mirror of
https://github.com/davidhalter/jedi.git
synced 2026-05-20 07:19:40 +08:00
Simplified the line splitting and with that a few other things in the fast parser.
This commit is contained in:
@@ -307,7 +307,7 @@ class Parser(object):
|
|||||||
|
|
||||||
def _tokenize(self, tokenizer):
|
def _tokenize(self, tokenizer):
|
||||||
for typ, value, start_pos, prefix in tokenizer:
|
for typ, value, start_pos, prefix in tokenizer:
|
||||||
print(token.tok_name[typ], repr(value), start_pos, repr(prefix))
|
#print(token.tok_name[typ], repr(value), start_pos, repr(prefix))
|
||||||
if self._omit_dedent and typ == token.DEDENT:
|
if self._omit_dedent and typ == token.DEDENT:
|
||||||
self._omit_dedent -= 1
|
self._omit_dedent -= 1
|
||||||
continue
|
continue
|
||||||
|
|||||||
+13
-26
@@ -337,24 +337,21 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
not everything.
|
not everything.
|
||||||
"""
|
"""
|
||||||
def gen_part():
|
def gen_part():
|
||||||
text = '\n'.join(current_lines)
|
text = ''.join(current_lines)
|
||||||
del current_lines[:]
|
del current_lines[:]
|
||||||
self.number_of_splits += 1
|
self.number_of_splits += 1
|
||||||
if i == len(self._lines) - 1:
|
return text
|
||||||
return text
|
|
||||||
else:
|
|
||||||
return text + '\n'
|
|
||||||
|
|
||||||
def just_newlines(current_lines):
|
def just_newlines(current_lines):
|
||||||
for line in current_lines:
|
for line in current_lines:
|
||||||
line = line.lstrip('\t ')
|
line = line.lstrip('\t \n\r')
|
||||||
if line and line[0] not in ('#', '\r'):
|
if line and line[0] != '#':
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# Split only new lines. Distinction between \r\n is the tokenizer's
|
# Split only new lines. Distinction between \r\n is the tokenizer's
|
||||||
# job.
|
# job.
|
||||||
self._lines = source.split('\n')
|
self._lines = source.splitlines(keepends=True)
|
||||||
current_lines = []
|
current_lines = []
|
||||||
is_decorator = False
|
is_decorator = False
|
||||||
current_indent = 0
|
current_indent = 0
|
||||||
@@ -364,9 +361,9 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
# All things within flows are simply being ignored.
|
# All things within flows are simply being ignored.
|
||||||
for i, l in enumerate(self._lines):
|
for i, l in enumerate(self._lines):
|
||||||
# check for dedents
|
# check for dedents
|
||||||
s = l.lstrip('\t ')
|
s = l.lstrip('\t \n\r')
|
||||||
indent = len(l) - len(s)
|
indent = len(l) - len(s)
|
||||||
if not s or s[0] in ('#', '\r'):
|
if not s or s[0] == '#':
|
||||||
current_lines.append(l) # just ignore comments and blank lines
|
current_lines.append(l) # just ignore comments and blank lines
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -388,7 +385,6 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
in_flow = m.group(1) in FLOWS
|
in_flow = m.group(1) in FLOWS
|
||||||
if not is_decorator and not in_flow:
|
if not is_decorator and not in_flow:
|
||||||
if not just_newlines(current_lines):
|
if not just_newlines(current_lines):
|
||||||
print('GEN', current_lines)
|
|
||||||
yield gen_part()
|
yield gen_part()
|
||||||
is_decorator = '@' == m.group(1)
|
is_decorator = '@' == m.group(1)
|
||||||
if not is_decorator:
|
if not is_decorator:
|
||||||
@@ -424,7 +420,7 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
|
|
||||||
for code_part in self._split_parts(source):
|
for code_part in self._split_parts(source):
|
||||||
if not is_first:
|
if not is_first:
|
||||||
print('OFF', line_offset, self.current_node.parser.module.end_pos)
|
#print('OFF', line_offset, self.current_node.parser.module.end_pos)
|
||||||
#import pdb; pdb.set_trace()
|
#import pdb; pdb.set_trace()
|
||||||
pass # TODO remove
|
pass # TODO remove
|
||||||
if is_first or line_offset + 1 == self.current_node.parser.module.end_pos[0]:
|
if is_first or line_offset + 1 == self.current_node.parser.module.end_pos[0]:
|
||||||
@@ -470,7 +466,6 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
start += len(code_part)
|
start += len(code_part)
|
||||||
|
|
||||||
if added_newline:
|
if added_newline:
|
||||||
print('REMOVE NL', self.current_node)
|
|
||||||
self.current_node.remove_last_newline()
|
self.current_node.remove_last_newline()
|
||||||
|
|
||||||
# Now that the for loop is finished, we still want to close all nodes.
|
# Now that the for loop is finished, we still want to close all nodes.
|
||||||
@@ -498,10 +493,9 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
"""
|
"""
|
||||||
Side effect: Alters the list of nodes.
|
Side effect: Alters the list of nodes.
|
||||||
"""
|
"""
|
||||||
print('r', repr(source))
|
|
||||||
h = hash(source)
|
h = hash(source)
|
||||||
for index, node in enumerate(nodes):
|
for index, node in enumerate(nodes):
|
||||||
print('EQ', node, repr(node.source), repr(source))
|
#print('EQ', node, repr(node.source), repr(source))
|
||||||
if node.hash == h and node.source == source:
|
if node.hash == h and node.source == source:
|
||||||
node.reset_node()
|
node.reset_node()
|
||||||
nodes.remove(node)
|
nodes.remove(node)
|
||||||
@@ -509,19 +503,13 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
else:
|
else:
|
||||||
tokenizer = FastTokenizer(parser_code, 0)
|
tokenizer = FastTokenizer(parser_code, 0)
|
||||||
self.number_parsers_used += 1
|
self.number_parsers_used += 1
|
||||||
print('CODE', repr(source))
|
#print('CODE', repr(source))
|
||||||
p = Parser(self._grammar, parser_code, self.module_path, tokenizer=tokenizer)
|
p = Parser(self._grammar, parser_code, self.module_path, tokenizer=tokenizer)
|
||||||
node = ParserNode(self.module)
|
node = ParserNode(self.module)
|
||||||
|
|
||||||
end = line_offset + p.module.end_pos[0]
|
end = line_offset + p.module.end_pos[0]
|
||||||
if not (len(self._lines) == end):
|
used_lines = self._lines[line_offset:end - 1]
|
||||||
# We don't keep the last line, except if were done. A newline
|
code_part_actually_used = ''.join(used_lines)
|
||||||
# ends on the next line, which is part of the next parser. But
|
|
||||||
# the last parser includes the last new line.
|
|
||||||
end -= 1
|
|
||||||
print(line_offset, end)
|
|
||||||
used_lines = self._lines[line_offset:end]
|
|
||||||
code_part_actually_used = '\n'.join(used_lines)
|
|
||||||
node.set_parser(p, code_part_actually_used)
|
node.set_parser(p, code_part_actually_used)
|
||||||
|
|
||||||
self.current_node.add_node(node, line_offset)
|
self.current_node.add_node(node, line_offset)
|
||||||
@@ -589,7 +577,7 @@ class FastTokenizer(object):
|
|||||||
if self.previous[0] in (NEWLINE, INDENT, DEDENT) \
|
if self.previous[0] in (NEWLINE, INDENT, DEDENT) \
|
||||||
and not self._parentheses_level and typ != INDENT:
|
and not self._parentheses_level and typ != INDENT:
|
||||||
# Check for NEWLINE, which symbolizes the indent.
|
# Check for NEWLINE, which symbolizes the indent.
|
||||||
print('X', repr(value), tokenize.tok_name[typ])
|
# print('X', repr(value), tokenize.tok_name[typ])
|
||||||
if not self._in_flow:
|
if not self._in_flow:
|
||||||
self._in_flow = value in FLOWS
|
self._in_flow = value in FLOWS
|
||||||
if self._in_flow:
|
if self._in_flow:
|
||||||
@@ -606,7 +594,6 @@ class FastTokenizer(object):
|
|||||||
self._first_stmt = False
|
self._first_stmt = False
|
||||||
self._expect_indent = True
|
self._expect_indent = True
|
||||||
elif self._expect_indent:
|
elif self._expect_indent:
|
||||||
print('EXP', self._first_stmt)
|
|
||||||
return self._close()
|
return self._close()
|
||||||
else:
|
else:
|
||||||
self._first_stmt = False
|
self._first_stmt = False
|
||||||
|
|||||||
Reference in New Issue
Block a user