Account for code parts that were not parsed in the fast parser.

This commit is contained in:
Dave Halter
2015-02-19 01:42:13 +01:00
parent 39bf9f426b
commit 0e73bf7d80
2 changed files with 47 additions and 13 deletions

View File

@@ -333,8 +333,9 @@ class FastParser(use_metaclass(CachedFastParser)):
elif is_decorator: elif is_decorator:
is_decorator = False is_decorator = False
parentheses_level += (l.count('(') + l.count('[') + l.count('{') parentheses_level = \
- l.count(')') - l.count(']') - l.count('}')) max(0, (l.count('(') + l.count('[') + l.count('{')
- l.count(')') - l.count(']') - l.count('}')))
current_lines.append(l) current_lines.append(l)
if current_lines: if current_lines:
@@ -351,32 +352,44 @@ class FastParser(use_metaclass(CachedFastParser)):
source += '\n' source += '\n'
added_newline = True added_newline = True
line_offset = 0 next_line_offset = line_offset = 0
start = 0 start = 0
is_first = True
nodes = list(self.current_node.all_sub_nodes()) nodes = list(self.current_node.all_sub_nodes())
# Now we can reset the node, because we have all the old nodes. # Now we can reset the node, because we have all the old nodes.
self.current_node.reset_node() self.current_node.reset_node()
last_end_line = 1
for code_part in self._split_parts(source): for code_part in self._split_parts(source):
next_line_offset += code_part.count('\n')
# If the last code part parsed isn't equal to the current end_pos, # If the last code part parsed isn't equal to the current end_pos,
# we know that the parser went further (`def` start in a # we know that the parser went further (`def` start in a
# docstring). So just parse the next part. # docstring). So just parse the next part.
if is_first or line_offset + 1 == self.current_node.parser.module.end_pos[0]: if line_offset + 1 == last_end_line:
indent = len(code_part) - len(code_part.lstrip('\t '))
self.current_node = self.current_node.parent_until_indent(indent)
# check if code_part has already been parsed
self.current_node = self._get_node(code_part, source[start:], self.current_node = self._get_node(code_part, source[start:],
line_offset, nodes, not is_first) line_offset, nodes)
is_first = False
else: else:
# Means that some lines where not fully parsed. Parse it now.
# This is a very rare case. Should only happens with very
# strange code bits.
while last_end_line < next_line_offset + 1:
line_offset = last_end_line - 1
# We could calculate the src in a more complicated way to
# make caching here possible as well. However, this is
# complicated and error-prone. Since this is not very often
# called - just ignore it.
src = ''.join(self._lines[line_offset:])
self.current_node = self._get_node(code_part, src,
line_offset, nodes)
last_end_line = self.current_node.parser.module.end_pos[0]
debug.dbg('While parsing %s, line %s slowed down the fast parser', debug.dbg('While parsing %s, line %s slowed down the fast parser',
self.module_path, line_offset) self.module_path, line_offset)
line_offset += code_part.count('\n') line_offset = next_line_offset
start += len(code_part) start += len(code_part)
last_end_line = self.current_node.parser.module.end_pos[0]
if added_newline: if added_newline:
self.current_node.remove_last_newline() self.current_node.remove_last_newline()
@@ -388,10 +401,13 @@ class FastParser(use_metaclass(CachedFastParser)):
% (self.module_path, self.number_parsers_used, % (self.module_path, self.number_parsers_used,
self.number_of_splits)) self.number_of_splits))
def _get_node(self, source, parser_code, line_offset, nodes, no_docstr): def _get_node(self, source, parser_code, line_offset, nodes):
""" """
Side effect: Alters the list of nodes. Side effect: Alters the list of nodes.
""" """
indent = len(source) - len(source.lstrip('\t '))
self.current_node = self.current_node.parent_until_indent(indent)
h = hash(source) h = hash(source)
for index, node in enumerate(nodes): for index, node in enumerate(nodes):
#print('EQ', node, repr(node.source), repr(source)) #print('EQ', node, repr(node.source), repr(source))

View File

@@ -368,6 +368,24 @@ def test_backslash():
check_fp(src, 2) check_fp(src, 2)
def test_fake_parentheses():
"""
The fast parser splitting counts parentheses, but not as correct tokens.
Therefore parentheses in string tokens are included as well. This needs to
be accounted for.
"""
src = dedent(r"""
def x():
a = (')'
if 1 else 2)
def y():
pass
def z():
pass
""")
check_fp(src, 3, 2)
def test_incomplete_function(): def test_incomplete_function():
source = '''return ImportErr''' source = '''return ImportErr'''