Fix for diff parser: Rewrite prefix logic and don't mutate prematurely

This commit is contained in:
Dave Halter
2019-01-02 17:27:47 +01:00
parent 96ae6a078b
commit 430f13af5e
2 changed files with 45 additions and 16 deletions

View File

@@ -403,7 +403,7 @@ class DiffParser(object):
class _NodesTreeNode(object): class _NodesTreeNode(object):
_ChildrenGroup = namedtuple('_ChildrenGroup', 'children line_offset last_line_offset_leaf') _ChildrenGroup = namedtuple('_ChildrenGroup', 'prefix children line_offset last_line_offset_leaf')
def __init__(self, tree_node, parent=None): def __init__(self, tree_node, parent=None):
self.tree_node = tree_node self.tree_node = tree_node
@@ -413,7 +413,9 @@ class _NodesTreeNode(object):
def finish(self): def finish(self):
children = [] children = []
for children_part, line_offset, last_line_offset_leaf in self._children_groups: for prefix, children_part, line_offset, last_line_offset_leaf in self._children_groups:
first_leaf = children_part[0].get_first_leaf()
first_leaf.prefix = prefix + first_leaf.prefix
if line_offset != 0: if line_offset != 0:
try: try:
_update_positions( _update_positions(
@@ -432,10 +434,10 @@ class _NodesTreeNode(object):
def add_child_node(self, child_node): def add_child_node(self, child_node):
self._node_children.append(child_node) self._node_children.append(child_node)
def add_tree_nodes(self, children, line_offset=0, last_line_offset_leaf=None): def add_tree_nodes(self, prefix, children, line_offset=0, last_line_offset_leaf=None):
if last_line_offset_leaf is None: if last_line_offset_leaf is None:
last_line_offset_leaf = children[-1].get_last_leaf() last_line_offset_leaf = children[-1].get_last_leaf()
group = self._ChildrenGroup(children, line_offset, last_line_offset_leaf) group = self._ChildrenGroup(prefix, children, line_offset, last_line_offset_leaf)
self._children_groups.append(group) self._children_groups.append(group)
def get_last_line(self, suffix): def get_last_line(self, suffix):
@@ -450,6 +452,7 @@ class _NodesTreeNode(object):
if _ends_with_newline(last_leaf, suffix): if _ends_with_newline(last_leaf, suffix):
line -= 1 line -= 1
line += suffix.count('\n') line += suffix.count('\n')
if suffix and not suffix.endswith('\n'): if suffix and not suffix.endswith('\n'):
# This is the end of a file (that doesn't end with a newline). # This is the end of a file (that doesn't end with a newline).
line += 1 line += 1
@@ -466,7 +469,7 @@ class _NodesTree(object):
self._base_node = _NodesTreeNode(module) self._base_node = _NodesTreeNode(module)
self._working_stack = [self._base_node] self._working_stack = [self._base_node]
self._module = module self._module = module
self._last_prefix = '' self._prefix_remainder = ''
self.prefix = '' self.prefix = ''
@property @property
@@ -496,22 +499,24 @@ class _NodesTree(object):
self._working_stack.pop() self._working_stack.pop()
def add_parsed_nodes(self, tree_nodes): def add_parsed_nodes(self, tree_nodes):
old_prefix = self.prefix
tree_nodes = self._remove_endmarker(tree_nodes) tree_nodes = self._remove_endmarker(tree_nodes)
if not tree_nodes: if not tree_nodes:
self.prefix = old_prefix + self.prefix
return return
assert tree_nodes[0].type != 'newline' assert tree_nodes[0].type != 'newline'
node = self._get_insertion_node(tree_nodes[0]) node = self._get_insertion_node(tree_nodes[0])
assert node.tree_node.type in ('suite', 'file_input') assert node.tree_node.type in ('suite', 'file_input')
node.add_tree_nodes(tree_nodes) node.add_tree_nodes(old_prefix, tree_nodes)
# tos = Top of stack # tos = Top of stack
self._update_tos(tree_nodes[-1]) self._update_tos(tree_nodes[-1])
def _update_tos(self, tree_node): def _update_tos(self, tree_node):
if tree_node.type in ('suite', 'file_input'): if tree_node.type in ('suite', 'file_input'):
new_tos = _NodesTreeNode(tree_node) new_tos = _NodesTreeNode(tree_node)
new_tos.add_tree_nodes(list(tree_node.children)) new_tos.add_tree_nodes('', list(tree_node.children))
self._working_stack[-1].add_child_node(new_tos) self._working_stack[-1].add_child_node(new_tos)
self._working_stack.append(new_tos) self._working_stack.append(new_tos)
@@ -526,7 +531,7 @@ class _NodesTree(object):
""" """
last_leaf = tree_nodes[-1].get_last_leaf() last_leaf = tree_nodes[-1].get_last_leaf()
is_endmarker = last_leaf.type == self.endmarker_type is_endmarker = last_leaf.type == self.endmarker_type
self._last_prefix = '' self._prefix_remainder = ''
if is_endmarker: if is_endmarker:
try: try:
separation = last_leaf.prefix.rindex('\n') + 1 separation = last_leaf.prefix.rindex('\n') + 1
@@ -536,11 +541,9 @@ class _NodesTree(object):
# Remove the whitespace part of the prefix after a newline. # Remove the whitespace part of the prefix after a newline.
# That is not relevant if parentheses were opened. Always parse # That is not relevant if parentheses were opened. Always parse
# until the end of a line. # until the end of a line.
last_leaf.prefix, self._last_prefix = \ last_leaf.prefix, self._prefix_remainder = \
last_leaf.prefix[:separation], last_leaf.prefix[separation:] last_leaf.prefix[:separation], last_leaf.prefix[separation:]
first_leaf = tree_nodes[0].get_first_leaf()
first_leaf.prefix = self.prefix + first_leaf.prefix
self.prefix = '' self.prefix = ''
if is_endmarker: if is_endmarker:
@@ -561,13 +564,15 @@ class _NodesTree(object):
list(self._working_stack), list(self._working_stack),
tree_nodes, tree_nodes,
until_line, until_line,
line_offset line_offset,
self.prefix,
) )
return new_nodes return new_nodes
def _copy_nodes(self, working_stack, nodes, until_line, line_offset): def _copy_nodes(self, working_stack, nodes, until_line, line_offset, prefix=''):
new_nodes = [] new_nodes = []
new_prefix = ''
for node in nodes: for node in nodes:
if node.start_pos[0] > until_line: if node.start_pos[0] > until_line:
break break
@@ -577,7 +582,7 @@ class _NodesTree(object):
# remove the newline at the end of the line, otherwise it's # remove the newline at the end of the line, otherwise it's
# going to be missing. # going to be missing.
try: try:
self.prefix = node.prefix[:node.prefix.rindex('\n') + 1] new_prefix = node.prefix[:node.prefix.rindex('\n') + 1]
except ValueError: except ValueError:
pass pass
# Endmarkers just distort all the checks below. Remove them. # Endmarkers just distort all the checks below. Remove them.
@@ -645,7 +650,10 @@ class _NodesTree(object):
assert last_line_offset_leaf == ':' assert last_line_offset_leaf == ':'
else: else:
last_line_offset_leaf = new_nodes[-1].get_last_leaf() last_line_offset_leaf = new_nodes[-1].get_last_leaf()
tos.add_tree_nodes(new_nodes, line_offset, last_line_offset_leaf) tos.add_tree_nodes(prefix, new_nodes, line_offset, last_line_offset_leaf)
self.prefix = new_prefix
self._prefix_remainder = ''
return new_nodes, working_stack return new_nodes, working_stack
def close(self): def close(self):
@@ -665,6 +673,6 @@ class _NodesTree(object):
end_pos[0] += len(lines) - 1 end_pos[0] += len(lines) - 1
end_pos[1] = len(lines[-1]) end_pos[1] = len(lines[-1])
endmarker = EndMarker('', tuple(end_pos), self.prefix + self._last_prefix) endmarker = EndMarker('', tuple(end_pos), self.prefix + self._prefix_remainder)
endmarker.parent = self._module endmarker.parent = self._module
self._module.children.append(endmarker) self._module.children.append(endmarker)

View File

@@ -701,3 +701,24 @@ def test_docstring_removal(differ):
differ.initialize(code1) differ.initialize(code1)
differ.parse(code2, parsers=1, copies=2) differ.parse(code2, parsers=1, copies=2)
differ.parse(code1, parsers=2, copies=1) differ.parse(code1, parsers=2, copies=1)
def test_paren_in_strange_position(differ):
code1 = dedent('''\
class C:
""" ha """
def __init__(self, message):
self.message = message
''')
code2 = dedent('''\
class C:
""" ha """
)
def __init__(self, message):
self.message = message
''')
differ.initialize(code1)
differ.parse(code2, parsers=1, copies=2, expect_error_leaves=True)
differ.parse(code1, parsers=1, copies=1)