Fix indentation error tokens

This commit is contained in:
Dave Halter
2019-01-16 23:21:31 +01:00
parent 20fd32b45d
commit dac4c445a7
3 changed files with 42 additions and 4 deletions

View File

@@ -19,6 +19,8 @@ from parso.python.token import PythonTokenTypes
LOG = logging.getLogger(__name__)
DEBUG_DIFF_PARSER = False
_INDENTATION_TOKENS = 'INDENT', 'ERROR_DEDENT', 'DEDENT'
def _assert_valid_graph(node):
"""
@@ -33,11 +35,12 @@ def _assert_valid_graph(node):
# Ignore INDENT is necessary, because indent/dedent tokens don't
# contain value/prefix and are just around, because of the tokenizer.
error_tokens = 'INDENT', 'ERROR_DEDENT', 'DEDENT'
if node.type == 'error_leaf' and node.token_type in error_tokens:
if node.type == 'error_leaf' and node.token_type in _INDENTATION_TOKENS:
assert not node.value
assert not node.prefix
return
while previous_leaf and previous_leaf.type == 'error_leaf' \
and previous_leaf.token_type in error_tokens:
and previous_leaf.token_type in _INDENTATION_TOKENS:
assert previous_leaf.end_pos <= node.start_pos, \
(previous_leaf, node)
previous_leaf = previous_leaf.get_previous_leaf()
@@ -428,6 +431,11 @@ class _NodesTreeNode(object):
children = []
for prefix, children_part, line_offset, last_line_offset_leaf in self._children_groups:
first_leaf = children_part[0].get_first_leaf()
while first_leaf.type == 'error_leaf' \
and first_leaf.token_type in _INDENTATION_TOKENS:
first_leaf = first_leaf.get_next_leaf()
first_leaf.prefix = prefix + first_leaf.prefix
if line_offset != 0:
try:

View File

@@ -169,7 +169,9 @@ class EndMarker(_LeafWithoutNewlines):
@utf8_repr
def __repr__(self):
return "<%s: prefix=%s>" % (type(self).__name__, repr(self.prefix))
return "<%s: prefix=%s end_pos=%s>" % (
type(self).__name__, repr(self.prefix), self.end_pos
)
class Newline(PythonLeaf):

View File

@@ -1069,3 +1069,31 @@ def test_opening_bracket_at_end(differ):
differ.initialize(code1)
differ.parse(code2, copies=1, parsers=2, expect_error_leaves=True)
differ.parse(code1, copies=1, parsers=1, expect_error_leaves=True)
def test_all_sorts_of_indentation(differ):
code1 = dedent('''\
class C:
1
def f():
'same'
if foo:
a = b
end
''')
code2 = dedent('''\
class C:
1
def f(yield await %|(
'same'
\x02\x06\x0f\x1c\x11
if foo:
a = b
end
''')
differ.initialize(code1)
differ.parse(code2, copies=2, parsers=3, expect_error_leaves=True)
differ.parse(code1, copies=1, parsers=3)