mirror of
https://github.com/davidhalter/parso.git
synced 2026-03-05 06:34:31 +08:00
Diff parser: Rewrite tokenizer modifications a bit
This commit is contained in:
@@ -134,7 +134,7 @@ class BaseParser(object):
|
|||||||
# However, the error recovery might have added the token again, if
|
# However, the error recovery might have added the token again, if
|
||||||
# the stack is empty, we're fine.
|
# the stack is empty, we're fine.
|
||||||
raise InternalParseError(
|
raise InternalParseError(
|
||||||
"incomplete input", token.type, token.value, token.start_pos
|
"incomplete input", token.type, token.string, token.start_pos
|
||||||
)
|
)
|
||||||
|
|
||||||
if len(self.stack) > 1:
|
if len(self.stack) > 1:
|
||||||
|
|||||||
@@ -397,6 +397,8 @@ class DiffParser(object):
|
|||||||
def _diff_tokenize(self, lines, until_line, line_offset=0):
|
def _diff_tokenize(self, lines, until_line, line_offset=0):
|
||||||
is_first_token = True
|
is_first_token = True
|
||||||
omitted_first_indent = False
|
omitted_first_indent = False
|
||||||
|
was_newline = False
|
||||||
|
base_indentation = 0
|
||||||
indents = []
|
indents = []
|
||||||
tokens = self._tokenizer(lines, (1, 0))
|
tokens = self._tokenizer(lines, (1, 0))
|
||||||
stack = self._active_parser.stack
|
stack = self._active_parser.stack
|
||||||
@@ -405,6 +407,7 @@ class DiffParser(object):
|
|||||||
if typ == PythonTokenTypes.INDENT:
|
if typ == PythonTokenTypes.INDENT:
|
||||||
indents.append(start_pos[1])
|
indents.append(start_pos[1])
|
||||||
if is_first_token:
|
if is_first_token:
|
||||||
|
base_indentation = start_pos[1]
|
||||||
omitted_first_indent = True
|
omitted_first_indent = True
|
||||||
# We want to get rid of indents that are only here because
|
# We want to get rid of indents that are only here because
|
||||||
# we only parse part of the file. These indents would only
|
# we only parse part of the file. These indents would only
|
||||||
@@ -436,18 +439,18 @@ class DiffParser(object):
|
|||||||
)
|
)
|
||||||
break
|
break
|
||||||
elif typ == PythonTokenTypes.NEWLINE and start_pos[0] >= until_line:
|
elif typ == PythonTokenTypes.NEWLINE and start_pos[0] >= until_line:
|
||||||
yield PythonToken(typ, string, start_pos, prefix)
|
was_newline = True
|
||||||
# Check if the parser is actually in a valid suite state.
|
elif was_newline:
|
||||||
if _suite_or_file_input_is_valid(self._pgen_grammar, stack):
|
was_newline = False
|
||||||
start_pos = start_pos[0] + 1, 0
|
if start_pos[1] <= base_indentation:
|
||||||
while len(indents) > int(omitted_first_indent):
|
# Check if the parser is actually in a valid suite state.
|
||||||
indents.pop()
|
if _suite_or_file_input_is_valid(self._pgen_grammar, stack):
|
||||||
yield PythonToken(PythonTokenTypes.DEDENT, '', start_pos, '')
|
start_pos = start_pos[0] + 1, 0
|
||||||
|
while len(indents) > int(omitted_first_indent):
|
||||||
yield PythonToken(PythonTokenTypes.ENDMARKER, '', start_pos, '')
|
indents.pop()
|
||||||
break
|
yield PythonToken(PythonTokenTypes.DEDENT, '', start_pos, '')
|
||||||
else:
|
yield PythonToken(PythonTokenTypes.ENDMARKER, '', start_pos, '')
|
||||||
continue
|
break
|
||||||
|
|
||||||
yield PythonToken(typ, string, start_pos, prefix)
|
yield PythonToken(typ, string, start_pos, prefix)
|
||||||
|
|
||||||
@@ -479,10 +482,7 @@ class _NodesTreeNode(object):
|
|||||||
except _PositionUpdatingFinished:
|
except _PositionUpdatingFinished:
|
||||||
pass
|
pass
|
||||||
if add_error_leaf is not None:
|
if add_error_leaf is not None:
|
||||||
if add_error_leaf == 'INDENT':
|
pos = children_part[0].start_pos
|
||||||
pos = children_part[0].start_pos
|
|
||||||
else:
|
|
||||||
pos = children_part[0].start_pos[0], 0
|
|
||||||
children.append(PythonErrorLeaf(add_error_leaf, '', pos))
|
children.append(PythonErrorLeaf(add_error_leaf, '', pos))
|
||||||
children += children_part
|
children += children_part
|
||||||
self.tree_node.children = children
|
self.tree_node.children = children
|
||||||
|
|||||||
@@ -397,7 +397,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
def dedent_if_necessary(start):
|
def dedent_if_necessary(start):
|
||||||
while start < indents[-1]:
|
while start < indents[-1]:
|
||||||
if start > indents[-2]:
|
if start > indents[-2]:
|
||||||
yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '')
|
yield PythonToken(ERROR_DEDENT, '', (lnum, start), '')
|
||||||
indents[-1] = start
|
indents[-1] = start
|
||||||
break
|
break
|
||||||
yield PythonToken(DEDENT, '', spos, '')
|
yield PythonToken(DEDENT, '', spos, '')
|
||||||
|
|||||||
@@ -138,7 +138,7 @@ def test_if_simple(differ):
|
|||||||
differ.initialize(src + 'a')
|
differ.initialize(src + 'a')
|
||||||
differ.parse(src + else_ + "a", copies=0, parsers=1)
|
differ.parse(src + else_ + "a", copies=0, parsers=1)
|
||||||
|
|
||||||
differ.parse(else_, parsers=1, copies=1, expect_error_leaves=True)
|
differ.parse(else_, parsers=1, expect_error_leaves=True)
|
||||||
differ.parse(src + else_, parsers=1)
|
differ.parse(src + else_, parsers=1)
|
||||||
|
|
||||||
|
|
||||||
@@ -239,7 +239,7 @@ def test_backslash(differ):
|
|||||||
def y():
|
def y():
|
||||||
pass
|
pass
|
||||||
""")
|
""")
|
||||||
differ.parse(src, parsers=2)
|
differ.parse(src, parsers=1)
|
||||||
|
|
||||||
src = dedent(r"""
|
src = dedent(r"""
|
||||||
def first():
|
def first():
|
||||||
@@ -250,7 +250,7 @@ def test_backslash(differ):
|
|||||||
def second():
|
def second():
|
||||||
pass
|
pass
|
||||||
""")
|
""")
|
||||||
differ.parse(src, parsers=1)
|
differ.parse(src, parsers=2)
|
||||||
|
|
||||||
|
|
||||||
def test_full_copy(differ):
|
def test_full_copy(differ):
|
||||||
@@ -282,7 +282,7 @@ def test_issues_with_error_leaves(differ):
|
|||||||
str
|
str
|
||||||
''')
|
''')
|
||||||
differ.initialize(code)
|
differ.initialize(code)
|
||||||
differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True)
|
differ.parse(code2, parsers=1, expect_error_leaves=True)
|
||||||
|
|
||||||
|
|
||||||
def test_unfinished_nodes(differ):
|
def test_unfinished_nodes(differ):
|
||||||
@@ -368,7 +368,7 @@ def test_totally_wrong_whitespace(differ):
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
differ.initialize(code1)
|
differ.initialize(code1)
|
||||||
differ.parse(code2, parsers=4, copies=0, expect_error_leaves=True)
|
differ.parse(code2, parsers=2, copies=0, expect_error_leaves=True)
|
||||||
|
|
||||||
|
|
||||||
def test_node_insertion(differ):
|
def test_node_insertion(differ):
|
||||||
@@ -442,7 +442,7 @@ def test_in_class_movements(differ):
|
|||||||
""")
|
""")
|
||||||
|
|
||||||
differ.initialize(code1)
|
differ.initialize(code1)
|
||||||
differ.parse(code2, parsers=2, copies=1)
|
differ.parse(code2, parsers=1)
|
||||||
|
|
||||||
|
|
||||||
def test_in_parentheses_newlines(differ):
|
def test_in_parentheses_newlines(differ):
|
||||||
@@ -487,7 +487,7 @@ def test_indentation_issue(differ):
|
|||||||
""")
|
""")
|
||||||
|
|
||||||
differ.initialize(code1)
|
differ.initialize(code1)
|
||||||
differ.parse(code2, parsers=1)
|
differ.parse(code2, parsers=2)
|
||||||
|
|
||||||
|
|
||||||
def test_endmarker_newline(differ):
|
def test_endmarker_newline(differ):
|
||||||
@@ -588,7 +588,7 @@ def test_if_removal_and_reappearence(differ):
|
|||||||
la
|
la
|
||||||
''')
|
''')
|
||||||
differ.initialize(code1)
|
differ.initialize(code1)
|
||||||
differ.parse(code2, parsers=1, copies=4, expect_error_leaves=True)
|
differ.parse(code2, parsers=1, copies=3, expect_error_leaves=True)
|
||||||
differ.parse(code1, parsers=1, copies=1)
|
differ.parse(code1, parsers=1, copies=1)
|
||||||
differ.parse(code3, parsers=1, copies=1)
|
differ.parse(code3, parsers=1, copies=1)
|
||||||
|
|
||||||
@@ -621,8 +621,8 @@ def test_differing_docstrings(differ):
|
|||||||
''')
|
''')
|
||||||
|
|
||||||
differ.initialize(code1)
|
differ.initialize(code1)
|
||||||
differ.parse(code2, parsers=3, copies=1)
|
differ.parse(code2, parsers=2, copies=1)
|
||||||
differ.parse(code1, parsers=3, copies=1)
|
differ.parse(code1, parsers=2, copies=1)
|
||||||
|
|
||||||
|
|
||||||
def test_one_call_in_function_change(differ):
|
def test_one_call_in_function_change(differ):
|
||||||
@@ -827,10 +827,10 @@ def test_indentation_issues(differ):
|
|||||||
''')
|
''')
|
||||||
|
|
||||||
differ.initialize(code1)
|
differ.initialize(code1)
|
||||||
differ.parse(code2, parsers=3, copies=2, expect_error_leaves=True)
|
differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True)
|
||||||
differ.parse(code1, copies=2, parsers=2)
|
differ.parse(code1, copies=1, parsers=2)
|
||||||
differ.parse(code3, parsers=1, copies=1)
|
differ.parse(code3, parsers=1, copies=1)
|
||||||
differ.parse(code1, parsers=1, copies=2)
|
differ.parse(code1, parsers=1, copies=1)
|
||||||
|
|
||||||
|
|
||||||
def test_error_dedent_issues(differ):
|
def test_error_dedent_issues(differ):
|
||||||
@@ -863,7 +863,7 @@ def test_error_dedent_issues(differ):
|
|||||||
''')
|
''')
|
||||||
|
|
||||||
differ.initialize(code1)
|
differ.initialize(code1)
|
||||||
differ.parse(code2, parsers=6, copies=2, expect_error_leaves=True)
|
differ.parse(code2, parsers=2, copies=0, expect_error_leaves=True)
|
||||||
differ.parse(code1, parsers=1, copies=0)
|
differ.parse(code1, parsers=1, copies=0)
|
||||||
|
|
||||||
|
|
||||||
@@ -949,7 +949,7 @@ def test_with_and_funcdef_in_call(differ, prefix):
|
|||||||
code2 = insert_line_into_code(code1, 3, 'def y(self, args):\n')
|
code2 = insert_line_into_code(code1, 3, 'def y(self, args):\n')
|
||||||
|
|
||||||
differ.initialize(code1)
|
differ.initialize(code1)
|
||||||
differ.parse(code2, parsers=3, expect_error_leaves=True)
|
differ.parse(code2, parsers=1, expect_error_leaves=True)
|
||||||
differ.parse(code1, parsers=1)
|
differ.parse(code1, parsers=1)
|
||||||
|
|
||||||
|
|
||||||
@@ -964,7 +964,7 @@ def test_wrong_backslash(differ):
|
|||||||
code2 = insert_line_into_code(code1, 3, '\\.whl$\n')
|
code2 = insert_line_into_code(code1, 3, '\\.whl$\n')
|
||||||
|
|
||||||
differ.initialize(code1)
|
differ.initialize(code1)
|
||||||
differ.parse(code2, parsers=2, copies=2, expect_error_leaves=True)
|
differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True)
|
||||||
differ.parse(code1, parsers=1, copies=1)
|
differ.parse(code1, parsers=1, copies=1)
|
||||||
|
|
||||||
|
|
||||||
@@ -1043,7 +1043,7 @@ def test_random_character_insertion(differ):
|
|||||||
# 4
|
# 4
|
||||||
''')
|
''')
|
||||||
differ.initialize(code1)
|
differ.initialize(code1)
|
||||||
differ.parse(code2, copies=1, parsers=3, expect_error_leaves=True)
|
differ.parse(code2, copies=1, parsers=2, expect_error_leaves=True)
|
||||||
differ.parse(code1, copies=1, parsers=1)
|
differ.parse(code1, copies=1, parsers=1)
|
||||||
|
|
||||||
|
|
||||||
@@ -1105,7 +1105,7 @@ def test_all_sorts_of_indentation(differ):
|
|||||||
''')
|
''')
|
||||||
differ.initialize(code1)
|
differ.initialize(code1)
|
||||||
differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True)
|
differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True)
|
||||||
differ.parse(code1, copies=1, parsers=3, expect_error_leaves=True)
|
differ.parse(code1, copies=1, parsers=1, expect_error_leaves=True)
|
||||||
|
|
||||||
code3 = dedent('''\
|
code3 = dedent('''\
|
||||||
if 1:
|
if 1:
|
||||||
@@ -1152,7 +1152,7 @@ def test_dont_copy_error_leaves(differ):
|
|||||||
''')
|
''')
|
||||||
differ.initialize(code1)
|
differ.initialize(code1)
|
||||||
differ.parse(code2, parsers=1, expect_error_leaves=True)
|
differ.parse(code2, parsers=1, expect_error_leaves=True)
|
||||||
differ.parse(code1, parsers=2)
|
differ.parse(code1, parsers=1)
|
||||||
|
|
||||||
|
|
||||||
def test_error_dedent_in_between(differ):
|
def test_error_dedent_in_between(differ):
|
||||||
@@ -1203,7 +1203,7 @@ def test_some_other_indentation_issues(differ):
|
|||||||
''')
|
''')
|
||||||
differ.initialize(code1)
|
differ.initialize(code1)
|
||||||
differ.parse(code2, copies=0, parsers=1, expect_error_leaves=True)
|
differ.parse(code2, copies=0, parsers=1, expect_error_leaves=True)
|
||||||
differ.parse(code1, copies=2, parsers=1)
|
differ.parse(code1, copies=1, parsers=1)
|
||||||
|
|
||||||
|
|
||||||
def test_open_bracket_case1(differ):
|
def test_open_bracket_case1(differ):
|
||||||
@@ -1269,7 +1269,7 @@ def test_some_weird_removals(differ):
|
|||||||
''')
|
''')
|
||||||
differ.initialize(code1)
|
differ.initialize(code1)
|
||||||
differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True)
|
differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True)
|
||||||
differ.parse(code3, copies=1, parsers=2, expect_error_leaves=True)
|
differ.parse(code3, copies=1, parsers=1, expect_error_leaves=True)
|
||||||
differ.parse(code1, copies=1)
|
differ.parse(code1, copies=1)
|
||||||
|
|
||||||
|
|
||||||
@@ -1310,7 +1310,7 @@ def test_parent_on_decorator(differ):
|
|||||||
def a_test(self):
|
def a_test(self):
|
||||||
pass''')
|
pass''')
|
||||||
differ.initialize(code1)
|
differ.initialize(code1)
|
||||||
module_node = differ.parse(code2, copies=2, parsers=1)
|
module_node = differ.parse(code2, parsers=1)
|
||||||
cls = module_node.children[0]
|
cls = module_node.children[0]
|
||||||
cls_suite = cls.children[-1]
|
cls_suite = cls.children[-1]
|
||||||
assert len(cls_suite.children) == 3
|
assert len(cls_suite.children) == 3
|
||||||
@@ -1330,5 +1330,5 @@ def test_wrong_indent_in_def(differ):
|
|||||||
c
|
c
|
||||||
''')
|
''')
|
||||||
differ.initialize(code1)
|
differ.initialize(code1)
|
||||||
differ.parse(code2, copies=1, parsers=2, expect_error_leaves=True)
|
differ.parse(code2, parsers=1, expect_error_leaves=True)
|
||||||
differ.parse(code1, parsers=2)
|
differ.parse(code1, parsers=1)
|
||||||
|
|||||||
Reference in New Issue
Block a user