mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-06 21:04:29 +08:00
Fix tokenizer: Dedents before error tokens are properly done, now.
This commit is contained in:
@@ -364,6 +364,14 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
token. This idea comes from lib2to3. The prefix contains all information
|
||||
that is irrelevant for the parser like newlines in parentheses or comments.
|
||||
"""
|
||||
def dedent_if_necessary(start):
|
||||
while start < indents[-1]:
|
||||
if start > indents[-2]:
|
||||
yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '')
|
||||
break
|
||||
yield PythonToken(DEDENT, '', spos, '')
|
||||
indents.pop()
|
||||
|
||||
pseudo_token, single_quoted, triple_quoted, endpats, whitespace, \
|
||||
fstring_pattern_map, always_break_tokens, = \
|
||||
_get_token_collection(version_info)
|
||||
@@ -450,6 +458,9 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
if not pseudomatch: # scan for tokens
|
||||
match = whitespace.match(line, pos)
|
||||
pos = match.end()
|
||||
new_line = False
|
||||
for t in dedent_if_necessary(pos):
|
||||
yield t
|
||||
yield PythonToken(
|
||||
ERRORTOKEN, line[pos], (lnum, pos),
|
||||
additional_prefix + match.group(0)
|
||||
@@ -482,12 +493,8 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
if start > indents[-1]:
|
||||
yield PythonToken(INDENT, '', spos, '')
|
||||
indents.append(start)
|
||||
while start < indents[-1]:
|
||||
if start > indents[-2]:
|
||||
yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '')
|
||||
break
|
||||
yield PythonToken(DEDENT, '', spos, '')
|
||||
indents.pop()
|
||||
for t in dedent_if_necessary(start):
|
||||
yield t
|
||||
|
||||
if fstring_stack:
|
||||
fstring_index, end = _check_fstring_ending(fstring_stack, token)
|
||||
|
||||
@@ -935,5 +935,20 @@ def test_with_and_funcdef_in_call(differ, prefix):
|
||||
code2 = insert_line_into_code(code1, 3, 'def y(self, args):\n')
|
||||
|
||||
differ.initialize(code1)
|
||||
differ.parse(code2, parsers=3, copies=0, expect_error_leaves=True)
|
||||
differ.parse(code1, parsers=1, copies=0)
|
||||
differ.parse(code2, parsers=3, expect_error_leaves=True)
|
||||
differ.parse(code1, parsers=1)
|
||||
|
||||
|
||||
def test_wrong_backslash(differ):
|
||||
code1 = dedent('''\
|
||||
def y():
|
||||
1
|
||||
for x in y:
|
||||
continue
|
||||
''')
|
||||
|
||||
code2 = insert_line_into_code(code1, 3, '\.whl$\n')
|
||||
|
||||
differ.initialize(code1)
|
||||
differ.parse(code2, parsers=2, copies=2, expect_error_leaves=True)
|
||||
differ.parse(code1, parsers=1, copies=1)
|
||||
|
||||
@@ -197,11 +197,12 @@ def test_ur_literals():
|
||||
|
||||
|
||||
def test_error_literal():
|
||||
error_token, endmarker = _get_token_list('"\n')
|
||||
error_token, newline, endmarker = _get_token_list('"\n')
|
||||
assert error_token.type == ERRORTOKEN
|
||||
assert error_token.string == '"'
|
||||
assert newline.type == NEWLINE
|
||||
assert endmarker.type == ENDMARKER
|
||||
assert endmarker.prefix == '\n'
|
||||
assert endmarker.prefix == ''
|
||||
|
||||
bracket, error_token, endmarker = _get_token_list('( """')
|
||||
assert error_token.type == ERRORTOKEN
|
||||
@@ -240,11 +241,12 @@ def test_indentation(code, types):
|
||||
|
||||
|
||||
def test_error_string():
|
||||
t1, endmarker = _get_token_list(' "\n')
|
||||
t1, newline, endmarker = _get_token_list(' "\n')
|
||||
assert t1.type == ERRORTOKEN
|
||||
assert t1.prefix == ' '
|
||||
assert t1.string == '"'
|
||||
assert endmarker.prefix == '\n'
|
||||
assert newline.type == NEWLINE
|
||||
assert endmarker.prefix == ''
|
||||
assert endmarker.string == ''
|
||||
|
||||
|
||||
@@ -268,3 +270,18 @@ def test_indent_error_recovery():
|
||||
# `b`
|
||||
NAME, NEWLINE, ENDMARKER]
|
||||
assert [t.type for t in lst] == expected
|
||||
|
||||
|
||||
def test_error_token_after_dedent():
|
||||
code = dedent("""\
|
||||
class C:
|
||||
pass
|
||||
$foo
|
||||
""")
|
||||
lst = _get_token_list(code)
|
||||
expected = [
|
||||
NAME, NAME, OP, NEWLINE, INDENT, NAME, NEWLINE, DEDENT,
|
||||
# $foo\n
|
||||
ERRORTOKEN, NAME, NEWLINE, ENDMARKER
|
||||
]
|
||||
assert [t.type for t in lst] == expected
|
||||
|
||||
Reference in New Issue
Block a user