Fix tokenizer: Dedents before error tokens are properly done, now.

This commit is contained in:
Dave Halter
2019-01-06 19:26:49 +01:00
parent edbceba4f8
commit 94bd48bae1
3 changed files with 51 additions and 12 deletions

View File

@@ -364,6 +364,14 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
token. This idea comes from lib2to3. The prefix contains all information
that is irrelevant for the parser like newlines in parentheses or comments.
"""
def dedent_if_necessary(start):
while start < indents[-1]:
if start > indents[-2]:
yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '')
break
yield PythonToken(DEDENT, '', spos, '')
indents.pop()
pseudo_token, single_quoted, triple_quoted, endpats, whitespace, \
fstring_pattern_map, always_break_tokens, = \
_get_token_collection(version_info)
@@ -450,6 +458,9 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
if not pseudomatch: # scan for tokens
match = whitespace.match(line, pos)
pos = match.end()
new_line = False
for t in dedent_if_necessary(pos):
yield t
yield PythonToken(
ERRORTOKEN, line[pos], (lnum, pos),
additional_prefix + match.group(0)
@@ -482,12 +493,8 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
if start > indents[-1]:
yield PythonToken(INDENT, '', spos, '')
indents.append(start)
while start < indents[-1]:
if start > indents[-2]:
yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '')
break
yield PythonToken(DEDENT, '', spos, '')
indents.pop()
for t in dedent_if_necessary(start):
yield t
if fstring_stack:
fstring_index, end = _check_fstring_ending(fstring_stack, token)

View File

@@ -935,5 +935,20 @@ def test_with_and_funcdef_in_call(differ, prefix):
code2 = insert_line_into_code(code1, 3, 'def y(self, args):\n')
differ.initialize(code1)
differ.parse(code2, parsers=3, copies=0, expect_error_leaves=True)
differ.parse(code1, parsers=1, copies=0)
differ.parse(code2, parsers=3, expect_error_leaves=True)
differ.parse(code1, parsers=1)
def test_wrong_backslash(differ):
code1 = dedent('''\
def y():
1
for x in y:
continue
''')
code2 = insert_line_into_code(code1, 3, '\.whl$\n')
differ.initialize(code1)
differ.parse(code2, parsers=2, copies=2, expect_error_leaves=True)
differ.parse(code1, parsers=1, copies=1)

View File

@@ -197,11 +197,12 @@ def test_ur_literals():
def test_error_literal():
error_token, endmarker = _get_token_list('"\n')
error_token, newline, endmarker = _get_token_list('"\n')
assert error_token.type == ERRORTOKEN
assert error_token.string == '"'
assert newline.type == NEWLINE
assert endmarker.type == ENDMARKER
assert endmarker.prefix == '\n'
assert endmarker.prefix == ''
bracket, error_token, endmarker = _get_token_list('( """')
assert error_token.type == ERRORTOKEN
@@ -240,11 +241,12 @@ def test_indentation(code, types):
def test_error_string():
t1, endmarker = _get_token_list(' "\n')
t1, newline, endmarker = _get_token_list(' "\n')
assert t1.type == ERRORTOKEN
assert t1.prefix == ' '
assert t1.string == '"'
assert endmarker.prefix == '\n'
assert newline.type == NEWLINE
assert endmarker.prefix == ''
assert endmarker.string == ''
@@ -268,3 +270,18 @@ def test_indent_error_recovery():
# `b`
NAME, NEWLINE, ENDMARKER]
assert [t.type for t in lst] == expected
def test_error_token_after_dedent():
code = dedent("""\
class C:
pass
$foo
""")
lst = _get_token_list(code)
expected = [
NAME, NAME, OP, NEWLINE, INDENT, NAME, NEWLINE, DEDENT,
# $foo\n
ERRORTOKEN, NAME, NEWLINE, ENDMARKER
]
assert [t.type for t in lst] == expected