Fix tokenizer: Dedents before error tokens are properly done, now.

This commit is contained in:
Dave Halter
2019-01-06 19:26:49 +01:00
parent edbceba4f8
commit 94bd48bae1
3 changed files with 51 additions and 12 deletions

View File

@@ -364,6 +364,14 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
token. This idea comes from lib2to3. The prefix contains all information token. This idea comes from lib2to3. The prefix contains all information
that is irrelevant for the parser like newlines in parentheses or comments. that is irrelevant for the parser like newlines in parentheses or comments.
""" """
def dedent_if_necessary(start):
while start < indents[-1]:
if start > indents[-2]:
yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '')
break
yield PythonToken(DEDENT, '', spos, '')
indents.pop()
pseudo_token, single_quoted, triple_quoted, endpats, whitespace, \ pseudo_token, single_quoted, triple_quoted, endpats, whitespace, \
fstring_pattern_map, always_break_tokens, = \ fstring_pattern_map, always_break_tokens, = \
_get_token_collection(version_info) _get_token_collection(version_info)
@@ -450,6 +458,9 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
if not pseudomatch: # scan for tokens if not pseudomatch: # scan for tokens
match = whitespace.match(line, pos) match = whitespace.match(line, pos)
pos = match.end() pos = match.end()
new_line = False
for t in dedent_if_necessary(pos):
yield t
yield PythonToken( yield PythonToken(
ERRORTOKEN, line[pos], (lnum, pos), ERRORTOKEN, line[pos], (lnum, pos),
additional_prefix + match.group(0) additional_prefix + match.group(0)
@@ -482,12 +493,8 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
if start > indents[-1]: if start > indents[-1]:
yield PythonToken(INDENT, '', spos, '') yield PythonToken(INDENT, '', spos, '')
indents.append(start) indents.append(start)
while start < indents[-1]: for t in dedent_if_necessary(start):
if start > indents[-2]: yield t
yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '')
break
yield PythonToken(DEDENT, '', spos, '')
indents.pop()
if fstring_stack: if fstring_stack:
fstring_index, end = _check_fstring_ending(fstring_stack, token) fstring_index, end = _check_fstring_ending(fstring_stack, token)

View File

@@ -935,5 +935,20 @@ def test_with_and_funcdef_in_call(differ, prefix):
code2 = insert_line_into_code(code1, 3, 'def y(self, args):\n') code2 = insert_line_into_code(code1, 3, 'def y(self, args):\n')
differ.initialize(code1) differ.initialize(code1)
differ.parse(code2, parsers=3, copies=0, expect_error_leaves=True) differ.parse(code2, parsers=3, expect_error_leaves=True)
differ.parse(code1, parsers=1, copies=0) differ.parse(code1, parsers=1)
def test_wrong_backslash(differ):
code1 = dedent('''\
def y():
1
for x in y:
continue
''')
code2 = insert_line_into_code(code1, 3, '\.whl$\n')
differ.initialize(code1)
differ.parse(code2, parsers=2, copies=2, expect_error_leaves=True)
differ.parse(code1, parsers=1, copies=1)

View File

@@ -197,11 +197,12 @@ def test_ur_literals():
def test_error_literal(): def test_error_literal():
error_token, endmarker = _get_token_list('"\n') error_token, newline, endmarker = _get_token_list('"\n')
assert error_token.type == ERRORTOKEN assert error_token.type == ERRORTOKEN
assert error_token.string == '"' assert error_token.string == '"'
assert newline.type == NEWLINE
assert endmarker.type == ENDMARKER assert endmarker.type == ENDMARKER
assert endmarker.prefix == '\n' assert endmarker.prefix == ''
bracket, error_token, endmarker = _get_token_list('( """') bracket, error_token, endmarker = _get_token_list('( """')
assert error_token.type == ERRORTOKEN assert error_token.type == ERRORTOKEN
@@ -240,11 +241,12 @@ def test_indentation(code, types):
def test_error_string(): def test_error_string():
t1, endmarker = _get_token_list(' "\n') t1, newline, endmarker = _get_token_list(' "\n')
assert t1.type == ERRORTOKEN assert t1.type == ERRORTOKEN
assert t1.prefix == ' ' assert t1.prefix == ' '
assert t1.string == '"' assert t1.string == '"'
assert endmarker.prefix == '\n' assert newline.type == NEWLINE
assert endmarker.prefix == ''
assert endmarker.string == '' assert endmarker.string == ''
@@ -268,3 +270,18 @@ def test_indent_error_recovery():
# `b` # `b`
NAME, NEWLINE, ENDMARKER] NAME, NEWLINE, ENDMARKER]
assert [t.type for t in lst] == expected assert [t.type for t in lst] == expected
def test_error_token_after_dedent():
code = dedent("""\
class C:
pass
$foo
""")
lst = _get_token_list(code)
expected = [
NAME, NAME, OP, NEWLINE, INDENT, NAME, NEWLINE, DEDENT,
# $foo\n
ERRORTOKEN, NAME, NEWLINE, ENDMARKER
]
assert [t.type for t in lst] == expected