mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-07 21:34:32 +08:00
Fix tokenizer: Dedents before error tokens are properly done, now.
This commit is contained in:
@@ -364,6 +364,14 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
token. This idea comes from lib2to3. The prefix contains all information
|
token. This idea comes from lib2to3. The prefix contains all information
|
||||||
that is irrelevant for the parser like newlines in parentheses or comments.
|
that is irrelevant for the parser like newlines in parentheses or comments.
|
||||||
"""
|
"""
|
||||||
|
def dedent_if_necessary(start):
|
||||||
|
while start < indents[-1]:
|
||||||
|
if start > indents[-2]:
|
||||||
|
yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '')
|
||||||
|
break
|
||||||
|
yield PythonToken(DEDENT, '', spos, '')
|
||||||
|
indents.pop()
|
||||||
|
|
||||||
pseudo_token, single_quoted, triple_quoted, endpats, whitespace, \
|
pseudo_token, single_quoted, triple_quoted, endpats, whitespace, \
|
||||||
fstring_pattern_map, always_break_tokens, = \
|
fstring_pattern_map, always_break_tokens, = \
|
||||||
_get_token_collection(version_info)
|
_get_token_collection(version_info)
|
||||||
@@ -450,6 +458,9 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
if not pseudomatch: # scan for tokens
|
if not pseudomatch: # scan for tokens
|
||||||
match = whitespace.match(line, pos)
|
match = whitespace.match(line, pos)
|
||||||
pos = match.end()
|
pos = match.end()
|
||||||
|
new_line = False
|
||||||
|
for t in dedent_if_necessary(pos):
|
||||||
|
yield t
|
||||||
yield PythonToken(
|
yield PythonToken(
|
||||||
ERRORTOKEN, line[pos], (lnum, pos),
|
ERRORTOKEN, line[pos], (lnum, pos),
|
||||||
additional_prefix + match.group(0)
|
additional_prefix + match.group(0)
|
||||||
@@ -482,12 +493,8 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
if start > indents[-1]:
|
if start > indents[-1]:
|
||||||
yield PythonToken(INDENT, '', spos, '')
|
yield PythonToken(INDENT, '', spos, '')
|
||||||
indents.append(start)
|
indents.append(start)
|
||||||
while start < indents[-1]:
|
for t in dedent_if_necessary(start):
|
||||||
if start > indents[-2]:
|
yield t
|
||||||
yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '')
|
|
||||||
break
|
|
||||||
yield PythonToken(DEDENT, '', spos, '')
|
|
||||||
indents.pop()
|
|
||||||
|
|
||||||
if fstring_stack:
|
if fstring_stack:
|
||||||
fstring_index, end = _check_fstring_ending(fstring_stack, token)
|
fstring_index, end = _check_fstring_ending(fstring_stack, token)
|
||||||
|
|||||||
@@ -935,5 +935,20 @@ def test_with_and_funcdef_in_call(differ, prefix):
|
|||||||
code2 = insert_line_into_code(code1, 3, 'def y(self, args):\n')
|
code2 = insert_line_into_code(code1, 3, 'def y(self, args):\n')
|
||||||
|
|
||||||
differ.initialize(code1)
|
differ.initialize(code1)
|
||||||
differ.parse(code2, parsers=3, copies=0, expect_error_leaves=True)
|
differ.parse(code2, parsers=3, expect_error_leaves=True)
|
||||||
differ.parse(code1, parsers=1, copies=0)
|
differ.parse(code1, parsers=1)
|
||||||
|
|
||||||
|
|
||||||
|
def test_wrong_backslash(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
def y():
|
||||||
|
1
|
||||||
|
for x in y:
|
||||||
|
continue
|
||||||
|
''')
|
||||||
|
|
||||||
|
code2 = insert_line_into_code(code1, 3, '\.whl$\n')
|
||||||
|
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, parsers=2, copies=2, expect_error_leaves=True)
|
||||||
|
differ.parse(code1, parsers=1, copies=1)
|
||||||
|
|||||||
@@ -197,11 +197,12 @@ def test_ur_literals():
|
|||||||
|
|
||||||
|
|
||||||
def test_error_literal():
|
def test_error_literal():
|
||||||
error_token, endmarker = _get_token_list('"\n')
|
error_token, newline, endmarker = _get_token_list('"\n')
|
||||||
assert error_token.type == ERRORTOKEN
|
assert error_token.type == ERRORTOKEN
|
||||||
assert error_token.string == '"'
|
assert error_token.string == '"'
|
||||||
|
assert newline.type == NEWLINE
|
||||||
assert endmarker.type == ENDMARKER
|
assert endmarker.type == ENDMARKER
|
||||||
assert endmarker.prefix == '\n'
|
assert endmarker.prefix == ''
|
||||||
|
|
||||||
bracket, error_token, endmarker = _get_token_list('( """')
|
bracket, error_token, endmarker = _get_token_list('( """')
|
||||||
assert error_token.type == ERRORTOKEN
|
assert error_token.type == ERRORTOKEN
|
||||||
@@ -240,11 +241,12 @@ def test_indentation(code, types):
|
|||||||
|
|
||||||
|
|
||||||
def test_error_string():
|
def test_error_string():
|
||||||
t1, endmarker = _get_token_list(' "\n')
|
t1, newline, endmarker = _get_token_list(' "\n')
|
||||||
assert t1.type == ERRORTOKEN
|
assert t1.type == ERRORTOKEN
|
||||||
assert t1.prefix == ' '
|
assert t1.prefix == ' '
|
||||||
assert t1.string == '"'
|
assert t1.string == '"'
|
||||||
assert endmarker.prefix == '\n'
|
assert newline.type == NEWLINE
|
||||||
|
assert endmarker.prefix == ''
|
||||||
assert endmarker.string == ''
|
assert endmarker.string == ''
|
||||||
|
|
||||||
|
|
||||||
@@ -268,3 +270,18 @@ def test_indent_error_recovery():
|
|||||||
# `b`
|
# `b`
|
||||||
NAME, NEWLINE, ENDMARKER]
|
NAME, NEWLINE, ENDMARKER]
|
||||||
assert [t.type for t in lst] == expected
|
assert [t.type for t in lst] == expected
|
||||||
|
|
||||||
|
|
||||||
|
def test_error_token_after_dedent():
|
||||||
|
code = dedent("""\
|
||||||
|
class C:
|
||||||
|
pass
|
||||||
|
$foo
|
||||||
|
""")
|
||||||
|
lst = _get_token_list(code)
|
||||||
|
expected = [
|
||||||
|
NAME, NAME, OP, NEWLINE, INDENT, NAME, NEWLINE, DEDENT,
|
||||||
|
# $foo\n
|
||||||
|
ERRORTOKEN, NAME, NEWLINE, ENDMARKER
|
||||||
|
]
|
||||||
|
assert [t.type for t in lst] == expected
|
||||||
|
|||||||
Reference in New Issue
Block a user