Error recovery should not match the whole line in case of an invalid token, fixes #40

This commit is contained in:
Dave Halter
2018-07-03 01:31:02 +02:00
parent 7f964c26f2
commit e05d7fd59f
3 changed files with 32 additions and 9 deletions

View File

@@ -448,16 +448,15 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
pseudomatch = pseudo_token.match(line, pos) pseudomatch = pseudo_token.match(line, pos)
if not pseudomatch: # scan for tokens if not pseudomatch: # scan for tokens
if line.endswith('\n'):
new_line = True
match = whitespace.match(line, pos) match = whitespace.match(line, pos)
pos = match.end() pos = match.end()
yield PythonToken( yield PythonToken(
ERRORTOKEN, line[pos:], (lnum, pos), ERRORTOKEN, line[pos], (lnum, pos),
additional_prefix + match.group(0) additional_prefix + match.group(0)
) )
additional_prefix = '' additional_prefix = ''
break pos += 1
continue
prefix = additional_prefix + pseudomatch.group(1) prefix = additional_prefix + pseudomatch.group(1)
additional_prefix = '' additional_prefix = ''

View File

@@ -1,4 +1,4 @@
from parso import parse from parso import parse, load_grammar
def test_with_stmt(): def test_with_stmt():
@@ -59,3 +59,27 @@ def test_if_stmt():
assert in_else_stmt.type == 'error_node' assert in_else_stmt.type == 'error_node'
assert in_else_stmt.children[0].value == 'g' assert in_else_stmt.children[0].value == 'g'
assert in_else_stmt.children[1].value == '(' assert in_else_stmt.children[1].value == '('
def test_invalid_token():
module = parse('a + ? + b')
error_node, q, plus_b, endmarker = module.children
assert error_node.get_code() == 'a +'
assert q.value == '?'
assert q.type == 'error_leaf'
assert plus_b.type == 'factor'
assert plus_b.get_code() == ' + b'
def test_invalid_token_in_fstr():
module = load_grammar(version='3.6').parse('f"{a + ? + b}"')
error_node, q, plus_b, error1, error2, endmarker = module.children
assert error_node.get_code() == 'f"{a +'
assert q.value == '?'
assert q.type == 'error_leaf'
assert plus_b.type == 'error_node'
assert plus_b.get_code() == ' + b'
assert error1.value == '}'
assert error1.type == 'error_leaf'
assert error2.value == '"'
assert error2.type == 'error_leaf'

View File

@@ -199,10 +199,9 @@ def test_ur_literals():
def test_error_literal(): def test_error_literal():
error_token, endmarker = _get_token_list('"\n') error_token, endmarker = _get_token_list('"\n')
assert error_token.type == ERRORTOKEN assert error_token.type == ERRORTOKEN
assert endmarker.prefix == '' assert error_token.string == '"'
assert error_token.string == '"\n'
assert endmarker.type == ENDMARKER assert endmarker.type == ENDMARKER
assert endmarker.prefix == '' assert endmarker.prefix == '\n'
bracket, error_token, endmarker = _get_token_list('( """') bracket, error_token, endmarker = _get_token_list('( """')
assert error_token.type == ERRORTOKEN assert error_token.type == ERRORTOKEN
@@ -244,5 +243,6 @@ def test_error_string():
t1, endmarker = _get_token_list(' "\n') t1, endmarker = _get_token_list(' "\n')
assert t1.type == ERRORTOKEN assert t1.type == ERRORTOKEN
assert t1.prefix == ' ' assert t1.prefix == ' '
assert t1.string == '"\n' assert t1.string == '"'
assert endmarker.prefix == '\n'
assert endmarker.string == '' assert endmarker.string == ''