diff --git a/parso/python/tokenize.py b/parso/python/tokenize.py index bceb8ee..e44f320 100644 --- a/parso/python/tokenize.py +++ b/parso/python/tokenize.py @@ -448,16 +448,15 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): pseudomatch = pseudo_token.match(line, pos) if not pseudomatch: # scan for tokens - if line.endswith('\n'): - new_line = True match = whitespace.match(line, pos) pos = match.end() yield PythonToken( - ERRORTOKEN, line[pos:], (lnum, pos), + ERRORTOKEN, line[pos], (lnum, pos), additional_prefix + match.group(0) ) additional_prefix = '' - break + pos += 1 + continue prefix = additional_prefix + pseudomatch.group(1) additional_prefix = '' diff --git a/test/test_error_recovery.py b/test/test_error_recovery.py index f8dcd94..b7d897d 100644 --- a/test/test_error_recovery.py +++ b/test/test_error_recovery.py @@ -1,4 +1,4 @@ -from parso import parse +from parso import parse, load_grammar def test_with_stmt(): @@ -59,3 +59,27 @@ def test_if_stmt(): assert in_else_stmt.type == 'error_node' assert in_else_stmt.children[0].value == 'g' assert in_else_stmt.children[1].value == '(' + + +def test_invalid_token(): + module = parse('a + ? + b') + error_node, q, plus_b, endmarker = module.children + assert error_node.get_code() == 'a +' + assert q.value == '?' + assert q.type == 'error_leaf' + assert plus_b.type == 'factor' + assert plus_b.get_code() == ' + b' + + +def test_invalid_token_in_fstr(): + module = load_grammar(version='3.6').parse('f"{a + ? + b}"') + error_node, q, plus_b, error1, error2, endmarker = module.children + assert error_node.get_code() == 'f"{a +' + assert q.value == '?' + assert q.type == 'error_leaf' + assert plus_b.type == 'error_node' + assert plus_b.get_code() == ' + b' + assert error1.value == '}' + assert error1.type == 'error_leaf' + assert error2.value == '"' + assert error2.type == 'error_leaf' diff --git a/test/test_tokenize.py b/test/test_tokenize.py index bab6439..31d33e2 100644 --- a/test/test_tokenize.py +++ b/test/test_tokenize.py @@ -199,10 +199,9 @@ def test_ur_literals(): def test_error_literal(): error_token, endmarker = _get_token_list('"\n') assert error_token.type == ERRORTOKEN - assert endmarker.prefix == '' - assert error_token.string == '"\n' + assert error_token.string == '"' assert endmarker.type == ENDMARKER - assert endmarker.prefix == '' + assert endmarker.prefix == '\n' bracket, error_token, endmarker = _get_token_list('( """') assert error_token.type == ERRORTOKEN @@ -244,5 +243,6 @@ def test_error_string(): t1, endmarker = _get_token_list(' "\n') assert t1.type == ERRORTOKEN assert t1.prefix == ' ' - assert t1.string == '"\n' + assert t1.string == '"' + assert endmarker.prefix == '\n' assert endmarker.string == ''