diff --git a/parso/python/tokenize.py b/parso/python/tokenize.py index 101b6a6..4d6a71e 100644 --- a/parso/python/tokenize.py +++ b/parso/python/tokenize.py @@ -364,6 +364,14 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): token. This idea comes from lib2to3. The prefix contains all information that is irrelevant for the parser like newlines in parentheses or comments. """ + def dedent_if_necessary(start): + while start < indents[-1]: + if start > indents[-2]: + yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '') + break + yield PythonToken(DEDENT, '', spos, '') + indents.pop() + pseudo_token, single_quoted, triple_quoted, endpats, whitespace, \ fstring_pattern_map, always_break_tokens, = \ _get_token_collection(version_info) @@ -450,6 +458,9 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): if not pseudomatch: # scan for tokens match = whitespace.match(line, pos) pos = match.end() + new_line = False + for t in dedent_if_necessary(pos): + yield t yield PythonToken( ERRORTOKEN, line[pos], (lnum, pos), additional_prefix + match.group(0) @@ -482,12 +493,8 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): if start > indents[-1]: yield PythonToken(INDENT, '', spos, '') indents.append(start) - while start < indents[-1]: - if start > indents[-2]: - yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '') - break - yield PythonToken(DEDENT, '', spos, '') - indents.pop() + for t in dedent_if_necessary(start): + yield t if fstring_stack: fstring_index, end = _check_fstring_ending(fstring_stack, token) diff --git a/test/test_diff_parser.py b/test/test_diff_parser.py index 117220c..f90c1c1 100644 --- a/test/test_diff_parser.py +++ b/test/test_diff_parser.py @@ -935,5 +935,20 @@ def test_with_and_funcdef_in_call(differ, prefix): code2 = insert_line_into_code(code1, 3, 'def y(self, args):\n') differ.initialize(code1) - differ.parse(code2, parsers=3, copies=0, expect_error_leaves=True) - differ.parse(code1, parsers=1, copies=0) + differ.parse(code2, parsers=3, expect_error_leaves=True) + differ.parse(code1, parsers=1) + + +def test_wrong_backslash(differ): + code1 = dedent('''\ + def y(): + 1 + for x in y: + continue + ''') + + code2 = insert_line_into_code(code1, 3, '\.whl$\n') + + differ.initialize(code1) + differ.parse(code2, parsers=2, copies=2, expect_error_leaves=True) + differ.parse(code1, parsers=1, copies=1) diff --git a/test/test_tokenize.py b/test/test_tokenize.py index 8d8275f..3909214 100644 --- a/test/test_tokenize.py +++ b/test/test_tokenize.py @@ -197,11 +197,12 @@ def test_ur_literals(): def test_error_literal(): - error_token, endmarker = _get_token_list('"\n') + error_token, newline, endmarker = _get_token_list('"\n') assert error_token.type == ERRORTOKEN assert error_token.string == '"' + assert newline.type == NEWLINE assert endmarker.type == ENDMARKER - assert endmarker.prefix == '\n' + assert endmarker.prefix == '' bracket, error_token, endmarker = _get_token_list('( """') assert error_token.type == ERRORTOKEN @@ -240,11 +241,12 @@ def test_indentation(code, types): def test_error_string(): - t1, endmarker = _get_token_list(' "\n') + t1, newline, endmarker = _get_token_list(' "\n') assert t1.type == ERRORTOKEN assert t1.prefix == ' ' assert t1.string == '"' - assert endmarker.prefix == '\n' + assert newline.type == NEWLINE + assert endmarker.prefix == '' assert endmarker.string == '' @@ -268,3 +270,18 @@ def test_indent_error_recovery(): # `b` NAME, NEWLINE, ENDMARKER] assert [t.type for t in lst] == expected + + +def test_error_token_after_dedent(): + code = dedent("""\ + class C: + pass + $foo + """) + lst = _get_token_list(code) + expected = [ + NAME, NAME, OP, NEWLINE, INDENT, NAME, NEWLINE, DEDENT, + # $foo\n + ERRORTOKEN, NAME, NEWLINE, ENDMARKER + ] + assert [t.type for t in lst] == expected