diff --git a/parso/python/tokenize.py b/parso/python/tokenize.py index e44f320..101b6a6 100644 --- a/parso/python/tokenize.py +++ b/parso/python/tokenize.py @@ -554,13 +554,15 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): if token in always_break_tokens: fstring_stack[:] = [] paren_level = 0 - while True: - indent = indents.pop() - if indent > start: - yield PythonToken(DEDENT, '', spos, '') - else: - indents.append(indent) - break + # We only want to dedent if the token is on a new line. + if re.match(r'[ \f\t]*$', line[:start]): + while True: + indent = indents.pop() + if indent > start: + yield PythonToken(DEDENT, '', spos, '') + else: + indents.append(indent) + break yield PythonToken(NAME, token, spos, prefix) elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt additional_prefix += prefix + line[start:] diff --git a/test/test_diff_parser.py b/test/test_diff_parser.py index 1e97e0f..46d51ef 100644 --- a/test/test_diff_parser.py +++ b/test/test_diff_parser.py @@ -722,3 +722,26 @@ def test_paren_in_strange_position(differ): differ.initialize(code1) differ.parse(code2, parsers=1, copies=2, expect_error_leaves=True) differ.parse(code1, parsers=1, copies=1) + + +def insert_line_into_code(code, index, line): + lines = split_lines(code, keepends=True) + lines.insert(index, line) + return ''.join(lines) + + +def test_paren_before_docstring(differ): + code1 = dedent('''\ + # comment + """ + The + """ + from parso import tree + from parso import python + ''') + + code2 = insert_line_into_code(code1, 1, ' ' * 16 + 'raise InternalParseError(\n') + + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True) + differ.parse(code1, parsers=2, copies=1) diff --git a/test/test_tokenize.py b/test/test_tokenize.py index 31d33e2..8d8275f 100644 --- a/test/test_tokenize.py +++ b/test/test_tokenize.py @@ -246,3 +246,25 @@ def test_error_string(): assert t1.string == '"' assert endmarker.prefix == '\n' assert endmarker.string == '' + + +def test_indent_error_recovery(): + code = dedent("""\ + str( + from x import a + def + """) + lst = _get_token_list(code) + expected = [ + # `str(` + INDENT, NAME, OP, + # `from parso` + NAME, NAME, + # `import a` on same line as the previous from parso + NAME, NAME, NEWLINE, + # Dedent happens, because there's an import now and the import + # statement "breaks" out of the opening paren on the first line. + DEDENT, + # `b` + NAME, NEWLINE, ENDMARKER] + assert [t.type for t in lst] == expected