Fix tokenizer: Dedents should only happen after newlines

This commit is contained in:
Dave Halter
2019-01-03 11:44:17 +01:00
parent fde64d0eae
commit 5da51720cd
3 changed files with 54 additions and 7 deletions

View File

@@ -554,6 +554,8 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
if token in always_break_tokens:
fstring_stack[:] = []
paren_level = 0
# We only want to dedent if the token is on a new line.
if re.match(r'[ \f\t]*$', line[:start]):
while True:
indent = indents.pop()
if indent > start:

View File

@@ -722,3 +722,26 @@ def test_paren_in_strange_position(differ):
differ.initialize(code1)
differ.parse(code2, parsers=1, copies=2, expect_error_leaves=True)
differ.parse(code1, parsers=1, copies=1)
def insert_line_into_code(code, index, line):
lines = split_lines(code, keepends=True)
lines.insert(index, line)
return ''.join(lines)
def test_paren_before_docstring(differ):
code1 = dedent('''\
# comment
"""
The
"""
from parso import tree
from parso import python
''')
code2 = insert_line_into_code(code1, 1, ' ' * 16 + 'raise InternalParseError(\n')
differ.initialize(code1)
differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True)
differ.parse(code1, parsers=2, copies=1)

View File

@@ -246,3 +246,25 @@ def test_error_string():
assert t1.string == '"'
assert endmarker.prefix == '\n'
assert endmarker.string == ''
def test_indent_error_recovery():
code = dedent("""\
str(
from x import a
def
""")
lst = _get_token_list(code)
expected = [
# `str(`
INDENT, NAME, OP,
# `from parso`
NAME, NAME,
# `import a` on same line as the previous from parso
NAME, NAME, NEWLINE,
# Dedent happens, because there's an import now and the import
# statement "breaks" out of the opening paren on the first line.
DEDENT,
# `b`
NAME, NEWLINE, ENDMARKER]
assert [t.type for t in lst] == expected