Tokenizer: It's now clearer when an error dedent appears

2025-12-06 21:04:29 +08:00 · 2020-03-29 13:50:36 +02:00
parent a950b82066
commit 9f8a68677d
4 changed files with 20 additions and 14 deletions
--- a/parso/python/diff.py
+++ b/parso/python/diff.py
@@ -93,9 +93,9 @@ def _assert_nodes_are_equal(node1, node2):
            children2 = node2.children
        except AttributeError:
            assert False, (node1, node2)
-    assert len(children1) == len(children2)
    for n1, n2 in zip(children1, children2):
        _assert_nodes_are_equal(n1, n2)
+    assert len(children1) == len(children2)


 def _get_debug_error_message(module, old_lines, new_lines):
--- a/parso/python/tokenize.py
+++ b/parso/python/tokenize.py
@@ -398,6 +398,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
        while start < indents[-1]:
            if start > indents[-2]:
                yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '')
+                indents[-1] = start
                break
            yield PythonToken(DEDENT, '', spos, '')
            indents.pop()
@@ -554,14 +555,10 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
                    fstring_stack[:] = []
                    paren_level = 0
                    # We only want to dedent if the token is on a new line.
-                    if re.match(r'[ \f\t]*$', line[:start]):
-                        while True:
-                            indent = indents.pop()
-                            if indent > start:
-                                yield PythonToken(DEDENT, '', spos, '')
-                            else:
-                                indents.append(indent)
-                                break
+                    m = re.match(r'[ \f\t]*$', line[:start])
+                    if m is not None:
+                        for t in dedent_if_necessary(m.end()):
+                            yield t
                if is_identifier(token):
                    yield PythonToken(NAME, token, spos, prefix)
                else:
--- a/test/test_diff_parser.py
+++ b/test/test_diff_parser.py
@@ -827,7 +827,7 @@ def test_indentation_issues(differ):
        ''')

    differ.initialize(code1)
-    differ.parse(code2, parsers=2, copies=2, expect_error_leaves=True)
+    differ.parse(code2, parsers=3, copies=2, expect_error_leaves=True)
    differ.parse(code1, copies=2, parsers=2)
    differ.parse(code3, parsers=1, copies=1)
    differ.parse(code1, parsers=1, copies=2)
--- a/test/test_tokenize.py
+++ b/test/test_tokenize.py
@@ -4,7 +4,6 @@ import sys
 from textwrap import dedent

 import pytest
-import sys

 from parso.utils import split_lines, parse_version_string
 from parso.python.token import PythonTokenTypes
@@ -239,7 +238,7 @@ xfail_py2 = dict(marks=[pytest.mark.xfail(sys.version_info[0] == 2, reason='Pyth
        (' foo', [INDENT, NAME, DEDENT]),
        ('  foo\n bar', [INDENT, NAME, NEWLINE, ERROR_DEDENT, NAME, DEDENT]),
        ('  foo\n bar \n baz', [INDENT, NAME, NEWLINE, ERROR_DEDENT, NAME,
-                                NEWLINE, ERROR_DEDENT, NAME, DEDENT]),
+                                NEWLINE, NAME, DEDENT]),
        (' foo\nbar', [INDENT, NAME, NEWLINE, DEDENT, NAME]),

        # Name stuff
@@ -250,6 +249,17 @@ xfail_py2 = dict(marks=[pytest.mark.xfail(sys.version_info[0] == 2, reason='Pyth
        pytest.param(u'²', [ERRORTOKEN], **xfail_py2),
        pytest.param(u'ä²ö', [NAME, ERRORTOKEN, NAME], **xfail_py2),
        pytest.param(u'ää²¹öö', [NAME, ERRORTOKEN, NAME], **xfail_py2),
+        (' \x00a', [INDENT, ERRORTOKEN, NAME, DEDENT]),
+        (dedent('''\
+            class BaseCache:
+                    a
+                def
+                    b
+                def
+                    c
+            '''), [NAME, NAME, OP, NEWLINE, INDENT, NAME, NEWLINE,
+                   ERROR_DEDENT, NAME, NEWLINE, INDENT, NAME, NEWLINE, DEDENT,
+                   NAME, NEWLINE, INDENT, NAME, NEWLINE, DEDENT, DEDENT])
    ]
 )
 def test_token_types(code, types):
@@ -339,7 +349,6 @@ def test_backslash():

@pytest.mark.parametrize(
    ('code', 'types'), [
-        (' \x00a', [INDENT, ERRORTOKEN, NAME, DEDENT]),
        # f-strings
        ('f"', [FSTRING_START]),
        ('f""', [FSTRING_START, FSTRING_END]),
@@ -396,7 +405,7 @@ def test_backslash():
        ]),
    ]
 )
-def test_token_types(code, types, version_ge_py36):
+def test_fstring_token_types(code, types, version_ge_py36):
    actual_types = [t.type for t in _get_token_list(code, version_ge_py36)]
    assert types + [ENDMARKER] == actual_types