Use ERROR_DEDENT instead of ERRORTOKEN for wrong dedents.

2025-12-09 22:25:53 +08:00 · 2017-07-19 20:13:51 +02:00
parent 569cb99ca7
commit 6876e1b7c0
9 changed files with 20 additions and 31 deletions
--- a/parso/python/diff.py
+++ b/parso/python/diff.py
@@ -13,8 +13,8 @@ import logging
 from parso.utils import splitlines
 from parso.python.parser import Parser
 from parso.python.tree import EndMarker
-from parso.python.tokenize import (NEWLINE, TokenInfo,
-                                   ENDMARKER, INDENT, DEDENT, ERRORTOKEN)
+from parso.python.tokenize import (NEWLINE, TokenInfo, ERROR_DEDENT,
+                                   ENDMARKER, INDENT, DEDENT)


 def _get_last_line(node_or_leaf):
@@ -305,8 +305,7 @@ class DiffParser(object):
            # In case of omitted_first_indent, it might not be dedented fully.
            # However this is a sign for us that a dedent happened.
            if typ == DEDENT \
-                    or typ == ERRORTOKEN and not string \
-                    and omitted_first_indent and len(indents) == 1:
+                    or typ == ERROR_DEDENT and omitted_first_indent and len(indents) == 1:
                indents.pop()
                if omitted_first_indent and not indents:
                    # We are done here, only thing that can come now is an
--- a/parso/python/normalizer.py
+++ b/parso/python/normalizer.py
@@ -67,13 +67,13 @@ class ErrorFinder(Normalizer):

    def visit_leaf(self, leaf):
        if leaf.type == 'error_leaf':
-            if leaf.original_type == 'indent':
+            if leaf.original_type in ('indent', 'error_dedent'):
                # Indents/Dedents itself never have a prefix. They are just
                # "pseudo" tokens that get removed by the syntax tree later.
                # Therefore in case of an error we also have to check for this.
+                print(repr(leaf.prefix), leaf.get_next_leaf())
                spacing = list(leaf.get_next_leaf()._split_prefix())[-1]
                self._add_indentation_error("Indentation Error", spacing)
-                print(leaf, repr(leaf.prefix), repr(leaf.value), leaf.get_previous_leaf())
            else:
                self._add_syntax_error("Syntax Error", leaf)

--- a/parso/python/token.py
+++ b/parso/python/token.py
@@ -29,6 +29,11 @@ if not py_version >= 35:
    tok_name[ATEQUAL] = 'ATEQUAL'
    N_TOKENS += 1

+ERROR_DEDENT = N_TOKENS
+tok_name[ERROR_DEDENT] = 'ERROR_DEDENT'
+N_TOKENS += 1
+
+


 # Map from operator to number (since tokenize doesn't do this)
--- a/parso/python/tokenize.py
+++ b/parso/python/tokenize.py
@@ -19,7 +19,8 @@ import itertools as _itertools
 from codecs import BOM_UTF8

 from parso.python.token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, opmap,
-                                NAME, OP, ERRORTOKEN, NEWLINE, INDENT, DEDENT)
+                                NAME, OP, ERRORTOKEN, NEWLINE, INDENT, DEDENT,
+                                ERROR_DEDENT)
 from parso._compatibility import py_version
 from parso.utils import splitlines

@@ -41,10 +42,6 @@ else:
    is_identifier = lambda s: s in namechars


-COMMENT = N_TOKENS
-tok_name[COMMENT] = 'COMMENT'
-
-
 def group(*choices, **kwargs):
    capture = kwargs.pop('capture', False)  # Python 2, arrghhhhh :(
    assert not kwargs
@@ -318,8 +315,7 @@ def tokenize_lines(lines, version_info):
                        indents.append(start)
                    while start < indents[-1]:
                        if start > indents[-2]:
-                            yield TokenInfo(ERRORTOKEN, '', spos, '')
-                            print(spos, repr(line))
+                            yield TokenInfo(ERROR_DEDENT, '', (lnum, 0), '')
                            break
                        yield TokenInfo(DEDENT, '', spos, '')
                        indents.pop()
--- a/parso/python/tree.py
+++ b/parso/python/tree.py
@@ -108,7 +108,7 @@ class PythonLeaf(PythonMixin, Leaf):
        #   indent error leafs somehow? No idea how, though.
        previous_leaf = self.get_previous_leaf()
        if previous_leaf is not None and previous_leaf.type == 'error_leaf' \
-                and previous_leaf.original_type == 'indent':
+                and previous_leaf.original_type in ('indent', 'error_dedent'):
            previous_leaf = previous_leaf.get_previous_leaf()

        if previous_leaf is None:
--- a/test/normalizer_issue_files/E10.py
+++ b/test/normalizer_issue_files/E10.py
@@ -1,7 +1,7 @@
 for a in 'abc':
    for b in 'xyz':
        hello(a)  # indented with 8 spaces
-        #: E901:1
+        #: E903:0
 	hello(b)  # indented with 1 tab
 if True:
    #: E101:0
--- a/test/test_parser_tree.py
+++ b/test/test_parser_tree.py
@@ -112,7 +112,6 @@ def test_ellipsis_py2(each_py2_version):
    module = parse('[0][...]', version=each_py2_version, error_recovery=False)
    expr = module.children[0]
    trailer = expr.children[-1]
-    print(expr)
    subscript = trailer.children[1]
    assert subscript.type == 'subscript'
    assert [leaf.value for leaf in subscript.children] == ['.', '.', '.']
--- a/test/test_python_errors.py
+++ b/test/test_python_errors.py
@@ -45,6 +45,7 @@ def test_syntax_errors(code, positions):
@pytest.mark.parametrize(
    ('code', 'positions'), [
        (' 1', [(1, 0)]),
+        ('def x():\n    1\n 2', [(3, 0)]),
    ]
 )
 def test_indentation_errors(code, positions):
--- a/test/test_tokenize.py
+++ b/test/test_tokenize.py
@@ -7,7 +7,7 @@ import pytest
 from parso._compatibility import py_version
 from parso.utils import splitlines, parse_version_string
 from parso.python.token import (
-    NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER)
+    NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER, ERROR_DEDENT)
 from parso.python import tokenize
 from parso import parse
 from parso.python.tokenize import TokenInfo
@@ -212,23 +212,12 @@ def test_endmarker_end_pos():
    check('a\\')


-@pytest.mark.parametrize(
-    ('code', 'types'), [
-        (' foo', ['error_leaf', 'name'])
-    ]
-)
-def test_indentation(code, types):
-    return
-    actual_types = [t.type for t in _get_token_list(code)]
-    print(actual_types)
-    assert False
-
@pytest.mark.parametrize(
    ('code', 'types'), [
        (' foo', [INDENT, NAME, DEDENT]),
-        ('  foo\n bar', [INDENT, NAME, NEWLINE, ERRORTOKEN, NAME, DEDENT]),
-        ('  foo\n bar \n baz', [INDENT, NAME, NEWLINE, ERRORTOKEN, NAME,
-                                NEWLINE, ERRORTOKEN, NAME, DEDENT]),
+        ('  foo\n bar', [INDENT, NAME, NEWLINE, ERROR_DEDENT, NAME, DEDENT]),
+        ('  foo\n bar \n baz', [INDENT, NAME, NEWLINE, ERROR_DEDENT, NAME,
+                                NEWLINE, ERROR_DEDENT, NAME, DEDENT]),
        (' foo\nbar', [INDENT, NAME, NEWLINE, DEDENT, NAME]),
    ]
 )