Generate error tokens in the tokenizer if the indentation is dedented wrongly.

2025-12-21 03:42:00 +08:00 · 2017-07-09 10:44:24 +02:00
parent 8ad37f6036
commit 5b5e4a0616
3 changed files with 35 additions and 3 deletions
--- a/parso/python/tokenize.py
+++ b/parso/python/tokenize.py
@@ -277,6 +277,9 @@ def tokenize_lines(lines):
                        yield TokenInfo(INDENT, '', spos, '')
                        indents.append(start)
                    while start < indents[-1]:
+                        if start > indents[-2]:
+                            yield TokenInfo(ERRORTOKEN, '', spos, '')
+                            break
                        yield TokenInfo(DEDENT, '', spos, '')
                        indents.pop()

--- a/test/normalizer_issue_files/E10.py
+++ b/test/normalizer_issue_files/E10.py
@@ -1,10 +1,10 @@
 for a in 'abc':
    for b in 'xyz':
        print a  # indented with 8 spaces
-        #: W101:0
+        # TODO currently not an error, because the indentation matches.
 	print(b)  # indented with 1 tab
 if True:
-    #: W101:0
+    #: E101:0
 	pass

 #: E122+1
--- a/test/test_tokenize.py
+++ b/test/test_tokenize.py
@@ -2,13 +2,17 @@

 from textwrap import dedent

+import pytest
+
 from parso._compatibility import py_version
 from parso.utils import splitlines
-from parso.python.token import NAME, NEWLINE, STRING, INDENT, ERRORTOKEN, ENDMARKER
+from parso.python.token import (
+    NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER)
 from parso.python import tokenize
 from parso import parse
 from parso.python.tokenize import TokenInfo

+
 def _get_token_list(string):
    return list(tokenize.tokenize(string))

@@ -211,3 +215,28 @@ def test_endmarker_end_pos():
    check('a')
    check(r'a\\n')
    check('a\\')
+
+
+@pytest.mark.parametrize(
+    ('code', 'types'), [
+        (' foo', ['error_leaf', 'name'])
+    ]
+)
+def test_indentation(code, types):
+    return
+    actual_types = [t.type for t in _get_token_list(code)]
+    print(actual_types)
+    assert False
+
+@pytest.mark.parametrize(
+    ('code', 'types'), [
+        (' foo', [INDENT, NAME, DEDENT]),
+        ('  foo\n bar', [INDENT, NAME, NEWLINE, ERRORTOKEN, NAME, DEDENT]),
+        ('  foo\n bar \n baz', [INDENT, NAME, NEWLINE, ERRORTOKEN, NAME,
+                                NEWLINE, ERRORTOKEN, NAME, DEDENT]),
+        (' foo\nbar', [INDENT, NAME, NEWLINE, DEDENT, NAME]),
+    ]
+)
+def test_indentation(code, types):
+    actual_types = [t.type for t in _get_token_list(code)]
+    assert actual_types == types + [ENDMARKER]