diff --git a/parso/python/tokenize.py b/parso/python/tokenize.py index 6c6011c..2dffa73 100644 --- a/parso/python/tokenize.py +++ b/parso/python/tokenize.py @@ -277,6 +277,9 @@ def tokenize_lines(lines): yield TokenInfo(INDENT, '', spos, '') indents.append(start) while start < indents[-1]: + if start > indents[-2]: + yield TokenInfo(ERRORTOKEN, '', spos, '') + break yield TokenInfo(DEDENT, '', spos, '') indents.pop() diff --git a/test/normalizer_issue_files/E10.py b/test/normalizer_issue_files/E10.py index f49bd93..2dbed78 100644 --- a/test/normalizer_issue_files/E10.py +++ b/test/normalizer_issue_files/E10.py @@ -1,10 +1,10 @@ for a in 'abc': for b in 'xyz': print a # indented with 8 spaces - #: W101:0 + # TODO currently not an error, because the indentation matches. print(b) # indented with 1 tab if True: - #: W101:0 + #: E101:0 pass #: E122+1 diff --git a/test/test_tokenize.py b/test/test_tokenize.py index c5f4bb3..9ae56e9 100644 --- a/test/test_tokenize.py +++ b/test/test_tokenize.py @@ -2,13 +2,17 @@ from textwrap import dedent +import pytest + from parso._compatibility import py_version from parso.utils import splitlines -from parso.python.token import NAME, NEWLINE, STRING, INDENT, ERRORTOKEN, ENDMARKER +from parso.python.token import ( + NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER) from parso.python import tokenize from parso import parse from parso.python.tokenize import TokenInfo + def _get_token_list(string): return list(tokenize.tokenize(string)) @@ -211,3 +215,28 @@ def test_endmarker_end_pos(): check('a') check(r'a\\n') check('a\\') + + +@pytest.mark.parametrize( + ('code', 'types'), [ + (' foo', ['error_leaf', 'name']) + ] +) +def test_indentation(code, types): + return + actual_types = [t.type for t in _get_token_list(code)] + print(actual_types) + assert False + +@pytest.mark.parametrize( + ('code', 'types'), [ + (' foo', [INDENT, NAME, DEDENT]), + (' foo\n bar', [INDENT, NAME, NEWLINE, ERRORTOKEN, NAME, DEDENT]), + (' foo\n bar \n baz', [INDENT, NAME, NEWLINE, ERRORTOKEN, NAME, + NEWLINE, ERRORTOKEN, NAME, DEDENT]), + (' foo\nbar', [INDENT, NAME, NEWLINE, DEDENT, NAME]), + ] +) +def test_indentation(code, types): + actual_types = [t.type for t in _get_token_list(code)] + assert actual_types == types + [ENDMARKER]