Fix another tokenizer issue

This commit is contained in:
Dave Halter
2019-01-09 00:55:54 +01:00
parent 574e1c63e8
commit 57320af6eb
2 changed files with 7 additions and 2 deletions

View File

@@ -457,10 +457,11 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
pseudomatch = pseudo_token.match(line, pos)
if not pseudomatch: # scan for tokens
match = whitespace.match(line, pos)
if pos == 0:
for t in dedent_if_necessary(match.end()):
yield t
pos = match.end()
new_line = False
for t in dedent_if_necessary(pos):
yield t
yield PythonToken(
ERRORTOKEN, line[pos], (lnum, pos),
additional_prefix + match.group(0)

View File

@@ -961,4 +961,8 @@ def test_random_unicode_characters(differ):
differ.parse('\r\r', parsers=1)
differ.parse("˟Ę\x05À\r rúƣ@\x8a\x15r()\n", parsers=1, expect_error_leaves=True)
differ.parse('a\ntaǁ\rGĒōns__\n\nb', parsers=1)
s = ' if not (self, "_fi\x02\x0e\x08\n\nle"):'
differ.parse(s, parsers=1, expect_error_leaves=True)
differ.parse('')
differ.parse(s + '\n', parsers=1, expect_error_leaves=True)
differ.parse('')