Fix a prefix issue with error leafs.

This commit is contained in:
Dave Halter
2018-04-22 19:28:30 +02:00
parent 579146b501
commit f20106d88e
2 changed files with 21 additions and 6 deletions

View File

@@ -28,7 +28,8 @@ from parso.utils import split_lines
TokenCollection = namedtuple( TokenCollection = namedtuple(
'TokenCollection', 'TokenCollection',
'pseudo_token single_quoted triple_quoted endpats fstring_pattern_map always_break_tokens', 'pseudo_token single_quoted triple_quoted endpats whitespace '
'fstring_pattern_map always_break_tokens',
) )
BOM_UTF8_STRING = BOM_UTF8.decode('utf-8') BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')
@@ -114,6 +115,7 @@ def _create_token_collection(version_info):
# Note: we use unicode matching for names ("\w") but ascii matching for # Note: we use unicode matching for names ("\w") but ascii matching for
# number literals. # number literals.
Whitespace = r'[ \f\t]*' Whitespace = r'[ \f\t]*'
whitespace = _compile(Whitespace)
Comment = r'#[^\r\n]*' Comment = r'#[^\r\n]*'
Name = r'\w+' Name = r'\w+'
@@ -225,7 +227,7 @@ def _create_token_collection(version_info):
pseudo_token_compiled = _compile(PseudoToken) pseudo_token_compiled = _compile(PseudoToken)
return TokenCollection( return TokenCollection(
pseudo_token_compiled, single_quoted, triple_quoted, endpats, pseudo_token_compiled, single_quoted, triple_quoted, endpats,
fstring_pattern_map, ALWAYS_BREAK_TOKENS whitespace, fstring_pattern_map, ALWAYS_BREAK_TOKENS
) )
@@ -354,7 +356,8 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
token. This idea comes from lib2to3. The prefix contains all information token. This idea comes from lib2to3. The prefix contains all information
that is irrelevant for the parser like newlines in parentheses or comments. that is irrelevant for the parser like newlines in parentheses or comments.
""" """
pseudo_token, single_quoted, triple_quoted, endpats, fstring_pattern_map, always_break_tokens, = \ pseudo_token, single_quoted, triple_quoted, endpats, whitespace, \
fstring_pattern_map, always_break_tokens, = \
_get_token_collection(version_info) _get_token_collection(version_info)
paren_level = 0 # count parentheses paren_level = 0 # count parentheses
indents = [0] indents = [0]
@@ -435,10 +438,14 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
pseudomatch = pseudo_token.match(line, pos) pseudomatch = pseudo_token.match(line, pos)
if not pseudomatch: # scan for tokens if not pseudomatch: # scan for tokens
txt = line[pos:] if line.endswith('\n'):
if txt.endswith('\n'):
new_line = True new_line = True
yield PythonToken(ERRORTOKEN, txt, (lnum, pos), additional_prefix) match = whitespace.match(line, pos)
pos = match.end()
yield PythonToken(
ERRORTOKEN, line[pos:], (lnum, pos),
additional_prefix + match.group(0)
)
additional_prefix = '' additional_prefix = ''
break break

View File

@@ -227,3 +227,11 @@ def test_endmarker_end_pos():
def test_indentation(code, types): def test_indentation(code, types):
actual_types = [t.type for t in _get_token_list(code)] actual_types = [t.type for t in _get_token_list(code)]
assert actual_types == types + [ENDMARKER] assert actual_types == types + [ENDMARKER]
def test_error_string():
t1, endmarker = _get_token_list(' "\n')
assert t1.type == ERRORTOKEN
assert t1.prefix == ' '
assert t1.string == '"\n'
assert endmarker.string == ''