mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-10 22:51:59 +08:00
Fix a prefix issue with error leafs.
This commit is contained in:
@@ -28,7 +28,8 @@ from parso.utils import split_lines
|
|||||||
|
|
||||||
TokenCollection = namedtuple(
|
TokenCollection = namedtuple(
|
||||||
'TokenCollection',
|
'TokenCollection',
|
||||||
'pseudo_token single_quoted triple_quoted endpats fstring_pattern_map always_break_tokens',
|
'pseudo_token single_quoted triple_quoted endpats whitespace '
|
||||||
|
'fstring_pattern_map always_break_tokens',
|
||||||
)
|
)
|
||||||
|
|
||||||
BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')
|
BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')
|
||||||
@@ -114,6 +115,7 @@ def _create_token_collection(version_info):
|
|||||||
# Note: we use unicode matching for names ("\w") but ascii matching for
|
# Note: we use unicode matching for names ("\w") but ascii matching for
|
||||||
# number literals.
|
# number literals.
|
||||||
Whitespace = r'[ \f\t]*'
|
Whitespace = r'[ \f\t]*'
|
||||||
|
whitespace = _compile(Whitespace)
|
||||||
Comment = r'#[^\r\n]*'
|
Comment = r'#[^\r\n]*'
|
||||||
Name = r'\w+'
|
Name = r'\w+'
|
||||||
|
|
||||||
@@ -225,7 +227,7 @@ def _create_token_collection(version_info):
|
|||||||
pseudo_token_compiled = _compile(PseudoToken)
|
pseudo_token_compiled = _compile(PseudoToken)
|
||||||
return TokenCollection(
|
return TokenCollection(
|
||||||
pseudo_token_compiled, single_quoted, triple_quoted, endpats,
|
pseudo_token_compiled, single_quoted, triple_quoted, endpats,
|
||||||
fstring_pattern_map, ALWAYS_BREAK_TOKENS
|
whitespace, fstring_pattern_map, ALWAYS_BREAK_TOKENS
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -354,7 +356,8 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
token. This idea comes from lib2to3. The prefix contains all information
|
token. This idea comes from lib2to3. The prefix contains all information
|
||||||
that is irrelevant for the parser like newlines in parentheses or comments.
|
that is irrelevant for the parser like newlines in parentheses or comments.
|
||||||
"""
|
"""
|
||||||
pseudo_token, single_quoted, triple_quoted, endpats, fstring_pattern_map, always_break_tokens, = \
|
pseudo_token, single_quoted, triple_quoted, endpats, whitespace, \
|
||||||
|
fstring_pattern_map, always_break_tokens, = \
|
||||||
_get_token_collection(version_info)
|
_get_token_collection(version_info)
|
||||||
paren_level = 0 # count parentheses
|
paren_level = 0 # count parentheses
|
||||||
indents = [0]
|
indents = [0]
|
||||||
@@ -435,10 +438,14 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
|
|
||||||
pseudomatch = pseudo_token.match(line, pos)
|
pseudomatch = pseudo_token.match(line, pos)
|
||||||
if not pseudomatch: # scan for tokens
|
if not pseudomatch: # scan for tokens
|
||||||
txt = line[pos:]
|
if line.endswith('\n'):
|
||||||
if txt.endswith('\n'):
|
|
||||||
new_line = True
|
new_line = True
|
||||||
yield PythonToken(ERRORTOKEN, txt, (lnum, pos), additional_prefix)
|
match = whitespace.match(line, pos)
|
||||||
|
pos = match.end()
|
||||||
|
yield PythonToken(
|
||||||
|
ERRORTOKEN, line[pos:], (lnum, pos),
|
||||||
|
additional_prefix + match.group(0)
|
||||||
|
)
|
||||||
additional_prefix = ''
|
additional_prefix = ''
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|||||||
@@ -227,3 +227,11 @@ def test_endmarker_end_pos():
|
|||||||
def test_indentation(code, types):
|
def test_indentation(code, types):
|
||||||
actual_types = [t.type for t in _get_token_list(code)]
|
actual_types = [t.type for t in _get_token_list(code)]
|
||||||
assert actual_types == types + [ENDMARKER]
|
assert actual_types == types + [ENDMARKER]
|
||||||
|
|
||||||
|
|
||||||
|
def test_error_string():
|
||||||
|
t1, endmarker = _get_token_list(' "\n')
|
||||||
|
assert t1.type == ERRORTOKEN
|
||||||
|
assert t1.prefix == ' '
|
||||||
|
assert t1.string == '"\n'
|
||||||
|
assert endmarker.string == ''
|
||||||
|
|||||||
Reference in New Issue
Block a user