Fix: End detection for strings was mostly wrong, fixes #51

This commit is contained in:
Dave Halter
2019-01-23 10:11:41 +01:00
parent ef56debb78
commit 60e4591837
4 changed files with 48 additions and 39 deletions

View File

@@ -204,7 +204,6 @@ class Parser(BaseParser):
def _recovery_tokenize(self, tokens): def _recovery_tokenize(self, tokens):
for token in tokens: for token in tokens:
typ = token[0] typ = token[0]
# print(tok_name[typ], repr(value), start_pos, repr(prefix))
if typ == DEDENT: if typ == DEDENT:
# We need to count indents, because if we just omit any DEDENT, # We need to count indents, because if we just omit any DEDENT,
# we might omit them in the wrong place. # we might omit them in the wrong place.

View File

@@ -273,6 +273,9 @@ class FStringNode(object):
def close_parentheses(self, character): def close_parentheses(self, character):
self.parentheses_count -= 1 self.parentheses_count -= 1
if self.parentheses_count == 0:
# No parentheses means that the format spec is also finished.
self.format_spec_count = 0
def allow_multiline(self): def allow_multiline(self):
return len(self.quote) == 3 return len(self.quote) == 3
@@ -305,35 +308,32 @@ def _check_fstring_ending(fstring_stack, token, from_start=False):
def _find_fstring_string(endpats, fstring_stack, line, lnum, pos): def _find_fstring_string(endpats, fstring_stack, line, lnum, pos):
tos = fstring_stack[-1] tos = fstring_stack[-1]
if tos.is_in_expr(): allow_multiline = tos.allow_multiline()
return '', pos if allow_multiline:
match = fstring_string_multi_line.match(line, pos)
else: else:
new_pos = pos match = fstring_string_single_line.match(line, pos)
allow_multiline = tos.allow_multiline() if match is None:
if allow_multiline: return tos.previous_lines, pos
match = fstring_string_multi_line.match(line, pos)
else:
match = fstring_string_single_line.match(line, pos)
if match is None:
string = tos.previous_lines
else:
if not tos.previous_lines:
tos.last_string_start_pos = (lnum, pos)
string = match.group(0) if not tos.previous_lines:
for fstring_stack_node in fstring_stack: tos.last_string_start_pos = (lnum, pos)
end_match = endpats[fstring_stack_node.quote].match(string)
if end_match is not None:
string = match.group(0)[:-len(fstring_stack_node.quote)]
new_pos += len(string) string = match.group(0)
if allow_multiline and (string.endswith('\n') or string.endswith('\r')): for fstring_stack_node in fstring_stack:
tos.previous_lines += string end_match = endpats[fstring_stack_node.quote].match(string)
string = '' if end_match is not None:
else: string = end_match.group(0)[:-len(fstring_stack_node.quote)]
string = tos.previous_lines + string
return string, new_pos new_pos = pos
new_pos += len(string)
if allow_multiline and (string.endswith('\n') or string.endswith('\r')):
tos.previous_lines += string
string = ''
else:
string = tos.previous_lines + string
return string, new_pos
def tokenize(code, version_info, start_pos=(1, 0)): def tokenize(code, version_info, start_pos=(1, 0)):
@@ -348,7 +348,6 @@ def _print_tokens(func):
""" """
def wrapper(*args, **kwargs): def wrapper(*args, **kwargs):
for token in func(*args, **kwargs): for token in func(*args, **kwargs):
print(token)
yield token yield token
return wrapper return wrapper
@@ -423,17 +422,19 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
while pos < max: while pos < max:
if fstring_stack: if fstring_stack:
string, pos = _find_fstring_string(endpats, fstring_stack, line, lnum, pos) tos = fstring_stack[-1]
if string: if not tos.is_in_expr():
yield PythonToken( string, pos = _find_fstring_string(endpats, fstring_stack, line, lnum, pos)
FSTRING_STRING, string, if string:
fstring_stack[-1].last_string_start_pos, yield PythonToken(
# Never has a prefix because it can start anywhere and FSTRING_STRING, string,
# include whitespace. tos.last_string_start_pos,
prefix='' # Never has a prefix because it can start anywhere and
) # include whitespace.
fstring_stack[-1].previous_lines = '' prefix=''
continue )
tos.previous_lines = ''
continue
if pos == max: if pos == max:
break break

View File

@@ -262,6 +262,7 @@ def test_too_many_levels_of_indentation():
r'f"\\\""', r'f"\\\""',
r'fr"\""', r'fr"\""',
r'fr"\\\""', r'fr"\\\""',
r"print(f'Some {x:.2f} and some {y}')",
] ]
) )
def test_valid_fstrings(code): def test_valid_fstrings(code):

View File

@@ -329,8 +329,16 @@ def test_backslash():
('f" {}"', [FSTRING_START, FSTRING_STRING, OP, OP, FSTRING_END]), ('f" {}"', [FSTRING_START, FSTRING_STRING, OP, OP, FSTRING_END]),
('f" "{}', [FSTRING_START, FSTRING_STRING, FSTRING_END, OP, OP]), ('f" "{}', [FSTRING_START, FSTRING_STRING, FSTRING_END, OP, OP]),
(r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]), (r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
(r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
(r'f"Some {x:.2f}{y}"', [FSTRING_START, FSTRING_STRING, OP, NAME, OP,
FSTRING_STRING, OP, OP, NAME, OP, FSTRING_END]),
(r'print(f"Some {x:.2f}a{y}")', [
NAME, OP, FSTRING_START, FSTRING_STRING, OP, NAME, OP,
FSTRING_STRING, OP, FSTRING_STRING, OP, NAME, OP, FSTRING_END, OP
]),
] ]
) )
def test_fstring(code, types, version_ge_py36): def test_fstring(code, types, version_ge_py36):
actual_types = [t.type for t in _get_token_list(code, version_ge_py36)] actual_types = [t.type for t in _get_token_list(code, version_ge_py36)]
assert actual_types == types + [ENDMARKER] assert types + [ENDMARKER] == actual_types