From 60e4591837b11985bed02d23e1500ee9027d667d Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Wed, 23 Jan 2019 10:11:41 +0100 Subject: [PATCH] Fix: End detection for strings was mostly wrong, fixes #51 --- parso/python/parser.py | 1 - parso/python/tokenize.py | 75 +++++++++++++++++++------------------- test/test_python_errors.py | 1 + test/test_tokenize.py | 10 ++++- 4 files changed, 48 insertions(+), 39 deletions(-) diff --git a/parso/python/parser.py b/parso/python/parser.py index 69678e6..d98eccd 100644 --- a/parso/python/parser.py +++ b/parso/python/parser.py @@ -204,7 +204,6 @@ class Parser(BaseParser): def _recovery_tokenize(self, tokens): for token in tokens: typ = token[0] - # print(tok_name[typ], repr(value), start_pos, repr(prefix)) if typ == DEDENT: # We need to count indents, because if we just omit any DEDENT, # we might omit them in the wrong place. diff --git a/parso/python/tokenize.py b/parso/python/tokenize.py index a6f4979..6042972 100644 --- a/parso/python/tokenize.py +++ b/parso/python/tokenize.py @@ -273,6 +273,9 @@ class FStringNode(object): def close_parentheses(self, character): self.parentheses_count -= 1 + if self.parentheses_count == 0: + # No parentheses means that the format spec is also finished. + self.format_spec_count = 0 def allow_multiline(self): return len(self.quote) == 3 @@ -305,35 +308,32 @@ def _check_fstring_ending(fstring_stack, token, from_start=False): def _find_fstring_string(endpats, fstring_stack, line, lnum, pos): tos = fstring_stack[-1] - if tos.is_in_expr(): - return '', pos + allow_multiline = tos.allow_multiline() + if allow_multiline: + match = fstring_string_multi_line.match(line, pos) else: - new_pos = pos - allow_multiline = tos.allow_multiline() - if allow_multiline: - match = fstring_string_multi_line.match(line, pos) - else: - match = fstring_string_single_line.match(line, pos) - if match is None: - string = tos.previous_lines - else: - if not tos.previous_lines: - tos.last_string_start_pos = (lnum, pos) + match = fstring_string_single_line.match(line, pos) + if match is None: + return tos.previous_lines, pos - string = match.group(0) - for fstring_stack_node in fstring_stack: - end_match = endpats[fstring_stack_node.quote].match(string) - if end_match is not None: - string = match.group(0)[:-len(fstring_stack_node.quote)] + if not tos.previous_lines: + tos.last_string_start_pos = (lnum, pos) - new_pos += len(string) - if allow_multiline and (string.endswith('\n') or string.endswith('\r')): - tos.previous_lines += string - string = '' - else: - string = tos.previous_lines + string + string = match.group(0) + for fstring_stack_node in fstring_stack: + end_match = endpats[fstring_stack_node.quote].match(string) + if end_match is not None: + string = end_match.group(0)[:-len(fstring_stack_node.quote)] - return string, new_pos + new_pos = pos + new_pos += len(string) + if allow_multiline and (string.endswith('\n') or string.endswith('\r')): + tos.previous_lines += string + string = '' + else: + string = tos.previous_lines + string + + return string, new_pos def tokenize(code, version_info, start_pos=(1, 0)): @@ -348,7 +348,6 @@ def _print_tokens(func): """ def wrapper(*args, **kwargs): for token in func(*args, **kwargs): - print(token) yield token return wrapper @@ -423,17 +422,19 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): while pos < max: if fstring_stack: - string, pos = _find_fstring_string(endpats, fstring_stack, line, lnum, pos) - if string: - yield PythonToken( - FSTRING_STRING, string, - fstring_stack[-1].last_string_start_pos, - # Never has a prefix because it can start anywhere and - # include whitespace. - prefix='' - ) - fstring_stack[-1].previous_lines = '' - continue + tos = fstring_stack[-1] + if not tos.is_in_expr(): + string, pos = _find_fstring_string(endpats, fstring_stack, line, lnum, pos) + if string: + yield PythonToken( + FSTRING_STRING, string, + tos.last_string_start_pos, + # Never has a prefix because it can start anywhere and + # include whitespace. + prefix='' + ) + tos.previous_lines = '' + continue if pos == max: break diff --git a/test/test_python_errors.py b/test/test_python_errors.py index 480fc2b..4b73cde 100644 --- a/test/test_python_errors.py +++ b/test/test_python_errors.py @@ -262,6 +262,7 @@ def test_too_many_levels_of_indentation(): r'f"\\\""', r'fr"\""', r'fr"\\\""', + r"print(f'Some {x:.2f} and some {y}')", ] ) def test_valid_fstrings(code): diff --git a/test/test_tokenize.py b/test/test_tokenize.py index b457a2a..db96513 100644 --- a/test/test_tokenize.py +++ b/test/test_tokenize.py @@ -329,8 +329,16 @@ def test_backslash(): ('f" {}"', [FSTRING_START, FSTRING_STRING, OP, OP, FSTRING_END]), ('f" "{}', [FSTRING_START, FSTRING_STRING, FSTRING_END, OP, OP]), (r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]), + (r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]), + (r'f"Some {x:.2f}{y}"', [FSTRING_START, FSTRING_STRING, OP, NAME, OP, + FSTRING_STRING, OP, OP, NAME, OP, FSTRING_END]), + (r'print(f"Some {x:.2f}a{y}")', [ + NAME, OP, FSTRING_START, FSTRING_STRING, OP, NAME, OP, + FSTRING_STRING, OP, FSTRING_STRING, OP, NAME, OP, FSTRING_END, OP + ]), + ] ) def test_fstring(code, types, version_ge_py36): actual_types = [t.type for t in _get_token_list(code, version_ge_py36)] - assert actual_types == types + [ENDMARKER] + assert types + [ENDMARKER] == actual_types