From ad57a5180026893db5b921674b43a8bde4593b56 Mon Sep 17 00:00:00 2001 From: Benjamin Woodruff Date: Thu, 11 Jul 2019 15:05:20 -0700 Subject: [PATCH] Fix line continuation characters inside f-strings Line continuation characters are valid inside of strings, but weren't handled correctly in certain cases with f-strings, due to some small tokenizer bugs. This pull request to address those issues, and adds tests to validate the new logic. --- parso/python/tokenize.py | 8 +-- test/test_fstring.py | 104 ++++++++++++++++++++++++++------------- test/test_tokenize.py | 36 +++++++++++++- 3 files changed, 110 insertions(+), 38 deletions(-) diff --git a/parso/python/tokenize.py b/parso/python/tokenize.py index 5b70d94..17d58a3 100644 --- a/parso/python/tokenize.py +++ b/parso/python/tokenize.py @@ -118,9 +118,9 @@ def _get_token_collection(version_info): return result -fstring_string_single_line = _compile(r'(?:[^{}\r\n]+|\{\{|\}\})+') +fstring_string_single_line = _compile(r'(?:\{\{|\}\}|\\(?:\r\n?|\n)|[^{}\r\n])+') fstring_string_multi_line = _compile(r'(?:[^{}]+|\{\{|\}\})+') -fstring_format_spec_single_line = _compile(r'[^{}\r\n]+') +fstring_format_spec_single_line = _compile(r'(?:\\(?:\r\n?|\n)|[^{}\r\n])+') fstring_format_spec_multi_line = _compile(r'[^{}]+') @@ -340,7 +340,9 @@ def _find_fstring_string(endpats, fstring_stack, line, lnum, pos): new_pos = pos new_pos += len(string) - if allow_multiline and (string.endswith('\n') or string.endswith('\r')): + # even if allow_multiline is False, we still need to check for trailing + # newlines, because a single-line f-string can contain line continuations + if string.endswith('\n') or string.endswith('\r'): tos.previous_lines += string string = '' else: diff --git a/test/test_fstring.py b/test/test_fstring.py index df3f10f..2a07ce7 100644 --- a/test/test_fstring.py +++ b/test/test_fstring.py @@ -12,33 +12,57 @@ def grammar(): @pytest.mark.parametrize( 'code', [ - '{1}', - '{1:}', - '', - '{1!a}', - '{1!a:1}', - '{1:1}', - '{1:1.{32}}', - '{1::>4}', - '{foo} {bar}', - '{x:{y}}', - '{x:{y:}}', - '{x:{y:1}}', + # simple cases + 'f"{1}"', + 'f"""{1}"""', + 'f"{foo} {bar}"', + + # empty string + 'f""', + 'f""""""', + + # empty format specifier is okay + 'f"{1:}"', + + # use of conversion options + 'f"{1!a}"', + 'f"{1!a:1}"', + + # format specifiers + 'f"{1:1}"', + 'f"{1:1.{32}}"', + 'f"{1::>4}"', + 'f"{x:{y}}"', + 'f"{x:{y:}}"', + 'f"{x:{y:1}}"', # Escapes - '{{}}', - '{{{1}}}', - '{{{1}', - '1{{2{{3', - '}}', + 'f"{{}}"', + 'f"{{{1}}}"', + 'f"{{{1}"', + 'f"1{{2{{3"', + 'f"}}"', # New Python 3.8 syntax f'{a=}' - '{a=}', - '{a()=}', + 'f"{a=}"', + 'f"{a()=}"', + + # multiline f-string + 'f"""abc\ndef"""', + 'f"""abc{\n123}def"""', + + # a line continuation inside of an fstring_string + 'f"abc\\\ndef"', + 'f"\\\n{123}\\\n"', + + # a line continuation inside of an fstring_expr + 'f"{\\\n123}"', + + # a line continuation inside of an format spec + 'f"{123:.2\\\nf}"', ] ) def test_valid(code, grammar): - code = 'f"""%s"""' % code module = grammar.parse(code, error_recovery=False) fstring = module.children[0] assert fstring.type == 'fstring' @@ -47,23 +71,34 @@ def test_valid(code, grammar): @pytest.mark.parametrize( 'code', [ - '}', - '{', - '{1!{a}}', - '{!{a}}', - '{}', - '{:}', - '{:}}}', - '{:1}', - '{!:}', - '{!}', - '{!a}', - '{1:{}}', - '{1:{:}}', + # an f-string can't contain unmatched curly braces + 'f"}"', + 'f"{"', + 'f"""}"""', + 'f"""{"""', + + # invalid conversion characters + 'f"{1!{a}}"', + 'f"{!{a}}"', + + # The curly braces must contain an expression + 'f"{}"', + 'f"{:}"', + 'f"{:}}}"', + 'f"{:1}"', + 'f"{!:}"', + 'f"{!}"', + 'f"{!a}"', + + # invalid (empty) format specifiers + 'f"{1:{}}"', + 'f"{1:{:}}"', + + # a newline without a line continuation inside a single-line string + 'f"abc\ndef"', ] ) def test_invalid(code, grammar): - code = 'f"""%s"""' % code with pytest.raises(ParserSyntaxError): grammar.parse(code, error_recovery=False) @@ -95,6 +130,7 @@ def test_tokenize_start_pos(code, positions): """), 'f"foo', 'f"""foo', + 'f"abc\ndef"', ] ) def test_roundtrip(grammar, code): diff --git a/test/test_tokenize.py b/test/test_tokenize.py index db96513..a7bf1ff 100644 --- a/test/test_tokenize.py +++ b/test/test_tokenize.py @@ -16,6 +16,7 @@ from parso.python.tokenize import PythonToken NAME = PythonTokenTypes.NAME NEWLINE = PythonTokenTypes.NEWLINE STRING = PythonTokenTypes.STRING +NUMBER = PythonTokenTypes.NUMBER INDENT = PythonTokenTypes.INDENT DEDENT = PythonTokenTypes.DEDENT ERRORTOKEN = PythonTokenTypes.ERRORTOKEN @@ -330,13 +331,46 @@ def test_backslash(): ('f" "{}', [FSTRING_START, FSTRING_STRING, FSTRING_END, OP, OP]), (r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]), (r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]), + + # format spec (r'f"Some {x:.2f}{y}"', [FSTRING_START, FSTRING_STRING, OP, NAME, OP, FSTRING_STRING, OP, OP, NAME, OP, FSTRING_END]), + + # multiline f-string + ('f"""abc\ndef"""', [FSTRING_START, FSTRING_STRING, FSTRING_END]), + ('f"""abc{\n123}def"""', [ + FSTRING_START, FSTRING_STRING, OP, NUMBER, OP, FSTRING_STRING, + FSTRING_END + ]), + + # a line continuation inside of an fstring_string + ('f"abc\\\ndef"', [ + FSTRING_START, FSTRING_STRING, FSTRING_END + ]), + ('f"\\\n{123}\\\n"', [ + FSTRING_START, FSTRING_STRING, OP, NUMBER, OP, FSTRING_STRING, + FSTRING_END + ]), + + # a line continuation inside of an fstring_expr + ('f"{\\\n123}"', [FSTRING_START, OP, NUMBER, OP, FSTRING_END]), + + # a line continuation inside of an format spec + ('f"{123:.2\\\nf}"', [ + FSTRING_START, OP, NUMBER, OP, FSTRING_STRING, OP, FSTRING_END + ]), + + # a newline without a line continuation inside a single-line string is + # wrong, and will generate an ERRORTOKEN + ('f"abc\ndef"', [ + FSTRING_START, FSTRING_STRING, NEWLINE, NAME, ERRORTOKEN + ]), + + # a more complex example (r'print(f"Some {x:.2f}a{y}")', [ NAME, OP, FSTRING_START, FSTRING_STRING, OP, NAME, OP, FSTRING_STRING, OP, FSTRING_STRING, OP, NAME, OP, FSTRING_END, OP ]), - ] ) def test_fstring(code, types, version_ge_py36):