Fix line continuation characters inside f-strings

Line continuation characters are valid inside of strings, but weren't handled correctly in certain cases with f-strings, due to some small tokenizer bugs. This pull request to address those issues, and adds tests to validate the new logic.
2025-12-08 21:54:54 +08:00 · 2019-07-11 15:05:20 -07:00
parent 19de3eb5ca
commit ad57a51800
3 changed files with 110 additions and 38 deletions
--- a/parso/python/tokenize.py
+++ b/parso/python/tokenize.py
@@ -118,9 +118,9 @@ def _get_token_collection(version_info):
        return result
-fstring_string_single_line = _compile(r'(?:[^{}\r\n]+|\{\{|\}\})+')
+fstring_string_single_line = _compile(r'(?:\{\{|\}\}|\\(?:\r\n?|\n)|[^{}\r\n])+')
 fstring_string_multi_line = _compile(r'(?:[^{}]+|\{\{|\}\})+')
-fstring_format_spec_single_line = _compile(r'[^{}\r\n]+')
+fstring_format_spec_single_line = _compile(r'(?:\\(?:\r\n?|\n)|[^{}\r\n])+')
 fstring_format_spec_multi_line = _compile(r'[^{}]+')
@@ -340,7 +340,9 @@ def _find_fstring_string(endpats, fstring_stack, line, lnum, pos):
    new_pos = pos
    new_pos += len(string)
-    if allow_multiline and (string.endswith('\n') or string.endswith('\r')):
+    # even if allow_multiline is False, we still need to check for trailing
    # newlines, because a single-line f-string can contain line continuations
    if string.endswith('\n') or string.endswith('\r'):
        tos.previous_lines += string
        string = ''
    else:
--- a/test/test_fstring.py
+++ b/test/test_fstring.py
@@ -12,33 +12,57 @@ def grammar():
@pytest.mark.parametrize(
    'code', [
-        '{1}',
+        # simple cases
-        '{1:}',
+        'f"{1}"',
-        '',
+        'f"""{1}"""',
-        '{1!a}',
+        'f"{foo} {bar}"',
-        '{1!a:1}',
+
-        '{1:1}',
+        # empty string
-        '{1:1.{32}}',
+        'f""',
-        '{1::>4}',
+        'f""""""',
-        '{foo} {bar}',
+
-        '{x:{y}}',
+        # empty format specifier is okay
-        '{x:{y:}}',
+        'f"{1:}"',
-        '{x:{y:1}}',
+
        # use of conversion options
        'f"{1!a}"',
        'f"{1!a:1}"',
        # format specifiers
        'f"{1:1}"',
        'f"{1:1.{32}}"',
        'f"{1::>4}"',
        'f"{x:{y}}"',
        'f"{x:{y:}}"',
        'f"{x:{y:1}}"',
        # Escapes
-        '{{}}',
+        'f"{{}}"',
-        '{{{1}}}',
+        'f"{{{1}}}"',
-        '{{{1}',
+        'f"{{{1}"',
-        '1{{2{{3',
+        'f"1{{2{{3"',
-        '}}',
+        'f"}}"',
        # New Python 3.8 syntax f'{a=}'
-        '{a=}',
+        'f"{a=}"',
-        '{a()=}',
+        'f"{a()=}"',
        # multiline f-string
        'f"""abc\ndef"""',
        'f"""abc{\n123}def"""',
        # a line continuation inside of an fstring_string
        'f"abc\\\ndef"',
        'f"\\\n{123}\\\n"',
        # a line continuation inside of an fstring_expr
        'f"{\\\n123}"',
        # a line continuation inside of an format spec
        'f"{123:.2\\\nf}"',
    ]
 )
 def test_valid(code, grammar):
    code = 'f"""%s"""' % code
    module = grammar.parse(code, error_recovery=False)
    fstring = module.children[0]
    assert fstring.type == 'fstring'
@@ -47,23 +71,34 @@ def test_valid(code, grammar):
@pytest.mark.parametrize(
    'code', [
-        '}',
+        # an f-string can't contain unmatched curly braces
-        '{',
+        'f"}"',
-        '{1!{a}}',
+        'f"{"',
-        '{!{a}}',
+        'f"""}"""',
-        '{}',
+        'f"""{"""',
-        '{:}',
+
-        '{:}}}',
+        # invalid conversion characters
-        '{:1}',
+        'f"{1!{a}}"',
-        '{!:}',
+        'f"{!{a}}"',
-        '{!}',
+
-        '{!a}',
+        # The curly braces must contain an expression
-        '{1:{}}',
+        'f"{}"',
-        '{1:{:}}',
+        'f"{:}"',
        'f"{:}}}"',
        'f"{:1}"',
        'f"{!:}"',
        'f"{!}"',
        'f"{!a}"',
        # invalid (empty) format specifiers
        'f"{1:{}}"',
        'f"{1:{:}}"',
        # a newline without a line continuation inside a single-line string
        'f"abc\ndef"',
    ]
 )
 def test_invalid(code, grammar):
    code = 'f"""%s"""' % code
    with pytest.raises(ParserSyntaxError):
        grammar.parse(code, error_recovery=False)
@@ -95,6 +130,7 @@ def test_tokenize_start_pos(code, positions):
            """),
        'f"foo',
        'f"""foo',
        'f"abc\ndef"',
    ]
 )
 def test_roundtrip(grammar, code):
--- a/test/test_tokenize.py
+++ b/test/test_tokenize.py
@@ -16,6 +16,7 @@ from parso.python.tokenize import PythonToken
 NAME = PythonTokenTypes.NAME
 NEWLINE = PythonTokenTypes.NEWLINE
 STRING = PythonTokenTypes.STRING
 NUMBER = PythonTokenTypes.NUMBER
 INDENT = PythonTokenTypes.INDENT
 DEDENT = PythonTokenTypes.DEDENT
 ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
@@ -330,13 +331,46 @@ def test_backslash():
        ('f" "{}', [FSTRING_START, FSTRING_STRING, FSTRING_END, OP, OP]),
        (r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
        (r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
        # format spec
        (r'f"Some {x:.2f}{y}"', [FSTRING_START, FSTRING_STRING, OP, NAME, OP,
                                 FSTRING_STRING, OP, OP, NAME, OP, FSTRING_END]),
        # multiline f-string
        ('f"""abc\ndef"""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
        ('f"""abc{\n123}def"""', [
            FSTRING_START, FSTRING_STRING, OP, NUMBER, OP, FSTRING_STRING,
            FSTRING_END
        ]),
        # a line continuation inside of an fstring_string
        ('f"abc\\\ndef"', [
            FSTRING_START, FSTRING_STRING, FSTRING_END
        ]),
        ('f"\\\n{123}\\\n"', [
            FSTRING_START, FSTRING_STRING, OP, NUMBER, OP, FSTRING_STRING,
            FSTRING_END
        ]),
        # a line continuation inside of an fstring_expr
        ('f"{\\\n123}"', [FSTRING_START, OP, NUMBER, OP, FSTRING_END]),
        # a line continuation inside of an format spec
        ('f"{123:.2\\\nf}"', [
            FSTRING_START, OP, NUMBER, OP, FSTRING_STRING, OP, FSTRING_END
        ]),
        # a newline without a line continuation inside a single-line string is
        # wrong, and will generate an ERRORTOKEN
        ('f"abc\ndef"', [
            FSTRING_START, FSTRING_STRING, NEWLINE, NAME, ERRORTOKEN
        ]),
        # a more complex example
        (r'print(f"Some {x:.2f}a{y}")', [
            NAME, OP, FSTRING_START, FSTRING_STRING, OP, NAME, OP,
            FSTRING_STRING, OP, FSTRING_STRING, OP, NAME, OP, FSTRING_END, OP
        ]),
    ]
 )
 def test_fstring(code, types, version_ge_py36):