Fix f-string escapes, fixes #48

The tokenizer was not detecting backslash escapes for f-string endings properly
2025-12-07 21:34:32 +08:00 · 2019-01-22 22:15:48 +01:00
parent dc2582f488
commit ef56debb78
4 changed files with 33 additions and 11 deletions
--- a/conftest.py
+++ b/conftest.py
@@ -57,6 +57,8 @@ def pytest_generate_tests(metafunc):
        metafunc.parametrize('each_py2_version', VERSIONS_2)
    elif 'each_py3_version' in metafunc.fixturenames:
        metafunc.parametrize('each_py3_version', VERSIONS_3)
+    elif 'version_ge_py36' in metafunc.fixturenames:
+        metafunc.parametrize('version_ge_py36', ['3.6', '3.7'])


 class NormalizerIssueCase(object):
@@ -151,8 +153,5 @@ def works_ge_py3(each_version):

@pytest.fixture
 def works_ge_py35(each_version):
-    """
-    Works only greater equal Python 3.3.
-    """
    version_info = parse_version_string(each_version)
    return Checker(each_version, version_info >= (3, 5))
--- a/parso/python/tokenize.py
+++ b/parso/python/tokenize.py
@@ -303,7 +303,7 @@ def _check_fstring_ending(fstring_stack, token, from_start=False):
    return fstring_index, fstring_end


-def _find_fstring_string(fstring_stack, line, lnum, pos):
+def _find_fstring_string(endpats, fstring_stack, line, lnum, pos):
    tos = fstring_stack[-1]
    if tos.is_in_expr():
        return '', pos
@@ -322,10 +322,9 @@ def _find_fstring_string(fstring_stack, line, lnum, pos):

            string = match.group(0)
            for fstring_stack_node in fstring_stack:
-                try:
-                    string = string[:string.index(fstring_stack_node.quote)]
-                except ValueError:
-                    pass  # The string was not found.
+                end_match = endpats[fstring_stack_node.quote].match(string)
+                if end_match is not None:
+                    string = match.group(0)[:-len(fstring_stack_node.quote)]

            new_pos += len(string)
            if allow_multiline and (string.endswith('\n') or string.endswith('\r')):
@@ -424,7 +423,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):

        while pos < max:
            if fstring_stack:
-                string, pos = _find_fstring_string(fstring_stack, line, lnum, pos)
+                string, pos = _find_fstring_string(endpats, fstring_stack, line, lnum, pos)
                if string:
                    yield PythonToken(
                        FSTRING_STRING, string,
@@ -559,6 +558,8 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
                    token[:2] in single_quoted or \
                    token[:3] in single_quoted:
                if token[-1] in '\r\n':                       # continued string
+                    # This means that a single quoted string ends with a
+                    # backslash and is continued.
                    contstr_start = lnum, start
                    endprog = (endpats.get(initial) or endpats.get(token[1])
                               or endpats.get(token[2]))
--- a/test/test_python_errors.py
+++ b/test/test_python_errors.py
@@ -258,6 +258,10 @@ def test_too_many_levels_of_indentation():
@pytest.mark.parametrize(
    'code', [
        "f'{*args,}'",
+        r'f"\""',
+        r'f"\\\""',
+        r'fr"\""',
+        r'fr"\\\""',
    ]
 )
 def test_valid_fstrings(code):
@@ -267,6 +271,8 @@ def test_valid_fstrings(code):
@pytest.mark.parametrize(
    ('code', 'message'), [
        ("f'{1+}'", ('invalid syntax')),
+        (r'f"\"', ('invalid syntax')),
+        (r'fr"\"', ('invalid syntax')),
    ]
 )
 def test_invalid_fstrings(code, message):
--- a/test/test_tokenize.py
+++ b/test/test_tokenize.py
@@ -23,11 +23,13 @@ OP = PythonTokenTypes.OP
 ENDMARKER = PythonTokenTypes.ENDMARKER
 ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
 FSTRING_START = PythonTokenTypes.FSTRING_START
+FSTRING_STRING = PythonTokenTypes.FSTRING_STRING
+FSTRING_END = PythonTokenTypes.FSTRING_END


-def _get_token_list(string):
+def _get_token_list(string, version=None):
    # Load the current version.
-    version_info = parse_version_string()
+    version_info = parse_version_string(version)
    return list(tokenize.tokenize(string, version_info))


@@ -318,3 +320,17 @@ def test_backslash():
    code = '\\\n# 1 \n'
    endmarker, = _get_token_list(code)
    assert endmarker.prefix == code
+
+
+@pytest.mark.parametrize(
+    ('code', 'types'), [
+        ('f"', [FSTRING_START]),
+        ('f""', [FSTRING_START, FSTRING_END]),
+        ('f" {}"', [FSTRING_START, FSTRING_STRING, OP, OP, FSTRING_END]),
+        ('f" "{}', [FSTRING_START, FSTRING_STRING, FSTRING_END, OP, OP]),
+        (r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
+    ]
+)
+def test_fstring(code, types, version_ge_py36):
+    actual_types = [t.type for t in _get_token_list(code, version_ge_py36)]
+    assert actual_types == types + [ENDMARKER]