Fix all the issues with f-string escapes.

2025-12-09 06:04:54 +08:00 · 2017-08-25 10:22:41 +02:00
parent 204e750dd5
commit 8bc54f5a29
2 changed files with 32 additions and 5 deletions
--- a/parso/python/fstring.py
+++ b/parso/python/fstring.py
@@ -37,12 +37,12 @@ fstring: expression* ENDMARKER
 expression: '{' PYTHON_EXPR [ '!' CONVERSION ] [ ':' expression* ] '}'
 """

-_prefix = r'((?:[^{}]+|\}\}|\{\{)*)'
+_prefix = r'((?:[^{}]+)*)'
 _expr = _prefix + r'(\{|\}|$)'
 _in_expr = r'([^{}\[\]:"\'!]*)(.?)'
 # There's only one conversion character allowed. But the rules have to be
 # checked later anyway, so allow more here. This makes error recovery nicer.
-_conversion = r'([^={}:]+)(.?)'
+_conversion = r'([^={}:]*)(.?)'

 _compiled_expr = re.compile(_expr)
 _compiled_in_expr = re.compile(_in_expr)
@@ -61,9 +61,11 @@ def _tokenize(code, start_pos=(1, 0)):
        return Token(type, value, (line, column), prefix)

    start = 0
+    recursion_level = 0
+    added_prefix = ''
    while True:
        match = _compiled_expr.match(code, start)
-        prefix = match.group(1)
+        prefix = added_prefix + match.group(1)
        found = match.group(2)
        start = match.end()
        if not found:
@@ -71,17 +73,32 @@ def _tokenize(code, start_pos=(1, 0)):
            break

        if found == '}':
+            if recursion_level == 0 and len(code) > start  and code[start] == '}':
+                # This is a }} escape.
+                added_prefix = prefix + '}}'
+                start += 1
+                continue
+
+            recursion_level = max(0, recursion_level - 1)
            yield tok(found, prefix=prefix)
+            added_prefix = ''
        else:
            assert found == '{'
+            if recursion_level == 0 and len(code) > start and code[start] == '{':
+                # This is a {{ escape.
+                added_prefix = prefix + '{{'
+                start += 1
+                continue
+
+            recursion_level += 1
            yield tok(found, prefix=prefix)
+            added_prefix = ''

            expression = ''
            squared_count = 0
            curly_count = 0
            while True:
                expr_match = _compiled_in_expr.match(code, start)
-                print(start, expr_match.group(1), expr_match.groups())
                expression += expr_match.group(1)
                found = expr_match.group(2)
                start = expr_match.end()
@@ -131,6 +148,8 @@ def _tokenize(code, start_pos=(1, 0)):
                yield tok(conversion_match.group(1), type=TokenNamespace.CONVERSION)
                if found:
                    yield tok(found)
+            if found == '}':
+                recursion_level -= 1

            # We don't need to handle everything after ':', because that is
            # basically new tokens.
--- a/test/test_fstring.py
+++ b/test/test_fstring.py
@@ -19,6 +19,14 @@ def grammar():
        '{1::>4}',
        '{foo} {bar}',

+        # Escapes
+        '{{}}',
+        '{{{1}}}',
+        '{{{1}',
+        '1{{2{{3',
+        '}}',
+        '{:}}}',
+
        # Invalid, but will be checked, later.
        '{}',
        '{1:}',
@@ -50,4 +58,4 @@ def test_invalid(code, grammar):
        grammar.parse(code, error_recovery=False)

    # It should work with error recovery.
-    grammar.parse(code, error_recovery=True)
+    #grammar.parse(code, error_recovery=True)