From ad57a5180026893db5b921674b43a8bde4593b56 Mon Sep 17 00:00:00 2001
From: Benjamin Woodruff <bgw@fb.com>
Date: Thu, 11 Jul 2019 15:05:20 -0700
Subject: [PATCH] Fix line continuation characters inside f-strings

Line continuation characters are valid inside of strings, but weren't
handled correctly in certain cases with f-strings, due to some small
tokenizer bugs.

This pull request to address those issues, and adds tests to validate
the new logic.
---
 parso/python/tokenize.py |   8 +--
 test/test_fstring.py     | 104 ++++++++++++++++++++++++++-------------
 test/test_tokenize.py    |  36 +++++++++++++-
 3 files changed, 110 insertions(+), 38 deletions(-)

diff --git a/parso/python/tokenize.py b/parso/python/tokenize.py
index 5b70d94..17d58a3 100644
--- a/parso/python/tokenize.py
+++ b/parso/python/tokenize.py
@@ -118,9 +118,9 @@ def _get_token_collection(version_info):
         return result
 
 
-fstring_string_single_line = _compile(r'(?:[^{}\r\n]+|\{\{|\}\})+')
+fstring_string_single_line = _compile(r'(?:\{\{|\}\}|\\(?:\r\n?|\n)|[^{}\r\n])+')
 fstring_string_multi_line = _compile(r'(?:[^{}]+|\{\{|\}\})+')
-fstring_format_spec_single_line = _compile(r'[^{}\r\n]+')
+fstring_format_spec_single_line = _compile(r'(?:\\(?:\r\n?|\n)|[^{}\r\n])+')
 fstring_format_spec_multi_line = _compile(r'[^{}]+')
 
 
@@ -340,7 +340,9 @@ def _find_fstring_string(endpats, fstring_stack, line, lnum, pos):
 
     new_pos = pos
     new_pos += len(string)
-    if allow_multiline and (string.endswith('\n') or string.endswith('\r')):
+    # even if allow_multiline is False, we still need to check for trailing
+    # newlines, because a single-line f-string can contain line continuations
+    if string.endswith('\n') or string.endswith('\r'):
         tos.previous_lines += string
         string = ''
     else:
diff --git a/test/test_fstring.py b/test/test_fstring.py
index df3f10f..2a07ce7 100644
--- a/test/test_fstring.py
+++ b/test/test_fstring.py
@@ -12,33 +12,57 @@ def grammar():
 
 @pytest.mark.parametrize(
     'code', [
-        '{1}',
-        '{1:}',
-        '',
-        '{1!a}',
-        '{1!a:1}',
-        '{1:1}',
-        '{1:1.{32}}',
-        '{1::>4}',
-        '{foo} {bar}',
-        '{x:{y}}',
-        '{x:{y:}}',
-        '{x:{y:1}}',
+        # simple cases
+        'f"{1}"',
+        'f"""{1}"""',
+        'f"{foo} {bar}"',
+
+        # empty string
+        'f""',
+        'f""""""',
+
+        # empty format specifier is okay
+        'f"{1:}"',
+
+        # use of conversion options
+        'f"{1!a}"',
+        'f"{1!a:1}"',
+
+        # format specifiers
+        'f"{1:1}"',
+        'f"{1:1.{32}}"',
+        'f"{1::>4}"',
+        'f"{x:{y}}"',
+        'f"{x:{y:}}"',
+        'f"{x:{y:1}}"',
 
         # Escapes
-        '{{}}',
-        '{{{1}}}',
-        '{{{1}',
-        '1{{2{{3',
-        '}}',
+        'f"{{}}"',
+        'f"{{{1}}}"',
+        'f"{{{1}"',
+        'f"1{{2{{3"',
+        'f"}}"',
 
         # New Python 3.8 syntax f'{a=}'
-        '{a=}',
-        '{a()=}',
+        'f"{a=}"',
+        'f"{a()=}"',
+
+        # multiline f-string
+        'f"""abc\ndef"""',
+        'f"""abc{\n123}def"""',
+
+        # a line continuation inside of an fstring_string
+        'f"abc\\\ndef"',
+        'f"\\\n{123}\\\n"',
+
+        # a line continuation inside of an fstring_expr
+        'f"{\\\n123}"',
+
+        # a line continuation inside of an format spec
+        'f"{123:.2\\\nf}"',
     ]
 )
 def test_valid(code, grammar):
-    code = 'f"""%s"""' % code
     module = grammar.parse(code, error_recovery=False)
     fstring = module.children[0]
     assert fstring.type == 'fstring'
@@ -47,23 +71,34 @@ def test_valid(code, grammar):
 
 @pytest.mark.parametrize(
     'code', [
-        '}',
-        '{',
-        '{1!{a}}',
-        '{!{a}}',
-        '{}',
-        '{:}',
-        '{:}}}',
-        '{:1}',
-        '{!:}',
-        '{!}',
-        '{!a}',
-        '{1:{}}',
-        '{1:{:}}',
+        # an f-string can't contain unmatched curly braces
+        'f"}"',
+        'f"{"',
+        'f"""}"""',
+        'f"""{"""',
+
+        # invalid conversion characters
+        'f"{1!{a}}"',
+        'f"{!{a}}"',
+
+        # The curly braces must contain an expression
+        'f"{}"',
+        'f"{:}"',
+        'f"{:}}}"',
+        'f"{:1}"',
+        'f"{!:}"',
+        'f"{!}"',
+        'f"{!a}"',
+
+        # invalid (empty) format specifiers
+        'f"{1:{}}"',
+        'f"{1:{:}}"',
+
+        # a newline without a line continuation inside a single-line string
+        'f"abc\ndef"',
     ]
 )
 def test_invalid(code, grammar):
-    code = 'f"""%s"""' % code
     with pytest.raises(ParserSyntaxError):
         grammar.parse(code, error_recovery=False)
 
@@ -95,6 +130,7 @@ def test_tokenize_start_pos(code, positions):
             """),
         'f"foo',
         'f"""foo',
+        'f"abc\ndef"',
     ]
 )
 def test_roundtrip(grammar, code):
diff --git a/test/test_tokenize.py b/test/test_tokenize.py
index db96513..a7bf1ff 100644
--- a/test/test_tokenize.py
+++ b/test/test_tokenize.py
@@ -16,6 +16,7 @@ from parso.python.tokenize import PythonToken
 NAME = PythonTokenTypes.NAME
 NEWLINE = PythonTokenTypes.NEWLINE
 STRING = PythonTokenTypes.STRING
+NUMBER = PythonTokenTypes.NUMBER
 INDENT = PythonTokenTypes.INDENT
 DEDENT = PythonTokenTypes.DEDENT
 ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
@@ -330,13 +331,46 @@ def test_backslash():
         ('f" "{}', [FSTRING_START, FSTRING_STRING, FSTRING_END, OP, OP]),
         (r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
         (r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
+
+        # format spec
         (r'f"Some {x:.2f}{y}"', [FSTRING_START, FSTRING_STRING, OP, NAME, OP,
                                  FSTRING_STRING, OP, OP, NAME, OP, FSTRING_END]),
+
+        # multiline f-string
+        ('f"""abc\ndef"""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
+        ('f"""abc{\n123}def"""', [
+            FSTRING_START, FSTRING_STRING, OP, NUMBER, OP, FSTRING_STRING,
+            FSTRING_END
+        ]),
+
+        # a line continuation inside of an fstring_string
+        ('f"abc\\\ndef"', [
+            FSTRING_START, FSTRING_STRING, FSTRING_END
+        ]),
+        ('f"\\\n{123}\\\n"', [
+            FSTRING_START, FSTRING_STRING, OP, NUMBER, OP, FSTRING_STRING,
+            FSTRING_END
+        ]),
+
+        # a line continuation inside of an fstring_expr
+        ('f"{\\\n123}"', [FSTRING_START, OP, NUMBER, OP, FSTRING_END]),
+
+        # a line continuation inside of an format spec
+        ('f"{123:.2\\\nf}"', [
+            FSTRING_START, OP, NUMBER, OP, FSTRING_STRING, OP, FSTRING_END
+        ]),
+
+        # a newline without a line continuation inside a single-line string is
+        # wrong, and will generate an ERRORTOKEN
+        ('f"abc\ndef"', [
+            FSTRING_START, FSTRING_STRING, NEWLINE, NAME, ERRORTOKEN
+        ]),
+
+        # a more complex example
         (r'print(f"Some {x:.2f}a{y}")', [
             NAME, OP, FSTRING_START, FSTRING_STRING, OP, NAME, OP,
             FSTRING_STRING, OP, FSTRING_STRING, OP, NAME, OP, FSTRING_END, OP
         ]),
-
     ]
 )
 def test_fstring(code, types, version_ge_py36):