Fix line continuation characters inside f-strings

Line continuation characters are valid inside of strings, but weren't
handled correctly in certain cases with f-strings, due to some small
tokenizer bugs.

This pull request to address those issues, and adds tests to validate
the new logic.
This commit is contained in:
Benjamin Woodruff
2019-07-11 15:05:20 -07:00
committed by Dave Halter
parent 19de3eb5ca
commit ad57a51800
3 changed files with 110 additions and 38 deletions

View File

@@ -118,9 +118,9 @@ def _get_token_collection(version_info):
return result return result
fstring_string_single_line = _compile(r'(?:[^{}\r\n]+|\{\{|\}\})+') fstring_string_single_line = _compile(r'(?:\{\{|\}\}|\\(?:\r\n?|\n)|[^{}\r\n])+')
fstring_string_multi_line = _compile(r'(?:[^{}]+|\{\{|\}\})+') fstring_string_multi_line = _compile(r'(?:[^{}]+|\{\{|\}\})+')
fstring_format_spec_single_line = _compile(r'[^{}\r\n]+') fstring_format_spec_single_line = _compile(r'(?:\\(?:\r\n?|\n)|[^{}\r\n])+')
fstring_format_spec_multi_line = _compile(r'[^{}]+') fstring_format_spec_multi_line = _compile(r'[^{}]+')
@@ -340,7 +340,9 @@ def _find_fstring_string(endpats, fstring_stack, line, lnum, pos):
new_pos = pos new_pos = pos
new_pos += len(string) new_pos += len(string)
if allow_multiline and (string.endswith('\n') or string.endswith('\r')): # even if allow_multiline is False, we still need to check for trailing
# newlines, because a single-line f-string can contain line continuations
if string.endswith('\n') or string.endswith('\r'):
tos.previous_lines += string tos.previous_lines += string
string = '' string = ''
else: else:

View File

@@ -12,33 +12,57 @@ def grammar():
@pytest.mark.parametrize( @pytest.mark.parametrize(
'code', [ 'code', [
'{1}', # simple cases
'{1:}', 'f"{1}"',
'', 'f"""{1}"""',
'{1!a}', 'f"{foo} {bar}"',
'{1!a:1}',
'{1:1}', # empty string
'{1:1.{32}}', 'f""',
'{1::>4}', 'f""""""',
'{foo} {bar}',
'{x:{y}}', # empty format specifier is okay
'{x:{y:}}', 'f"{1:}"',
'{x:{y:1}}',
# use of conversion options
'f"{1!a}"',
'f"{1!a:1}"',
# format specifiers
'f"{1:1}"',
'f"{1:1.{32}}"',
'f"{1::>4}"',
'f"{x:{y}}"',
'f"{x:{y:}}"',
'f"{x:{y:1}}"',
# Escapes # Escapes
'{{}}', 'f"{{}}"',
'{{{1}}}', 'f"{{{1}}}"',
'{{{1}', 'f"{{{1}"',
'1{{2{{3', 'f"1{{2{{3"',
'}}', 'f"}}"',
# New Python 3.8 syntax f'{a=}' # New Python 3.8 syntax f'{a=}'
'{a=}', 'f"{a=}"',
'{a()=}', 'f"{a()=}"',
# multiline f-string
'f"""abc\ndef"""',
'f"""abc{\n123}def"""',
# a line continuation inside of an fstring_string
'f"abc\\\ndef"',
'f"\\\n{123}\\\n"',
# a line continuation inside of an fstring_expr
'f"{\\\n123}"',
# a line continuation inside of an format spec
'f"{123:.2\\\nf}"',
] ]
) )
def test_valid(code, grammar): def test_valid(code, grammar):
code = 'f"""%s"""' % code
module = grammar.parse(code, error_recovery=False) module = grammar.parse(code, error_recovery=False)
fstring = module.children[0] fstring = module.children[0]
assert fstring.type == 'fstring' assert fstring.type == 'fstring'
@@ -47,23 +71,34 @@ def test_valid(code, grammar):
@pytest.mark.parametrize( @pytest.mark.parametrize(
'code', [ 'code', [
'}', # an f-string can't contain unmatched curly braces
'{', 'f"}"',
'{1!{a}}', 'f"{"',
'{!{a}}', 'f"""}"""',
'{}', 'f"""{"""',
'{:}',
'{:}}}', # invalid conversion characters
'{:1}', 'f"{1!{a}}"',
'{!:}', 'f"{!{a}}"',
'{!}',
'{!a}', # The curly braces must contain an expression
'{1:{}}', 'f"{}"',
'{1:{:}}', 'f"{:}"',
'f"{:}}}"',
'f"{:1}"',
'f"{!:}"',
'f"{!}"',
'f"{!a}"',
# invalid (empty) format specifiers
'f"{1:{}}"',
'f"{1:{:}}"',
# a newline without a line continuation inside a single-line string
'f"abc\ndef"',
] ]
) )
def test_invalid(code, grammar): def test_invalid(code, grammar):
code = 'f"""%s"""' % code
with pytest.raises(ParserSyntaxError): with pytest.raises(ParserSyntaxError):
grammar.parse(code, error_recovery=False) grammar.parse(code, error_recovery=False)
@@ -95,6 +130,7 @@ def test_tokenize_start_pos(code, positions):
"""), """),
'f"foo', 'f"foo',
'f"""foo', 'f"""foo',
'f"abc\ndef"',
] ]
) )
def test_roundtrip(grammar, code): def test_roundtrip(grammar, code):

View File

@@ -16,6 +16,7 @@ from parso.python.tokenize import PythonToken
NAME = PythonTokenTypes.NAME NAME = PythonTokenTypes.NAME
NEWLINE = PythonTokenTypes.NEWLINE NEWLINE = PythonTokenTypes.NEWLINE
STRING = PythonTokenTypes.STRING STRING = PythonTokenTypes.STRING
NUMBER = PythonTokenTypes.NUMBER
INDENT = PythonTokenTypes.INDENT INDENT = PythonTokenTypes.INDENT
DEDENT = PythonTokenTypes.DEDENT DEDENT = PythonTokenTypes.DEDENT
ERRORTOKEN = PythonTokenTypes.ERRORTOKEN ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
@@ -330,13 +331,46 @@ def test_backslash():
('f" "{}', [FSTRING_START, FSTRING_STRING, FSTRING_END, OP, OP]), ('f" "{}', [FSTRING_START, FSTRING_STRING, FSTRING_END, OP, OP]),
(r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]), (r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
(r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]), (r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
# format spec
(r'f"Some {x:.2f}{y}"', [FSTRING_START, FSTRING_STRING, OP, NAME, OP, (r'f"Some {x:.2f}{y}"', [FSTRING_START, FSTRING_STRING, OP, NAME, OP,
FSTRING_STRING, OP, OP, NAME, OP, FSTRING_END]), FSTRING_STRING, OP, OP, NAME, OP, FSTRING_END]),
# multiline f-string
('f"""abc\ndef"""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
('f"""abc{\n123}def"""', [
FSTRING_START, FSTRING_STRING, OP, NUMBER, OP, FSTRING_STRING,
FSTRING_END
]),
# a line continuation inside of an fstring_string
('f"abc\\\ndef"', [
FSTRING_START, FSTRING_STRING, FSTRING_END
]),
('f"\\\n{123}\\\n"', [
FSTRING_START, FSTRING_STRING, OP, NUMBER, OP, FSTRING_STRING,
FSTRING_END
]),
# a line continuation inside of an fstring_expr
('f"{\\\n123}"', [FSTRING_START, OP, NUMBER, OP, FSTRING_END]),
# a line continuation inside of an format spec
('f"{123:.2\\\nf}"', [
FSTRING_START, OP, NUMBER, OP, FSTRING_STRING, OP, FSTRING_END
]),
# a newline without a line continuation inside a single-line string is
# wrong, and will generate an ERRORTOKEN
('f"abc\ndef"', [
FSTRING_START, FSTRING_STRING, NEWLINE, NAME, ERRORTOKEN
]),
# a more complex example
(r'print(f"Some {x:.2f}a{y}")', [ (r'print(f"Some {x:.2f}a{y}")', [
NAME, OP, FSTRING_START, FSTRING_STRING, OP, NAME, OP, NAME, OP, FSTRING_START, FSTRING_STRING, OP, NAME, OP,
FSTRING_STRING, OP, FSTRING_STRING, OP, NAME, OP, FSTRING_END, OP FSTRING_STRING, OP, FSTRING_STRING, OP, NAME, OP, FSTRING_END, OP
]), ]),
] ]
) )
def test_fstring(code, types, version_ge_py36): def test_fstring(code, types, version_ge_py36):