Fix line continuation characters inside f-strings

Line continuation characters are valid inside of strings, but weren't
handled correctly in certain cases with f-strings, due to some small
tokenizer bugs.

This pull request to address those issues, and adds tests to validate
the new logic.
This commit is contained in:
Benjamin Woodruff
2019-07-11 15:05:20 -07:00
committed by Dave Halter
parent 19de3eb5ca
commit ad57a51800
3 changed files with 110 additions and 38 deletions

View File

@@ -118,9 +118,9 @@ def _get_token_collection(version_info):
return result
fstring_string_single_line = _compile(r'(?:[^{}\r\n]+|\{\{|\}\})+')
fstring_string_single_line = _compile(r'(?:\{\{|\}\}|\\(?:\r\n?|\n)|[^{}\r\n])+')
fstring_string_multi_line = _compile(r'(?:[^{}]+|\{\{|\}\})+')
fstring_format_spec_single_line = _compile(r'[^{}\r\n]+')
fstring_format_spec_single_line = _compile(r'(?:\\(?:\r\n?|\n)|[^{}\r\n])+')
fstring_format_spec_multi_line = _compile(r'[^{}]+')
@@ -340,7 +340,9 @@ def _find_fstring_string(endpats, fstring_stack, line, lnum, pos):
new_pos = pos
new_pos += len(string)
if allow_multiline and (string.endswith('\n') or string.endswith('\r')):
# even if allow_multiline is False, we still need to check for trailing
# newlines, because a single-line f-string can contain line continuations
if string.endswith('\n') or string.endswith('\r'):
tos.previous_lines += string
string = ''
else:

View File

@@ -12,33 +12,57 @@ def grammar():
@pytest.mark.parametrize(
'code', [
'{1}',
'{1:}',
'',
'{1!a}',
'{1!a:1}',
'{1:1}',
'{1:1.{32}}',
'{1::>4}',
'{foo} {bar}',
'{x:{y}}',
'{x:{y:}}',
'{x:{y:1}}',
# simple cases
'f"{1}"',
'f"""{1}"""',
'f"{foo} {bar}"',
# empty string
'f""',
'f""""""',
# empty format specifier is okay
'f"{1:}"',
# use of conversion options
'f"{1!a}"',
'f"{1!a:1}"',
# format specifiers
'f"{1:1}"',
'f"{1:1.{32}}"',
'f"{1::>4}"',
'f"{x:{y}}"',
'f"{x:{y:}}"',
'f"{x:{y:1}}"',
# Escapes
'{{}}',
'{{{1}}}',
'{{{1}',
'1{{2{{3',
'}}',
'f"{{}}"',
'f"{{{1}}}"',
'f"{{{1}"',
'f"1{{2{{3"',
'f"}}"',
# New Python 3.8 syntax f'{a=}'
'{a=}',
'{a()=}',
'f"{a=}"',
'f"{a()=}"',
# multiline f-string
'f"""abc\ndef"""',
'f"""abc{\n123}def"""',
# a line continuation inside of an fstring_string
'f"abc\\\ndef"',
'f"\\\n{123}\\\n"',
# a line continuation inside of an fstring_expr
'f"{\\\n123}"',
# a line continuation inside of an format spec
'f"{123:.2\\\nf}"',
]
)
def test_valid(code, grammar):
code = 'f"""%s"""' % code
module = grammar.parse(code, error_recovery=False)
fstring = module.children[0]
assert fstring.type == 'fstring'
@@ -47,23 +71,34 @@ def test_valid(code, grammar):
@pytest.mark.parametrize(
'code', [
'}',
'{',
'{1!{a}}',
'{!{a}}',
'{}',
'{:}',
'{:}}}',
'{:1}',
'{!:}',
'{!}',
'{!a}',
'{1:{}}',
'{1:{:}}',
# an f-string can't contain unmatched curly braces
'f"}"',
'f"{"',
'f"""}"""',
'f"""{"""',
# invalid conversion characters
'f"{1!{a}}"',
'f"{!{a}}"',
# The curly braces must contain an expression
'f"{}"',
'f"{:}"',
'f"{:}}}"',
'f"{:1}"',
'f"{!:}"',
'f"{!}"',
'f"{!a}"',
# invalid (empty) format specifiers
'f"{1:{}}"',
'f"{1:{:}}"',
# a newline without a line continuation inside a single-line string
'f"abc\ndef"',
]
)
def test_invalid(code, grammar):
code = 'f"""%s"""' % code
with pytest.raises(ParserSyntaxError):
grammar.parse(code, error_recovery=False)
@@ -95,6 +130,7 @@ def test_tokenize_start_pos(code, positions):
"""),
'f"foo',
'f"""foo',
'f"abc\ndef"',
]
)
def test_roundtrip(grammar, code):

View File

@@ -16,6 +16,7 @@ from parso.python.tokenize import PythonToken
NAME = PythonTokenTypes.NAME
NEWLINE = PythonTokenTypes.NEWLINE
STRING = PythonTokenTypes.STRING
NUMBER = PythonTokenTypes.NUMBER
INDENT = PythonTokenTypes.INDENT
DEDENT = PythonTokenTypes.DEDENT
ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
@@ -330,13 +331,46 @@ def test_backslash():
('f" "{}', [FSTRING_START, FSTRING_STRING, FSTRING_END, OP, OP]),
(r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
(r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
# format spec
(r'f"Some {x:.2f}{y}"', [FSTRING_START, FSTRING_STRING, OP, NAME, OP,
FSTRING_STRING, OP, OP, NAME, OP, FSTRING_END]),
# multiline f-string
('f"""abc\ndef"""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
('f"""abc{\n123}def"""', [
FSTRING_START, FSTRING_STRING, OP, NUMBER, OP, FSTRING_STRING,
FSTRING_END
]),
# a line continuation inside of an fstring_string
('f"abc\\\ndef"', [
FSTRING_START, FSTRING_STRING, FSTRING_END
]),
('f"\\\n{123}\\\n"', [
FSTRING_START, FSTRING_STRING, OP, NUMBER, OP, FSTRING_STRING,
FSTRING_END
]),
# a line continuation inside of an fstring_expr
('f"{\\\n123}"', [FSTRING_START, OP, NUMBER, OP, FSTRING_END]),
# a line continuation inside of an format spec
('f"{123:.2\\\nf}"', [
FSTRING_START, OP, NUMBER, OP, FSTRING_STRING, OP, FSTRING_END
]),
# a newline without a line continuation inside a single-line string is
# wrong, and will generate an ERRORTOKEN
('f"abc\ndef"', [
FSTRING_START, FSTRING_STRING, NEWLINE, NAME, ERRORTOKEN
]),
# a more complex example
(r'print(f"Some {x:.2f}a{y}")', [
NAME, OP, FSTRING_START, FSTRING_STRING, OP, NAME, OP,
FSTRING_STRING, OP, FSTRING_STRING, OP, NAME, OP, FSTRING_END, OP
]),
]
)
def test_fstring(code, types, version_ge_py36):