diff --git a/parso/python/tokenize.py b/parso/python/tokenize.py index 6282275..0eff15d 100644 --- a/parso/python/tokenize.py +++ b/parso/python/tokenize.py @@ -110,9 +110,14 @@ def _get_token_collection(version_info): _create_token_collection(version_info) return result - -fstring_string_single_line = _compile(r'(?:\{\{|\}\}|\\(?:\r\n?|\n)|[^{}\r\n])+') -fstring_string_multi_line = _compile(r'(?:[^{}]+|\{\{|\}\})+') +unicode_character_name = r'[A-Za-z0-9\-]+(?: [A-Za-z0-9\-]+)*' +fstring_string_single_line = _compile( + r'(?:\{\{|\}\}|\\N\{' + unicode_character_name + + r'\}|\\(?:\r\n?|\n)|\\[^\r\nN]|[^{}\r\n\\])+' +) +fstring_string_multi_line = _compile( + r'(?:\{\{|\}\}|\\N\{' + unicode_character_name + r'\}|\\[^N]|[^{}\\])+' +) fstring_format_spec_single_line = _compile(r'(?:\\(?:\r\n?|\n)|[^{}\r\n])+') fstring_format_spec_multi_line = _compile(r'[^{}]+') diff --git a/test/test_fstring.py b/test/test_fstring.py index a17599d..c81d027 100644 --- a/test/test_fstring.py +++ b/test/test_fstring.py @@ -67,6 +67,17 @@ def grammar(): 'f"{x, *y}"', 'f"{*x, y}"', 'f"{x for x in [1]}"', + + # named unicode characters + 'f"\\N{BULLET}"', + 'f"\\N{FLEUR-DE-LIS}"', + 'f"\\N{NO ENTRY}"', + 'f"Combo {expr} and \\N{NO ENTRY}"', + 'f"\\N{NO ENTRY} and {expr}"', + 'f"\\N{no entry}"', + 'f"\\N{SOYOMBO LETTER -A}"', + 'f"\\N{DOMINO TILE HORIZONTAL-00-00}"', + 'f"""\\N{NO ENTRY}"""', ] ) def test_valid(code, grammar): @@ -104,6 +115,11 @@ def test_valid(code, grammar): # a newline without a line continuation inside a single-line string 'f"abc\ndef"', + + # various named unicode escapes that aren't name-shaped + 'f"\\N{ BULLET }"', + 'f"\\N{NO ENTRY}"', + 'f"""\\N{NO\nENTRY}"""', ] ) def test_invalid(code, grammar): @@ -122,6 +138,8 @@ def test_invalid(code, grammar): (1, 10), (1, 11), (1, 12), (1, 13)]), ('f"""\n {\nfoo\n }"""', [(1, 0), (1, 4), (2, 1), (3, 0), (4, 1), (4, 2), (4, 5)]), + ('f"\\N{NO ENTRY} and {expr}"', [(1, 0), (1, 2), (1, 19), (1, 20), + (1, 24), (1, 25), (1, 26)]), ] ) def test_tokenize_start_pos(code, positions):