Fix tokenizer: backslashes sometimes led to newline token generation

2025-12-07 05:14:29 +08:00 · 2019-01-14 09:59:16 +01:00
parent 1e25445176
commit 9cc8178998
3 changed files with 9 additions and 2 deletions
--- a/parso/python/tokenize.py
+++ b/parso/python/tokenize.py
@@ -483,7 +483,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
                break
            initial = token[0]
-            if new_line and initial not in '\r\n#':
+            if new_line and initial not in '\r\n\\#':
                new_line = False
                if paren_level == 0 and not fstring_stack:
                    i = 0
--- a/test/fuzz_diff_parser.py
+++ b/test/fuzz_diff_parser.py
@@ -40,7 +40,7 @@ _python_reserved_strings = tuple(
 )
 _random_python_fragments = _python_reserved_strings + (
    ' ', '\t', '\n', '\r', '\f', 'f"', 'F"""', "fr'", "RF'''", '"', '"""', "'",
-    "'''", ';', ' some_random_word ', '\\'
+    "'''", ';', ' some_random_word ', '\\', '#',
 )
@@ -172,6 +172,7 @@ class FileTests:
            code = f.read()
        self._code_lines = split_lines(code, keepends=True)
        self._test_count = test_count
        self._code_lines = self._code_lines[:30]
        self._change_count = change_count
        with open(file_path) as f:
--- a/test/test_tokenize.py
+++ b/test/test_tokenize.py
@@ -312,3 +312,9 @@ def test_form_feed():
 def test_carriage_return():
    lst = _get_token_list(' =\\\rclass')
    assert [t.type for t in lst] == [INDENT, OP, DEDENT, NAME, ENDMARKER]
 def test_backslash():
    code = '\\\n# 1 \n'
    endmarker, = _get_token_list(code)
    assert endmarker.prefix == code