Fix split lines for Python code

Some characters like Vertical Tab or File Separator were used as line separators. This is not legal. Line Separators in Python are only Carriage Return \r and Line Feed \n.
2025-12-08 13:45:01 +08:00 · 2019-01-08 08:42:30 +01:00
parent f4696a6245
commit b1f613fe16
3 changed files with 34 additions and 2 deletions
--- a/parso/utils.py
+++ b/parso/utils.py
@@ -5,6 +5,20 @@ from ast import literal_eval
 from parso._compatibility import unicode, total_ordering
 # The following is a list in Python that are line breaks in str.splitlines, but
 # not in Python. In Python only \r (Carriage Return, 0xD) and \n (Line Feed,
 # 0xA) are allowed to split lines.
 _NON_LINE_BREAKS = (
    u'\v',  # Vertical Tabulation 0xB
    u'\f',  # Form Feed 0xC
    u'\x1C',  # File Separator
    u'\x1D',  # Group Separator
    u'\x1E',  # Record Separator
    u'\x85',  # Next Line (NEL - Equivalent to CR+LF.
              # Used to mark end-of-line on some IBM mainframes.)
    u'\u2028',  # Line Separator
    u'\u2029',  # Paragraph Separator
 )
 Version = namedtuple('Version', 'major, minor, micro')
@@ -26,8 +40,13 @@ def split_lines(string, keepends=False):
        # We have to merge lines that were broken by form feed characters.
        merge = []
        for i, line in enumerate(lst):
-            if line.endswith('\f'):
+            try:
-                merge.append(i)
+                last_chr = line[-1]
            except IndexError:
                pass
            else:
                if last_chr in _NON_LINE_BREAKS:
                    merge.append(i)
        for index in reversed(merge):
            try:
--- a/test/test_diff_parser.py
+++ b/test/test_diff_parser.py
@@ -1,3 +1,4 @@
 # -*- coding: utf-8 -*-
 from textwrap import dedent
 import logging
@@ -952,3 +953,9 @@ def test_wrong_backslash(differ):
    differ.initialize(code1)
    differ.parse(code2, parsers=2, copies=2, expect_error_leaves=True)
    differ.parse(code1, parsers=1, copies=1)
 def test_random_unicode_characters(differ):
    differ.initialize('')
    differ.parse('\x1dĔBϞɛˁşʑ˳˻ȣſéÎ\x90̕ȟòwʘ\x1dĔBϞɛˁşʑ˳˻ȣſéÎ', parsers=1, expect_error_leaves=True)
    differ.parse('')
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -29,6 +29,12 @@ import pytest
        ('\r', ['', ''], False),
        ('\r', ['\r', ''], True),
        # Invalid line breaks
        ('a\vb', ['a\vb'], False),
        ('a\vb', ['a\vb'], True),
        ('\x1C', ['\x1C'], False),
        ('\x1C', ['\x1C'], True),
    ]
 )
 def test_split_lines(string, expected_result, keepends):