mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-08 05:34:51 +08:00
Fix split lines for Python code
Some characters like Vertical Tab or File Separator were used as line separators. This is not legal. Line Separators in Python are only Carriage Return \r and Line Feed \n.
This commit is contained in:
@@ -5,6 +5,20 @@ from ast import literal_eval
|
||||
|
||||
from parso._compatibility import unicode, total_ordering
|
||||
|
||||
# The following is a list in Python that are line breaks in str.splitlines, but
|
||||
# not in Python. In Python only \r (Carriage Return, 0xD) and \n (Line Feed,
|
||||
# 0xA) are allowed to split lines.
|
||||
_NON_LINE_BREAKS = (
|
||||
u'\v', # Vertical Tabulation 0xB
|
||||
u'\f', # Form Feed 0xC
|
||||
u'\x1C', # File Separator
|
||||
u'\x1D', # Group Separator
|
||||
u'\x1E', # Record Separator
|
||||
u'\x85', # Next Line (NEL - Equivalent to CR+LF.
|
||||
# Used to mark end-of-line on some IBM mainframes.)
|
||||
u'\u2028', # Line Separator
|
||||
u'\u2029', # Paragraph Separator
|
||||
)
|
||||
|
||||
Version = namedtuple('Version', 'major, minor, micro')
|
||||
|
||||
@@ -26,7 +40,12 @@ def split_lines(string, keepends=False):
|
||||
# We have to merge lines that were broken by form feed characters.
|
||||
merge = []
|
||||
for i, line in enumerate(lst):
|
||||
if line.endswith('\f'):
|
||||
try:
|
||||
last_chr = line[-1]
|
||||
except IndexError:
|
||||
pass
|
||||
else:
|
||||
if last_chr in _NON_LINE_BREAKS:
|
||||
merge.append(i)
|
||||
|
||||
for index in reversed(merge):
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from textwrap import dedent
|
||||
import logging
|
||||
|
||||
@@ -952,3 +953,9 @@ def test_wrong_backslash(differ):
|
||||
differ.initialize(code1)
|
||||
differ.parse(code2, parsers=2, copies=2, expect_error_leaves=True)
|
||||
differ.parse(code1, parsers=1, copies=1)
|
||||
|
||||
|
||||
def test_random_unicode_characters(differ):
|
||||
differ.initialize('')
|
||||
differ.parse('\x1dĔBϞɛˁşʑ˳˻ȣſéÎ\x90̕ȟòwʘ\x1dĔBϞɛˁşʑ˳˻ȣſéÎ', parsers=1, expect_error_leaves=True)
|
||||
differ.parse('')
|
||||
|
||||
@@ -29,6 +29,12 @@ import pytest
|
||||
|
||||
('\r', ['', ''], False),
|
||||
('\r', ['\r', ''], True),
|
||||
|
||||
# Invalid line breaks
|
||||
('a\vb', ['a\vb'], False),
|
||||
('a\vb', ['a\vb'], True),
|
||||
('\x1C', ['\x1C'], False),
|
||||
('\x1C', ['\x1C'], True),
|
||||
]
|
||||
)
|
||||
def test_split_lines(string, expected_result, keepends):
|
||||
|
||||
Reference in New Issue
Block a user