Fix split lines for Python code

Some characters like Vertical Tab or File Separator were used as line separators.
This is not legal. Line Separators in Python are only Carriage Return \r and Line Feed \n.
This commit is contained in:
Dave Halter
2019-01-08 08:42:30 +01:00
parent f4696a6245
commit b1f613fe16
3 changed files with 34 additions and 2 deletions

View File

@@ -5,6 +5,20 @@ from ast import literal_eval
from parso._compatibility import unicode, total_ordering from parso._compatibility import unicode, total_ordering
# The following is a list in Python that are line breaks in str.splitlines, but
# not in Python. In Python only \r (Carriage Return, 0xD) and \n (Line Feed,
# 0xA) are allowed to split lines.
_NON_LINE_BREAKS = (
u'\v', # Vertical Tabulation 0xB
u'\f', # Form Feed 0xC
u'\x1C', # File Separator
u'\x1D', # Group Separator
u'\x1E', # Record Separator
u'\x85', # Next Line (NEL - Equivalent to CR+LF.
# Used to mark end-of-line on some IBM mainframes.)
u'\u2028', # Line Separator
u'\u2029', # Paragraph Separator
)
Version = namedtuple('Version', 'major, minor, micro') Version = namedtuple('Version', 'major, minor, micro')
@@ -26,8 +40,13 @@ def split_lines(string, keepends=False):
# We have to merge lines that were broken by form feed characters. # We have to merge lines that were broken by form feed characters.
merge = [] merge = []
for i, line in enumerate(lst): for i, line in enumerate(lst):
if line.endswith('\f'): try:
merge.append(i) last_chr = line[-1]
except IndexError:
pass
else:
if last_chr in _NON_LINE_BREAKS:
merge.append(i)
for index in reversed(merge): for index in reversed(merge):
try: try:

View File

@@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
from textwrap import dedent from textwrap import dedent
import logging import logging
@@ -952,3 +953,9 @@ def test_wrong_backslash(differ):
differ.initialize(code1) differ.initialize(code1)
differ.parse(code2, parsers=2, copies=2, expect_error_leaves=True) differ.parse(code2, parsers=2, copies=2, expect_error_leaves=True)
differ.parse(code1, parsers=1, copies=1) differ.parse(code1, parsers=1, copies=1)
def test_random_unicode_characters(differ):
differ.initialize('')
differ.parse('\x1dĔBϞɛˁşʑ˳˻ȣſéÎ\x90̕ȟòwʘ\x1dĔBϞɛˁşʑ˳˻ȣſéÎ', parsers=1, expect_error_leaves=True)
differ.parse('')

View File

@@ -29,6 +29,12 @@ import pytest
('\r', ['', ''], False), ('\r', ['', ''], False),
('\r', ['\r', ''], True), ('\r', ['\r', ''], True),
# Invalid line breaks
('a\vb', ['a\vb'], False),
('a\vb', ['a\vb'], True),
('\x1C', ['\x1C'], False),
('\x1C', ['\x1C'], True),
] ]
) )
def test_split_lines(string, expected_result, keepends): def test_split_lines(string, expected_result, keepends):