Diff parser: Fix other BOM issues

This commit is contained in:
Dave Halter
2020-04-07 01:06:03 +02:00
parent e04552b14a
commit f45941226f
2 changed files with 13 additions and 1 deletions

View File

@@ -13,7 +13,7 @@ import logging
from parso.utils import split_lines
from parso.python.parser import Parser
from parso.python.tree import EndMarker
from parso.python.tokenize import PythonToken
from parso.python.tokenize import PythonToken, BOM_UTF8_STRING
from parso.python.token import PythonTokenTypes
LOG = logging.getLogger(__name__)
@@ -85,6 +85,10 @@ def _assert_valid_graph(node):
actual = line, len(splitted[-1])
else:
actual = previous_start_pos[0], previous_start_pos[1] + len(content)
if content.startswith(BOM_UTF8_STRING) \
and node.get_start_pos_of_prefix() == (1, 0):
# Remove the byte order mark
actual = actual[0], actual[1] - 1
assert node.start_pos == actual, (node.start_pos, actual)
else:
@@ -815,6 +819,8 @@ class _NodesTree(object):
lines = split_lines(self.prefix)
assert len(lines) > 0
if len(lines) == 1:
if lines[0].startswith(BOM_UTF8_STRING) and end_pos == [1, 0]:
end_pos[1] -= 1
end_pos[1] += len(lines[0])
else:
end_pos[0] += len(lines) - 1

View File

@@ -1587,6 +1587,12 @@ def test_byte_order_mark(differ):
differ.parse(code3, parsers=2, expect_error_leaves=True)
def test_byte_order_mark2(differ):
code = '\ufeff# foo'
differ.initialize(code)
differ.parse(code + 'x', parsers=1)
def test_backslash_insertion(differ):
code1 = dedent('''
def f():