From f45941226fded7c88f65aa5f61f83de43fcf359f Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Tue, 7 Apr 2020 01:06:03 +0200 Subject: [PATCH] Diff parser: Fix other BOM issues --- parso/python/diff.py | 8 +++++++- test/test_diff_parser.py | 6 ++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/parso/python/diff.py b/parso/python/diff.py index ba1e494..8b4ac3c 100644 --- a/parso/python/diff.py +++ b/parso/python/diff.py @@ -13,7 +13,7 @@ import logging from parso.utils import split_lines from parso.python.parser import Parser from parso.python.tree import EndMarker -from parso.python.tokenize import PythonToken +from parso.python.tokenize import PythonToken, BOM_UTF8_STRING from parso.python.token import PythonTokenTypes LOG = logging.getLogger(__name__) @@ -85,6 +85,10 @@ def _assert_valid_graph(node): actual = line, len(splitted[-1]) else: actual = previous_start_pos[0], previous_start_pos[1] + len(content) + if content.startswith(BOM_UTF8_STRING) \ + and node.get_start_pos_of_prefix() == (1, 0): + # Remove the byte order mark + actual = actual[0], actual[1] - 1 assert node.start_pos == actual, (node.start_pos, actual) else: @@ -815,6 +819,8 @@ class _NodesTree(object): lines = split_lines(self.prefix) assert len(lines) > 0 if len(lines) == 1: + if lines[0].startswith(BOM_UTF8_STRING) and end_pos == [1, 0]: + end_pos[1] -= 1 end_pos[1] += len(lines[0]) else: end_pos[0] += len(lines) - 1 diff --git a/test/test_diff_parser.py b/test/test_diff_parser.py index c95170f..7552df1 100644 --- a/test/test_diff_parser.py +++ b/test/test_diff_parser.py @@ -1587,6 +1587,12 @@ def test_byte_order_mark(differ): differ.parse(code3, parsers=2, expect_error_leaves=True) +def test_byte_order_mark2(differ): + code = '\ufeff# foo' + differ.initialize(code) + differ.parse(code + 'x', parsers=1) + + def test_backslash_insertion(differ): code1 = dedent(''' def f():