Diff parser: Fix BOM with indentation issues

This commit is contained in:
Dave Halter
2020-04-05 20:47:49 +02:00
parent db10b4fa72
commit b12dd498bb
3 changed files with 25 additions and 5 deletions

View File

@@ -427,7 +427,8 @@ class DiffParser(object):
tokens = self._tokenizer(
lines,
start_pos=(line_offset + 1, 0),
indents=indents
indents=indents,
is_first_token=line_offset == 0,
)
stack = self._active_parser.stack
self._replace_tos_indent = None

View File

@@ -389,7 +389,7 @@ def _print_tokens(func):
# @_print_tokens
def tokenize_lines(lines, version_info, start_pos=(1, 0), indents=None):
def tokenize_lines(lines, version_info, start_pos=(1, 0), indents=None, is_first_token=True):
"""
A heavily modified Python standard library tokenizer.
@@ -423,14 +423,13 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0), indents=None):
new_line = True
prefix = '' # Should never be required, but here for safety
additional_prefix = ''
first = True
lnum = start_pos[0] - 1
fstring_stack = []
for line in lines: # loop over lines in stream
lnum += 1
pos = 0
max_ = len(line)
if first:
if is_first_token:
if line.startswith(BOM_UTF8_STRING):
additional_prefix = BOM_UTF8_STRING
line = line[1:]
@@ -441,7 +440,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0), indents=None):
pos = start_pos[1]
max_ += start_pos[1]
first = False
is_first_token = False
if contstr: # continued string
endmatch = endprog.match(line)

View File

@@ -1565,3 +1565,23 @@ class Grammar:
''')
differ.initialize(code1)
differ.parse(code2, parsers=3, copies=1, expect_error_leaves=True)
def test_byte_order_mark(differ):
code2 = dedent('''\
x
\ufeff
else :
''')
differ.initialize('\n')
differ.parse(code2, parsers=2, expect_error_leaves=True)
code3 = dedent('''\
\ufeff
if:
x
''')
differ.initialize('\n')
differ.parse(code3, parsers=2, expect_error_leaves=True)