mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-06 12:54:29 +08:00
Diff parser: Fix BOM with indentation issues
This commit is contained in:
@@ -427,7 +427,8 @@ class DiffParser(object):
|
||||
tokens = self._tokenizer(
|
||||
lines,
|
||||
start_pos=(line_offset + 1, 0),
|
||||
indents=indents
|
||||
indents=indents,
|
||||
is_first_token=line_offset == 0,
|
||||
)
|
||||
stack = self._active_parser.stack
|
||||
self._replace_tos_indent = None
|
||||
|
||||
@@ -389,7 +389,7 @@ def _print_tokens(func):
|
||||
|
||||
|
||||
# @_print_tokens
|
||||
def tokenize_lines(lines, version_info, start_pos=(1, 0), indents=None):
|
||||
def tokenize_lines(lines, version_info, start_pos=(1, 0), indents=None, is_first_token=True):
|
||||
"""
|
||||
A heavily modified Python standard library tokenizer.
|
||||
|
||||
@@ -423,14 +423,13 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0), indents=None):
|
||||
new_line = True
|
||||
prefix = '' # Should never be required, but here for safety
|
||||
additional_prefix = ''
|
||||
first = True
|
||||
lnum = start_pos[0] - 1
|
||||
fstring_stack = []
|
||||
for line in lines: # loop over lines in stream
|
||||
lnum += 1
|
||||
pos = 0
|
||||
max_ = len(line)
|
||||
if first:
|
||||
if is_first_token:
|
||||
if line.startswith(BOM_UTF8_STRING):
|
||||
additional_prefix = BOM_UTF8_STRING
|
||||
line = line[1:]
|
||||
@@ -441,7 +440,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0), indents=None):
|
||||
pos = start_pos[1]
|
||||
max_ += start_pos[1]
|
||||
|
||||
first = False
|
||||
is_first_token = False
|
||||
|
||||
if contstr: # continued string
|
||||
endmatch = endprog.match(line)
|
||||
|
||||
@@ -1565,3 +1565,23 @@ class Grammar:
|
||||
''')
|
||||
differ.initialize(code1)
|
||||
differ.parse(code2, parsers=3, copies=1, expect_error_leaves=True)
|
||||
|
||||
|
||||
def test_byte_order_mark(differ):
|
||||
code2 = dedent('''\
|
||||
|
||||
x
|
||||
\ufeff
|
||||
else :
|
||||
''')
|
||||
differ.initialize('\n')
|
||||
differ.parse(code2, parsers=2, expect_error_leaves=True)
|
||||
|
||||
code3 = dedent('''\
|
||||
\ufeff
|
||||
if:
|
||||
|
||||
x
|
||||
''')
|
||||
differ.initialize('\n')
|
||||
differ.parse(code3, parsers=2, expect_error_leaves=True)
|
||||
|
||||
Reference in New Issue
Block a user