mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-07 21:34:32 +08:00
Fix utf-8 bom positions.
This commit is contained in:
@@ -3,6 +3,8 @@ from codecs import BOM_UTF8
|
||||
|
||||
from parso.python.tokenize import group
|
||||
|
||||
unicode_bom = BOM_UTF8.decode('utf-8')
|
||||
|
||||
|
||||
class PrefixPart(object):
|
||||
def __init__(self, leaf, typ, value, spacing='', start_pos=None):
|
||||
@@ -17,6 +19,9 @@ class PrefixPart(object):
|
||||
def end_pos(self):
|
||||
if self.value.endswith('\n'):
|
||||
return self.start_pos[0] + 1, 0
|
||||
if self.value == unicode_bom:
|
||||
# The bom doesn't have a length at the start of a Python file.
|
||||
return self.start_pos
|
||||
return self.start_pos[0], self.start_pos[1] + len(self.value)
|
||||
|
||||
def create_spacing_part(self):
|
||||
@@ -35,8 +40,6 @@ class PrefixPart(object):
|
||||
)
|
||||
|
||||
|
||||
unicode_bom = BOM_UTF8.decode('utf-8')
|
||||
|
||||
_comment = r'#[^\n\r\f]*'
|
||||
_backslash = r'\\\r?\n'
|
||||
_newline = r'\r?\n'
|
||||
@@ -66,6 +69,7 @@ def split_prefix(leaf, start_pos):
|
||||
line, column = start_pos
|
||||
start = 0
|
||||
value = spacing = ''
|
||||
bom = False
|
||||
while start != len(leaf.prefix):
|
||||
match =_regex.match(leaf.prefix, start)
|
||||
spacing = match.group(1)
|
||||
@@ -75,8 +79,10 @@ def split_prefix(leaf, start_pos):
|
||||
type_ = _types[value[0]]
|
||||
yield PrefixPart(
|
||||
leaf, type_, value, spacing,
|
||||
start_pos=(line, column + start + len(spacing))
|
||||
start_pos=(line, column + start - int(bom) + len(spacing))
|
||||
)
|
||||
if type_ == 'bom':
|
||||
bom = True
|
||||
|
||||
start = match.end(0)
|
||||
if value.endswith('\n'):
|
||||
|
||||
@@ -59,3 +59,16 @@ def test_prefix_splitting_types(string, types):
|
||||
assert leaf.type == 'endmarker'
|
||||
parsed_tokens = list(leaf._split_prefix())
|
||||
assert [t.type for t in parsed_tokens] == types
|
||||
|
||||
|
||||
def test_utf8_bom():
|
||||
tree = parso.parse(unicode_bom + 'a = 1')
|
||||
expr_stmt = tree.children[0]
|
||||
assert expr_stmt.start_pos == (1, 0)
|
||||
|
||||
tree = parso.parse(unicode_bom + '\n')
|
||||
endmarker = tree.children[0]
|
||||
parts = list(endmarker._split_prefix())
|
||||
assert [p.type for p in parts] == ['bom', 'newline', 'spacing']
|
||||
assert [p.start_pos for p in parts] == [(1, 0), (1, 0), (2, 0)]
|
||||
assert [p.end_pos for p in parts] == [(1, 0), (2, 0), (2, 0)]
|
||||
|
||||
Reference in New Issue
Block a user