mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-06 21:04:29 +08:00
Fix utf-8 bom positions.
This commit is contained in:
@@ -3,6 +3,8 @@ from codecs import BOM_UTF8
|
|||||||
|
|
||||||
from parso.python.tokenize import group
|
from parso.python.tokenize import group
|
||||||
|
|
||||||
|
unicode_bom = BOM_UTF8.decode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
class PrefixPart(object):
|
class PrefixPart(object):
|
||||||
def __init__(self, leaf, typ, value, spacing='', start_pos=None):
|
def __init__(self, leaf, typ, value, spacing='', start_pos=None):
|
||||||
@@ -17,6 +19,9 @@ class PrefixPart(object):
|
|||||||
def end_pos(self):
|
def end_pos(self):
|
||||||
if self.value.endswith('\n'):
|
if self.value.endswith('\n'):
|
||||||
return self.start_pos[0] + 1, 0
|
return self.start_pos[0] + 1, 0
|
||||||
|
if self.value == unicode_bom:
|
||||||
|
# The bom doesn't have a length at the start of a Python file.
|
||||||
|
return self.start_pos
|
||||||
return self.start_pos[0], self.start_pos[1] + len(self.value)
|
return self.start_pos[0], self.start_pos[1] + len(self.value)
|
||||||
|
|
||||||
def create_spacing_part(self):
|
def create_spacing_part(self):
|
||||||
@@ -35,8 +40,6 @@ class PrefixPart(object):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
unicode_bom = BOM_UTF8.decode('utf-8')
|
|
||||||
|
|
||||||
_comment = r'#[^\n\r\f]*'
|
_comment = r'#[^\n\r\f]*'
|
||||||
_backslash = r'\\\r?\n'
|
_backslash = r'\\\r?\n'
|
||||||
_newline = r'\r?\n'
|
_newline = r'\r?\n'
|
||||||
@@ -66,6 +69,7 @@ def split_prefix(leaf, start_pos):
|
|||||||
line, column = start_pos
|
line, column = start_pos
|
||||||
start = 0
|
start = 0
|
||||||
value = spacing = ''
|
value = spacing = ''
|
||||||
|
bom = False
|
||||||
while start != len(leaf.prefix):
|
while start != len(leaf.prefix):
|
||||||
match =_regex.match(leaf.prefix, start)
|
match =_regex.match(leaf.prefix, start)
|
||||||
spacing = match.group(1)
|
spacing = match.group(1)
|
||||||
@@ -75,8 +79,10 @@ def split_prefix(leaf, start_pos):
|
|||||||
type_ = _types[value[0]]
|
type_ = _types[value[0]]
|
||||||
yield PrefixPart(
|
yield PrefixPart(
|
||||||
leaf, type_, value, spacing,
|
leaf, type_, value, spacing,
|
||||||
start_pos=(line, column + start + len(spacing))
|
start_pos=(line, column + start - int(bom) + len(spacing))
|
||||||
)
|
)
|
||||||
|
if type_ == 'bom':
|
||||||
|
bom = True
|
||||||
|
|
||||||
start = match.end(0)
|
start = match.end(0)
|
||||||
if value.endswith('\n'):
|
if value.endswith('\n'):
|
||||||
|
|||||||
@@ -59,3 +59,16 @@ def test_prefix_splitting_types(string, types):
|
|||||||
assert leaf.type == 'endmarker'
|
assert leaf.type == 'endmarker'
|
||||||
parsed_tokens = list(leaf._split_prefix())
|
parsed_tokens = list(leaf._split_prefix())
|
||||||
assert [t.type for t in parsed_tokens] == types
|
assert [t.type for t in parsed_tokens] == types
|
||||||
|
|
||||||
|
|
||||||
|
def test_utf8_bom():
|
||||||
|
tree = parso.parse(unicode_bom + 'a = 1')
|
||||||
|
expr_stmt = tree.children[0]
|
||||||
|
assert expr_stmt.start_pos == (1, 0)
|
||||||
|
|
||||||
|
tree = parso.parse(unicode_bom + '\n')
|
||||||
|
endmarker = tree.children[0]
|
||||||
|
parts = list(endmarker._split_prefix())
|
||||||
|
assert [p.type for p in parts] == ['bom', 'newline', 'spacing']
|
||||||
|
assert [p.start_pos for p in parts] == [(1, 0), (1, 0), (2, 0)]
|
||||||
|
assert [p.end_pos for p in parts] == [(1, 0), (2, 0), (2, 0)]
|
||||||
|
|||||||
Reference in New Issue
Block a user