Fix utf-8 bom positions.

This commit is contained in:
Dave Halter
2017-07-10 23:38:44 +02:00
parent 859c48170e
commit ff949d1061
2 changed files with 22 additions and 3 deletions

View File

@@ -3,6 +3,8 @@ from codecs import BOM_UTF8
from parso.python.tokenize import group from parso.python.tokenize import group
unicode_bom = BOM_UTF8.decode('utf-8')
class PrefixPart(object): class PrefixPart(object):
def __init__(self, leaf, typ, value, spacing='', start_pos=None): def __init__(self, leaf, typ, value, spacing='', start_pos=None):
@@ -17,6 +19,9 @@ class PrefixPart(object):
def end_pos(self): def end_pos(self):
if self.value.endswith('\n'): if self.value.endswith('\n'):
return self.start_pos[0] + 1, 0 return self.start_pos[0] + 1, 0
if self.value == unicode_bom:
# The bom doesn't have a length at the start of a Python file.
return self.start_pos
return self.start_pos[0], self.start_pos[1] + len(self.value) return self.start_pos[0], self.start_pos[1] + len(self.value)
def create_spacing_part(self): def create_spacing_part(self):
@@ -35,8 +40,6 @@ class PrefixPart(object):
) )
unicode_bom = BOM_UTF8.decode('utf-8')
_comment = r'#[^\n\r\f]*' _comment = r'#[^\n\r\f]*'
_backslash = r'\\\r?\n' _backslash = r'\\\r?\n'
_newline = r'\r?\n' _newline = r'\r?\n'
@@ -66,6 +69,7 @@ def split_prefix(leaf, start_pos):
line, column = start_pos line, column = start_pos
start = 0 start = 0
value = spacing = '' value = spacing = ''
bom = False
while start != len(leaf.prefix): while start != len(leaf.prefix):
match =_regex.match(leaf.prefix, start) match =_regex.match(leaf.prefix, start)
spacing = match.group(1) spacing = match.group(1)
@@ -75,8 +79,10 @@ def split_prefix(leaf, start_pos):
type_ = _types[value[0]] type_ = _types[value[0]]
yield PrefixPart( yield PrefixPart(
leaf, type_, value, spacing, leaf, type_, value, spacing,
start_pos=(line, column + start + len(spacing)) start_pos=(line, column + start - int(bom) + len(spacing))
) )
if type_ == 'bom':
bom = True
start = match.end(0) start = match.end(0)
if value.endswith('\n'): if value.endswith('\n'):

View File

@@ -59,3 +59,16 @@ def test_prefix_splitting_types(string, types):
assert leaf.type == 'endmarker' assert leaf.type == 'endmarker'
parsed_tokens = list(leaf._split_prefix()) parsed_tokens = list(leaf._split_prefix())
assert [t.type for t in parsed_tokens] == types assert [t.type for t in parsed_tokens] == types
def test_utf8_bom():
tree = parso.parse(unicode_bom + 'a = 1')
expr_stmt = tree.children[0]
assert expr_stmt.start_pos == (1, 0)
tree = parso.parse(unicode_bom + '\n')
endmarker = tree.children[0]
parts = list(endmarker._split_prefix())
assert [p.type for p in parts] == ['bom', 'newline', 'spacing']
assert [p.start_pos for p in parts] == [(1, 0), (1, 0), (2, 0)]
assert [p.end_pos for p in parts] == [(1, 0), (2, 0), (2, 0)]