Fix utf-8 bom positions.

This commit is contained in:
Dave Halter
2017-07-10 23:38:44 +02:00
parent 859c48170e
commit ff949d1061
2 changed files with 22 additions and 3 deletions

View File

@@ -3,6 +3,8 @@ from codecs import BOM_UTF8
from parso.python.tokenize import group
unicode_bom = BOM_UTF8.decode('utf-8')
class PrefixPart(object):
def __init__(self, leaf, typ, value, spacing='', start_pos=None):
@@ -17,6 +19,9 @@ class PrefixPart(object):
def end_pos(self):
if self.value.endswith('\n'):
return self.start_pos[0] + 1, 0
if self.value == unicode_bom:
# The bom doesn't have a length at the start of a Python file.
return self.start_pos
return self.start_pos[0], self.start_pos[1] + len(self.value)
def create_spacing_part(self):
@@ -35,8 +40,6 @@ class PrefixPart(object):
)
unicode_bom = BOM_UTF8.decode('utf-8')
_comment = r'#[^\n\r\f]*'
_backslash = r'\\\r?\n'
_newline = r'\r?\n'
@@ -66,6 +69,7 @@ def split_prefix(leaf, start_pos):
line, column = start_pos
start = 0
value = spacing = ''
bom = False
while start != len(leaf.prefix):
match =_regex.match(leaf.prefix, start)
spacing = match.group(1)
@@ -75,8 +79,10 @@ def split_prefix(leaf, start_pos):
type_ = _types[value[0]]
yield PrefixPart(
leaf, type_, value, spacing,
start_pos=(line, column + start + len(spacing))
start_pos=(line, column + start - int(bom) + len(spacing))
)
if type_ == 'bom':
bom = True
start = match.end(0)
if value.endswith('\n'):