Fix utf-8 bom positions.

2025-12-23 12:41:43 +08:00 · 2017-07-10 23:38:44 +02:00
parent 859c48170e
commit ff949d1061
2 changed files with 22 additions and 3 deletions
--- a/parso/python/prefix.py
+++ b/parso/python/prefix.py
@@ -3,6 +3,8 @@ from codecs import BOM_UTF8

 from parso.python.tokenize import group

+unicode_bom = BOM_UTF8.decode('utf-8')
+

 class PrefixPart(object):
    def __init__(self, leaf, typ, value, spacing='', start_pos=None):
@@ -17,6 +19,9 @@ class PrefixPart(object):
    def end_pos(self):
        if self.value.endswith('\n'):
            return self.start_pos[0] + 1, 0
+        if self.value == unicode_bom:
+            # The bom doesn't have a length at the start of a Python file.
+            return self.start_pos
        return self.start_pos[0], self.start_pos[1] + len(self.value)

    def create_spacing_part(self):
@@ -35,8 +40,6 @@ class PrefixPart(object):
        )


-unicode_bom = BOM_UTF8.decode('utf-8')
-
 _comment = r'#[^\n\r\f]*'
 _backslash = r'\\\r?\n'
 _newline = r'\r?\n'
@@ -66,6 +69,7 @@ def split_prefix(leaf, start_pos):
    line, column = start_pos
    start = 0
    value = spacing = ''
+    bom = False
    while start != len(leaf.prefix):
        match =_regex.match(leaf.prefix, start)
        spacing = match.group(1)
@@ -75,8 +79,10 @@ def split_prefix(leaf, start_pos):
        type_ = _types[value[0]]
        yield PrefixPart(
            leaf, type_, value, spacing,
-            start_pos=(line, column + start + len(spacing))
+            start_pos=(line, column + start - int(bom) + len(spacing))
        )
+        if type_ == 'bom':
+            bom = True

        start = match.end(0)
        if value.endswith('\n'):