Add the utf-8 bom to the prefix in the tokenizer.

2025-12-23 12:41:43 +08:00 · 2017-07-10 23:26:15 +02:00
parent b3923e65e8
commit 859c48170e
4 changed files with 37 additions and 2 deletions
--- a/parso/python/prefix.py
+++ b/parso/python/prefix.py
@@ -1,4 +1,5 @@
 import re
+from codecs import BOM_UTF8

 from parso.python.tokenize import group

@@ -34,15 +35,18 @@ class PrefixPart(object):
        )


+unicode_bom = BOM_UTF8.decode('utf-8')
+
 _comment = r'#[^\n\r\f]*'
 _backslash = r'\\\r?\n'
 _newline = r'\r?\n'
 _form_feed = r'\f'
 _only_spacing = '$'
 _spacing = r'[ \t]*'
+_bom = unicode_bom

 _regex = group(
-    _comment, _backslash, _newline, _form_feed, _only_spacing,
+    _comment, _backslash, _newline, _form_feed, _only_spacing, _bom,
    capture=True
 )
 _regex = re.compile(group(_spacing, capture=True) + _regex)
@@ -54,6 +58,7 @@ _types = {
    '\f': 'formfeed',
    '\n': 'newline',
    '\r': 'newline',
+    unicode_bom: 'bom'
 }