Add the utf-8 bom to the prefix in the tokenizer.

2026-02-21 17:18:52 +08:00 · 2017-07-10 23:26:15 +02:00
parent b3923e65e8
commit 859c48170e
4 changed files with 37 additions and 2 deletions
--- a/test/test_prefix.py
+++ b/test/test_prefix.py
@@ -1,9 +1,12 @@
 from itertools import zip_longest
+from codecs import BOM_UTF8

 import pytest

 import parso

+unicode_bom = BOM_UTF8.decode('utf-8')
+

@pytest.mark.parametrize(('string', 'tokens'), [
    ('', ['']),
@@ -48,6 +51,7 @@ def test_simple_prefix_splitting(string, tokens):
    ('\\\n', ['backslash', 'spacing']),
    (' \t', ['spacing']),
    (' \t ', ['spacing']),
+    (unicode_bom + ' # ', ['bom', 'comment', 'spacing']),
 ])
 def test_prefix_splitting_types(string, types):
    tree = parso.parse(string)