Fix a python_bytes_to_unicode issue, fixes #107

2025-12-07 13:24:39 +08:00 · 2020-05-14 23:34:14 +02:00
parent 15403fd998
commit 1b4c75608a
2 changed files with 25 additions and 2 deletions
--- a/parso/utils.py
+++ b/parso/utils.py
@@ -105,8 +105,17 @@ def python_bytes_to_unicode(source, encoding='utf-8', errors='strict'):
    if not isinstance(encoding, unicode):
        encoding = unicode(encoding, 'utf-8', 'replace')

+    try:
        # Cast to unicode
        return unicode(source, encoding, errors)
+    except LookupError:
+        if errors == 'replace':
+            # This is a weird case that can happen if the given encoding is not
+            # a valid encoding. This usually shouldn't happen with provided
+            # encodings, but can happen if somebody uses encoding declarations
+            # like `# coding: foo-8`.
+            return unicode(source, 'utf-8', errors)
+        raise


 def version_info():
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -63,3 +63,17 @@ def test_utf8_bom():
    expr_stmt = module.children[0]
    assert expr_stmt.type == 'expr_stmt'
    assert unicode_bom == expr_stmt.get_first_leaf().prefix
+
+
+@pytest.mark.parametrize(
+    ('code', 'errors'), [
+        (b'# coding: wtf-12\nfoo', 'strict'),
+        (b'# coding: wtf-12\nfoo', 'replace'),
+    ]
+)
+def test_bytes_to_unicode_failing_encoding(code, errors):
+    if errors == 'strict':
+        with pytest.raises(LookupError):
+            python_bytes_to_unicode(code, errors=errors)
+    else:
+        python_bytes_to_unicode(code, errors=errors)