From 1b4c75608ab844f5763e294fec4cba3fbd2dbe10 Mon Sep 17 00:00:00 2001
From: Dave Halter <davidhalter88@gmail.com>
Date: Thu, 14 May 2020 23:34:14 +0200
Subject: [PATCH] Fix a python_bytes_to_unicode issue, fixes #107

---
 parso/utils.py     | 13 +++++++++++--
 test/test_utils.py | 14 ++++++++++++++
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/parso/utils.py b/parso/utils.py
index 5a49c32..1485928 100644
--- a/parso/utils.py
+++ b/parso/utils.py
@@ -105,8 +105,17 @@ def python_bytes_to_unicode(source, encoding='utf-8', errors='strict'):
     if not isinstance(encoding, unicode):
         encoding = unicode(encoding, 'utf-8', 'replace')
 
-    # Cast to unicode
-    return unicode(source, encoding, errors)
+    try:
+        # Cast to unicode
+        return unicode(source, encoding, errors)
+    except LookupError:
+        if errors == 'replace':
+            # This is a weird case that can happen if the given encoding is not
+            # a valid encoding. This usually shouldn't happen with provided
+            # encodings, but can happen if somebody uses encoding declarations
+            # like `# coding: foo-8`.
+            return unicode(source, 'utf-8', errors)
+        raise
 
 
 def version_info():
diff --git a/test/test_utils.py b/test/test_utils.py
index 3078151..44a4ce4 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -63,3 +63,17 @@ def test_utf8_bom():
     expr_stmt = module.children[0]
     assert expr_stmt.type == 'expr_stmt'
     assert unicode_bom == expr_stmt.get_first_leaf().prefix
+
+
+@pytest.mark.parametrize(
+    ('code', 'errors'), [
+        (b'# coding: wtf-12\nfoo', 'strict'),
+        (b'# coding: wtf-12\nfoo', 'replace'),
+    ]
+)
+def test_bytes_to_unicode_failing_encoding(code, errors):
+    if errors == 'strict':
+        with pytest.raises(LookupError):
+            python_bytes_to_unicode(code, errors=errors)
+    else:
+        python_bytes_to_unicode(code, errors=errors)