Fix a python_bytes_to_unicode issue, fixes #107

This commit is contained in:
Dave Halter
2020-05-14 23:34:14 +02:00
parent 15403fd998
commit 1b4c75608a
2 changed files with 25 additions and 2 deletions

View File

@@ -105,8 +105,17 @@ def python_bytes_to_unicode(source, encoding='utf-8', errors='strict'):
if not isinstance(encoding, unicode):
encoding = unicode(encoding, 'utf-8', 'replace')
try:
# Cast to unicode
return unicode(source, encoding, errors)
except LookupError:
if errors == 'replace':
# This is a weird case that can happen if the given encoding is not
# a valid encoding. This usually shouldn't happen with provided
# encodings, but can happen if somebody uses encoding declarations
# like `# coding: foo-8`.
return unicode(source, 'utf-8', errors)
raise
def version_info():

View File

@@ -63,3 +63,17 @@ def test_utf8_bom():
expr_stmt = module.children[0]
assert expr_stmt.type == 'expr_stmt'
assert unicode_bom == expr_stmt.get_first_leaf().prefix
@pytest.mark.parametrize(
('code', 'errors'), [
(b'# coding: wtf-12\nfoo', 'strict'),
(b'# coding: wtf-12\nfoo', 'replace'),
]
)
def test_bytes_to_unicode_failing_encoding(code, errors):
if errors == 'strict':
with pytest.raises(LookupError):
python_bytes_to_unicode(code, errors=errors)
else:
python_bytes_to_unicode(code, errors=errors)