mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-07 13:24:39 +08:00
Fix a python_bytes_to_unicode issue, fixes #107
This commit is contained in:
@@ -105,8 +105,17 @@ def python_bytes_to_unicode(source, encoding='utf-8', errors='strict'):
|
||||
if not isinstance(encoding, unicode):
|
||||
encoding = unicode(encoding, 'utf-8', 'replace')
|
||||
|
||||
try:
|
||||
# Cast to unicode
|
||||
return unicode(source, encoding, errors)
|
||||
except LookupError:
|
||||
if errors == 'replace':
|
||||
# This is a weird case that can happen if the given encoding is not
|
||||
# a valid encoding. This usually shouldn't happen with provided
|
||||
# encodings, but can happen if somebody uses encoding declarations
|
||||
# like `# coding: foo-8`.
|
||||
return unicode(source, 'utf-8', errors)
|
||||
raise
|
||||
|
||||
|
||||
def version_info():
|
||||
|
||||
@@ -63,3 +63,17 @@ def test_utf8_bom():
|
||||
expr_stmt = module.children[0]
|
||||
assert expr_stmt.type == 'expr_stmt'
|
||||
assert unicode_bom == expr_stmt.get_first_leaf().prefix
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
('code', 'errors'), [
|
||||
(b'# coding: wtf-12\nfoo', 'strict'),
|
||||
(b'# coding: wtf-12\nfoo', 'replace'),
|
||||
]
|
||||
)
|
||||
def test_bytes_to_unicode_failing_encoding(code, errors):
|
||||
if errors == 'strict':
|
||||
with pytest.raises(LookupError):
|
||||
python_bytes_to_unicode(code, errors=errors)
|
||||
else:
|
||||
python_bytes_to_unicode(code, errors=errors)
|
||||
|
||||
Reference in New Issue
Block a user