mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-06 04:44:29 +08:00
Use source_to_unicode by default with errors='strict'.
This commit is contained in:
@@ -84,7 +84,9 @@ class Grammar(object):
|
||||
|
||||
if code is None:
|
||||
with open(path, 'rb') as f:
|
||||
code = source_to_unicode(f.read())
|
||||
code = f.read()
|
||||
|
||||
code = source_to_unicode(code)
|
||||
|
||||
lines = splitlines(code, keepends=True)
|
||||
if diff_cache:
|
||||
|
||||
@@ -48,7 +48,10 @@ def splitlines(string, keepends=False):
|
||||
return re.split('\n|\r\n', string)
|
||||
|
||||
|
||||
def source_to_unicode(source, encoding=None):
|
||||
def source_to_unicode(source, default_encoding='utf-8', errors='strict'):
|
||||
"""
|
||||
`errors` can be 'strict', 'replace' or 'ignore'.
|
||||
"""
|
||||
def detect_encoding():
|
||||
"""
|
||||
For the implementation of encoding definitions in Python, look at:
|
||||
@@ -67,7 +70,7 @@ def source_to_unicode(source, encoding=None):
|
||||
return possible_encoding.group(1)
|
||||
else:
|
||||
# the default if nothing else has been set -> PEP 263
|
||||
return encoding if encoding is not None else 'utf-8'
|
||||
return default_encoding
|
||||
|
||||
if isinstance(source, unicode):
|
||||
# only cast str/bytes
|
||||
@@ -76,8 +79,9 @@ def source_to_unicode(source, encoding=None):
|
||||
encoding = detect_encoding()
|
||||
if not isinstance(encoding, unicode):
|
||||
encoding = unicode(encoding, 'utf-8', 'replace')
|
||||
# cast to unicode by default
|
||||
return unicode(source, encoding, 'replace')
|
||||
|
||||
# Cast to unicode
|
||||
return unicode(source, encoding, errors)
|
||||
|
||||
|
||||
def version_info():
|
||||
|
||||
8
test/test_grammar.py
Normal file
8
test/test_grammar.py
Normal file
@@ -0,0 +1,8 @@
|
||||
import parso
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def test_non_unicode():
|
||||
with pytest.raises(UnicodeDecodeError):
|
||||
parso.parse(b'\xe4')
|
||||
Reference in New Issue
Block a user