mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-08 13:45:01 +08:00
Use source_to_unicode by default with errors='strict'.
This commit is contained in:
@@ -84,7 +84,9 @@ class Grammar(object):
|
|||||||
|
|
||||||
if code is None:
|
if code is None:
|
||||||
with open(path, 'rb') as f:
|
with open(path, 'rb') as f:
|
||||||
code = source_to_unicode(f.read())
|
code = f.read()
|
||||||
|
|
||||||
|
code = source_to_unicode(code)
|
||||||
|
|
||||||
lines = splitlines(code, keepends=True)
|
lines = splitlines(code, keepends=True)
|
||||||
if diff_cache:
|
if diff_cache:
|
||||||
|
|||||||
@@ -48,7 +48,10 @@ def splitlines(string, keepends=False):
|
|||||||
return re.split('\n|\r\n', string)
|
return re.split('\n|\r\n', string)
|
||||||
|
|
||||||
|
|
||||||
def source_to_unicode(source, encoding=None):
|
def source_to_unicode(source, default_encoding='utf-8', errors='strict'):
|
||||||
|
"""
|
||||||
|
`errors` can be 'strict', 'replace' or 'ignore'.
|
||||||
|
"""
|
||||||
def detect_encoding():
|
def detect_encoding():
|
||||||
"""
|
"""
|
||||||
For the implementation of encoding definitions in Python, look at:
|
For the implementation of encoding definitions in Python, look at:
|
||||||
@@ -67,7 +70,7 @@ def source_to_unicode(source, encoding=None):
|
|||||||
return possible_encoding.group(1)
|
return possible_encoding.group(1)
|
||||||
else:
|
else:
|
||||||
# the default if nothing else has been set -> PEP 263
|
# the default if nothing else has been set -> PEP 263
|
||||||
return encoding if encoding is not None else 'utf-8'
|
return default_encoding
|
||||||
|
|
||||||
if isinstance(source, unicode):
|
if isinstance(source, unicode):
|
||||||
# only cast str/bytes
|
# only cast str/bytes
|
||||||
@@ -76,8 +79,9 @@ def source_to_unicode(source, encoding=None):
|
|||||||
encoding = detect_encoding()
|
encoding = detect_encoding()
|
||||||
if not isinstance(encoding, unicode):
|
if not isinstance(encoding, unicode):
|
||||||
encoding = unicode(encoding, 'utf-8', 'replace')
|
encoding = unicode(encoding, 'utf-8', 'replace')
|
||||||
# cast to unicode by default
|
|
||||||
return unicode(source, encoding, 'replace')
|
# Cast to unicode
|
||||||
|
return unicode(source, encoding, errors)
|
||||||
|
|
||||||
|
|
||||||
def version_info():
|
def version_info():
|
||||||
|
|||||||
8
test/test_grammar.py
Normal file
8
test/test_grammar.py
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
import parso
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def test_non_unicode():
|
||||||
|
with pytest.raises(UnicodeDecodeError):
|
||||||
|
parso.parse(b'\xe4')
|
||||||
Reference in New Issue
Block a user