mirror of
https://github.com/davidhalter/jedi.git
synced 2025-12-06 14:04:26 +08:00
more robust source encoding detection
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,3 +1,4 @@
|
|||||||
|
*~
|
||||||
*.swp
|
*.swp
|
||||||
*.swo
|
*.swo
|
||||||
.ropeproject
|
.ropeproject
|
||||||
|
|||||||
@@ -328,20 +328,17 @@ def source_to_unicode(source, encoding=None):
|
|||||||
http://docs.python.org/2/reference/lexical_analysis.html#encoding-\
|
http://docs.python.org/2/reference/lexical_analysis.html#encoding-\
|
||||||
declarations
|
declarations
|
||||||
"""
|
"""
|
||||||
if encoding is not None:
|
|
||||||
return encoding
|
|
||||||
|
|
||||||
if source.startswith('\xef\xbb\xbf'):
|
if source.startswith('\xef\xbb\xbf'):
|
||||||
# UTF-8 byte-order mark
|
# UTF-8 byte-order mark
|
||||||
return 'utf-8'
|
return 'utf-8'
|
||||||
|
|
||||||
first_two_lines = re.match(r'(?:[^\n]*\n){0,2}', source).group(0)
|
first_two_lines = re.match(r'(?:[^\n]*\n){0,2}', source).group(0)
|
||||||
possible_encoding = re.match("coding[=:]\s*([-\w.]+)", first_two_lines)
|
possible_encoding = re.search(r"coding[=:]\s*([-\w.]+)", first_two_lines)
|
||||||
if possible_encoding:
|
if possible_encoding:
|
||||||
return possible_encoding.group(1)
|
return possible_encoding.group(1)
|
||||||
else:
|
else:
|
||||||
# the default if nothing else has been set -> PEP 263
|
# the default if nothing else has been set -> PEP 263
|
||||||
return 'iso-8859-1'
|
return encoding if encoding is not None else 'iso-8859-1'
|
||||||
|
|
||||||
if isinstance(source, unicode):
|
if isinstance(source, unicode):
|
||||||
# only cast str/bytes
|
# only cast str/bytes
|
||||||
|
|||||||
Reference in New Issue
Block a user