mirror of
https://github.com/davidhalter/jedi.git
synced 2026-02-04 19:22:55 +08:00
Merge pull request #115 from andviro/master
more robust source encoding detection
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,3 +1,4 @@
|
||||
*~
|
||||
*.swp
|
||||
*.swo
|
||||
.ropeproject
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from __future__ import with_statement
|
||||
|
||||
from _compatibility import exec_function, unicode
|
||||
from _compatibility import exec_function, unicode, is_py25, literal_eval
|
||||
|
||||
import re
|
||||
import tokenize
|
||||
@@ -327,20 +327,18 @@ def source_to_unicode(source, encoding=None):
|
||||
http://docs.python.org/2/reference/lexical_analysis.html#encoding-\
|
||||
declarations
|
||||
"""
|
||||
if encoding is not None:
|
||||
return encoding
|
||||
|
||||
if source.startswith('\xef\xbb\xbf'):
|
||||
byte_mark = '\xef\xbb\xbf' if is_py25 else literal_eval(r"b'\xef\xbb\xbf'")
|
||||
if source.startswith(byte_mark):
|
||||
# UTF-8 byte-order mark
|
||||
return 'utf-8'
|
||||
|
||||
first_two_lines = re.match(r'(?:[^\n]*\n){0,2}', source).group(0)
|
||||
possible_encoding = re.match("coding[=:]\s*([-\w.]+)", first_two_lines)
|
||||
first_two_lines = re.match(r'(?:[^\n]*\n){0,2}', str(source)).group(0)
|
||||
possible_encoding = re.search(r"coding[=:]\s*([-\w.]+)", first_two_lines)
|
||||
if possible_encoding:
|
||||
return possible_encoding.group(1)
|
||||
else:
|
||||
# the default if nothing else has been set -> PEP 263
|
||||
return 'iso-8859-1'
|
||||
return encoding if encoding is not None else 'iso-8859-1'
|
||||
|
||||
if isinstance(source, unicode):
|
||||
# only cast str/bytes
|
||||
|
||||
Reference in New Issue
Block a user