diff --git a/.gitignore b/.gitignore index 1f303fe4..5c08b8e0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +*~ *.swp *.swo .ropeproject diff --git a/jedi/modules.py b/jedi/modules.py index 53a95f8a..9093cf71 100644 --- a/jedi/modules.py +++ b/jedi/modules.py @@ -1,6 +1,6 @@ from __future__ import with_statement -from _compatibility import exec_function, unicode +from _compatibility import exec_function, unicode, is_py25, literal_eval import re import tokenize @@ -327,20 +327,18 @@ def source_to_unicode(source, encoding=None): http://docs.python.org/2/reference/lexical_analysis.html#encoding-\ declarations """ - if encoding is not None: - return encoding - - if source.startswith('\xef\xbb\xbf'): + byte_mark = '\xef\xbb\xbf' if is_py25 else literal_eval(r"b'\xef\xbb\xbf'") + if source.startswith(byte_mark): # UTF-8 byte-order mark return 'utf-8' - first_two_lines = re.match(r'(?:[^\n]*\n){0,2}', source).group(0) - possible_encoding = re.match("coding[=:]\s*([-\w.]+)", first_two_lines) + first_two_lines = re.match(r'(?:[^\n]*\n){0,2}', str(source)).group(0) + possible_encoding = re.search(r"coding[=:]\s*([-\w.]+)", first_two_lines) if possible_encoding: return possible_encoding.group(1) else: # the default if nothing else has been set -> PEP 263 - return 'iso-8859-1' + return encoding if encoding is not None else 'iso-8859-1' if isinstance(source, unicode): # only cast str/bytes