forked from VimPlug/jedi
added source_to_unicode method to deal with different encodings, davidhalter/jedi-vim#48
This commit is contained in:
10
jedi/api.py
10
jedi/api.py
@@ -61,18 +61,14 @@ class Script(object):
|
|||||||
:type col: int
|
:type col: int
|
||||||
:param source_path: The path in the os, the current module is in.
|
:param source_path: The path in the os, the current module is in.
|
||||||
:type source_path: string or None
|
:type source_path: string or None
|
||||||
:param source_encoding: encoding for decoding `source`, when it
|
:param source_encoding: encoding for decoding `source`, if it
|
||||||
is not a `unicode` object.
|
is not a `unicode` object.
|
||||||
:type source_encoding: string
|
:type source_encoding: string
|
||||||
"""
|
"""
|
||||||
def __init__(self, source, line, column, source_path,
|
def __init__(self, source, line, column, source_path,
|
||||||
source_encoding='utf-8'):
|
source_encoding='utf-8'):
|
||||||
debug.reset_time()
|
debug.reset_time()
|
||||||
try:
|
source = modules.source_to_unicode(source, source_encoding)
|
||||||
source = unicode(source, source_encoding, 'replace')
|
|
||||||
# Use 'replace' over 'ignore' to hold code structure.
|
|
||||||
except TypeError: # `source` is already a unicode object
|
|
||||||
pass
|
|
||||||
self.pos = line, column
|
self.pos = line, column
|
||||||
self.module = modules.ModuleWithCursor(source_path, source=source,
|
self.module = modules.ModuleWithCursor(source_path, source=source,
|
||||||
position=self.pos)
|
position=self.pos)
|
||||||
|
|||||||
@@ -42,7 +42,7 @@ def get_directory_modules_for_name(mods, name):
|
|||||||
|
|
||||||
def check_fs(path):
|
def check_fs(path):
|
||||||
with open(path) as f:
|
with open(path) as f:
|
||||||
source = f.read()
|
source = modules.source_to_unicode(f.read())
|
||||||
if name in source:
|
if name in source:
|
||||||
return modules.Module(path, source).parser.module
|
return modules.Module(path, source).parser.module
|
||||||
|
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ class Module(builtin.CachedModule):
|
|||||||
"""
|
"""
|
||||||
def __init__(self, path, source):
|
def __init__(self, path, source):
|
||||||
super(Module, self).__init__(path=path)
|
super(Module, self).__init__(path=path)
|
||||||
self.source = source
|
self.source = source_to_unicode(source)
|
||||||
self._line_cache = None
|
self._line_cache = None
|
||||||
|
|
||||||
def _get_source(self):
|
def _get_source(self):
|
||||||
@@ -311,3 +311,33 @@ def detect_django_path(module_path):
|
|||||||
except IOError:
|
except IOError:
|
||||||
pass
|
pass
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def source_to_unicode(source, encoding=None):
|
||||||
|
def detect_encoding():
|
||||||
|
""" For the implementation of encoding definitions in Python, look at:
|
||||||
|
http://www.python.org/dev/peps/pep-0263/
|
||||||
|
http://docs.python.org/2/reference/lexical_analysis.html#encoding-\
|
||||||
|
declarations
|
||||||
|
"""
|
||||||
|
if encoding is not None:
|
||||||
|
return encoding
|
||||||
|
|
||||||
|
if source.startswith('\xef\xbb\xbf'):
|
||||||
|
# UTF-8 byte-order mark
|
||||||
|
return 'utf-8'
|
||||||
|
|
||||||
|
first_two_lines = re.match(r'(?:[^\n]*\n){0,2}', source).group(0)
|
||||||
|
possible_encoding = re.match("coding[=:]\s*([-\w.]+)", first_two_lines)
|
||||||
|
if possible_encoding:
|
||||||
|
return possible_encoding.group(1)
|
||||||
|
else:
|
||||||
|
# the default if nothing else has been set -> PEP 263
|
||||||
|
return 'iso-8859-1'
|
||||||
|
|
||||||
|
if isinstance(source, unicode):
|
||||||
|
# only cast str/bytes
|
||||||
|
return source
|
||||||
|
|
||||||
|
# cast to unicode by default
|
||||||
|
return unicode(source, detect_encoding(), 'replace')
|
||||||
|
|||||||
@@ -251,10 +251,22 @@ class TestRegression(Base):
|
|||||||
assert len(api.Script(s, 1, 15, '/').get_definition()) == 1
|
assert len(api.Script(s, 1, 15, '/').get_definition()) == 1
|
||||||
assert len(api.Script(s, 1, 10, '/').get_definition()) == 1
|
assert len(api.Script(s, 1, 10, '/').get_definition()) == 1
|
||||||
|
|
||||||
|
|
||||||
def test_unicode_script(self):
|
def test_unicode_script(self):
|
||||||
""" normally no unicode objects are being used. (<=2.7) """
|
""" normally no unicode objects are being used. (<=2.7) """
|
||||||
s = unicode("import datetime; datetime.d")
|
s = unicode("import datetime; datetime.timedelta")
|
||||||
assert len(self.complete(s))
|
completions = self.complete(s)
|
||||||
|
assert len(completions)
|
||||||
|
assert type(completions[0].description) is unicode
|
||||||
|
|
||||||
|
s = utf8("author='öä'; author")
|
||||||
|
completions = self.complete(s)
|
||||||
|
assert type(completions[0].description) is unicode
|
||||||
|
|
||||||
|
s = utf8("#-*- coding: iso-8859-1 -*-\nauthor='öä'; author")
|
||||||
|
s = s.encode('latin-1')
|
||||||
|
completions = self.complete(s)
|
||||||
|
assert type(completions[0].description) is unicode
|
||||||
|
|
||||||
def test_multibyte_script(self):
|
def test_multibyte_script(self):
|
||||||
""" `jedi.Script` must accept multi-byte string source. """
|
""" `jedi.Script` must accept multi-byte string source. """
|
||||||
|
|||||||
Reference in New Issue
Block a user