diff --git a/jedi/_compatibility.py b/jedi/_compatibility.py index 11bd1f89..d7015268 100644 --- a/jedi/_compatibility.py +++ b/jedi/_compatibility.py @@ -60,6 +60,15 @@ try: except NameError: unicode = str +if is_py3k: + utf8 = lambda s: s +else: + utf8 = lambda s: s.decode('utf-8') + +utf8.__doc__ = """ +Decode a raw string into unicode object. Do nothing in Python 3. +""" + # exec function if is_py3k: def exec_function(source, global_map): diff --git a/jedi/api.py b/jedi/api.py index 751026bd..a765fbc6 100644 --- a/jedi/api.py +++ b/jedi/api.py @@ -60,9 +60,17 @@ class Script(object): :type col: int :param source_path: The path in the os, the current module is in. :type source_path: string or None + :param source_encoding: encoding for decoding `source`, when it + is not a `unicode` object. + :type source_encoding: string """ - def __init__(self, source, line, column, source_path): - source = unicode(source) + def __init__(self, source, line, column, source_path, + source_encoding='utf-8'): + try: + source = unicode(source, source_encoding, 'replace') + # Use 'replace' over 'ignore' to hold code structure. + except TypeError: # `source` is already a unicode object + pass self.pos = line, column self.module = modules.ModuleWithCursor(source_path, source=source, position=self.pos) diff --git a/test/regression.py b/test/regression.py index 498762a7..792f87f9 100755 --- a/test/regression.py +++ b/test/regression.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- import os import sys import unittest @@ -10,7 +11,7 @@ sys.path.append(abspath(dirname(abspath(__file__)) + '/../jedi')) os.chdir(os.path.dirname(os.path.abspath(__file__)) + '/../jedi') sys.path.append('.') -from _compatibility import is_py25 +from _compatibility import is_py25, utf8 import api #api.set_debug_function(api.debug.print_to_stdout) @@ -211,6 +212,17 @@ class TestRegression(Base): else: assert len(self.complete(s)) + def test_multibyte_script(self): + """ `jedi.Script` must accept multi-byte string source. """ + try: + code = unicode("import datetime; datetime.d") + comment = utf8("# multi-byte comment あいうえお") + s = (unicode('%s\n%s') % (code, comment)).encode('utf-8') + except NameError: + pass # python 3 has no unicode method + else: + assert len(self.complete(s, (1, len(code)))) + def test_os_nowait(self): """ github issue #45 """ s = self.complete("import os; os.P_")