From 8b31117faf71bb1cf0da026134759376f5f825d5 Mon Sep 17 00:00:00 2001 From: Takafumi Arakaki Date: Sat, 10 Nov 2012 19:55:56 +0100 Subject: [PATCH 1/2] Add a test for multi-byte string source --- jedi/_compatibility.py | 9 +++++++++ test/regression.py | 14 +++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/jedi/_compatibility.py b/jedi/_compatibility.py index 11bd1f89..d7015268 100644 --- a/jedi/_compatibility.py +++ b/jedi/_compatibility.py @@ -60,6 +60,15 @@ try: except NameError: unicode = str +if is_py3k: + utf8 = lambda s: s +else: + utf8 = lambda s: s.decode('utf-8') + +utf8.__doc__ = """ +Decode a raw string into unicode object. Do nothing in Python 3. +""" + # exec function if is_py3k: def exec_function(source, global_map): diff --git a/test/regression.py b/test/regression.py index 3e7d0f61..c8b98d66 100755 --- a/test/regression.py +++ b/test/regression.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- import os import sys import unittest @@ -10,7 +11,7 @@ sys.path.append(abspath(dirname(abspath(__file__)) + '/../jedi')) os.chdir(os.path.dirname(os.path.abspath(__file__)) + '/../jedi') sys.path.append('.') -from _compatibility import is_py25 +from _compatibility import is_py25, utf8 import api #api.set_debug_function(api.debug.print_to_stdout) @@ -206,6 +207,17 @@ class TestRegression(Base): else: assert len(self.complete(s)) + def test_multibyte_script(self): + """ `jedi.Script` must accept multi-byte string source. """ + try: + code = unicode("import datetime; datetime.d") + comment = utf8("# multi-byte comment あいうえお") + s = (unicode('%s\n%s') % (code, comment)).encode('utf-8') + except NameError: + pass # python 3 has no unicode method + else: + assert len(self.complete(s, (1, len(code)))) + def test_os_nowait(self): """ github issue #45 """ s = self.complete("import os; os.P_") From 817ab133d0c8d77186f6457d859903b0f107927a Mon Sep 17 00:00:00 2001 From: Takafumi Arakaki Date: Sat, 10 Nov 2012 20:45:11 +0100 Subject: [PATCH 2/2] Fix the failing multibyte source test The optional source_encoding optional argument is added to jedi.api.Script.__init__. --- jedi/api.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/jedi/api.py b/jedi/api.py index aba7fbee..2e0e3263 100644 --- a/jedi/api.py +++ b/jedi/api.py @@ -60,9 +60,17 @@ class Script(object): :type col: int :param source_path: The path in the os, the current module is in. :type source_path: string or None + :param source_encoding: encoding for decoding `source`, when it + is not a `unicode` object. + :type source_encoding: string """ - def __init__(self, source, line, column, source_path): - source = unicode(source) + def __init__(self, source, line, column, source_path, + source_encoding='utf-8'): + try: + source = unicode(source, source_encoding, 'replace') + # Use 'replace' over 'ignore' to hold code structure. + except TypeError: # `source` is already a unicode object + pass self.pos = line, column self.module = modules.ModuleWithCursor(source_path, source=source, position=self.pos)