From 9ed0dc48615612bca2dec307fcb54edaab1f7147 Mon Sep 17 00:00:00 2001 From: Jorgen Schaefer Date: Wed, 23 Apr 2014 16:25:12 +0200 Subject: [PATCH] Always load source files in binary mode. Source files can be in any coding system, provided Python can read a coding: line at the beginning of the file. So source files should be loaded in binary format and decoded according to that line, not assumed to be in the default coding system. Fixes #398. --- AUTHORS.txt | 1 + jedi/_compatibility.py | 4 +--- jedi/evaluate/imports.py | 10 +++++----- test/test_regression.py | 17 +++++++++++++++++ 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/AUTHORS.txt b/AUTHORS.txt index 0bbe5b5b..d37dedcf 100644 --- a/AUTHORS.txt +++ b/AUTHORS.txt @@ -22,5 +22,6 @@ Akinori Hattori (@hattya) srusskih (@srusskih) Steven Silvester (@blink1073) Colin Duquesnoy (@ColinDuquesnoy) +Jorgen Schaefer (@jorgenschaefer) Note: (@user) means a github user name. diff --git a/jedi/_compatibility.py b/jedi/_compatibility.py index fdc11b6d..c699a66c 100644 --- a/jedi/_compatibility.py +++ b/jedi/_compatibility.py @@ -31,9 +31,7 @@ def find_module_py33(string, path=None): module_file = None else: module_path = loader.get_filename(string) - module_ext = os.path.splitext(module_path)[1] - mode = 'rb' if module_ext in ['.pyc', '.so', '.pyd'] else 'r' - module_file = open(module_path, mode) + module_file = open(module_path, 'rb') except AttributeError: # ExtensionLoader has not attribute get_filename, instead it has a # path attribute that we can use to retrieve the module path diff --git a/jedi/evaluate/imports.py b/jedi/evaluate/imports.py index b923b880..8ff05ca8 100644 --- a/jedi/evaluate/imports.py +++ b/jedi/evaluate/imports.py @@ -331,8 +331,8 @@ class _Importer(object): deeper_paths.append(new) return follow_path(directories, deeper_paths) - with open(os.path.join(found_path, '__init__.py')) as f: - content = f.read() + with open(os.path.join(found_path, '__init__.py'), 'rb') as f: + content = common.source_to_unicode(f.read()) # these are strings that need to be used for namespace packages, # the first one is ``pkgutil``, the second ``pkg_resources``. options = 'declare_namespace(__name__)', 'extend_path(__path__' @@ -406,7 +406,7 @@ class _Importer(object): # is a directory module if is_package_directory: path += '/__init__.py' - with open(path) as f: + with open(path, 'rb') as f: source = f.read() else: source = current_namespace[0].read() @@ -454,7 +454,7 @@ def load_module(path=None, source=None, name=None): def load(source): if path is not None and path.endswith('.py'): if source is None: - with open(path) as f: + with open(path, 'rb') as f: source = f.read() else: return compiled.load_module(path, name) @@ -481,7 +481,7 @@ def get_modules_containing_name(mods, name): return None def check_fs(path): - with open(path) as f: + with open(path, 'rb') as f: source = source_to_unicode(f.read()) if name in source: return load_module(path, source) diff --git a/test/test_regression.py b/test/test_regression.py index 846307c1..bc490d17 100644 --- a/test/test_regression.py +++ b/test/test_regression.py @@ -4,6 +4,7 @@ found a good place in any other testing module. """ import os +import sys import textwrap from .helpers import TestCase, cwd_at @@ -154,3 +155,19 @@ class TestRegression(TestCase): " yield 1\n" \ "abc()." assert Script(s).completions() + + +def test_loading_unicode_files_with_bad_global_charset(monkeypatch, tmpdir): + dirname = str(tmpdir.mkdir('jedi-test')) + filename1 = os.path.join(dirname, 'test1.py') + filename2 = os.path.join(dirname, 'test2.py') + if sys.version_info < (3, 0): + data = "# coding: latin-1\nfoo = 'm\xf6p'\n" + else: + data = "# coding: latin-1\nfoo = 'm\xf6p'\n".encode("latin-1") + + with open(filename1, "wb") as f: + f.write(data) + s = Script("from test1 import foo\nfoo.", + line=2, column=4, path=filename2) + s.complete()