From 9ed0dc48615612bca2dec307fcb54edaab1f7147 Mon Sep 17 00:00:00 2001
From: Jorgen Schaefer <contact@jorgenschaefer.de>
Date: Wed, 23 Apr 2014 16:25:12 +0200
Subject: [PATCH] Always load source files in binary mode.

Source files can be in any coding system, provided Python can
read a coding: line at the beginning of the file. So source files
should be loaded in binary format and decoded according to that
line, not assumed to be in the default coding system.

Fixes #398.
---
 AUTHORS.txt              |  1 +
 jedi/_compatibility.py   |  4 +---
 jedi/evaluate/imports.py | 10 +++++-----
 test/test_regression.py  | 17 +++++++++++++++++
 4 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/AUTHORS.txt b/AUTHORS.txt
index 0bbe5b5b..d37dedcf 100644
--- a/AUTHORS.txt
+++ b/AUTHORS.txt
@@ -22,5 +22,6 @@ Akinori Hattori (@hattya)
 srusskih (@srusskih)
 Steven Silvester (@blink1073)
 Colin Duquesnoy (@ColinDuquesnoy) <colin.duquesnoy@gmail.com>
+Jorgen Schaefer (@jorgenschaefer) <contact@jorgenschaefer.de>
 
 Note: (@user) means a github user name.
diff --git a/jedi/_compatibility.py b/jedi/_compatibility.py
index fdc11b6d..c699a66c 100644
--- a/jedi/_compatibility.py
+++ b/jedi/_compatibility.py
@@ -31,9 +31,7 @@ def find_module_py33(string, path=None):
             module_file = None
         else:
             module_path = loader.get_filename(string)
-            module_ext = os.path.splitext(module_path)[1]
-            mode = 'rb' if module_ext in ['.pyc', '.so', '.pyd'] else 'r'
-            module_file = open(module_path, mode)
+            module_file = open(module_path, 'rb')
     except AttributeError:
         # ExtensionLoader has not attribute get_filename, instead it has a
         # path attribute that we can use to retrieve the module path
diff --git a/jedi/evaluate/imports.py b/jedi/evaluate/imports.py
index b923b880..8ff05ca8 100644
--- a/jedi/evaluate/imports.py
+++ b/jedi/evaluate/imports.py
@@ -331,8 +331,8 @@ class _Importer(object):
                         deeper_paths.append(new)
                 return follow_path(directories, deeper_paths)
 
-        with open(os.path.join(found_path, '__init__.py')) as f:
-            content = f.read()
+        with open(os.path.join(found_path, '__init__.py'), 'rb') as f:
+            content = common.source_to_unicode(f.read())
             # these are strings that need to be used for namespace packages,
             # the first one is ``pkgutil``, the second ``pkg_resources``.
             options = 'declare_namespace(__name__)', 'extend_path(__path__'
@@ -406,7 +406,7 @@ class _Importer(object):
             # is a directory module
             if is_package_directory:
                 path += '/__init__.py'
-                with open(path) as f:
+                with open(path, 'rb') as f:
                     source = f.read()
             else:
                 source = current_namespace[0].read()
@@ -454,7 +454,7 @@ def load_module(path=None, source=None, name=None):
     def load(source):
         if path is not None and path.endswith('.py'):
             if source is None:
-                with open(path) as f:
+                with open(path, 'rb') as f:
                     source = f.read()
         else:
             return compiled.load_module(path, name)
@@ -481,7 +481,7 @@ def get_modules_containing_name(mods, name):
                 return None
 
     def check_fs(path):
-        with open(path) as f:
+        with open(path, 'rb') as f:
             source = source_to_unicode(f.read())
             if name in source:
                 return load_module(path, source)
diff --git a/test/test_regression.py b/test/test_regression.py
index 846307c1..bc490d17 100644
--- a/test/test_regression.py
+++ b/test/test_regression.py
@@ -4,6 +4,7 @@ found a good place in any other testing module.
 """
 
 import os
+import sys
 import textwrap
 
 from .helpers import TestCase, cwd_at
@@ -154,3 +155,19 @@ class TestRegression(TestCase):
             "    yield 1\n" \
             "abc()."
         assert Script(s).completions()
+
+
+def test_loading_unicode_files_with_bad_global_charset(monkeypatch, tmpdir):
+    dirname = str(tmpdir.mkdir('jedi-test'))
+    filename1 = os.path.join(dirname, 'test1.py')
+    filename2 = os.path.join(dirname, 'test2.py')
+    if sys.version_info < (3, 0):
+        data = "# coding: latin-1\nfoo = 'm\xf6p'\n"
+    else:
+        data = "# coding: latin-1\nfoo = 'm\xf6p'\n".encode("latin-1")
+
+    with open(filename1, "wb") as f:
+        f.write(data)
+    s = Script("from test1 import foo\nfoo.",
+               line=2, column=4, path=filename2)
+    s.complete()