From 9c152119ada6f508877951afb37ca5d2f81d0c89 Mon Sep 17 00:00:00 2001
From: Andrew Rodionoff <andviro@gmail.com>
Date: Fri, 25 Jan 2013 22:57:23 +0400
Subject: [PATCH 1/3] more robust source encoding detection

---
 .gitignore      | 1 +
 jedi/modules.py | 7 ++-----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/.gitignore b/.gitignore
index 1f303fe4..5c08b8e0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+*~
 *.swp
 *.swo
 .ropeproject
diff --git a/jedi/modules.py b/jedi/modules.py
index 75c89ff2..a55290d7 100644
--- a/jedi/modules.py
+++ b/jedi/modules.py
@@ -328,20 +328,17 @@ def source_to_unicode(source, encoding=None):
         http://docs.python.org/2/reference/lexical_analysis.html#encoding-\
                                                                 declarations
         """
-        if encoding is not None:
-            return encoding
-
         if source.startswith('\xef\xbb\xbf'):
             # UTF-8 byte-order mark
             return 'utf-8'
 
         first_two_lines = re.match(r'(?:[^\n]*\n){0,2}', source).group(0)
-        possible_encoding = re.match("coding[=:]\s*([-\w.]+)", first_two_lines)
+        possible_encoding = re.search(r"coding[=:]\s*([-\w.]+)", first_two_lines)
         if possible_encoding:
             return possible_encoding.group(1)
         else:
             # the default if nothing else has been set -> PEP 263
-            return 'iso-8859-1'
+            return encoding if encoding is not None else 'iso-8859-1'
 
     if isinstance(source, unicode):
         # only cast str/bytes

From 0315da9699a8fde88828887f0f138b9892edb94d Mon Sep 17 00:00:00 2001
From: Andrew Rodionoff <andviro@gmail.com>
Date: Sat, 26 Jan 2013 02:10:26 +0400
Subject: [PATCH 2/3] compatibility with python3 fix

---
 jedi/modules.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/jedi/modules.py b/jedi/modules.py
index a55290d7..1f069e2e 100644
--- a/jedi/modules.py
+++ b/jedi/modules.py
@@ -328,11 +328,11 @@ def source_to_unicode(source, encoding=None):
         http://docs.python.org/2/reference/lexical_analysis.html#encoding-\
                                                                 declarations
         """
-        if source.startswith('\xef\xbb\xbf'):
+        if source.startswith(b'\xef\xbb\xbf'):
             # UTF-8 byte-order mark
             return 'utf-8'
 
-        first_two_lines = re.match(r'(?:[^\n]*\n){0,2}', source).group(0)
+        first_two_lines = re.match(r'(?:[^\n]*\n){0,2}', str(source)).group(0)
         possible_encoding = re.search(r"coding[=:]\s*([-\w.]+)", first_two_lines)
         if possible_encoding:
             return possible_encoding.group(1)

From 048608f4b7c8f37bcfdbde76457fa68568447a05 Mon Sep 17 00:00:00 2001
From: Andrew Rodionoff <andviro@gmail.com>
Date: Sat, 26 Jan 2013 09:26:04 +0400
Subject: [PATCH 3/3] compatibility with python2.5

---
 jedi/modules.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/jedi/modules.py b/jedi/modules.py
index 1f069e2e..f6a84243 100644
--- a/jedi/modules.py
+++ b/jedi/modules.py
@@ -1,6 +1,6 @@
 from __future__ import with_statement
 
-from _compatibility import exec_function, unicode
+from _compatibility import exec_function, unicode, is_py25, literal_eval
 
 import re
 import tokenize
@@ -328,7 +328,8 @@ def source_to_unicode(source, encoding=None):
         http://docs.python.org/2/reference/lexical_analysis.html#encoding-\
                                                                 declarations
         """
-        if source.startswith(b'\xef\xbb\xbf'):
+        byte_mark = '\xef\xbb\xbf' if is_py25 else literal_eval(r"b'\xef\xbb\xbf'")
+        if source.startswith(byte_mark):
             # UTF-8 byte-order mark
             return 'utf-8'