From 15221bc8f59f4b16570340d50fe3150b59584418 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Sat, 30 Jul 2016 03:34:24 +0200 Subject: [PATCH] Make sure that the encoding parameters are always right. --- jedi/common.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/jedi/common.py b/jedi/common.py index 7fe33bf4..ae10f423 100644 --- a/jedi/common.py +++ b/jedi/common.py @@ -128,8 +128,8 @@ def source_to_unicode(source, encoding=None): # UTF-8 byte-order mark return 'utf-8' - first_two_lines = re.match(r'(?:[^\n]*\n){0,2}', source.decode('iso-8859-1')).group(0) - possible_encoding = re.search(r"coding[=:]\s*([-\w.]+)", + first_two_lines = re.match(br'(?:[^\n]*\n){0,2}', source).group(0) + possible_encoding = re.search(br"coding[=:]\s*([-\w.]+)", first_two_lines) if possible_encoding: return possible_encoding.group(1) @@ -141,8 +141,11 @@ def source_to_unicode(source, encoding=None): # only cast str/bytes return source + encoding = detect_encoding() + if not isinstance(encoding, unicode): + encoding = unicode(encoding, 'utf-8', 'replace') # cast to unicode by default - return unicode(source, detect_encoding(), 'replace') + return unicode(source, encoding, 'replace') def splitlines(string):