From c5fcebde8233d891041b6d798bbde8b75853f227 Mon Sep 17 00:00:00 2001
From: Dave Halter <davidhalter88@gmail.com>
Date: Sun, 23 Feb 2014 11:29:00 +0100
Subject: [PATCH] changed _compatibility.utf8 -> 'u' and removed a lot of the
 issues with the now enforced unicode source input of the parser

---
 jedi/_compatibility.py                     |  6 +++---
 jedi/api/interpreter.py                    |  4 +++-
 jedi/evaluate/compiled/fake.py             |  4 ++--
 jedi/parser/fast.py                        |  4 ++--
 jedi/parser/token.py                       |  6 +++---
 jedi/parser/tokenize.py                    | 15 +++++++--------
 jedi/parser/user_context.py                |  9 +++++----
 scripts/wx_check.py                        |  2 ++
 test/test_api/test_unicode.py              | 20 ++++++++++----------
 test/test_evaluate/test_absolute_import.py |  7 ++++---
 10 files changed, 41 insertions(+), 36 deletions(-)

diff --git a/jedi/_compatibility.py b/jedi/_compatibility.py
index edfec111..a862b74b 100644
--- a/jedi/_compatibility.py
+++ b/jedi/_compatibility.py
@@ -90,11 +90,11 @@ except NameError:
     unicode = str
 
 if is_py3:
-    utf8 = lambda s: s
+    u = lambda s: s
 else:
-    utf8 = lambda s: s.decode('utf-8')
+    u = lambda s: s.decode('utf-8')
 
-utf8.__doc__ = """
+u.__doc__ = """
 Decode a raw string into unicode object.  Do nothing in Python 3.
 """
 
diff --git a/jedi/api/interpreter.py b/jedi/api/interpreter.py
index cf38dff5..48093046 100644
--- a/jedi/api/interpreter.py
+++ b/jedi/api/interpreter.py
@@ -3,6 +3,7 @@ import re
 
 from jedi._compatibility import builtins
 from jedi import debug
+from jedi.common import source_to_unicode
 from jedi.cache import underscore_memoization
 from jedi.evaluate import compiled
 from jedi.evaluate.compiled.fake import get_module
@@ -68,7 +69,8 @@ class LazyName(helpers.FakeName):
             if path.endswith('.py'):
                 # cut the `c` from `.pyc`
                 with open(path) as f:
-                    mod = FastParser(f.read(), path[:-1]).module
+                    source = source_to_unicode(f.read())
+                mod = FastParser(source, path[:-1]).module
                 if not parser_path:
                     return mod
                 found = self._evaluator.eval_call_path(iter(parser_path), mod, None)
diff --git a/jedi/evaluate/compiled/fake.py b/jedi/evaluate/compiled/fake.py
index f4e44c9a..091c3129 100644
--- a/jedi/evaluate/compiled/fake.py
+++ b/jedi/evaluate/compiled/fake.py
@@ -7,7 +7,7 @@ mixing in Python code, the autocompletion should work much better for builtins.
 import os
 import inspect
 
-from jedi._compatibility import is_py3, builtins
+from jedi._compatibility import is_py3, builtins, unicode
 from jedi.parser import Parser
 from jedi.parser import token as token_pr
 from jedi.parser.representation import Class
@@ -31,7 +31,7 @@ def _load_faked_module(module):
         except IOError:
             modules[module_name] = None
             return
-        module = Parser(source, module_name).module
+        module = Parser(unicode(source), module_name).module
         modules[module_name] = module
 
         if module_name == 'builtins' and not is_py3:
diff --git a/jedi/parser/fast.py b/jedi/parser/fast.py
index c3eee157..ced65ce8 100644
--- a/jedi/parser/fast.py
+++ b/jedi/parser/fast.py
@@ -5,7 +5,7 @@ finished (and still not working as I want), I won't document it any further.
 """
 import re
 
-from jedi._compatibility import use_metaclass
+from jedi._compatibility import use_metaclass, unicode
 from jedi import settings
 from jedi import common
 from jedi.parser import Parser
@@ -275,7 +275,7 @@ class FastParser(use_metaclass(CachedFastParser)):
     def _parse(self, code):
         """ :type code: str """
         def empty_parser():
-            new, temp = self._get_parser('', '', 0, [], False)
+            new, temp = self._get_parser(unicode(''), unicode(''), 0, [], False)
             return new
 
         parts = self._split_parts(code)
diff --git a/jedi/parser/token.py b/jedi/parser/token.py
index 292d4a52..a42b2358 100644
--- a/jedi/parser/token.py
+++ b/jedi/parser/token.py
@@ -8,7 +8,7 @@ found that a flat object with slots is the best.
 from inspect import cleandoc
 from ast import literal_eval
 
-from jedi._compatibility import utf8, unicode
+from jedi._compatibility import u, unicode
 
 
 class Token(object):
@@ -37,7 +37,7 @@ class Token(object):
     4
     >>> Token.from_tuple((6, 5, (4, 3)))
     <Token: (6, 5, (4, 3))>
-    >>> unicode(Token(1, utf8("😷"), 1 ,1)) + "p" == utf8("😷p")
+    >>> unicode(Token(1, u("😷"), 1 ,1)) + "p" == u("😷p")
     True
     """
     __slots__ = ("_token_type", "_token", "_start_pos_line", "_start_pos_col")
@@ -166,4 +166,4 @@ class TokenDocstring(TokenNoCompat):
 
     def as_string(self):
         """Returns a literal cleaned version of the token"""
-        return cleandoc(literal_eval(self.token))
+        return unicode(cleandoc(literal_eval(self.token)))
diff --git a/jedi/parser/tokenize.py b/jedi/parser/tokenize.py
index 7c155498..a8f3879e 100644
--- a/jedi/parser/tokenize.py
+++ b/jedi/parser/tokenize.py
@@ -12,7 +12,8 @@ from __future__ import absolute_import
 import string
 import re
 from io import StringIO
-from token import *
+from token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, NAME, OP,
+                   ERRORTOKEN, NEWLINE)
 import collections
 cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
 
@@ -23,9 +24,8 @@ namechars = string.ascii_letters + '_'
 
 COMMENT = N_TOKENS
 tok_name[COMMENT] = 'COMMENT'
-ENCODING = N_TOKENS + 2
+ENCODING = N_TOKENS + 1
 tok_name[ENCODING] = 'ENCODING'
-N_TOKENS += 3
 
 
 class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end')):
@@ -153,7 +153,6 @@ def generate_tokens(readline, line_offset=0):
     Modified to not care about dedents.
     """
     lnum = line_offset
-    continued = False
     numchars = '0123456789'
     contstr = ''
     contline = None
@@ -161,7 +160,7 @@ def generate_tokens(readline, line_offset=0):
         line = readline()  # readline returns empty if it's finished. See StringIO
         if not line:
             if contstr:
-                yield TokenInfo(ERRORTOKEN, contstr, strstart, (lnum, pos))
+                yield TokenInfo(ERRORTOKEN, contstr, contstr_start, (lnum, pos))
             break
 
         lnum += 1
@@ -171,7 +170,7 @@ def generate_tokens(readline, line_offset=0):
             endmatch = endprog.match(line)
             if endmatch:
                 pos = end = endmatch.end(0)
-                yield TokenInfo(STRING, contstr + line[:end], strstart, (lnum, end))
+                yield TokenInfo(STRING, contstr + line[:end], contstr_start, (lnum, end))
                 contstr = ''
                 contline = None
             else:
@@ -207,7 +206,7 @@ def generate_tokens(readline, line_offset=0):
                     token = line[start:pos]
                     yield TokenInfo(STRING, token, spos, (lnum, pos))
                 else:
-                    strstart = (lnum, start)                # multiple lines
+                    contstr_start = (lnum, start)                # multiple lines
                     contstr = line[start:]
                     contline = line
                     break
@@ -215,7 +214,7 @@ def generate_tokens(readline, line_offset=0):
                     token[:2] in single_quoted or \
                     token[:3] in single_quoted:
                 if token[-1] == '\n':                       # continued string
-                    strstart = (lnum, start)
+                    contstr_start = lnum, start
                     endprog = (endprogs[initial] or endprogs[token[1]] or
                                endprogs[token[2]])
                     contstr = line[start:]
diff --git a/jedi/parser/user_context.py b/jedi/parser/user_context.py
index e69a3b7f..cdbb7e70 100644
--- a/jedi/parser/user_context.py
+++ b/jedi/parser/user_context.py
@@ -3,6 +3,7 @@ import os
 
 from jedi import cache
 from jedi.parser import tokenize
+from jedi._compatibility import u
 from jedi.parser.fast import FastParser
 from jedi.parser import representation
 from jedi import debug
@@ -70,7 +71,7 @@ class UserContext(object):
         for token_type, tok, start, end in gen:
             if is_first:
                 if start != (1, 0):  # whitespace is not a path
-                    return '', start_cursor
+                    return u(''), start_cursor
                 is_first = False
 
             # print 'tok', token_type, tok, force_point
@@ -167,14 +168,14 @@ class UserContext(object):
             self._line_cache = self.source.splitlines()
             if self.source:
                 if self.source[-1] == '\n':
-                    self._line_cache.append('')
+                    self._line_cache.append(u(''))
             else:  # ''.splitlines() == []
-                self._line_cache = ['']
+                self._line_cache = [u('')]
 
         if line_nr == 0:
             # This is a fix for the zeroth line. We need a newline there, for
             # the backwards parser.
-            return ''
+            return u('')
         if line_nr < 0:
             raise StopIteration()
         try:
diff --git a/scripts/wx_check.py b/scripts/wx_check.py
index 1e48c39e..5c6e997e 100755
--- a/scripts/wx_check.py
+++ b/scripts/wx_check.py
@@ -37,6 +37,7 @@ def process_memory():
 uri = 'http://svn.wxwidgets.org/viewvc/wx/wxPython/trunk/src/gtk/_core.py?revision=74740&content-type=text%2Fplain&view=co'
 
 wx_core = urllib2.urlopen(uri).read()
+wx_core = wx_core[:1]
 
 
 def run():
@@ -44,6 +45,7 @@ def run():
     print('Process Memory before: %skB' % process_memory())
     # After this the module should be cached.
     # Need to invent a path so that it's really cached.
+    print type(wx_core), wx_core
     jedi.Script(wx_core, path='foobar.py').completions()
 
     gc.collect()  # make sure that it's all fair and the gc did its job.
diff --git a/test/test_api/test_unicode.py b/test/test_api/test_unicode.py
index 895be6bd..d6b6cbe1 100644
--- a/test/test_api/test_unicode.py
+++ b/test/test_api/test_unicode.py
@@ -3,7 +3,7 @@
 All character set and unicode related tests.
 """
 from jedi import Script
-from jedi._compatibility import utf8, unicode
+from jedi._compatibility import u, unicode
 
 
 def test_unicode_script():
@@ -13,12 +13,12 @@ def test_unicode_script():
     assert len(completions)
     assert type(completions[0].description) is unicode
 
-    s = utf8("author='öä'; author")
+    s = u("author='öä'; author")
     completions = Script(s).completions()
     x = completions[0].description
     assert type(x) is unicode
 
-    s = utf8("#-*- coding: iso-8859-1 -*-\nauthor='öä'; author")
+    s = u("#-*- coding: iso-8859-1 -*-\nauthor='öä'; author")
     s = s.encode('latin-1')
     completions = Script(s).completions()
     assert type(completions[0].description) is unicode
@@ -26,12 +26,12 @@ def test_unicode_script():
 
 def test_unicode_attribute():
     """ github jedi-vim issue #94 """
-    s1 = utf8('#-*- coding: utf-8 -*-\nclass Person():\n'
-              '    name = "e"\n\nPerson().name.')
+    s1 = u('#-*- coding: utf-8 -*-\nclass Person():\n'
+           '    name = "e"\n\nPerson().name.')
     completions1 = Script(s1).completions()
     assert 'strip' in [c.name for c in completions1]
-    s2 = utf8('#-*- coding: utf-8 -*-\nclass Person():\n'
-              '    name = "é"\n\nPerson().name.')
+    s2 = u('#-*- coding: utf-8 -*-\nclass Person():\n'
+           '    name = "é"\n\nPerson().name.')
     completions2 = Script(s2).completions()
     assert 'strip' in [c.name for c in completions2]
 
@@ -39,9 +39,9 @@ def test_unicode_attribute():
 def test_multibyte_script():
     """ `jedi.Script` must accept multi-byte string source. """
     try:
-        code = unicode("import datetime; datetime.d")
-        comment = utf8("# multi-byte comment あいうえおä")
-        s = (unicode('%s\n%s') % (code, comment)).encode('utf-8')
+        code = u("import datetime; datetime.d")
+        comment = u("# multi-byte comment あいうえおä")
+        s = (u('%s\n%s') % (code, comment)).encode('utf-8')
     except NameError:
         pass  # python 3 has no unicode method
     else:
diff --git a/test/test_evaluate/test_absolute_import.py b/test/test_evaluate/test_absolute_import.py
index f274ac33..932356b2 100644
--- a/test/test_evaluate/test_absolute_import.py
+++ b/test/test_evaluate/test_absolute_import.py
@@ -3,6 +3,7 @@ Tests ``from __future__ import absolute_import`` (only important for
 Python 2.X)
 """
 import jedi
+from jedi._compatibility import u
 from jedi.parser import Parser
 from .. import helpers
 
@@ -11,7 +12,7 @@ def test_explicit_absolute_imports():
     """
     Detect modules with ``from __future__ import absolute_import``.
     """
-    parser = Parser("from __future__ import absolute_import", "test.py")
+    parser = Parser(u("from __future__ import absolute_import"), "test.py")
     assert parser.module.has_explicit_absolute_import
 
 
@@ -19,7 +20,7 @@ def test_no_explicit_absolute_imports():
     """
      Detect modules without ``from __future__ import absolute_import``.
     """
-    parser = Parser("1", "test.py")
+    parser = Parser(u("1"), "test.py")
     assert not parser.module.has_explicit_absolute_import
 
 
@@ -28,7 +29,7 @@ def test_dont_break_imports_without_namespaces():
     The code checking for ``from __future__ import absolute_import`` shouldn't
     assume that all imports have non-``None`` namespaces.
     """
-    src = "from __future__ import absolute_import\nimport xyzzy"
+    src = u("from __future__ import absolute_import\nimport xyzzy")
     parser = Parser(src, "test.py")
     assert parser.module.has_explicit_absolute_import