From eaace104ddbfaaec37940a228797cf57363455db Mon Sep 17 00:00:00 2001
From: Dave Halter <davidhalter88@gmail.com>
Date: Tue, 16 Dec 2014 00:09:31 +0100
Subject: [PATCH] Replace the tokenizer's output with a tuple (switching back
 from a Token class).

---
 jedi/parser/__init__.py           |   7 +-
 jedi/parser/fast.py               |   5 +-
 jedi/parser/tokenize.py           | 107 ++++--------------------------
 jedi/parser/user_context.py       |  20 +++---
 test/test_parser/test_tokenize.py |  47 +++++++------
 5 files changed, 50 insertions(+), 136 deletions(-)

diff --git a/jedi/parser/__init__.py b/jedi/parser/__init__.py
index 6123b4f8..0079cef3 100644
--- a/jedi/parser/__init__.py
+++ b/jedi/parser/__init__.py
@@ -291,13 +291,10 @@ class Parser(object):
                     self._scope = self.module
         """
 
-        new_scope = False
-        for token in tokenizer:
-            typ = token.type
-            value = token.value
+        for typ, value, start_pos, prefix in tokenizer:
             if typ == tokenize.OP:
                 typ = grammar.opmap[value]
-            yield typ, value, token.prefix, token.start_pos
+            yield typ, value, prefix, start_pos
 
     def __repr__(self):
         return "<%s: %s>" % (type(self).__name__, self.module)
diff --git a/jedi/parser/fast.py b/jedi/parser/fast.py
index 7c5877fb..69b02e97 100644
--- a/jedi/parser/fast.py
+++ b/jedi/parser/fast.py
@@ -12,8 +12,7 @@ from jedi.parser import Parser
 from jedi.parser import tree as pr
 from jedi.parser import tokenize
 from jedi import cache
-from jedi.parser.tokenize import (source_tokens, Token, FLOWS, NEWLINE,
-                                  COMMENT, ENDMARKER)
+from jedi.parser.tokenize import source_tokens, FLOWS, NEWLINE, COMMENT, ENDMARKER
 
 
 class Module(pr.Module, pr.Simple):
@@ -387,7 +386,7 @@ class FastTokenizer(object):
         self.closed = False
 
         # fast parser options
-        self.current = self.previous = Token(None, '', (0, 0))
+        self.current = self.previous = None, '', (0, 0)
         self.in_flow = False
         self.new_indent = False
         self.parser_indent = self.old_parser_indent = 0
diff --git a/jedi/parser/tokenize.py b/jedi/parser/tokenize.py
index 010eb890..d5e48df7 100644
--- a/jedi/parser/tokenize.py
+++ b/jedi/parser/tokenize.py
@@ -17,8 +17,6 @@ from io import StringIO
 from token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, NAME, OP,
                    ERRORTOKEN, NEWLINE, INDENT, DEDENT)
 
-from jedi._compatibility import u
-
 cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
 
 
@@ -34,82 +32,6 @@ COMMENT = N_TOKENS
 tok_name[COMMENT] = 'COMMENT'
 
 
-class Token(object):
-    """
-    The token object is an efficient representation of the structure
-    (type, token, (start_pos_line, start_pos_col, prefix)). It has indexer
-    methods that maintain compatibility to existing code that expects the above
-    structure.
-
-    >>> repr(Token(1, "test", (1, 1, '')))
-    "<Token: ('NAME', 'test', (1, 1, ''))>"
-    >>> Token(1, 'bar', (3, 4, '')).__getstate__()
-    (1, 'bar', 3, 4, '')
-    >>> a = Token(0, 'baz', (0, 0, ''))
-    >>> a.__setstate__((1, 'foo', 3, 4, ''))
-    >>> a
-    <Token: ('NAME', 'foo', (3, 4, ''))>
-    >>> a.start_pos
-    (3, 4)
-    >>> a.value
-    'foo'
-    >>> a._start_pos_col
-    4
-    >>> Token(1, u("😷"), (1 ,1, '')).value + "p" == u("😷p")
-    True
-    """
-    __slots__ = ("type", "value", "_start_pos_line", "_start_pos_col",
-                 "prefix")
-
-    def __init__(self, type, value, start_pos, prefix=''):
-        self.type = type
-        self.value = value
-        self._start_pos_line = start_pos[0]
-        self._start_pos_col = start_pos[1]
-        self.prefix = prefix
-
-    def __repr__(self):
-        typ = tok_name[self.type]
-        content = typ, self.value,\
-            (self._start_pos_line, self._start_pos_col, self.prefix)
-        return "<%s: %s>" % (type(self).__name__, content)
-
-    @property
-    def start_pos(self):
-        return self._start_pos_line, self._start_pos_col
-
-    @property
-    def end_pos(self):
-        """Returns end position respecting multiline tokens."""
-        end_pos_line = self._start_pos_line
-        lines = self.value.split('\n')
-        if self.value.endswith('\n'):
-            lines = lines[:-1]
-            lines[-1] += '\n'
-        end_pos_line += len(lines) - 1
-        end_pos_col = self._start_pos_col
-        # Check for multiline token
-        if self._start_pos_line == end_pos_line:
-            end_pos_col += len(lines[-1])
-        else:
-            end_pos_col = len(lines[-1])
-        return (end_pos_line, end_pos_col)
-
-    # Make cache footprint smaller for faster unpickling
-    def __getstate__(self):
-        return (self.type, self.value,
-                self._start_pos_line, self._start_pos_col,
-                self.prefix)
-
-    # TODO DELETE this is not needed anymore, I guess. It should not get pickled.
-    def __setstate__(self, state):
-        self.type = state[0]
-        self.value = state[1]
-        self._start_pos_line = state[2]
-        self._start_pos_col = state[3]
-        self.prefix = state[4]
-
-
 def group(*choices):
     return '(' + '|'.join(choices) + ')'
 
@@ -239,7 +161,7 @@ def generate_tokens(readline, line_offset=0):
         line = readline()  # readline returns empty when finished. See StringIO
         if not line:
             if contstr:
-                yield Token(ERRORTOKEN, contstr, contstr_start, prefix)
+                yield ERRORTOKEN, contstr, contstr_start, prefix
             break
 
         lnum += 1
@@ -249,8 +171,7 @@ def generate_tokens(readline, line_offset=0):
             endmatch = endprog.match(line)
             if endmatch:
                 pos = endmatch.end(0)
-                yield Token(STRING, contstr + line[:pos],
-                            contstr_start, prefix)
+                yield STRING, contstr + line[:pos], contstr_start, prefix
                 contstr = ''
                 contline = None
             else:
@@ -266,7 +187,7 @@ def generate_tokens(readline, line_offset=0):
                     # If a literal starts but doesn't end the whole rest of the
                     # line is an error token.
                     txt = line[pos:]
-                yield Token(ERRORTOKEN, txt, (lnum, pos))
+                yield ERRORTOKEN, txt, (lnum, pos), prefix
                 pos += 1
                 continue
 
@@ -279,18 +200,18 @@ def generate_tokens(readline, line_offset=0):
                 new_line = False
                 if paren_level == 0:
                     if start > indents[-1]:
-                        yield Token(INDENT, '', spos, '')
+                        yield INDENT, '', spos, ''
                         indents.append(start)
                     while start < indents[-1]:
-                        yield Token(DEDENT, '', spos, '')
+                        yield DEDENT, '', spos, ''
                         indents.pop()
 
             if (initial in numchars or                      # ordinary number
                     (initial == '.' and token != '.' and token != '...')):
-                yield Token(NUMBER, token, spos, prefix)
+                yield NUMBER, token, spos, prefix
             elif initial in '\r\n':
                 if not new_line and paren_level == 0:
-                    yield Token(NEWLINE, token, spos, prefix)
+                    yield NEWLINE, token, spos, prefix
                 new_line = True
             elif initial == '#':
                 assert not token.endswith("\n")
@@ -301,7 +222,7 @@ def generate_tokens(readline, line_offset=0):
                 if endmatch:                                # all on one line
                     pos = endmatch.end(0)
                     token = line[start:pos]
-                    yield Token(STRING, token, spos, prefix)
+                    yield STRING, token, spos, prefix
                 else:
                     contstr_start = (lnum, start)           # multiple lines
                     contstr = line[start:]
@@ -318,18 +239,18 @@ def generate_tokens(readline, line_offset=0):
                     contline = line
                     break
                 else:                                       # ordinary string
-                    yield Token(STRING, token, spos, prefix)
+                    yield STRING, token, spos, prefix
             elif initial in namechars:                      # ordinary name
                 if token in ALWAYS_BREAK_TOKEN:
                     paren_level = 0
                     while True:
                         indent = indents.pop()
                         if indent > start:
-                            yield Token(DEDENT, '', (lnum, 0), '')
+                            yield DEDENT, '', (lnum, 0), ''
                         else:
                             indents.append(indent)
                             break
-                yield Token(NAME, token, spos, prefix)
+                yield NAME, token, spos, prefix
             elif initial == '\\' and line[start:] == '\\\n':  # continued stmt
                 continue
             else:
@@ -337,8 +258,8 @@ def generate_tokens(readline, line_offset=0):
                     paren_level += 1
                 elif token in ')]}':
                     paren_level -= 1
-                yield Token(OP, token, spos, prefix)
+                yield OP, token, spos, prefix
 
     for indent in indents[1:]:
-        yield Token(DEDENT, '', (lnum, 0), '')
-    yield Token(ENDMARKER, '', (lnum, 0), prefix)
+        yield DEDENT, '', (lnum, 0), ''
+    yield ENDMARKER, '', (lnum, 0), prefix
diff --git a/jedi/parser/user_context.py b/jedi/parser/user_context.py
index 5900606a..3a2ab85b 100644
--- a/jedi/parser/user_context.py
+++ b/jedi/parser/user_context.py
@@ -74,13 +74,12 @@ class UserContext(object):
         force_point = False
         last_type = None
         is_first = True
-        for tok in gen:
-            tok_type = tok.type
-            tok_str = tok.value
-            end = tok.end_pos
+        for tok_type, tok_str, tok_start_pos, prefix in gen:
+            # TODO end is not correct, doesn't take new lines in consideration.
+            end = tok_start_pos[0], tok_start_pos[-1] + len(tok_str)
             self._column_temp = self._line_length - end[1]
             if is_first:
-                if tok.start_pos != (1, 0):  # whitespace is not a path
+                if tok_start_pos != (1, 0):  # whitespace is not a path
                     return u(''), start_cursor
                 is_first = False
 
@@ -118,7 +117,7 @@ class UserContext(object):
             else:
                 if tok_str == '-':
                     next_tok = next(gen)
-                    if next_tok.value == 'e':
+                    if next_tok[1] == 'e':
                         gen.push_back(next_tok)
                     else:
                         break
@@ -166,16 +165,15 @@ class UserContext(object):
         next_must_be_name = False
         next_is_key = False
         key_name = None
-        for token in self._get_backwards_tokenizer(self.position):
-            tok_str = token.value
+        for tok_type, tok_str, start_pos, prefix in self._get_backwards_tokenizer(self.position):
             if next_must_be_name:
-                if token.type == tokenize.NAME:
+                if tok_type == tokenize.NAME:
                     call, _ = self._calc_path_until_cursor(start_pos=pos)
                     return call, index, key_name
                 index = 0
                 next_must_be_name = False
             elif next_is_key:
-                if token.type == tokenize.NAME:
+                if tok_type == tokenize.NAME:
                     key_name = tok_str[::-1]
                 next_is_key = False
 
@@ -184,7 +182,7 @@ class UserContext(object):
                 if level == 1:
                     next_must_be_name = True
                     level = 0
-                    end = token.end_pos
+                    end = start_pos[0], start_pos[1] + 1
                     self._column_temp = self._line_length - end[1]
                     pos = self._line_temp + 1, self._column_temp
             elif tok_str == ')':
diff --git a/test/test_parser/test_tokenize.py b/test/test_parser/test_tokenize.py
index 3acb203a..bff7c21c 100644
--- a/test/test_parser/test_tokenize.py
+++ b/test/test_parser/test_tokenize.py
@@ -31,9 +31,9 @@ asdfasdf""" + "h"
         simple_docstring_io = StringIO(simple_docstring)
         tokens = parser.tokenize.generate_tokens(simple_docstring_io.readline)
         token_list = list(tokens)
-        string_token = token_list[0]
-        self.assertEqual(string_token.prefix, '')
-        self.assertEqual(string_token.value, '"""simple one line docstring"""')
+        _, value, _, prefix = token_list[0]
+        self.assertEqual(prefix, '')
+        self.assertEqual(value, '"""simple one line docstring"""')
 
     def test_simple_with_whitespace(self):
         # Test a simple one line string with preceding whitespace and newline
@@ -41,13 +41,13 @@ asdfasdf""" + "h"
         simple_docstring_io = StringIO(simple_docstring)
         tokens = parser.tokenize.generate_tokens(simple_docstring_io.readline)
         token_list = list(tokens)
-        string_token = token_list[0]
-        self.assertEqual(string_token.prefix, '  ')
-        self.assertEqual(string_token.value, '"""simple one line docstring"""')
-        self.assertEqual(string_token.type, STRING)
-        newline_token = token_list[1]
-        self.assertEqual(newline_token.prefix, ' ')
-        self.assertEqual(newline_token.type, NEWLINE)
+        typ, value, start_pos, prefix = token_list[0]
+        self.assertEqual(prefix, '  ')
+        self.assertEqual(value, '"""simple one line docstring"""')
+        self.assertEqual(typ, STRING)
+        typ, value, start_pos, prefix = token_list[1]
+        self.assertEqual(prefix, ' ')
+        self.assertEqual(typ, NEWLINE)
 
     def test_function_whitespace(self):
         # Test function definition whitespace identification
@@ -59,20 +59,19 @@ asdfasdf""" + "h"
         fundef_io = StringIO(fundef)
         tokens = parser.tokenize.generate_tokens(fundef_io.readline)
         token_list = list(tokens)
-        print(token_list)
-        for t in token_list:
-            if t.value == 'test_whitespace':
-                self.assertEqual(t.prefix, ' ')
-            if t.value == '(':
-                self.assertEqual(t.prefix, '')
-            if t.value == '*':
-                self.assertEqual(t.prefix, '')
-            if t.value == '**':
-                self.assertEqual(t.prefix, ' ')
-            if t.value == 'print':
-                self.assertEqual(t.prefix, '        ')
-            if t.value == 'if':
-                self.assertEqual(t.prefix, '    ')
+        for _, value, _, prefix in token_list:
+            if value == 'test_whitespace':
+                self.assertEqual(prefix, ' ')
+            if value == '(':
+                self.assertEqual(prefix, '')
+            if value == '*':
+                self.assertEqual(prefix, '')
+            if value == '**':
+                self.assertEqual(prefix, ' ')
+            if value == 'print':
+                self.assertEqual(prefix, '        ')
+            if value == 'if':
+                self.assertEqual(prefix, '    ')
 
 
 def test_tokenizer_with_string_literal_backslash():