temporary changes for the tokenizer issues

2014-02-20 00:43:42 +01:00
parent 1b6df4602d
commit c8d6fbb0a1
6 changed files with 68 additions and 62 deletions
--- a/jedi/api/init.py
+++ b/jedi/api/init.py
@@ -15,6 +15,7 @@ from itertools import chain

 from jedi._compatibility import next, unicode, builtins
 from jedi.parser import Parser
+from jedi.parser.tokenize import source_tokens, NoErrorTokenizer
 from jedi.parser import representation as pr
 from jedi.parser.user_context import UserContext, UserContextParser
 from jedi import debug
@@ -227,8 +228,9 @@ class Script(object):
        return scopes

    def _get_under_cursor_stmt(self, cursor_txt):
-        offset = self._pos[0] - 1, self._pos[1]
-        r = Parser(cursor_txt, no_docstr=True, offset=offset)
+        tokenizer = source_tokens(cursor_txt, self._pos[0] - 1)
+        tokenizer = NoErrorTokenizer(cursor_txt, self._pos[0] - 1)
+        r = Parser(cursor_txt, no_docstr=True, tokenizer=tokenizer)
        try:
            stmt = r.module.statements[0]
        except IndexError:
--- a/jedi/parser/init.py
+++ b/jedi/parser/init.py
@@ -17,7 +17,7 @@ complexity of the ``Parser`` (there's another parser sitting inside
 """
 import keyword

-from jedi._compatibility import next, StringIO
+from jedi._compatibility import next
 from jedi import debug
 from jedi import common
 from jedi.parser import representation as pr
@@ -35,23 +35,21 @@ class Parser(object):
    :param module_path: The path of the module in the file system, may be None.
    :type module_path: str
    :param no_docstr: If True, a string at the beginning is not a docstr.
-    :param is_fast_parser: -> for fast_parser
    :param top_module: Use this module as a parent instead of `self.module`.
    """
    def __init__(self, source, module_path=None, no_docstr=False,
-                 offset=(0, 0), is_fast_parser=None, top_module=None):
+                 tokenizer=None, top_module=None, offset=0, is_fast=False):
        self.no_docstr = no_docstr

-        self.start_pos = self.end_pos = 1 + offset[0], offset[1]
+        self.start_pos = self.end_pos = 1 + offset, 0
        # initialize global Scope
        self.module = pr.SubModule(module_path, self.start_pos, top_module)
        self._scope = self.module
        self._current = (None, None)

-        source = source + '\n'  # end with \n, because the parser needs it
-        buf = StringIO(source)
-        self._gen = tokenize.NoErrorTokenizer(buf.readline, offset, is_fast_parser)
-        self.top_module = top_module or self.module
+        self._gen = tokenizer or tokenize.NoErrorTokenizer(source)
+        self._gen = tokenize.NoErrorTokenizer(source, offset, is_fast)
+        self._top_module = top_module or self.module
        try:
            self._parse()
        except (common.MultiLevelStopIteration, StopIteration):
@@ -386,7 +384,7 @@ class Parser(object):
                          as_names=as_names,
                          names_are_set_vars=names_are_set_vars)

-        stmt.parent = self.top_module
+        stmt.parent = self._top_module
        self._check_user_stmt(stmt)

        if tok in always_break + not_first_break:
@@ -455,9 +453,10 @@ class Parser(object):
                        and not isinstance(self._scope, pr.SubModule):
                    self._scope = self.module

-            use_as_parent_scope = self.top_module if isinstance(
-                self._scope, pr.SubModule
-            ) else self._scope
+            if isinstance(self._scope, pr.SubModule):
+                use_as_parent_scope = self._top_module
+            else:
+                use_as_parent_scope = self._scope
            first_pos = self.start_pos
            if tok == 'def':
                func = self._parse_function()
@@ -630,7 +629,7 @@ class Parser(object):
            else:
                if token_type not in [tokenize.COMMENT, tokenize.INDENT,
                                      tokenize.NEWLINE, tokenize.NL]:
-                    debug.warning('token not classified %s %s %s', tok,
-                                  token_type, self.start_pos[0])
+                    debug.warning('Token not used: %s %s %s', tok,
+                                  tokenize.tok_name[token_type], self.start_pos)
                continue
            self.no_docstr = False
--- a/jedi/parser/fast.py
+++ b/jedi/parser/fast.py
@@ -362,9 +362,10 @@ class FastParser(use_metaclass(CachedFastParser)):
            if nodes[index].code != code:
                raise ValueError()
        except ValueError:
-            p = Parser(parser_code, self.module_path, offset=(line_offset, 0),
-                       is_fast_parser=True, top_module=self.module,
-                       no_docstr=no_docstr)
+            tokenizer = tokenize.NoErrorTokenizer(parser_code, line_offset, True)
+            p = Parser(parser_code, self.module_path, tokenizer=tokenizer,
+                       top_module=self.module, no_docstr=no_docstr,
+                       is_fast=True, offset=line_offset)
            p.module.parent = self.module
        else:
            if nodes[index] != self.current_node:
--- a/jedi/parser/tokenize.py
+++ b/jedi/parser/tokenize.py
@@ -11,6 +11,7 @@ from __future__ import absolute_import

 import string
 import re
+from jedi._compatibility import StringIO
 from token import *
 import collections
 cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
@@ -142,7 +143,14 @@ del _compile
 tabsize = 8


+def source_tokens(source, line_offset=0):
+    source = source + '\n'  # end with \n, because the parser needs it
+    readline = StringIO(source).readline
+    return generate_tokens(readline, line_offset)
+
+
 def generate_tokens(readline, line_offset=0):
+    """The original stdlib Python version with minor modifications"""
    lnum = line_offset
    parenlev = 0
    continued = False
@@ -291,9 +299,9 @@ FLOWS = ['if', 'else', 'elif', 'while', 'with', 'try', 'except', 'finally']


 class NoErrorTokenizer(object):
-    def __init__(self, readline, offset=(0, 0), is_fast_parser=False):
-        self.readline = readline
-        self.gen = generate_tokens(readline, offset[0])
+    def __init__(self, source, line_offset=0, is_fast_parser=False):
+        self.source = source
+        self.gen = source_tokens(source, line_offset)
        self.closed = False
        self.is_first = True
        self.push_backs = []
--- a/jedi/parser/user_context.py
+++ b/jedi/parser/user_context.py
@@ -1,6 +1,5 @@
 import re
 import os
-import sys

 from jedi import cache
 from jedi.parser import tokenize
@@ -64,7 +63,6 @@ class UserContext(object):
        level = 0
        force_point = False
        last_type = None
-        try:
        for token_type, tok, start, end in gen:
            # print 'tok', token_type, tok, force_point
            if last_type == token_type == tokenize.NAME:
@@ -101,8 +99,6 @@ class UserContext(object):
            self._column_temp = self._line_length - end[1]
            string += tok
            last_type = token_type
-        except tokenize.TokenError:
-            debug.warning("Tokenize couldn't finish: %s", sys.exc_info)

        # string can still contain spaces at the end
        return string[::-1].strip(), start_cursor
@@ -215,9 +211,9 @@ class UserContextParser(object):
            # or `abs( ` where the cursor is out in the whitespace.
            if self._user_context.get_path_under_cursor():
                # We really should have a user_stmt, but the parser couldn't
-                # process it - probably a Syntax Error.
-                debug.warning('Something is probably wrong with the syntax under the cursor.')
-                return None
+                # process it - probably a Syntax Error (or in a comment).
+                debug.warning('No statement under the cursor.')
+                return
            pos = next(self._user_context.get_context(yield_positions=True))
            user_stmt = self.module().get_statement_for_position(pos, include_imports=True)
        return user_stmt
--- a/test/test_parser/test_token.py
+++ b/test/test_parser/test_token.py
@@ -12,7 +12,7 @@ class TokenTest(unittest.TestCase):
 def testit():
    a = "huhu"
 ''')
-        tok = parsed.top_module.subscopes[0].statements[0].token_list[2]
+        tok = parsed.module.subscopes[0].statements[0].token_list[2]
        self.assertEqual(tok.end_pos, (3, 14))

    def test_end_pos_multi_line(self):
@@ -21,5 +21,5 @@ def testit():
    a = """huhu
 asdfasdf""" + "h"
 ''')
-        tok = parsed.top_module.subscopes[0].statements[0].token_list[2]
+        tok = parsed.module.subscopes[0].statements[0].token_list[2]
        self.assertEqual(tok.end_pos, (4, 11))