1
0
forked from VimPlug/jedi

temporary changes for the tokenizer issues

This commit is contained in:
Dave Halter
2014-02-20 00:43:42 +01:00
parent 1b6df4602d
commit c8d6fbb0a1
6 changed files with 68 additions and 62 deletions

View File

@@ -15,6 +15,7 @@ from itertools import chain
from jedi._compatibility import next, unicode, builtins
from jedi.parser import Parser
from jedi.parser.tokenize import source_tokens, NoErrorTokenizer
from jedi.parser import representation as pr
from jedi.parser.user_context import UserContext, UserContextParser
from jedi import debug
@@ -227,8 +228,9 @@ class Script(object):
return scopes
def _get_under_cursor_stmt(self, cursor_txt):
offset = self._pos[0] - 1, self._pos[1]
r = Parser(cursor_txt, no_docstr=True, offset=offset)
tokenizer = source_tokens(cursor_txt, self._pos[0] - 1)
tokenizer = NoErrorTokenizer(cursor_txt, self._pos[0] - 1)
r = Parser(cursor_txt, no_docstr=True, tokenizer=tokenizer)
try:
stmt = r.module.statements[0]
except IndexError:

View File

@@ -17,7 +17,7 @@ complexity of the ``Parser`` (there's another parser sitting inside
"""
import keyword
from jedi._compatibility import next, StringIO
from jedi._compatibility import next
from jedi import debug
from jedi import common
from jedi.parser import representation as pr
@@ -35,23 +35,21 @@ class Parser(object):
:param module_path: The path of the module in the file system, may be None.
:type module_path: str
:param no_docstr: If True, a string at the beginning is not a docstr.
:param is_fast_parser: -> for fast_parser
:param top_module: Use this module as a parent instead of `self.module`.
"""
def __init__(self, source, module_path=None, no_docstr=False,
offset=(0, 0), is_fast_parser=None, top_module=None):
tokenizer=None, top_module=None, offset=0, is_fast=False):
self.no_docstr = no_docstr
self.start_pos = self.end_pos = 1 + offset[0], offset[1]
self.start_pos = self.end_pos = 1 + offset, 0
# initialize global Scope
self.module = pr.SubModule(module_path, self.start_pos, top_module)
self._scope = self.module
self._current = (None, None)
source = source + '\n' # end with \n, because the parser needs it
buf = StringIO(source)
self._gen = tokenize.NoErrorTokenizer(buf.readline, offset, is_fast_parser)
self.top_module = top_module or self.module
self._gen = tokenizer or tokenize.NoErrorTokenizer(source)
self._gen = tokenize.NoErrorTokenizer(source, offset, is_fast)
self._top_module = top_module or self.module
try:
self._parse()
except (common.MultiLevelStopIteration, StopIteration):
@@ -386,7 +384,7 @@ class Parser(object):
as_names=as_names,
names_are_set_vars=names_are_set_vars)
stmt.parent = self.top_module
stmt.parent = self._top_module
self._check_user_stmt(stmt)
if tok in always_break + not_first_break:
@@ -455,9 +453,10 @@ class Parser(object):
and not isinstance(self._scope, pr.SubModule):
self._scope = self.module
use_as_parent_scope = self.top_module if isinstance(
self._scope, pr.SubModule
) else self._scope
if isinstance(self._scope, pr.SubModule):
use_as_parent_scope = self._top_module
else:
use_as_parent_scope = self._scope
first_pos = self.start_pos
if tok == 'def':
func = self._parse_function()
@@ -630,7 +629,7 @@ class Parser(object):
else:
if token_type not in [tokenize.COMMENT, tokenize.INDENT,
tokenize.NEWLINE, tokenize.NL]:
debug.warning('token not classified %s %s %s', tok,
token_type, self.start_pos[0])
debug.warning('Token not used: %s %s %s', tok,
tokenize.tok_name[token_type], self.start_pos)
continue
self.no_docstr = False

View File

@@ -362,9 +362,10 @@ class FastParser(use_metaclass(CachedFastParser)):
if nodes[index].code != code:
raise ValueError()
except ValueError:
p = Parser(parser_code, self.module_path, offset=(line_offset, 0),
is_fast_parser=True, top_module=self.module,
no_docstr=no_docstr)
tokenizer = tokenize.NoErrorTokenizer(parser_code, line_offset, True)
p = Parser(parser_code, self.module_path, tokenizer=tokenizer,
top_module=self.module, no_docstr=no_docstr,
is_fast=True, offset=line_offset)
p.module.parent = self.module
else:
if nodes[index] != self.current_node:

View File

@@ -11,6 +11,7 @@ from __future__ import absolute_import
import string
import re
from jedi._compatibility import StringIO
from token import *
import collections
cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
@@ -142,7 +143,14 @@ del _compile
tabsize = 8
def source_tokens(source, line_offset=0):
source = source + '\n' # end with \n, because the parser needs it
readline = StringIO(source).readline
return generate_tokens(readline, line_offset)
def generate_tokens(readline, line_offset=0):
"""The original stdlib Python version with minor modifications"""
lnum = line_offset
parenlev = 0
continued = False
@@ -291,9 +299,9 @@ FLOWS = ['if', 'else', 'elif', 'while', 'with', 'try', 'except', 'finally']
class NoErrorTokenizer(object):
def __init__(self, readline, offset=(0, 0), is_fast_parser=False):
self.readline = readline
self.gen = generate_tokens(readline, offset[0])
def __init__(self, source, line_offset=0, is_fast_parser=False):
self.source = source
self.gen = source_tokens(source, line_offset)
self.closed = False
self.is_first = True
self.push_backs = []

View File

@@ -1,6 +1,5 @@
import re
import os
import sys
from jedi import cache
from jedi.parser import tokenize
@@ -64,7 +63,6 @@ class UserContext(object):
level = 0
force_point = False
last_type = None
try:
for token_type, tok, start, end in gen:
# print 'tok', token_type, tok, force_point
if last_type == token_type == tokenize.NAME:
@@ -101,8 +99,6 @@ class UserContext(object):
self._column_temp = self._line_length - end[1]
string += tok
last_type = token_type
except tokenize.TokenError:
debug.warning("Tokenize couldn't finish: %s", sys.exc_info)
# string can still contain spaces at the end
return string[::-1].strip(), start_cursor
@@ -215,9 +211,9 @@ class UserContextParser(object):
# or `abs( ` where the cursor is out in the whitespace.
if self._user_context.get_path_under_cursor():
# We really should have a user_stmt, but the parser couldn't
# process it - probably a Syntax Error.
debug.warning('Something is probably wrong with the syntax under the cursor.')
return None
# process it - probably a Syntax Error (or in a comment).
debug.warning('No statement under the cursor.')
return
pos = next(self._user_context.get_context(yield_positions=True))
user_stmt = self.module().get_statement_for_position(pos, include_imports=True)
return user_stmt

View File

@@ -12,7 +12,7 @@ class TokenTest(unittest.TestCase):
def testit():
a = "huhu"
''')
tok = parsed.top_module.subscopes[0].statements[0].token_list[2]
tok = parsed.module.subscopes[0].statements[0].token_list[2]
self.assertEqual(tok.end_pos, (3, 14))
def test_end_pos_multi_line(self):
@@ -21,5 +21,5 @@ def testit():
a = """huhu
asdfasdf""" + "h"
''')
tok = parsed.top_module.subscopes[0].statements[0].token_list[2]
tok = parsed.module.subscopes[0].statements[0].token_list[2]
self.assertEqual(tok.end_pos, (4, 11))