1
0
forked from VimPlug/jedi

temporary changes for the tokenizer issues

This commit is contained in:
Dave Halter
2014-02-20 00:43:42 +01:00
parent 1b6df4602d
commit c8d6fbb0a1
6 changed files with 68 additions and 62 deletions

View File

@@ -15,6 +15,7 @@ from itertools import chain
from jedi._compatibility import next, unicode, builtins from jedi._compatibility import next, unicode, builtins
from jedi.parser import Parser from jedi.parser import Parser
from jedi.parser.tokenize import source_tokens, NoErrorTokenizer
from jedi.parser import representation as pr from jedi.parser import representation as pr
from jedi.parser.user_context import UserContext, UserContextParser from jedi.parser.user_context import UserContext, UserContextParser
from jedi import debug from jedi import debug
@@ -227,8 +228,9 @@ class Script(object):
return scopes return scopes
def _get_under_cursor_stmt(self, cursor_txt): def _get_under_cursor_stmt(self, cursor_txt):
offset = self._pos[0] - 1, self._pos[1] tokenizer = source_tokens(cursor_txt, self._pos[0] - 1)
r = Parser(cursor_txt, no_docstr=True, offset=offset) tokenizer = NoErrorTokenizer(cursor_txt, self._pos[0] - 1)
r = Parser(cursor_txt, no_docstr=True, tokenizer=tokenizer)
try: try:
stmt = r.module.statements[0] stmt = r.module.statements[0]
except IndexError: except IndexError:

View File

@@ -17,7 +17,7 @@ complexity of the ``Parser`` (there's another parser sitting inside
""" """
import keyword import keyword
from jedi._compatibility import next, StringIO from jedi._compatibility import next
from jedi import debug from jedi import debug
from jedi import common from jedi import common
from jedi.parser import representation as pr from jedi.parser import representation as pr
@@ -35,23 +35,21 @@ class Parser(object):
:param module_path: The path of the module in the file system, may be None. :param module_path: The path of the module in the file system, may be None.
:type module_path: str :type module_path: str
:param no_docstr: If True, a string at the beginning is not a docstr. :param no_docstr: If True, a string at the beginning is not a docstr.
:param is_fast_parser: -> for fast_parser
:param top_module: Use this module as a parent instead of `self.module`. :param top_module: Use this module as a parent instead of `self.module`.
""" """
def __init__(self, source, module_path=None, no_docstr=False, def __init__(self, source, module_path=None, no_docstr=False,
offset=(0, 0), is_fast_parser=None, top_module=None): tokenizer=None, top_module=None, offset=0, is_fast=False):
self.no_docstr = no_docstr self.no_docstr = no_docstr
self.start_pos = self.end_pos = 1 + offset[0], offset[1] self.start_pos = self.end_pos = 1 + offset, 0
# initialize global Scope # initialize global Scope
self.module = pr.SubModule(module_path, self.start_pos, top_module) self.module = pr.SubModule(module_path, self.start_pos, top_module)
self._scope = self.module self._scope = self.module
self._current = (None, None) self._current = (None, None)
source = source + '\n' # end with \n, because the parser needs it self._gen = tokenizer or tokenize.NoErrorTokenizer(source)
buf = StringIO(source) self._gen = tokenize.NoErrorTokenizer(source, offset, is_fast)
self._gen = tokenize.NoErrorTokenizer(buf.readline, offset, is_fast_parser) self._top_module = top_module or self.module
self.top_module = top_module or self.module
try: try:
self._parse() self._parse()
except (common.MultiLevelStopIteration, StopIteration): except (common.MultiLevelStopIteration, StopIteration):
@@ -386,7 +384,7 @@ class Parser(object):
as_names=as_names, as_names=as_names,
names_are_set_vars=names_are_set_vars) names_are_set_vars=names_are_set_vars)
stmt.parent = self.top_module stmt.parent = self._top_module
self._check_user_stmt(stmt) self._check_user_stmt(stmt)
if tok in always_break + not_first_break: if tok in always_break + not_first_break:
@@ -455,9 +453,10 @@ class Parser(object):
and not isinstance(self._scope, pr.SubModule): and not isinstance(self._scope, pr.SubModule):
self._scope = self.module self._scope = self.module
use_as_parent_scope = self.top_module if isinstance( if isinstance(self._scope, pr.SubModule):
self._scope, pr.SubModule use_as_parent_scope = self._top_module
) else self._scope else:
use_as_parent_scope = self._scope
first_pos = self.start_pos first_pos = self.start_pos
if tok == 'def': if tok == 'def':
func = self._parse_function() func = self._parse_function()
@@ -630,7 +629,7 @@ class Parser(object):
else: else:
if token_type not in [tokenize.COMMENT, tokenize.INDENT, if token_type not in [tokenize.COMMENT, tokenize.INDENT,
tokenize.NEWLINE, tokenize.NL]: tokenize.NEWLINE, tokenize.NL]:
debug.warning('token not classified %s %s %s', tok, debug.warning('Token not used: %s %s %s', tok,
token_type, self.start_pos[0]) tokenize.tok_name[token_type], self.start_pos)
continue continue
self.no_docstr = False self.no_docstr = False

View File

@@ -362,9 +362,10 @@ class FastParser(use_metaclass(CachedFastParser)):
if nodes[index].code != code: if nodes[index].code != code:
raise ValueError() raise ValueError()
except ValueError: except ValueError:
p = Parser(parser_code, self.module_path, offset=(line_offset, 0), tokenizer = tokenize.NoErrorTokenizer(parser_code, line_offset, True)
is_fast_parser=True, top_module=self.module, p = Parser(parser_code, self.module_path, tokenizer=tokenizer,
no_docstr=no_docstr) top_module=self.module, no_docstr=no_docstr,
is_fast=True, offset=line_offset)
p.module.parent = self.module p.module.parent = self.module
else: else:
if nodes[index] != self.current_node: if nodes[index] != self.current_node:

View File

@@ -11,6 +11,7 @@ from __future__ import absolute_import
import string import string
import re import re
from jedi._compatibility import StringIO
from token import * from token import *
import collections import collections
cookie_re = re.compile("coding[:=]\s*([-\w.]+)") cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
@@ -142,7 +143,14 @@ del _compile
tabsize = 8 tabsize = 8
def source_tokens(source, line_offset=0):
source = source + '\n' # end with \n, because the parser needs it
readline = StringIO(source).readline
return generate_tokens(readline, line_offset)
def generate_tokens(readline, line_offset=0): def generate_tokens(readline, line_offset=0):
"""The original stdlib Python version with minor modifications"""
lnum = line_offset lnum = line_offset
parenlev = 0 parenlev = 0
continued = False continued = False
@@ -291,9 +299,9 @@ FLOWS = ['if', 'else', 'elif', 'while', 'with', 'try', 'except', 'finally']
class NoErrorTokenizer(object): class NoErrorTokenizer(object):
def __init__(self, readline, offset=(0, 0), is_fast_parser=False): def __init__(self, source, line_offset=0, is_fast_parser=False):
self.readline = readline self.source = source
self.gen = generate_tokens(readline, offset[0]) self.gen = source_tokens(source, line_offset)
self.closed = False self.closed = False
self.is_first = True self.is_first = True
self.push_backs = [] self.push_backs = []

View File

@@ -1,6 +1,5 @@
import re import re
import os import os
import sys
from jedi import cache from jedi import cache
from jedi.parser import tokenize from jedi.parser import tokenize
@@ -64,45 +63,42 @@ class UserContext(object):
level = 0 level = 0
force_point = False force_point = False
last_type = None last_type = None
try: for token_type, tok, start, end in gen:
for token_type, tok, start, end in gen: # print 'tok', token_type, tok, force_point
# print 'tok', token_type, tok, force_point if last_type == token_type == tokenize.NAME:
if last_type == token_type == tokenize.NAME: string += ' '
string += ' '
if level > 0: if level > 0:
if tok in close_brackets: if tok in close_brackets:
level += 1
if tok in open_brackets:
level -= 1
elif tok == '.':
force_point = False
elif force_point:
# it is reversed, therefore a number is getting recognized
# as a floating point number
if token_type == tokenize.NUMBER and tok[0] == '.':
force_point = False
else:
break
elif tok in close_brackets:
level += 1 level += 1
elif token_type in [tokenize.NAME, tokenize.STRING]: if tok in open_brackets:
force_point = True level -= 1
elif token_type == tokenize.NUMBER: elif tok == '.':
pass force_point = False
elif force_point:
# it is reversed, therefore a number is getting recognized
# as a floating point number
if token_type == tokenize.NUMBER and tok[0] == '.':
force_point = False
else: else:
self._column_temp = self._line_length - end[1]
break break
elif tok in close_brackets:
x = start_pos[0] - end[0] + 1 level += 1
l = self.get_line(x) elif token_type in [tokenize.NAME, tokenize.STRING]:
l = self._first_line if x == start_pos[0] else l force_point = True
start_cursor = x, len(l) - end[1] elif token_type == tokenize.NUMBER:
pass
else:
self._column_temp = self._line_length - end[1] self._column_temp = self._line_length - end[1]
string += tok break
last_type = token_type
except tokenize.TokenError: x = start_pos[0] - end[0] + 1
debug.warning("Tokenize couldn't finish: %s", sys.exc_info) l = self.get_line(x)
l = self._first_line if x == start_pos[0] else l
start_cursor = x, len(l) - end[1]
self._column_temp = self._line_length - end[1]
string += tok
last_type = token_type
# string can still contain spaces at the end # string can still contain spaces at the end
return string[::-1].strip(), start_cursor return string[::-1].strip(), start_cursor
@@ -215,9 +211,9 @@ class UserContextParser(object):
# or `abs( ` where the cursor is out in the whitespace. # or `abs( ` where the cursor is out in the whitespace.
if self._user_context.get_path_under_cursor(): if self._user_context.get_path_under_cursor():
# We really should have a user_stmt, but the parser couldn't # We really should have a user_stmt, but the parser couldn't
# process it - probably a Syntax Error. # process it - probably a Syntax Error (or in a comment).
debug.warning('Something is probably wrong with the syntax under the cursor.') debug.warning('No statement under the cursor.')
return None return
pos = next(self._user_context.get_context(yield_positions=True)) pos = next(self._user_context.get_context(yield_positions=True))
user_stmt = self.module().get_statement_for_position(pos, include_imports=True) user_stmt = self.module().get_statement_for_position(pos, include_imports=True)
return user_stmt return user_stmt

View File

@@ -12,7 +12,7 @@ class TokenTest(unittest.TestCase):
def testit(): def testit():
a = "huhu" a = "huhu"
''') ''')
tok = parsed.top_module.subscopes[0].statements[0].token_list[2] tok = parsed.module.subscopes[0].statements[0].token_list[2]
self.assertEqual(tok.end_pos, (3, 14)) self.assertEqual(tok.end_pos, (3, 14))
def test_end_pos_multi_line(self): def test_end_pos_multi_line(self):
@@ -21,5 +21,5 @@ def testit():
a = """huhu a = """huhu
asdfasdf""" + "h" asdfasdf""" + "h"
''') ''')
tok = parsed.top_module.subscopes[0].statements[0].token_list[2] tok = parsed.module.subscopes[0].statements[0].token_list[2]
self.assertEqual(tok.end_pos, (4, 11)) self.assertEqual(tok.end_pos, (4, 11))