forked from VimPlug/jedi
temporary changes for the tokenizer issues
This commit is contained in:
@@ -15,6 +15,7 @@ from itertools import chain
|
|||||||
|
|
||||||
from jedi._compatibility import next, unicode, builtins
|
from jedi._compatibility import next, unicode, builtins
|
||||||
from jedi.parser import Parser
|
from jedi.parser import Parser
|
||||||
|
from jedi.parser.tokenize import source_tokens, NoErrorTokenizer
|
||||||
from jedi.parser import representation as pr
|
from jedi.parser import representation as pr
|
||||||
from jedi.parser.user_context import UserContext, UserContextParser
|
from jedi.parser.user_context import UserContext, UserContextParser
|
||||||
from jedi import debug
|
from jedi import debug
|
||||||
@@ -227,8 +228,9 @@ class Script(object):
|
|||||||
return scopes
|
return scopes
|
||||||
|
|
||||||
def _get_under_cursor_stmt(self, cursor_txt):
|
def _get_under_cursor_stmt(self, cursor_txt):
|
||||||
offset = self._pos[0] - 1, self._pos[1]
|
tokenizer = source_tokens(cursor_txt, self._pos[0] - 1)
|
||||||
r = Parser(cursor_txt, no_docstr=True, offset=offset)
|
tokenizer = NoErrorTokenizer(cursor_txt, self._pos[0] - 1)
|
||||||
|
r = Parser(cursor_txt, no_docstr=True, tokenizer=tokenizer)
|
||||||
try:
|
try:
|
||||||
stmt = r.module.statements[0]
|
stmt = r.module.statements[0]
|
||||||
except IndexError:
|
except IndexError:
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ complexity of the ``Parser`` (there's another parser sitting inside
|
|||||||
"""
|
"""
|
||||||
import keyword
|
import keyword
|
||||||
|
|
||||||
from jedi._compatibility import next, StringIO
|
from jedi._compatibility import next
|
||||||
from jedi import debug
|
from jedi import debug
|
||||||
from jedi import common
|
from jedi import common
|
||||||
from jedi.parser import representation as pr
|
from jedi.parser import representation as pr
|
||||||
@@ -35,23 +35,21 @@ class Parser(object):
|
|||||||
:param module_path: The path of the module in the file system, may be None.
|
:param module_path: The path of the module in the file system, may be None.
|
||||||
:type module_path: str
|
:type module_path: str
|
||||||
:param no_docstr: If True, a string at the beginning is not a docstr.
|
:param no_docstr: If True, a string at the beginning is not a docstr.
|
||||||
:param is_fast_parser: -> for fast_parser
|
|
||||||
:param top_module: Use this module as a parent instead of `self.module`.
|
:param top_module: Use this module as a parent instead of `self.module`.
|
||||||
"""
|
"""
|
||||||
def __init__(self, source, module_path=None, no_docstr=False,
|
def __init__(self, source, module_path=None, no_docstr=False,
|
||||||
offset=(0, 0), is_fast_parser=None, top_module=None):
|
tokenizer=None, top_module=None, offset=0, is_fast=False):
|
||||||
self.no_docstr = no_docstr
|
self.no_docstr = no_docstr
|
||||||
|
|
||||||
self.start_pos = self.end_pos = 1 + offset[0], offset[1]
|
self.start_pos = self.end_pos = 1 + offset, 0
|
||||||
# initialize global Scope
|
# initialize global Scope
|
||||||
self.module = pr.SubModule(module_path, self.start_pos, top_module)
|
self.module = pr.SubModule(module_path, self.start_pos, top_module)
|
||||||
self._scope = self.module
|
self._scope = self.module
|
||||||
self._current = (None, None)
|
self._current = (None, None)
|
||||||
|
|
||||||
source = source + '\n' # end with \n, because the parser needs it
|
self._gen = tokenizer or tokenize.NoErrorTokenizer(source)
|
||||||
buf = StringIO(source)
|
self._gen = tokenize.NoErrorTokenizer(source, offset, is_fast)
|
||||||
self._gen = tokenize.NoErrorTokenizer(buf.readline, offset, is_fast_parser)
|
self._top_module = top_module or self.module
|
||||||
self.top_module = top_module or self.module
|
|
||||||
try:
|
try:
|
||||||
self._parse()
|
self._parse()
|
||||||
except (common.MultiLevelStopIteration, StopIteration):
|
except (common.MultiLevelStopIteration, StopIteration):
|
||||||
@@ -386,7 +384,7 @@ class Parser(object):
|
|||||||
as_names=as_names,
|
as_names=as_names,
|
||||||
names_are_set_vars=names_are_set_vars)
|
names_are_set_vars=names_are_set_vars)
|
||||||
|
|
||||||
stmt.parent = self.top_module
|
stmt.parent = self._top_module
|
||||||
self._check_user_stmt(stmt)
|
self._check_user_stmt(stmt)
|
||||||
|
|
||||||
if tok in always_break + not_first_break:
|
if tok in always_break + not_first_break:
|
||||||
@@ -455,9 +453,10 @@ class Parser(object):
|
|||||||
and not isinstance(self._scope, pr.SubModule):
|
and not isinstance(self._scope, pr.SubModule):
|
||||||
self._scope = self.module
|
self._scope = self.module
|
||||||
|
|
||||||
use_as_parent_scope = self.top_module if isinstance(
|
if isinstance(self._scope, pr.SubModule):
|
||||||
self._scope, pr.SubModule
|
use_as_parent_scope = self._top_module
|
||||||
) else self._scope
|
else:
|
||||||
|
use_as_parent_scope = self._scope
|
||||||
first_pos = self.start_pos
|
first_pos = self.start_pos
|
||||||
if tok == 'def':
|
if tok == 'def':
|
||||||
func = self._parse_function()
|
func = self._parse_function()
|
||||||
@@ -630,7 +629,7 @@ class Parser(object):
|
|||||||
else:
|
else:
|
||||||
if token_type not in [tokenize.COMMENT, tokenize.INDENT,
|
if token_type not in [tokenize.COMMENT, tokenize.INDENT,
|
||||||
tokenize.NEWLINE, tokenize.NL]:
|
tokenize.NEWLINE, tokenize.NL]:
|
||||||
debug.warning('token not classified %s %s %s', tok,
|
debug.warning('Token not used: %s %s %s', tok,
|
||||||
token_type, self.start_pos[0])
|
tokenize.tok_name[token_type], self.start_pos)
|
||||||
continue
|
continue
|
||||||
self.no_docstr = False
|
self.no_docstr = False
|
||||||
|
|||||||
@@ -362,9 +362,10 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
if nodes[index].code != code:
|
if nodes[index].code != code:
|
||||||
raise ValueError()
|
raise ValueError()
|
||||||
except ValueError:
|
except ValueError:
|
||||||
p = Parser(parser_code, self.module_path, offset=(line_offset, 0),
|
tokenizer = tokenize.NoErrorTokenizer(parser_code, line_offset, True)
|
||||||
is_fast_parser=True, top_module=self.module,
|
p = Parser(parser_code, self.module_path, tokenizer=tokenizer,
|
||||||
no_docstr=no_docstr)
|
top_module=self.module, no_docstr=no_docstr,
|
||||||
|
is_fast=True, offset=line_offset)
|
||||||
p.module.parent = self.module
|
p.module.parent = self.module
|
||||||
else:
|
else:
|
||||||
if nodes[index] != self.current_node:
|
if nodes[index] != self.current_node:
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ from __future__ import absolute_import
|
|||||||
|
|
||||||
import string
|
import string
|
||||||
import re
|
import re
|
||||||
|
from jedi._compatibility import StringIO
|
||||||
from token import *
|
from token import *
|
||||||
import collections
|
import collections
|
||||||
cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
|
cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
|
||||||
@@ -142,7 +143,14 @@ del _compile
|
|||||||
tabsize = 8
|
tabsize = 8
|
||||||
|
|
||||||
|
|
||||||
|
def source_tokens(source, line_offset=0):
|
||||||
|
source = source + '\n' # end with \n, because the parser needs it
|
||||||
|
readline = StringIO(source).readline
|
||||||
|
return generate_tokens(readline, line_offset)
|
||||||
|
|
||||||
|
|
||||||
def generate_tokens(readline, line_offset=0):
|
def generate_tokens(readline, line_offset=0):
|
||||||
|
"""The original stdlib Python version with minor modifications"""
|
||||||
lnum = line_offset
|
lnum = line_offset
|
||||||
parenlev = 0
|
parenlev = 0
|
||||||
continued = False
|
continued = False
|
||||||
@@ -291,9 +299,9 @@ FLOWS = ['if', 'else', 'elif', 'while', 'with', 'try', 'except', 'finally']
|
|||||||
|
|
||||||
|
|
||||||
class NoErrorTokenizer(object):
|
class NoErrorTokenizer(object):
|
||||||
def __init__(self, readline, offset=(0, 0), is_fast_parser=False):
|
def __init__(self, source, line_offset=0, is_fast_parser=False):
|
||||||
self.readline = readline
|
self.source = source
|
||||||
self.gen = generate_tokens(readline, offset[0])
|
self.gen = source_tokens(source, line_offset)
|
||||||
self.closed = False
|
self.closed = False
|
||||||
self.is_first = True
|
self.is_first = True
|
||||||
self.push_backs = []
|
self.push_backs = []
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
import sys
|
|
||||||
|
|
||||||
from jedi import cache
|
from jedi import cache
|
||||||
from jedi.parser import tokenize
|
from jedi.parser import tokenize
|
||||||
@@ -64,45 +63,42 @@ class UserContext(object):
|
|||||||
level = 0
|
level = 0
|
||||||
force_point = False
|
force_point = False
|
||||||
last_type = None
|
last_type = None
|
||||||
try:
|
for token_type, tok, start, end in gen:
|
||||||
for token_type, tok, start, end in gen:
|
# print 'tok', token_type, tok, force_point
|
||||||
# print 'tok', token_type, tok, force_point
|
if last_type == token_type == tokenize.NAME:
|
||||||
if last_type == token_type == tokenize.NAME:
|
string += ' '
|
||||||
string += ' '
|
|
||||||
|
|
||||||
if level > 0:
|
if level > 0:
|
||||||
if tok in close_brackets:
|
if tok in close_brackets:
|
||||||
level += 1
|
|
||||||
if tok in open_brackets:
|
|
||||||
level -= 1
|
|
||||||
elif tok == '.':
|
|
||||||
force_point = False
|
|
||||||
elif force_point:
|
|
||||||
# it is reversed, therefore a number is getting recognized
|
|
||||||
# as a floating point number
|
|
||||||
if token_type == tokenize.NUMBER and tok[0] == '.':
|
|
||||||
force_point = False
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
elif tok in close_brackets:
|
|
||||||
level += 1
|
level += 1
|
||||||
elif token_type in [tokenize.NAME, tokenize.STRING]:
|
if tok in open_brackets:
|
||||||
force_point = True
|
level -= 1
|
||||||
elif token_type == tokenize.NUMBER:
|
elif tok == '.':
|
||||||
pass
|
force_point = False
|
||||||
|
elif force_point:
|
||||||
|
# it is reversed, therefore a number is getting recognized
|
||||||
|
# as a floating point number
|
||||||
|
if token_type == tokenize.NUMBER and tok[0] == '.':
|
||||||
|
force_point = False
|
||||||
else:
|
else:
|
||||||
self._column_temp = self._line_length - end[1]
|
|
||||||
break
|
break
|
||||||
|
elif tok in close_brackets:
|
||||||
x = start_pos[0] - end[0] + 1
|
level += 1
|
||||||
l = self.get_line(x)
|
elif token_type in [tokenize.NAME, tokenize.STRING]:
|
||||||
l = self._first_line if x == start_pos[0] else l
|
force_point = True
|
||||||
start_cursor = x, len(l) - end[1]
|
elif token_type == tokenize.NUMBER:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
self._column_temp = self._line_length - end[1]
|
self._column_temp = self._line_length - end[1]
|
||||||
string += tok
|
break
|
||||||
last_type = token_type
|
|
||||||
except tokenize.TokenError:
|
x = start_pos[0] - end[0] + 1
|
||||||
debug.warning("Tokenize couldn't finish: %s", sys.exc_info)
|
l = self.get_line(x)
|
||||||
|
l = self._first_line if x == start_pos[0] else l
|
||||||
|
start_cursor = x, len(l) - end[1]
|
||||||
|
self._column_temp = self._line_length - end[1]
|
||||||
|
string += tok
|
||||||
|
last_type = token_type
|
||||||
|
|
||||||
# string can still contain spaces at the end
|
# string can still contain spaces at the end
|
||||||
return string[::-1].strip(), start_cursor
|
return string[::-1].strip(), start_cursor
|
||||||
@@ -215,9 +211,9 @@ class UserContextParser(object):
|
|||||||
# or `abs( ` where the cursor is out in the whitespace.
|
# or `abs( ` where the cursor is out in the whitespace.
|
||||||
if self._user_context.get_path_under_cursor():
|
if self._user_context.get_path_under_cursor():
|
||||||
# We really should have a user_stmt, but the parser couldn't
|
# We really should have a user_stmt, but the parser couldn't
|
||||||
# process it - probably a Syntax Error.
|
# process it - probably a Syntax Error (or in a comment).
|
||||||
debug.warning('Something is probably wrong with the syntax under the cursor.')
|
debug.warning('No statement under the cursor.')
|
||||||
return None
|
return
|
||||||
pos = next(self._user_context.get_context(yield_positions=True))
|
pos = next(self._user_context.get_context(yield_positions=True))
|
||||||
user_stmt = self.module().get_statement_for_position(pos, include_imports=True)
|
user_stmt = self.module().get_statement_for_position(pos, include_imports=True)
|
||||||
return user_stmt
|
return user_stmt
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ class TokenTest(unittest.TestCase):
|
|||||||
def testit():
|
def testit():
|
||||||
a = "huhu"
|
a = "huhu"
|
||||||
''')
|
''')
|
||||||
tok = parsed.top_module.subscopes[0].statements[0].token_list[2]
|
tok = parsed.module.subscopes[0].statements[0].token_list[2]
|
||||||
self.assertEqual(tok.end_pos, (3, 14))
|
self.assertEqual(tok.end_pos, (3, 14))
|
||||||
|
|
||||||
def test_end_pos_multi_line(self):
|
def test_end_pos_multi_line(self):
|
||||||
@@ -21,5 +21,5 @@ def testit():
|
|||||||
a = """huhu
|
a = """huhu
|
||||||
asdfasdf""" + "h"
|
asdfasdf""" + "h"
|
||||||
''')
|
''')
|
||||||
tok = parsed.top_module.subscopes[0].statements[0].token_list[2]
|
tok = parsed.module.subscopes[0].statements[0].token_list[2]
|
||||||
self.assertEqual(tok.end_pos, (4, 11))
|
self.assertEqual(tok.end_pos, (4, 11))
|
||||||
|
|||||||
Reference in New Issue
Block a user