Finally able to remove the user_context. This is awesome!

Now we only use the syntax tree to understand where the user is doing something.
This commit is contained in:
Dave Halter
2016-06-23 16:26:28 +02:00
parent 672bf9908c
commit 73e71b3c1a
6 changed files with 21 additions and 321 deletions

View File

@@ -16,7 +16,7 @@ import sys
from jedi._compatibility import unicode from jedi._compatibility import unicode
from jedi.parser import load_grammar from jedi.parser import load_grammar
from jedi.parser import tree from jedi.parser import tree
from jedi.parser.user_context import UserContext, UserContextParser from jedi.parser.user_context import UserContextParser
from jedi import debug from jedi import debug
from jedi import settings from jedi import settings
from jedi import common from jedi import common
@@ -120,9 +120,8 @@ class Script(object):
cache.clear_time_caches() cache.clear_time_caches()
debug.reset_time() debug.reset_time()
self._grammar = load_grammar(version='%s.%s' % sys.version_info[:2]) self._grammar = load_grammar(version='%s.%s' % sys.version_info[:2])
self._user_context = UserContext(self._source, self._pos)
self._parser = UserContextParser(self._grammar, self._source, path, self._parser = UserContextParser(self._grammar, self._source, path,
self._pos, self._user_context, self._pos,
self._parsed_callback) self._parsed_callback)
if sys_path is None: if sys_path is None:
venv = os.getenv('VIRTUAL_ENV') venv = os.getenv('VIRTUAL_ENV')
@@ -373,7 +372,7 @@ class Interpreter(Script):
# changing). # changing).
self._parser = UserContextParser(self._grammar, self._source, self._parser = UserContextParser(self._grammar, self._source,
self._orig_path, self._pos, self._orig_path, self._pos,
self._user_context, self._parsed_callback, self._parsed_callback,
use_fast_parser=False) use_fast_parser=False)
#interpreter.add_namespaces_to_parser(self._evaluator, namespaces, #interpreter.add_namespaces_to_parser(self._evaluator, namespaces,
#self._get_module()) #self._get_module())

View File

@@ -60,11 +60,8 @@ class Completion:
self._module = evaluator.wrap(parser.module()) self._module = evaluator.wrap(parser.module())
self._code_lines = code_lines self._code_lines = code_lines
line = self._code_lines[position[0] - 1]
# The first step of completions is to get the name # The first step of completions is to get the name
self._like_name = re.search( self._like_name = helpers.get_on_completion_name(code_lines, position)
r'(?!\d)\w+$|$', line[:position[1]]
).group(0)
# The actual cursor position is not what we need to calculate # The actual cursor position is not what we need to calculate
# everything. We want the start of the name we're on. # everything. We want the start of the name we're on.
self._position = position[0], position[1] - len(self._like_name) self._position = position[0], position[1] - len(self._like_name)

View File

@@ -27,6 +27,14 @@ def sorted_definitions(defs):
return sorted(defs, key=lambda x: (x.module_path or '', x.line or 0, x.column or 0)) return sorted(defs, key=lambda x: (x.module_path or '', x.line or 0, x.column or 0))
def get_on_completion_name(lines, position):
line = lines[position[0] - 1]
# The first step of completions is to get the name
return re.search(
r'(?!\d)\w+$|$', line[:position[1]]
).group(0)
def _get_code(code_lines, start_pos, end_pos): def _get_code(code_lines, start_pos, end_pos):
""" """
:param code_start_pos: is where the code starts. :param code_start_pos: is where the code starts.

View File

@@ -1,17 +1,12 @@
import re import re
import os import os
import keyword
from collections import namedtuple from collections import namedtuple
from jedi import cache from jedi import cache
from jedi import common from jedi.parser import ParserWithRecovery
from jedi.parser import tokenize, ParserWithRecovery
from jedi._compatibility import u
from jedi.parser import token
from jedi.parser.fast import FastParser from jedi.parser.fast import FastParser
from jedi.parser import tree from jedi.parser import tree
from jedi import debug from jedi import debug
from jedi.common import PushBackIterator
# TODO this should be part of the tokenizer not just of this user_context. # TODO this should be part of the tokenizer not just of this user_context.
Token = namedtuple('Token', ['type', 'string', 'start_pos', 'prefix']) Token = namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])
@@ -22,293 +17,13 @@ REPLACE_STR = r"[bBuU]?[rR]?" + (r"(?:(')[^\n'\\]*(?:\\.[^\n'\\]*)*(?:'|$)" +
REPLACE_STR = re.compile(REPLACE_STR) REPLACE_STR = re.compile(REPLACE_STR)
class UserContext(object):
"""
:param source: The source code of the file.
:param position: The position, the user is currently in. Only important \
for the main file.
"""
def __init__(self, source, position):
self.source = source
self.position = position
self._line_cache = None
self._relevant_temp = None
@cache.underscore_memoization
def get_path_until_cursor(self):
""" Get the path under the cursor. """
path, self._start_cursor_pos = self._calc_path_until_cursor(self.position)
return path
def _backwards_line_generator(self, start_pos):
self._line_temp, self._column_temp = start_pos
first_line = self.get_line(start_pos[0])[:self._column_temp]
self._line_length = self._column_temp
yield first_line[::-1] + '\n'
while True:
self._line_temp -= 1
line = self.get_line(self._line_temp)
self._line_length = len(line)
yield line[::-1] + '\n'
def _get_backwards_tokenizer(self, start_pos, line_gen=None):
if line_gen is None:
line_gen = self._backwards_line_generator(start_pos)
token_gen = tokenize.generate_tokens(lambda: next(line_gen))
for typ, tok_str, tok_start_pos, prefix in token_gen:
line = self.get_line(self._line_temp)
# Calculate the real start_pos of the token.
if tok_start_pos[0] == 1:
# We are in the first checked line
column = start_pos[1] - tok_start_pos[1]
else:
column = len(line) - tok_start_pos[1]
# Multi-line docstrings must be accounted for.
first_line = common.splitlines(tok_str)[0]
column -= len(first_line)
# Reverse the token again, so that it is in normal order again.
yield Token(typ, tok_str[::-1], (self._line_temp, column), prefix[::-1])
def _calc_path_until_cursor(self, start_pos):
"""
Something like a reverse tokenizer that tokenizes the reversed strings.
"""
open_brackets = ['(', '[', '{']
close_brackets = [')', ']', '}']
start_cursor = start_pos
gen = PushBackIterator(self._get_backwards_tokenizer(start_pos))
string = u('')
level = 0
force_point = False
last_type = None
is_first = True
for tok_type, tok_str, tok_start_pos, prefix in gen:
if is_first:
if prefix: # whitespace is not a path
return u(''), start_cursor
is_first = False
if last_type == tok_type == tokenize.NAME:
string = ' ' + string
if level:
if tok_str in close_brackets:
level += 1
elif tok_str in open_brackets:
level -= 1
elif tok_str == '.':
force_point = False
elif force_point:
# Reversed tokenizing, therefore a number is recognized as a
# floating point number.
# The same is true for string prefixes -> represented as a
# combination of string and name.
if tok_type == tokenize.NUMBER and tok_str[-1] == '.' \
or tok_type == tokenize.NAME and last_type == tokenize.STRING \
and tok_str.lower() in ('b', 'u', 'r', 'br', 'ur'):
force_point = False
else:
break
elif tok_str in close_brackets:
level += 1
elif tok_type in [tokenize.NAME, tokenize.STRING]:
if keyword.iskeyword(tok_str) and string:
# If there's already something in the string, a keyword
# never adds any meaning to the current statement.
break
force_point = True
elif tok_type == tokenize.NUMBER:
pass
else:
if tok_str == '-':
next_tok = next(gen)
if next_tok[1] == 'e':
gen.push_back(next_tok)
else:
break
else:
break
start_cursor = tok_start_pos
string = tok_str + prefix + string
last_type = tok_type
# Don't need whitespace around a statement.
return string.strip(), start_cursor
def get_path_under_cursor(self):
"""
Return the path under the cursor. If there is a rest of the path left,
it will be added to the stuff before it.
"""
return self.get_path_until_cursor() + self.get_path_after_cursor()
def get_path_after_cursor(self):
line = self.get_line(self.position[0])
return re.search("[\w\d]*", line[self.position[1]:]).group(0)
def get_operator_under_cursor(self):
line = self.get_line(self.position[0])
after = re.match("[^\w\s]+", line[self.position[1]:])
before = re.match("[^\w\s]+", line[:self.position[1]][::-1])
return (before.group(0) if before is not None else '') \
+ (after.group(0) if after is not None else '')
def call_signature(self):
"""
:return: Tuple of string of the call and the index of the cursor.
"""
def get_line(pos):
def simplify_str(match):
"""
To avoid having strings without end marks (error tokens) and
strings that just screw up all the call signatures, just
simplify everything.
"""
mark = match.group(1) or match.group(2)
return mark + ' ' * (len(match.group(0)) - 2) + mark
line_gen = self._backwards_line_generator(pos)
for line in line_gen:
# We have to switch the already backwards lines twice, because
# we scan them from start.
line = line[::-1]
modified = re.sub(REPLACE_STR, simplify_str, line)
yield modified[::-1]
index = 0
level = 0
next_must_be_name = False
next_is_key = False
key_name = None
generator = self._get_backwards_tokenizer(self.position, get_line(self.position))
for tok_type, tok_str, start_pos, prefix in generator:
if tok_str in tokenize.ALWAYS_BREAK_TOKENS:
break
elif next_must_be_name:
if tok_type == tokenize.NUMBER:
# If there's a number at the end of the string, it will be
# tokenized as a number. So add it to the name.
tok_type, t, _, _ = next(generator)
if tok_type == tokenize.NAME:
end_pos = start_pos[0], start_pos[1] + len(tok_str)
call, start_pos = self._calc_path_until_cursor(start_pos=end_pos)
return call, index, key_name, start_pos
index = 0
next_must_be_name = False
elif next_is_key:
if tok_type == tokenize.NAME:
key_name = tok_str
next_is_key = False
if tok_str == '(':
level += 1
if level == 1:
next_must_be_name = True
level = 0
elif tok_str == ')':
level -= 1
elif tok_str == ',':
index += 1
elif tok_str == '=':
next_is_key = True
return None, 0, None, (0, 0)
def get_reverse_context(self, yield_positions=False):
"""
Returns the token strings in reverse order from the start position.
"""
self.get_path_until_cursor() # In case _start_cursor_pos is undefined.
pos = self._start_cursor_pos
while True:
# Remove non important white space.
line = self.get_line(pos[0])
while True:
if pos[1] == 0:
line = self.get_line(pos[0] - 1)
if line and line[-1] == '\\':
pos = pos[0] - 1, len(line) - 1
continue
else:
break
if line[pos[1] - 1].isspace():
pos = pos[0], pos[1] - 1
else:
break
try:
result, pos = self._calc_path_until_cursor(start_pos=pos)
if yield_positions:
yield pos
else:
yield result
except StopIteration:
if yield_positions:
yield None
else:
yield ''
def get_backwards_context_tokens(self):
self.get_path_until_cursor() # In case _start_cursor_pos is undefined.
pos = self._start_cursor_pos
while True:
# Remove non important white space.
line = self.get_line(pos[0])
while True:
if pos[1] == 0:
line = self.get_line(pos[0] - 1)
if line and line[-1] == '\\':
pos = pos[0] - 1, len(line) - 1
continue
else:
break
if line[pos[1] - 1].isspace():
pos = pos[0], pos[1] - 1
else:
break
try:
token_ = next(self._get_backwards_tokenizer(pos))
pos = token_.start_pos
yield token_
except StopIteration:
# Make it clear that there's nothing coming anymore.
#yield Token('', token.ENDMARKER, (1, 0), '')
break
def get_line(self, line_nr):
if not self._line_cache:
self._line_cache = common.splitlines(self.source)
if line_nr == 0:
# This is a fix for the zeroth line. We need a newline there, for
# the backwards parser.
return u('')
if line_nr < 0:
raise StopIteration()
try:
return self._line_cache[line_nr - 1]
except IndexError:
raise StopIteration()
def get_position_line(self):
return self.get_line(self.position[0])[:self.position[1]]
class UserContextParser(object): class UserContextParser(object):
def __init__(self, grammar, source, path, position, user_context, def __init__(self, grammar, source, path, position,
parser_done_callback, use_fast_parser=True): parser_done_callback, use_fast_parser=True):
self._grammar = grammar self._grammar = grammar
self._source = source self._source = source
self._path = path and os.path.abspath(path) self._path = path and os.path.abspath(path)
self._position = position self._position = position
self._user_context = user_context
self._use_fast_parser = use_fast_parser self._use_fast_parser = use_fast_parser
self._parser_done_callback = parser_done_callback self._parser_done_callback = parser_done_callback
@@ -330,26 +45,6 @@ class UserContextParser(object):
debug.speed('parsed') debug.speed('parsed')
return module.get_statement_for_position(self._position) return module.get_statement_for_position(self._position)
@cache.underscore_memoization
def user_stmt_with_whitespace(self):
"""
Returns the statement under the cursor even if the statement lies
before the cursor.
"""
user_stmt = self.user_stmt()
if not user_stmt:
# for statements like `from x import ` (cursor not in statement)
# or `abs( ` where the cursor is out in the whitespace.
if self._user_context.get_path_under_cursor():
# We really should have a user_stmt, but the parser couldn't
# process it - probably a Syntax Error (or in a comment).
debug.warning('No statement under the cursor.')
return
pos = next(self._user_context.get_reverse_context(yield_positions=True))
user_stmt = self.module().get_statement_for_position(pos)
return user_stmt
@cache.underscore_memoization @cache.underscore_memoization
def user_scope(self): def user_scope(self):
""" """

View File

@@ -10,8 +10,8 @@ import os
import sys import sys
from jedi import Interpreter from jedi import Interpreter
from jedi.api.helpers import get_completion_parts from jedi.api.helpers import get_on_completion_name
from jedi.parser.user_context import UserContext from jedi import common
def setup_readline(namespace_module=__main__): def setup_readline(namespace_module=__main__):
@@ -72,9 +72,10 @@ def setup_readline(namespace_module=__main__):
try: try:
interpreter = Interpreter(text, [namespace_module.__dict__]) interpreter = Interpreter(text, [namespace_module.__dict__])
path = UserContext(text, (1, len(text))).get_path_until_cursor() lines = common.splitlines(text)
path, dot, like = get_completion_parts(path) position = (len(lines), len(lines[-1]))
before = text[:len(text) - len(like)] name = get_on_completion_name(lines, position)
before = text[:len(text) - len(name)]
completions = interpreter.completions() completions = interpreter.completions()
finally: finally:
sys.path.pop(0) sys.path.pop(0)

View File

@@ -15,7 +15,7 @@ def test_user_statement_on_import():
" time)") " time)")
for pos in [(2, 1), (2, 4)]: for pos in [(2, 1), (2, 4)]:
p = UserContextParser(load_grammar(), s, None, pos, None, lambda x: 1).user_stmt() p = UserContextParser(load_grammar(), s, None, pos, lambda x: 1).user_stmt()
assert isinstance(p, pt.Import) assert isinstance(p, pt.Import)
assert [str(n) for n in p.get_defined_names()] == ['time'] assert [str(n) for n in p.get_defined_names()] == ['time']