diff --git a/jedi/api/__init__.py b/jedi/api/__init__.py index 618cce0e..c4d9ee44 100644 --- a/jedi/api/__init__.py +++ b/jedi/api/__init__.py @@ -16,7 +16,7 @@ import sys from jedi._compatibility import unicode from jedi.parser import load_grammar from jedi.parser import tree -from jedi.parser.user_context import UserContext, UserContextParser +from jedi.parser.user_context import UserContextParser from jedi import debug from jedi import settings from jedi import common @@ -120,9 +120,8 @@ class Script(object): cache.clear_time_caches() debug.reset_time() self._grammar = load_grammar(version='%s.%s' % sys.version_info[:2]) - self._user_context = UserContext(self._source, self._pos) self._parser = UserContextParser(self._grammar, self._source, path, - self._pos, self._user_context, + self._pos, self._parsed_callback) if sys_path is None: venv = os.getenv('VIRTUAL_ENV') @@ -373,7 +372,7 @@ class Interpreter(Script): # changing). self._parser = UserContextParser(self._grammar, self._source, self._orig_path, self._pos, - self._user_context, self._parsed_callback, + self._parsed_callback, use_fast_parser=False) #interpreter.add_namespaces_to_parser(self._evaluator, namespaces, #self._get_module()) diff --git a/jedi/api/completion.py b/jedi/api/completion.py index 0ebcbfa3..d4beb8b2 100644 --- a/jedi/api/completion.py +++ b/jedi/api/completion.py @@ -60,11 +60,8 @@ class Completion: self._module = evaluator.wrap(parser.module()) self._code_lines = code_lines - line = self._code_lines[position[0] - 1] # The first step of completions is to get the name - self._like_name = re.search( - r'(?!\d)\w+$|$', line[:position[1]] - ).group(0) + self._like_name = helpers.get_on_completion_name(code_lines, position) # The actual cursor position is not what we need to calculate # everything. We want the start of the name we're on. self._position = position[0], position[1] - len(self._like_name) diff --git a/jedi/api/helpers.py b/jedi/api/helpers.py index 9111c873..0d4e86c1 100644 --- a/jedi/api/helpers.py +++ b/jedi/api/helpers.py @@ -27,6 +27,14 @@ def sorted_definitions(defs): return sorted(defs, key=lambda x: (x.module_path or '', x.line or 0, x.column or 0)) +def get_on_completion_name(lines, position): + line = lines[position[0] - 1] + # The first step of completions is to get the name + return re.search( + r'(?!\d)\w+$|$', line[:position[1]] + ).group(0) + + def _get_code(code_lines, start_pos, end_pos): """ :param code_start_pos: is where the code starts. diff --git a/jedi/parser/user_context.py b/jedi/parser/user_context.py index ae073b98..c7d7e787 100644 --- a/jedi/parser/user_context.py +++ b/jedi/parser/user_context.py @@ -1,17 +1,12 @@ import re import os -import keyword from collections import namedtuple from jedi import cache -from jedi import common -from jedi.parser import tokenize, ParserWithRecovery -from jedi._compatibility import u -from jedi.parser import token +from jedi.parser import ParserWithRecovery from jedi.parser.fast import FastParser from jedi.parser import tree from jedi import debug -from jedi.common import PushBackIterator # TODO this should be part of the tokenizer not just of this user_context. Token = namedtuple('Token', ['type', 'string', 'start_pos', 'prefix']) @@ -22,293 +17,13 @@ REPLACE_STR = r"[bBuU]?[rR]?" + (r"(?:(')[^\n'\\]*(?:\\.[^\n'\\]*)*(?:'|$)" + REPLACE_STR = re.compile(REPLACE_STR) -class UserContext(object): - """ - :param source: The source code of the file. - :param position: The position, the user is currently in. Only important \ - for the main file. - """ - def __init__(self, source, position): - self.source = source - self.position = position - self._line_cache = None - - self._relevant_temp = None - - @cache.underscore_memoization - def get_path_until_cursor(self): - """ Get the path under the cursor. """ - path, self._start_cursor_pos = self._calc_path_until_cursor(self.position) - return path - - def _backwards_line_generator(self, start_pos): - self._line_temp, self._column_temp = start_pos - first_line = self.get_line(start_pos[0])[:self._column_temp] - - self._line_length = self._column_temp - yield first_line[::-1] + '\n' - - while True: - self._line_temp -= 1 - line = self.get_line(self._line_temp) - self._line_length = len(line) - yield line[::-1] + '\n' - - def _get_backwards_tokenizer(self, start_pos, line_gen=None): - if line_gen is None: - line_gen = self._backwards_line_generator(start_pos) - token_gen = tokenize.generate_tokens(lambda: next(line_gen)) - for typ, tok_str, tok_start_pos, prefix in token_gen: - line = self.get_line(self._line_temp) - # Calculate the real start_pos of the token. - if tok_start_pos[0] == 1: - # We are in the first checked line - column = start_pos[1] - tok_start_pos[1] - else: - column = len(line) - tok_start_pos[1] - # Multi-line docstrings must be accounted for. - first_line = common.splitlines(tok_str)[0] - column -= len(first_line) - # Reverse the token again, so that it is in normal order again. - yield Token(typ, tok_str[::-1], (self._line_temp, column), prefix[::-1]) - - def _calc_path_until_cursor(self, start_pos): - """ - Something like a reverse tokenizer that tokenizes the reversed strings. - """ - open_brackets = ['(', '[', '{'] - close_brackets = [')', ']', '}'] - - start_cursor = start_pos - gen = PushBackIterator(self._get_backwards_tokenizer(start_pos)) - string = u('') - level = 0 - force_point = False - last_type = None - is_first = True - for tok_type, tok_str, tok_start_pos, prefix in gen: - if is_first: - if prefix: # whitespace is not a path - return u(''), start_cursor - is_first = False - - if last_type == tok_type == tokenize.NAME: - string = ' ' + string - - if level: - if tok_str in close_brackets: - level += 1 - elif tok_str in open_brackets: - level -= 1 - elif tok_str == '.': - force_point = False - elif force_point: - # Reversed tokenizing, therefore a number is recognized as a - # floating point number. - # The same is true for string prefixes -> represented as a - # combination of string and name. - if tok_type == tokenize.NUMBER and tok_str[-1] == '.' \ - or tok_type == tokenize.NAME and last_type == tokenize.STRING \ - and tok_str.lower() in ('b', 'u', 'r', 'br', 'ur'): - force_point = False - else: - break - elif tok_str in close_brackets: - level += 1 - elif tok_type in [tokenize.NAME, tokenize.STRING]: - if keyword.iskeyword(tok_str) and string: - # If there's already something in the string, a keyword - # never adds any meaning to the current statement. - break - force_point = True - elif tok_type == tokenize.NUMBER: - pass - else: - if tok_str == '-': - next_tok = next(gen) - if next_tok[1] == 'e': - gen.push_back(next_tok) - else: - break - else: - break - - start_cursor = tok_start_pos - string = tok_str + prefix + string - last_type = tok_type - - # Don't need whitespace around a statement. - return string.strip(), start_cursor - - def get_path_under_cursor(self): - """ - Return the path under the cursor. If there is a rest of the path left, - it will be added to the stuff before it. - """ - return self.get_path_until_cursor() + self.get_path_after_cursor() - - def get_path_after_cursor(self): - line = self.get_line(self.position[0]) - return re.search("[\w\d]*", line[self.position[1]:]).group(0) - - def get_operator_under_cursor(self): - line = self.get_line(self.position[0]) - after = re.match("[^\w\s]+", line[self.position[1]:]) - before = re.match("[^\w\s]+", line[:self.position[1]][::-1]) - return (before.group(0) if before is not None else '') \ - + (after.group(0) if after is not None else '') - - def call_signature(self): - """ - :return: Tuple of string of the call and the index of the cursor. - """ - def get_line(pos): - def simplify_str(match): - """ - To avoid having strings without end marks (error tokens) and - strings that just screw up all the call signatures, just - simplify everything. - """ - mark = match.group(1) or match.group(2) - return mark + ' ' * (len(match.group(0)) - 2) + mark - - line_gen = self._backwards_line_generator(pos) - for line in line_gen: - # We have to switch the already backwards lines twice, because - # we scan them from start. - line = line[::-1] - modified = re.sub(REPLACE_STR, simplify_str, line) - yield modified[::-1] - - index = 0 - level = 0 - next_must_be_name = False - next_is_key = False - key_name = None - generator = self._get_backwards_tokenizer(self.position, get_line(self.position)) - for tok_type, tok_str, start_pos, prefix in generator: - if tok_str in tokenize.ALWAYS_BREAK_TOKENS: - break - elif next_must_be_name: - if tok_type == tokenize.NUMBER: - # If there's a number at the end of the string, it will be - # tokenized as a number. So add it to the name. - tok_type, t, _, _ = next(generator) - if tok_type == tokenize.NAME: - end_pos = start_pos[0], start_pos[1] + len(tok_str) - call, start_pos = self._calc_path_until_cursor(start_pos=end_pos) - return call, index, key_name, start_pos - index = 0 - next_must_be_name = False - elif next_is_key: - if tok_type == tokenize.NAME: - key_name = tok_str - next_is_key = False - - if tok_str == '(': - level += 1 - if level == 1: - next_must_be_name = True - level = 0 - elif tok_str == ')': - level -= 1 - elif tok_str == ',': - index += 1 - elif tok_str == '=': - next_is_key = True - return None, 0, None, (0, 0) - - def get_reverse_context(self, yield_positions=False): - """ - Returns the token strings in reverse order from the start position. - """ - self.get_path_until_cursor() # In case _start_cursor_pos is undefined. - pos = self._start_cursor_pos - while True: - # Remove non important white space. - line = self.get_line(pos[0]) - while True: - if pos[1] == 0: - line = self.get_line(pos[0] - 1) - if line and line[-1] == '\\': - pos = pos[0] - 1, len(line) - 1 - continue - else: - break - - if line[pos[1] - 1].isspace(): - pos = pos[0], pos[1] - 1 - else: - break - - try: - result, pos = self._calc_path_until_cursor(start_pos=pos) - if yield_positions: - yield pos - else: - yield result - except StopIteration: - if yield_positions: - yield None - else: - yield '' - - def get_backwards_context_tokens(self): - self.get_path_until_cursor() # In case _start_cursor_pos is undefined. - pos = self._start_cursor_pos - while True: - # Remove non important white space. - line = self.get_line(pos[0]) - while True: - if pos[1] == 0: - line = self.get_line(pos[0] - 1) - if line and line[-1] == '\\': - pos = pos[0] - 1, len(line) - 1 - continue - else: - break - - if line[pos[1] - 1].isspace(): - pos = pos[0], pos[1] - 1 - else: - break - - try: - token_ = next(self._get_backwards_tokenizer(pos)) - pos = token_.start_pos - yield token_ - except StopIteration: - # Make it clear that there's nothing coming anymore. - #yield Token('', token.ENDMARKER, (1, 0), '') - break - - def get_line(self, line_nr): - if not self._line_cache: - self._line_cache = common.splitlines(self.source) - - if line_nr == 0: - # This is a fix for the zeroth line. We need a newline there, for - # the backwards parser. - return u('') - if line_nr < 0: - raise StopIteration() - try: - return self._line_cache[line_nr - 1] - except IndexError: - raise StopIteration() - - def get_position_line(self): - return self.get_line(self.position[0])[:self.position[1]] - - class UserContextParser(object): - def __init__(self, grammar, source, path, position, user_context, + def __init__(self, grammar, source, path, position, parser_done_callback, use_fast_parser=True): self._grammar = grammar self._source = source self._path = path and os.path.abspath(path) self._position = position - self._user_context = user_context self._use_fast_parser = use_fast_parser self._parser_done_callback = parser_done_callback @@ -330,26 +45,6 @@ class UserContextParser(object): debug.speed('parsed') return module.get_statement_for_position(self._position) - @cache.underscore_memoization - def user_stmt_with_whitespace(self): - """ - Returns the statement under the cursor even if the statement lies - before the cursor. - """ - user_stmt = self.user_stmt() - - if not user_stmt: - # for statements like `from x import ` (cursor not in statement) - # or `abs( ` where the cursor is out in the whitespace. - if self._user_context.get_path_under_cursor(): - # We really should have a user_stmt, but the parser couldn't - # process it - probably a Syntax Error (or in a comment). - debug.warning('No statement under the cursor.') - return - pos = next(self._user_context.get_reverse_context(yield_positions=True)) - user_stmt = self.module().get_statement_for_position(pos) - return user_stmt - @cache.underscore_memoization def user_scope(self): """ diff --git a/jedi/utils.py b/jedi/utils.py index 1f0615d8..848d8571 100644 --- a/jedi/utils.py +++ b/jedi/utils.py @@ -10,8 +10,8 @@ import os import sys from jedi import Interpreter -from jedi.api.helpers import get_completion_parts -from jedi.parser.user_context import UserContext +from jedi.api.helpers import get_on_completion_name +from jedi import common def setup_readline(namespace_module=__main__): @@ -72,9 +72,10 @@ def setup_readline(namespace_module=__main__): try: interpreter = Interpreter(text, [namespace_module.__dict__]) - path = UserContext(text, (1, len(text))).get_path_until_cursor() - path, dot, like = get_completion_parts(path) - before = text[:len(text) - len(like)] + lines = common.splitlines(text) + position = (len(lines), len(lines[-1])) + name = get_on_completion_name(lines, position) + before = text[:len(text) - len(name)] completions = interpreter.completions() finally: sys.path.pop(0) diff --git a/test/test_parser/test_parser.py b/test/test_parser/test_parser.py index 9a6891ac..c76e494e 100644 --- a/test/test_parser/test_parser.py +++ b/test/test_parser/test_parser.py @@ -15,7 +15,7 @@ def test_user_statement_on_import(): " time)") for pos in [(2, 1), (2, 4)]: - p = UserContextParser(load_grammar(), s, None, pos, None, lambda x: 1).user_stmt() + p = UserContextParser(load_grammar(), s, None, pos, lambda x: 1).user_stmt() assert isinstance(p, pt.Import) assert [str(n) for n in p.get_defined_names()] == ['time']