Finally able to remove the user_context. This is awesome!

Now we only use the syntax tree to understand where the user is doing something.
2025-12-10 07:41:51 +08:00 · 2016-06-23 16:26:28 +02:00
parent 672bf9908c
commit 73e71b3c1a
6 changed files with 21 additions and 321 deletions
--- a/jedi/api/init.py
+++ b/jedi/api/init.py
@@ -16,7 +16,7 @@ import sys
 from jedi._compatibility import unicode
 from jedi.parser import load_grammar
 from jedi.parser import tree
-from jedi.parser.user_context import UserContext, UserContextParser
+from jedi.parser.user_context import UserContextParser
 from jedi import debug
 from jedi import settings
 from jedi import common
@@ -120,9 +120,8 @@ class Script(object):
        cache.clear_time_caches()
        debug.reset_time()
        self._grammar = load_grammar(version='%s.%s' % sys.version_info[:2])
-        self._user_context = UserContext(self._source, self._pos)
        self._parser = UserContextParser(self._grammar, self._source, path,
-                                         self._pos, self._user_context,
+                                         self._pos,
                                         self._parsed_callback)
        if sys_path is None:
            venv = os.getenv('VIRTUAL_ENV')
@@ -373,7 +372,7 @@ class Interpreter(Script):
        # changing).
        self._parser = UserContextParser(self._grammar, self._source,
                                         self._orig_path, self._pos,
-                                         self._user_context, self._parsed_callback,
+                                         self._parsed_callback,
                                         use_fast_parser=False)
        #interpreter.add_namespaces_to_parser(self._evaluator, namespaces,
                                             #self._get_module())
--- a/jedi/api/completion.py
+++ b/jedi/api/completion.py
@@ -60,11 +60,8 @@ class Completion:
        self._module = evaluator.wrap(parser.module())
        self._code_lines = code_lines

-        line = self._code_lines[position[0] - 1]
        # The first step of completions is to get the name
-        self._like_name = re.search(
-            r'(?!\d)\w+$|$', line[:position[1]]
-        ).group(0)
+        self._like_name = helpers.get_on_completion_name(code_lines, position)
        # The actual cursor position is not what we need to calculate
        # everything. We want the start of the name we're on.
        self._position = position[0], position[1] - len(self._like_name)
--- a/jedi/api/helpers.py
+++ b/jedi/api/helpers.py
@@ -27,6 +27,14 @@ def sorted_definitions(defs):
    return sorted(defs, key=lambda x: (x.module_path or '', x.line or 0, x.column or 0))


+def get_on_completion_name(lines, position):
+    line = lines[position[0] - 1]
+    # The first step of completions is to get the name
+    return re.search(
+        r'(?!\d)\w+$|$', line[:position[1]]
+    ).group(0)
+
+
 def _get_code(code_lines, start_pos, end_pos):
    """
    :param code_start_pos: is where the code starts.
--- a/jedi/parser/user_context.py
+++ b/jedi/parser/user_context.py
@@ -1,17 +1,12 @@
 import re
 import os
-import keyword
 from collections import namedtuple

 from jedi import cache
-from jedi import common
-from jedi.parser import tokenize, ParserWithRecovery
-from jedi._compatibility import u
-from jedi.parser import token
+from jedi.parser import ParserWithRecovery
 from jedi.parser.fast import FastParser
 from jedi.parser import tree
 from jedi import debug
-from jedi.common import PushBackIterator

 # TODO this should be part of the tokenizer not just of this user_context.
 Token = namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])
@@ -22,293 +17,13 @@ REPLACE_STR = r"[bBuU]?[rR]?" + (r"(?:(')[^\n'\\]*(?:\\.[^\n'\\]*)*(?:'|$)" +
 REPLACE_STR = re.compile(REPLACE_STR)


-class UserContext(object):
-    """
-    :param source: The source code of the file.
-    :param position: The position, the user is currently in. Only important \
-    for the main file.
-    """
-    def __init__(self, source, position):
-        self.source = source
-        self.position = position
-        self._line_cache = None
-
-        self._relevant_temp = None
-
-    @cache.underscore_memoization
-    def get_path_until_cursor(self):
-        """ Get the path under the cursor. """
-        path, self._start_cursor_pos = self._calc_path_until_cursor(self.position)
-        return path
-
-    def _backwards_line_generator(self, start_pos):
-        self._line_temp, self._column_temp = start_pos
-        first_line = self.get_line(start_pos[0])[:self._column_temp]
-
-        self._line_length = self._column_temp
-        yield first_line[::-1] + '\n'
-
-        while True:
-            self._line_temp -= 1
-            line = self.get_line(self._line_temp)
-            self._line_length = len(line)
-            yield line[::-1] + '\n'
-
-    def _get_backwards_tokenizer(self, start_pos, line_gen=None):
-        if line_gen is None:
-            line_gen = self._backwards_line_generator(start_pos)
-        token_gen = tokenize.generate_tokens(lambda: next(line_gen))
-        for typ, tok_str, tok_start_pos, prefix in token_gen:
-            line = self.get_line(self._line_temp)
-            # Calculate the real start_pos of the token.
-            if tok_start_pos[0] == 1:
-                # We are in the first checked line
-                column = start_pos[1] - tok_start_pos[1]
-            else:
-                column = len(line) - tok_start_pos[1]
-            # Multi-line docstrings must be accounted for.
-            first_line = common.splitlines(tok_str)[0]
-            column -= len(first_line)
-            # Reverse the token again, so that it is in normal order again.
-            yield Token(typ, tok_str[::-1], (self._line_temp, column), prefix[::-1])
-
-    def _calc_path_until_cursor(self, start_pos):
-        """
-        Something like a reverse tokenizer that tokenizes the reversed strings.
-        """
-        open_brackets = ['(', '[', '{']
-        close_brackets = [')', ']', '}']
-
-        start_cursor = start_pos
-        gen = PushBackIterator(self._get_backwards_tokenizer(start_pos))
-        string = u('')
-        level = 0
-        force_point = False
-        last_type = None
-        is_first = True
-        for tok_type, tok_str, tok_start_pos, prefix in gen:
-            if is_first:
-                if prefix:  # whitespace is not a path
-                    return u(''), start_cursor
-                is_first = False
-
-            if last_type == tok_type == tokenize.NAME:
-                string = ' ' + string
-
-            if level:
-                if tok_str in close_brackets:
-                    level += 1
-                elif tok_str in open_brackets:
-                    level -= 1
-            elif tok_str == '.':
-                force_point = False
-            elif force_point:
-                # Reversed tokenizing, therefore a number is recognized as a
-                # floating point number.
-                # The same is true for string prefixes -> represented as a
-                # combination of string and name.
-                if tok_type == tokenize.NUMBER and tok_str[-1] == '.' \
-                        or tok_type == tokenize.NAME and last_type == tokenize.STRING \
-                        and tok_str.lower() in ('b', 'u', 'r', 'br', 'ur'):
-                    force_point = False
-                else:
-                    break
-            elif tok_str in close_brackets:
-                level += 1
-            elif tok_type in [tokenize.NAME, tokenize.STRING]:
-                if keyword.iskeyword(tok_str) and string:
-                    # If there's already something in the string, a keyword
-                    # never adds any meaning to the current statement.
-                    break
-                force_point = True
-            elif tok_type == tokenize.NUMBER:
-                pass
-            else:
-                if tok_str == '-':
-                    next_tok = next(gen)
-                    if next_tok[1] == 'e':
-                        gen.push_back(next_tok)
-                    else:
-                        break
-                else:
-                    break
-
-            start_cursor = tok_start_pos
-            string = tok_str + prefix + string
-            last_type = tok_type
-
-        # Don't need whitespace around a statement.
-        return string.strip(), start_cursor
-
-    def get_path_under_cursor(self):
-        """
-        Return the path under the cursor. If there is a rest of the path left,
-        it will be added to the stuff before it.
-        """
-        return self.get_path_until_cursor() + self.get_path_after_cursor()
-
-    def get_path_after_cursor(self):
-        line = self.get_line(self.position[0])
-        return re.search("[\w\d]*", line[self.position[1]:]).group(0)
-
-    def get_operator_under_cursor(self):
-        line = self.get_line(self.position[0])
-        after = re.match("[^\w\s]+", line[self.position[1]:])
-        before = re.match("[^\w\s]+", line[:self.position[1]][::-1])
-        return (before.group(0) if before is not None else '') \
-            + (after.group(0) if after is not None else '')
-
-    def call_signature(self):
-        """
-        :return: Tuple of string of the call and the index of the cursor.
-        """
-        def get_line(pos):
-            def simplify_str(match):
-                """
-                To avoid having strings without end marks (error tokens) and
-                strings that just screw up all the call signatures, just
-                simplify everything.
-                """
-                mark = match.group(1) or match.group(2)
-                return mark + ' ' * (len(match.group(0)) - 2) + mark
-
-            line_gen = self._backwards_line_generator(pos)
-            for line in line_gen:
-                # We have to switch the already backwards lines twice, because
-                # we scan them from start.
-                line = line[::-1]
-                modified = re.sub(REPLACE_STR, simplify_str, line)
-                yield modified[::-1]
-
-        index = 0
-        level = 0
-        next_must_be_name = False
-        next_is_key = False
-        key_name = None
-        generator = self._get_backwards_tokenizer(self.position, get_line(self.position))
-        for tok_type, tok_str, start_pos, prefix in generator:
-            if tok_str in tokenize.ALWAYS_BREAK_TOKENS:
-                break
-            elif next_must_be_name:
-                if tok_type == tokenize.NUMBER:
-                    # If there's a number at the end of the string, it will be
-                    # tokenized as a number. So add it to the name.
-                    tok_type, t, _, _ = next(generator)
-                if tok_type == tokenize.NAME:
-                    end_pos = start_pos[0], start_pos[1] + len(tok_str)
-                    call, start_pos = self._calc_path_until_cursor(start_pos=end_pos)
-                    return call, index, key_name, start_pos
-                index = 0
-                next_must_be_name = False
-            elif next_is_key:
-                if tok_type == tokenize.NAME:
-                    key_name = tok_str
-                next_is_key = False
-
-            if tok_str == '(':
-                level += 1
-                if level == 1:
-                    next_must_be_name = True
-                    level = 0
-            elif tok_str == ')':
-                level -= 1
-            elif tok_str == ',':
-                index += 1
-            elif tok_str == '=':
-                next_is_key = True
-        return None, 0, None, (0, 0)
-
-    def get_reverse_context(self, yield_positions=False):
-        """
-        Returns the token strings in reverse order from the start position.
-        """
-        self.get_path_until_cursor()  # In case _start_cursor_pos is undefined.
-        pos = self._start_cursor_pos
-        while True:
-            # Remove non important white space.
-            line = self.get_line(pos[0])
-            while True:
-                if pos[1] == 0:
-                    line = self.get_line(pos[0] - 1)
-                    if line and line[-1] == '\\':
-                        pos = pos[0] - 1, len(line) - 1
-                        continue
-                    else:
-                        break
-
-                if line[pos[1] - 1].isspace():
-                    pos = pos[0], pos[1] - 1
-                else:
-                    break
-
-            try:
-                result, pos = self._calc_path_until_cursor(start_pos=pos)
-                if yield_positions:
-                    yield pos
-                else:
-                    yield result
-            except StopIteration:
-                if yield_positions:
-                    yield None
-                else:
-                    yield ''
-
-    def get_backwards_context_tokens(self):
-        self.get_path_until_cursor()  # In case _start_cursor_pos is undefined.
-        pos = self._start_cursor_pos
-        while True:
-            # Remove non important white space.
-            line = self.get_line(pos[0])
-            while True:
-                if pos[1] == 0:
-                    line = self.get_line(pos[0] - 1)
-                    if line and line[-1] == '\\':
-                        pos = pos[0] - 1, len(line) - 1
-                        continue
-                    else:
-                        break
-
-                if line[pos[1] - 1].isspace():
-                    pos = pos[0], pos[1] - 1
-                else:
-                    break
-
-            try:
-                token_ = next(self._get_backwards_tokenizer(pos))
-                pos = token_.start_pos
-                yield token_
-            except StopIteration:
-                # Make it clear that there's nothing coming anymore.
-                #yield Token('', token.ENDMARKER, (1, 0), '')
-                break
-
-    def get_line(self, line_nr):
-        if not self._line_cache:
-            self._line_cache = common.splitlines(self.source)
-
-        if line_nr == 0:
-            # This is a fix for the zeroth line. We need a newline there, for
-            # the backwards parser.
-            return u('')
-        if line_nr < 0:
-            raise StopIteration()
-        try:
-            return self._line_cache[line_nr - 1]
-        except IndexError:
-            raise StopIteration()
-
-    def get_position_line(self):
-        return self.get_line(self.position[0])[:self.position[1]]
-
-
 class UserContextParser(object):
-    def __init__(self, grammar, source, path, position, user_context,
+    def __init__(self, grammar, source, path, position,
                 parser_done_callback, use_fast_parser=True):
        self._grammar = grammar
        self._source = source
        self._path = path and os.path.abspath(path)
        self._position = position
-        self._user_context = user_context
        self._use_fast_parser = use_fast_parser
        self._parser_done_callback = parser_done_callback

@@ -330,26 +45,6 @@ class UserContextParser(object):
        debug.speed('parsed')
        return module.get_statement_for_position(self._position)

-    @cache.underscore_memoization
-    def user_stmt_with_whitespace(self):
-        """
-        Returns the statement under the cursor even if the statement lies
-        before the cursor.
-        """
-        user_stmt = self.user_stmt()
-
-        if not user_stmt:
-            # for statements like `from x import ` (cursor not in statement)
-            # or `abs( ` where the cursor is out in the whitespace.
-            if self._user_context.get_path_under_cursor():
-                # We really should have a user_stmt, but the parser couldn't
-                # process it - probably a Syntax Error (or in a comment).
-                debug.warning('No statement under the cursor.')
-                return
-            pos = next(self._user_context.get_reverse_context(yield_positions=True))
-            user_stmt = self.module().get_statement_for_position(pos)
-        return user_stmt
-
    @cache.underscore_memoization
    def user_scope(self):
        """
--- a/jedi/utils.py
+++ b/jedi/utils.py
@@ -10,8 +10,8 @@ import os
 import sys

 from jedi import Interpreter
-from jedi.api.helpers import get_completion_parts
-from jedi.parser.user_context import UserContext
+from jedi.api.helpers import get_on_completion_name
+from jedi import common


 def setup_readline(namespace_module=__main__):
@@ -72,9 +72,10 @@ def setup_readline(namespace_module=__main__):
                try:
                    interpreter = Interpreter(text, [namespace_module.__dict__])

-                    path = UserContext(text, (1, len(text))).get_path_until_cursor()
-                    path, dot, like = get_completion_parts(path)
-                    before = text[:len(text) - len(like)]
+                    lines = common.splitlines(text)
+                    position = (len(lines), len(lines[-1]))
+                    name = get_on_completion_name(lines, position)
+                    before = text[:len(text) - len(name)]
                    completions = interpreter.completions()
                finally:
                    sys.path.pop(0)
--- a/test/test_parser/test_parser.py
+++ b/test/test_parser/test_parser.py
@@ -15,7 +15,7 @@ def test_user_statement_on_import():
          "    time)")

    for pos in [(2, 1), (2, 4)]:
-        p = UserContextParser(load_grammar(), s, None, pos, None, lambda x: 1).user_stmt()
+        p = UserContextParser(load_grammar(), s, None, pos, lambda x: 1).user_stmt()
        assert isinstance(p, pt.Import)
        assert [str(n) for n in p.get_defined_names()] == ['time']