much faster parsing for get_in_function_call, fixes parts of #34

2025-12-11 16:21:51 +08:00 · 2012-10-21 01:41:17 +02:00
parent 65e1b372b6
commit 36fd73a239
5 changed files with 76 additions and 10 deletions
--- a/jedi/api.py
+++ b/jedi/api.py
@@ -425,15 +425,50 @@ class Script(object):
        This would return `None`.
        """
-        user_stmt = self.parser.user_stmt
+        def check_user_stmt(user_stmt):
-        if user_stmt is None or not isinstance(user_stmt, parsing.Statement):
+            if user_stmt is None or not isinstance(user_stmt, parsing.Statement):
-            return None
+                return None, 0
-        ass = helpers.fast_parent_copy(user_stmt.get_assignment_calls())
+            ass = helpers.fast_parent_copy(user_stmt.get_assignment_calls())
-        call, index, stop = helpers.scan_array_for_pos(ass, self.pos)
+            call, index, stop = helpers.scan_array_for_pos(ass, self.pos)
-        if call is None:
+            return call, index
        def check_cache():
            if self.source_path is None:
                return None, 0
            try:
                timestamp, parser = builtin.CachedModule.cache[
                                                            self.source_path]
            except KeyError:
                return None, 0
            part_parser = self.module.get_part_parser()
            user_stmt = part_parser.user_stmt
            call, index = check_user_stmt(user_stmt)
            if call:
                old_stmt = parser.module.get_statement_for_position(self.pos)
                if old_stmt is None:
                    return None, 0
                new_call, new_index = check_user_stmt(user_stmt)
                if new_call:
                    if str(new_call) == str(call) and index == new_index:
                        return new_call, new_index
                return None, 0
            else:
                raise NotFoundError()
        try:
            call, index = check_cache()
        except NotFoundError:
            return None
        if call is None:
            user_stmt = self.parser.user_stmt
            call, index = check_user_stmt(user_stmt)
            if call is None:
                return None
        origins = evaluate.follow_call(call)
        if len(origins) == 0:
--- a/jedi/modules.py
+++ b/jedi/modules.py
@@ -6,12 +6,13 @@ import re
 import tokenize
 import sys
 import os
 import time
 import parsing
 import builtin
 import debug
 import evaluate
-import time
+import settings
 class Module(builtin.CachedModule):
@@ -53,6 +54,7 @@ class ModuleWithCursor(Module):
        self._relevant_temp = None
        self.source = source
        self._part_parser = None
    @property
    def parser(self):
@@ -67,7 +69,7 @@ class ModuleWithCursor(Module):
            # default), therefore fill the cache here.
            self._parser = parsing.PyFuzzyParser(self.source, self.path,
                                                                self.position)
-            if self.path:
+            if self.path is not None:
                builtin.CachedModule.cache[self.path] = time.time(), self._parser
        return self._parser
@@ -192,6 +194,21 @@ class ModuleWithCursor(Module):
        except IndexError:
            raise StopIteration()
    def get_part_parser(self):
        """ Returns a parser that contains only part of the source code. This
        exists only because of performance reasons.
        """
        if self._part_parser:
            return self._part_parser
        # TODO check for docstrings
        length = settings.part_line_length
        offset = max(self.position[0] - length, 0)
        s = '\n'.join(self.source.split('\n')[offset:offset + length])
        self._part_parser = parsing.PyFuzzyParser(s, self.path, self.position,
                                                        line_offset=offset)
        return self._part_parser
@evaluate.memoize_default([])
 def sys_path_with_modifications(module):
--- a/jedi/parsing.py
+++ b/jedi/parsing.py
@@ -1120,7 +1120,7 @@ class PyFuzzyParser(object):
    :type user_position: tuple(int, int)
    """
    def __init__(self, code, module_path=None, user_position=None,
-                                                            no_docstr=False):
+                                    no_docstr=False, line_offset=0):
        self.user_position = user_position
        self.user_scope = None
        self.user_stmt = None
@@ -1136,7 +1136,7 @@ class PyFuzzyParser(object):
        # Stuff to fix tokenize errors. The parser is pretty good in tolerating
        # any errors of tokenize and just parse ahead.
-        self._line_of_tokenize_restart = 0
+        self._line_of_tokenize_restart = line_offset
        self.parse()
--- a/jedi/settings.py
+++ b/jedi/settings.py
@@ -62,3 +62,10 @@ additional_dynamic_modules = []
 max_function_recursion_level = 5
 max_until_execution_unique = 50
 max_executions = 1000
 # ----------------
 # various
 # ----------------
 # Size of the current code part, which is used to speed up parsing.
 part_line_length = 20
--- a/test/regression.py
+++ b/test/regression.py
@@ -31,6 +31,13 @@ class TestRegression(unittest.TestCase):
        script = api.Script(src, pos[0], pos[1], '')
        return script.get_in_function_call()
    def test_part_parser(self):
        """ test the get_in_function_call speedups """
        s = '\n' * 100 + 'abs('
        pos = 101, 4
        self.get_in_function_call(s, pos)
        assert self.get_in_function_call(s, pos)
    def test_get_definition_cursor(self):
        s = ("class A():\n"