much faster parsing for get_in_function_call, fixes parts of #34

2012-10-21 01:41:17 +02:00
parent 65e1b372b6
commit 36fd73a239
5 changed files with 76 additions and 10 deletions
--- a/jedi/api.py
+++ b/jedi/api.py
@@ -425,12 +425,47 @@ class Script(object):

        This would return `None`.
        """
-        user_stmt = self.parser.user_stmt
+        def check_user_stmt(user_stmt):
            if user_stmt is None or not isinstance(user_stmt, parsing.Statement):
-            return None
+                return None, 0
            ass = helpers.fast_parent_copy(user_stmt.get_assignment_calls())

            call, index, stop = helpers.scan_array_for_pos(ass, self.pos)
+            return call, index
+
+        def check_cache():
+            if self.source_path is None:
+                return None, 0
+
+            try:
+                timestamp, parser = builtin.CachedModule.cache[
+                                                            self.source_path]
+            except KeyError:
+                return None, 0
+            part_parser = self.module.get_part_parser()
+            user_stmt = part_parser.user_stmt
+            call, index = check_user_stmt(user_stmt)
+            if call:
+                old_stmt = parser.module.get_statement_for_position(self.pos)
+                if old_stmt is None:
+                    return None, 0
+                new_call, new_index = check_user_stmt(user_stmt)
+                if new_call:
+                    if str(new_call) == str(call) and index == new_index:
+                        return new_call, new_index
+                return None, 0
+            else:
+                raise NotFoundError()
+
+
+        try:
+            call, index = check_cache()
+        except NotFoundError:
+            return None
+
+        if call is None:
+            user_stmt = self.parser.user_stmt
+            call, index = check_user_stmt(user_stmt)
            if call is None:
                return None

--- a/jedi/modules.py
+++ b/jedi/modules.py
@@ -6,12 +6,13 @@ import re
 import tokenize
 import sys
 import os
+import time

 import parsing
 import builtin
 import debug
 import evaluate
-import time
+import settings


 class Module(builtin.CachedModule):
@@ -53,6 +54,7 @@ class ModuleWithCursor(Module):
        self._relevant_temp = None

        self.source = source
+        self._part_parser = None

    @property
    def parser(self):
@@ -67,7 +69,7 @@ class ModuleWithCursor(Module):
            # default), therefore fill the cache here.
            self._parser = parsing.PyFuzzyParser(self.source, self.path,
                                                                self.position)
-            if self.path:
+            if self.path is not None:
                builtin.CachedModule.cache[self.path] = time.time(), self._parser
        return self._parser

@@ -192,6 +194,21 @@ class ModuleWithCursor(Module):
        except IndexError:
            raise StopIteration()

+    def get_part_parser(self):
+        """ Returns a parser that contains only part of the source code. This
+        exists only because of performance reasons.
+        """
+        if self._part_parser:
+            return self._part_parser
+
+        # TODO check for docstrings
+        length = settings.part_line_length
+        offset = max(self.position[0] - length, 0)
+        s = '\n'.join(self.source.split('\n')[offset:offset + length])
+        self._part_parser = parsing.PyFuzzyParser(s, self.path, self.position,
+                                                        line_offset=offset)
+        return self._part_parser
+

@evaluate.memoize_default([])
 def sys_path_with_modifications(module):
--- a/jedi/parsing.py
+++ b/jedi/parsing.py
@@ -1120,7 +1120,7 @@ class PyFuzzyParser(object):
    :type user_position: tuple(int, int)
    """
    def __init__(self, code, module_path=None, user_position=None,
-                                                            no_docstr=False):
+                                    no_docstr=False, line_offset=0):
        self.user_position = user_position
        self.user_scope = None
        self.user_stmt = None
@@ -1136,7 +1136,7 @@ class PyFuzzyParser(object):

        # Stuff to fix tokenize errors. The parser is pretty good in tolerating
        # any errors of tokenize and just parse ahead.
-        self._line_of_tokenize_restart = 0
+        self._line_of_tokenize_restart = line_offset

        self.parse()

--- a/jedi/settings.py
+++ b/jedi/settings.py
@@ -62,3 +62,10 @@ additional_dynamic_modules = []
 max_function_recursion_level = 5
 max_until_execution_unique = 50
 max_executions = 1000
+
+# ----------------
+# various
+# ----------------
+
+# Size of the current code part, which is used to speed up parsing.
+part_line_length = 20
--- a/test/regression.py
+++ b/test/regression.py
@@ -31,6 +31,13 @@ class TestRegression(unittest.TestCase):
        script = api.Script(src, pos[0], pos[1], '')
        return script.get_in_function_call()

+    def test_part_parser(self):
+        """ test the get_in_function_call speedups """
+        s = '\n' * 100 + 'abs('
+        pos = 101, 4
+        self.get_in_function_call(s, pos)
+        assert self.get_in_function_call(s, pos)
+
    def test_get_definition_cursor(self):

        s = ("class A():\n"