much faster parsing for get_in_function_call, fixes parts of #34

This commit is contained in:
David Halter
2012-10-21 01:41:17 +02:00
parent 65e1b372b6
commit 36fd73a239
5 changed files with 76 additions and 10 deletions

View File

@@ -425,15 +425,50 @@ class Script(object):
This would return `None`. This would return `None`.
""" """
user_stmt = self.parser.user_stmt def check_user_stmt(user_stmt):
if user_stmt is None or not isinstance(user_stmt, parsing.Statement): if user_stmt is None or not isinstance(user_stmt, parsing.Statement):
return None return None, 0
ass = helpers.fast_parent_copy(user_stmt.get_assignment_calls()) ass = helpers.fast_parent_copy(user_stmt.get_assignment_calls())
call, index, stop = helpers.scan_array_for_pos(ass, self.pos) call, index, stop = helpers.scan_array_for_pos(ass, self.pos)
if call is None: return call, index
def check_cache():
if self.source_path is None:
return None, 0
try:
timestamp, parser = builtin.CachedModule.cache[
self.source_path]
except KeyError:
return None, 0
part_parser = self.module.get_part_parser()
user_stmt = part_parser.user_stmt
call, index = check_user_stmt(user_stmt)
if call:
old_stmt = parser.module.get_statement_for_position(self.pos)
if old_stmt is None:
return None, 0
new_call, new_index = check_user_stmt(user_stmt)
if new_call:
if str(new_call) == str(call) and index == new_index:
return new_call, new_index
return None, 0
else:
raise NotFoundError()
try:
call, index = check_cache()
except NotFoundError:
return None return None
if call is None:
user_stmt = self.parser.user_stmt
call, index = check_user_stmt(user_stmt)
if call is None:
return None
origins = evaluate.follow_call(call) origins = evaluate.follow_call(call)
if len(origins) == 0: if len(origins) == 0:

View File

@@ -6,12 +6,13 @@ import re
import tokenize import tokenize
import sys import sys
import os import os
import time
import parsing import parsing
import builtin import builtin
import debug import debug
import evaluate import evaluate
import time import settings
class Module(builtin.CachedModule): class Module(builtin.CachedModule):
@@ -53,6 +54,7 @@ class ModuleWithCursor(Module):
self._relevant_temp = None self._relevant_temp = None
self.source = source self.source = source
self._part_parser = None
@property @property
def parser(self): def parser(self):
@@ -67,7 +69,7 @@ class ModuleWithCursor(Module):
# default), therefore fill the cache here. # default), therefore fill the cache here.
self._parser = parsing.PyFuzzyParser(self.source, self.path, self._parser = parsing.PyFuzzyParser(self.source, self.path,
self.position) self.position)
if self.path: if self.path is not None:
builtin.CachedModule.cache[self.path] = time.time(), self._parser builtin.CachedModule.cache[self.path] = time.time(), self._parser
return self._parser return self._parser
@@ -192,6 +194,21 @@ class ModuleWithCursor(Module):
except IndexError: except IndexError:
raise StopIteration() raise StopIteration()
def get_part_parser(self):
""" Returns a parser that contains only part of the source code. This
exists only because of performance reasons.
"""
if self._part_parser:
return self._part_parser
# TODO check for docstrings
length = settings.part_line_length
offset = max(self.position[0] - length, 0)
s = '\n'.join(self.source.split('\n')[offset:offset + length])
self._part_parser = parsing.PyFuzzyParser(s, self.path, self.position,
line_offset=offset)
return self._part_parser
@evaluate.memoize_default([]) @evaluate.memoize_default([])
def sys_path_with_modifications(module): def sys_path_with_modifications(module):

View File

@@ -1120,7 +1120,7 @@ class PyFuzzyParser(object):
:type user_position: tuple(int, int) :type user_position: tuple(int, int)
""" """
def __init__(self, code, module_path=None, user_position=None, def __init__(self, code, module_path=None, user_position=None,
no_docstr=False): no_docstr=False, line_offset=0):
self.user_position = user_position self.user_position = user_position
self.user_scope = None self.user_scope = None
self.user_stmt = None self.user_stmt = None
@@ -1136,7 +1136,7 @@ class PyFuzzyParser(object):
# Stuff to fix tokenize errors. The parser is pretty good in tolerating # Stuff to fix tokenize errors. The parser is pretty good in tolerating
# any errors of tokenize and just parse ahead. # any errors of tokenize and just parse ahead.
self._line_of_tokenize_restart = 0 self._line_of_tokenize_restart = line_offset
self.parse() self.parse()

View File

@@ -62,3 +62,10 @@ additional_dynamic_modules = []
max_function_recursion_level = 5 max_function_recursion_level = 5
max_until_execution_unique = 50 max_until_execution_unique = 50
max_executions = 1000 max_executions = 1000
# ----------------
# various
# ----------------
# Size of the current code part, which is used to speed up parsing.
part_line_length = 20

View File

@@ -31,6 +31,13 @@ class TestRegression(unittest.TestCase):
script = api.Script(src, pos[0], pos[1], '') script = api.Script(src, pos[0], pos[1], '')
return script.get_in_function_call() return script.get_in_function_call()
def test_part_parser(self):
""" test the get_in_function_call speedups """
s = '\n' * 100 + 'abs('
pos = 101, 4
self.get_in_function_call(s, pos)
assert self.get_in_function_call(s, pos)
def test_get_definition_cursor(self): def test_get_definition_cursor(self):
s = ("class A():\n" s = ("class A():\n"