Avoid caching parso objects, fixes #1723

This commit is contained in:
Dave Halter
2021-01-14 00:29:34 +01:00
parent 44d77523b3
commit b9067ccdbb
6 changed files with 66 additions and 23 deletions

View File

@@ -13,7 +13,6 @@ from pathlib import Path
import parso import parso
from parso.python import tree from parso.python import tree
from jedi._compatibility import cast_path
from jedi.parser_utils import get_executable_nodes from jedi.parser_utils import get_executable_nodes
from jedi import debug from jedi import debug
from jedi import settings from jedi import settings
@@ -151,7 +150,7 @@ class Script:
if self.path is None: if self.path is None:
file_io = None file_io = None
else: else:
file_io = KnownContentFileIO(cast_path(self.path), self._code) file_io = KnownContentFileIO(self.path, self._code)
if self.path is not None and self.path.suffix == '.pyi': if self.path is not None and self.path.suffix == '.pyi':
# We are in a stub file. Try to load the stub properly. # We are in a stub file. Try to load the stub properly.
stub_module = load_proper_stub_module( stub_module = load_proper_stub_module(
@@ -728,9 +727,13 @@ class Interpreter(Script):
@cache.memoize_method @cache.memoize_method
def _get_module_context(self): def _get_module_context(self):
if self.path is None:
file_io = None
else:
file_io = KnownContentFileIO(self.path, self._code)
tree_module_value = ModuleValue( tree_module_value = ModuleValue(
self._inference_state, self._module_node, self._inference_state, self._module_node,
file_io=KnownContentFileIO(str(self.path), self._code), file_io=file_io,
string_names=('__main__',), string_names=('__main__',),
code_lines=self._code_lines, code_lines=self._code_lines,
) )

View File

@@ -325,7 +325,7 @@ class ModuleContext(TreeContextMixin, ValueContext):
yield from filters yield from filters
def get_global_filter(self): def get_global_filter(self):
return GlobalNameFilter(self, self.tree_node) return GlobalNameFilter(self)
@property @property
def string_names(self): def string_names(self):

View File

@@ -12,7 +12,7 @@ from parso.python.tree import Name, UsedNamesMapping
from jedi.inference import flow_analysis from jedi.inference import flow_analysis
from jedi.inference.base_value import ValueSet, ValueWrapper, \ from jedi.inference.base_value import ValueSet, ValueWrapper, \
LazyValueWrapper LazyValueWrapper
from jedi.parser_utils import get_cached_parent_scope from jedi.parser_utils import get_cached_parent_scope, get_parso_cache_node
from jedi.inference.utils import to_list from jedi.inference.utils import to_list
from jedi.inference.names import TreeNameDefinition, ParamName, \ from jedi.inference.names import TreeNameDefinition, ParamName, \
AnonymousParamName, AbstractNameDefinition, NameWrapper AnonymousParamName, AbstractNameDefinition, NameWrapper
@@ -54,11 +54,11 @@ class FilterWrapper:
return self.wrap_names(self._wrapped_filter.values()) return self.wrap_names(self._wrapped_filter.values())
def _get_definition_names(used_names, name_key): def _get_definition_names(parso_cache_node, used_names, name_key):
try: try:
for_module = _definition_name_cache[used_names] for_module = _definition_name_cache[parso_cache_node]
except KeyError: except KeyError:
for_module = _definition_name_cache[used_names] = {} for_module = _definition_name_cache[parso_cache_node] = {}
try: try:
return for_module[name_key] return for_module[name_key]
@@ -70,18 +70,35 @@ def _get_definition_names(used_names, name_key):
return result return result
class AbstractUsedNamesFilter(AbstractFilter): class _AbstractUsedNamesFilter(AbstractFilter):
name_class = TreeNameDefinition name_class = TreeNameDefinition
def __init__(self, parent_context, parser_scope): def __init__(self, parent_context, node_context=None):
self._parser_scope = parser_scope if node_context is None:
self._module_node = self._parser_scope.get_root_node() node_context = parent_context
self._used_names = self._module_node.get_used_names() self._node_context = node_context
self._parser_scope = node_context.tree_node
module_context = node_context.get_root_context()
# It is quite hacky that we have to use that. This is for caching
# certain things with a WeakKeyDictionary. However, parso intentionally
# uses slots (to save memory) and therefore we end up with having to
# have a weak reference to the object that caches the tree.
#
# Previously we have tried to solve this by using a weak reference onto
# used_names. However that also does not work, because it has a
# reference from the module, which itself is referenced by any node
# through parents.
self._parso_cache_node = get_parso_cache_node(
module_context.inference_state.latest_grammar
if module_context.is_stub() else module_context.inference_state.grammar,
module_context.py__file__()
)
self._used_names = module_context.tree_node.get_used_names()
self.parent_context = parent_context self.parent_context = parent_context
def get(self, name): def get(self, name):
return self._convert_names(self._filter( return self._convert_names(self._filter(
_get_definition_names(self._used_names, name), _get_definition_names(self._parso_cache_node, self._used_names, name),
)) ))
def _convert_names(self, names): def _convert_names(self, names):
@@ -92,7 +109,7 @@ class AbstractUsedNamesFilter(AbstractFilter):
name name
for name_key in self._used_names for name_key in self._used_names
for name in self._filter( for name in self._filter(
_get_definition_names(self._used_names, name_key), _get_definition_names(self._parso_cache_node, self._used_names, name_key),
) )
) )
@@ -100,7 +117,7 @@ class AbstractUsedNamesFilter(AbstractFilter):
return '<%s: %s>' % (self.__class__.__name__, self.parent_context) return '<%s: %s>' % (self.__class__.__name__, self.parent_context)
class ParserTreeFilter(AbstractUsedNamesFilter): class ParserTreeFilter(_AbstractUsedNamesFilter):
def __init__(self, parent_context, node_context=None, until_position=None, def __init__(self, parent_context, node_context=None, until_position=None,
origin_scope=None): origin_scope=None):
""" """
@@ -109,10 +126,7 @@ class ParserTreeFilter(AbstractUsedNamesFilter):
value, but for some type inference it's important to have a local value, but for some type inference it's important to have a local
value of the other classes. value of the other classes.
""" """
if node_context is None: super().__init__(parent_context, node_context)
node_context = parent_context
super().__init__(parent_context, node_context.tree_node)
self._node_context = node_context
self._origin_scope = origin_scope self._origin_scope = origin_scope
self._until_position = until_position self._until_position = until_position
@@ -182,7 +196,7 @@ class AnonymousFunctionExecutionFilter(_FunctionExecutionFilter):
return AnonymousParamName(self._function_value, name) return AnonymousParamName(self._function_value, name)
class GlobalNameFilter(AbstractUsedNamesFilter): class GlobalNameFilter(_AbstractUsedNamesFilter):
def get(self, name): def get(self, name):
try: try:
names = self._used_names[name] names = self._used_names[name]

View File

@@ -64,7 +64,7 @@ class ModuleMixin(SubModuleDictMixin):
parent_context=self.as_context(), parent_context=self.as_context(),
origin_scope=origin_scope origin_scope=origin_scope
), ),
GlobalNameFilter(self.as_context(), self.tree_node), GlobalNameFilter(self.as_context()),
) )
yield DictFilter(self.sub_modules_dict()) yield DictFilter(self.sub_modules_dict())
yield DictFilter(self._module_attributes_dict()) yield DictFilter(self._module_attributes_dict())

View File

@@ -270,7 +270,11 @@ def get_cached_code_lines(grammar, path):
Basically access the cached code lines in parso. This is not the nicest way Basically access the cached code lines in parso. This is not the nicest way
to do this, but we avoid splitting all the lines again. to do this, but we avoid splitting all the lines again.
""" """
return parser_cache[grammar._hashed][path].lines return get_parso_cache_node(grammar, path).lines
def get_parso_cache_node(grammar, path):
return parser_cache[grammar._hashed][path]
def cut_value_at_position(leaf, position): def cut_value_at_position(leaf, position):

View File

@@ -1,5 +1,9 @@
import gc
from pathlib import Path
from jedi import parser_utils from jedi import parser_utils
from parso import parse from parso import parse
from parso.cache import parser_cache
from parso.python import tree from parso.python import tree
import pytest import pytest
@@ -67,3 +71,21 @@ def test_get_signature(code, signature):
if node.type == 'simple_stmt': if node.type == 'simple_stmt':
node = node.children[0] node = node.children[0]
assert parser_utils.get_signature(node) == signature assert parser_utils.get_signature(node) == signature
def test_parser_cache_clear(Script):
"""
If parso clears its cache, Jedi should not keep those resources, they
should be freed.
"""
script = Script("a = abs\na", path=Path(__file__).parent / 'parser_cache_test_foo.py')
script.complete()
module_id = id(script._module_node)
del parser_cache[script._inference_state.grammar._hashed][script.path]
del script
import jedi
jedi.parser_utils.get_cached_parent_scope.__closure__[0].cell_contents.clear()
gc.collect()
assert module_id not in [id(m) for m in gc.get_referrers(tree.Module)]