Make sure to limit the amount of cached files parso stores, fixes davidhalter/jedi#1340

2025-12-06 21:04:29 +08:00 · 2020-01-05 23:44:51 +01:00
parent 29b57d93bd
commit 2b0b093276
2 changed files with 87 additions and 2 deletions
--- a/parso/cache.py
+++ b/parso/cache.py
@@ -17,6 +17,21 @@ from parso._compatibility import FileNotFoundError

 LOG = logging.getLogger(__name__)

+_CACHED_FILE_MINIMUM_SURVIVAL = 60 * 10  # 10 minutes
+"""
+Cached files should survive at least a few minutes.
+"""
+_CACHED_SIZE_TRIGGER = 600
+"""
+This setting limits the amount of cached files. It's basically a way to start
+garbage collection.
+
+The reasoning for this limit being as big as it is, is the following:
+
+Numpy, Pandas, Matplotlib and Tensorflow together use about 500 files. This
+makes Jedi use ~500mb of memory. Since we might want a bit more than those few
+libraries, we just increase it a bit.
+"""

 _PICKLE_VERSION = 32
 """
@@ -76,6 +91,7 @@ class _NodeCacheItem(object):
        if change_time is None:
            change_time = time.time()
        self.change_time = change_time
+        self.last_used = change_time


 def load_module(hashed_grammar, file_io, cache_path=None):
@@ -89,6 +105,7 @@ def load_module(hashed_grammar, file_io, cache_path=None):
    try:
        module_cache_item = parser_cache[hashed_grammar][file_io.path]
        if p_time <= module_cache_item.change_time:
+            module_cache_item.last_used = time.time()
            return module_cache_item.node
    except KeyError:
        return _load_from_file_system(
@@ -122,11 +139,27 @@ def _load_from_file_system(hashed_grammar, path, p_time, cache_path=None):
    except FileNotFoundError:
        return None
    else:
-        parser_cache.setdefault(hashed_grammar, {})[path] = module_cache_item
+        _set_cache_item(hashed_grammar, path, module_cache_item)
        LOG.debug('pickle loaded: %s', path)
        return module_cache_item.node


+def _set_cache_item(hashed_grammar, path, module_cache_item):
+    if sum(len(v) for v in parser_cache.values()) >= _CACHED_SIZE_TRIGGER:
+        # Garbage collection of old cache files.
+        # We are basically throwing everything away that hasn't been accessed
+        # in 10 minutes.
+        cutoff_time = time.time() - _CACHED_FILE_MINIMUM_SURVIVAL
+        for key, path_to_item_map in parser_cache.items():
+            parser_cache[key] = {
+                path: node_item
+                for path, node_item in path_to_item_map.items()
+                if node_item.last_used > cutoff_time
+            }
+
+    parser_cache.setdefault(hashed_grammar, {})[path] = module_cache_item
+
+
 def save_module(hashed_grammar, file_io, module, lines, pickling=True, cache_path=None):
    path = file_io.path
    try:
@@ -136,7 +169,7 @@ def save_module(hashed_grammar, file_io, module, lines, pickling=True, cache_pat
        pickling = False

    item = _NodeCacheItem(module, lines, p_time)
-    parser_cache.setdefault(hashed_grammar, {})[path] = item
+    _set_cache_item(hashed_grammar, path, item)
    if pickling and path is not None:
        _save_to_file_system(hashed_grammar, path, item, cache_path=cache_path)