Make sure to limit the amount of cached files parso stores, fixes davidhalter/jedi#1340

2026-03-05 22:54:35 +08:00 · 2020-01-05 23:44:51 +01:00
parent 29b57d93bd
commit 2b0b093276
2 changed files with 87 additions and 2 deletions
--- a/parso/cache.py
+++ b/parso/cache.py
@@ -17,6 +17,21 @@ from parso._compatibility import FileNotFoundError
 LOG = logging.getLogger(__name__)
 _CACHED_FILE_MINIMUM_SURVIVAL = 60 * 10  # 10 minutes
 """
 Cached files should survive at least a few minutes.
 """
 _CACHED_SIZE_TRIGGER = 600
 """
 This setting limits the amount of cached files. It's basically a way to start
 garbage collection.
 The reasoning for this limit being as big as it is, is the following:
 Numpy, Pandas, Matplotlib and Tensorflow together use about 500 files. This
 makes Jedi use ~500mb of memory. Since we might want a bit more than those few
 libraries, we just increase it a bit.
 """
 _PICKLE_VERSION = 32
 """
@@ -76,6 +91,7 @@ class _NodeCacheItem(object):
        if change_time is None:
            change_time = time.time()
        self.change_time = change_time
        self.last_used = change_time
 def load_module(hashed_grammar, file_io, cache_path=None):
@@ -89,6 +105,7 @@ def load_module(hashed_grammar, file_io, cache_path=None):
    try:
        module_cache_item = parser_cache[hashed_grammar][file_io.path]
        if p_time <= module_cache_item.change_time:
            module_cache_item.last_used = time.time()
            return module_cache_item.node
    except KeyError:
        return _load_from_file_system(
@@ -122,11 +139,27 @@ def _load_from_file_system(hashed_grammar, path, p_time, cache_path=None):
    except FileNotFoundError:
        return None
    else:
-        parser_cache.setdefault(hashed_grammar, {})[path] = module_cache_item
+        _set_cache_item(hashed_grammar, path, module_cache_item)
        LOG.debug('pickle loaded: %s', path)
        return module_cache_item.node
 def _set_cache_item(hashed_grammar, path, module_cache_item):
    if sum(len(v) for v in parser_cache.values()) >= _CACHED_SIZE_TRIGGER:
        # Garbage collection of old cache files.
        # We are basically throwing everything away that hasn't been accessed
        # in 10 minutes.
        cutoff_time = time.time() - _CACHED_FILE_MINIMUM_SURVIVAL
        for key, path_to_item_map in parser_cache.items():
            parser_cache[key] = {
                path: node_item
                for path, node_item in path_to_item_map.items()
                if node_item.last_used > cutoff_time
            }
    parser_cache.setdefault(hashed_grammar, {})[path] = module_cache_item
 def save_module(hashed_grammar, file_io, module, lines, pickling=True, cache_path=None):
    path = file_io.path
    try:
@@ -136,7 +169,7 @@ def save_module(hashed_grammar, file_io, module, lines, pickling=True, cache_pat
        pickling = False
    item = _NodeCacheItem(module, lines, p_time)
-    parser_cache.setdefault(hashed_grammar, {})[path] = item
+    _set_cache_item(hashed_grammar, path, item)
    if pickling and path is not None:
        _save_to_file_system(hashed_grammar, path, item, cache_path=cache_path)
--- a/test/test_cache.py
+++ b/test/test_cache.py
@@ -5,12 +5,14 @@ Test all things related to the ``jedi.cache`` module.
 from os import unlink
 import pytest
 import time
 from parso.cache import _NodeCacheItem, save_module, load_module, \
    _get_hashed_path, parser_cache, _load_from_file_system, _save_to_file_system
 from parso import load_grammar
 from parso import cache
 from parso import file_io
 from parso import parse
@pytest.fixture()
@@ -87,3 +89,53 @@ def test_modulepickling_simulate_deleted_cache(tmpdir):
    cached2 = load_module(grammar._hashed, io)
    assert cached2 is None
 def test_cache_limit():
    def cache_size():
        return sum(len(v) for v in parser_cache.values())
    try:
        parser_cache.clear()
        future_node_cache_item = _NodeCacheItem('bla', [], change_time=time.time() + 10e6)
        old_node_cache_item = _NodeCacheItem('bla', [], change_time=time.time() - 10e4)
        parser_cache['some_hash_old'] = {
            '/path/%s' % i: old_node_cache_item for i in range(300)
        }
        parser_cache['some_hash_new'] = {
            '/path/%s' % i: future_node_cache_item for i in range(300)
        }
        assert cache_size() == 600
        parse('somecode', cache=True, path='/path/somepath')
        assert cache_size() == 301
    finally:
        parser_cache.clear()
 class _FixedTimeFileIO(file_io.KnownContentFileIO):
    def __init__(self, path, content, last_modified):
        super(_FixedTimeFileIO, self).__init__(path, content)
        self._last_modified = last_modified
    def get_last_modified(self):
        return self._last_modified
@pytest.mark.parametrize('diff_cache', [False, True])
@pytest.mark.parametrize('use_file_io', [False, True])
 def test_cache_last_used_update(diff_cache, use_file_io):
    p = '/path/last-used'
    parser_cache.clear()  # Clear, because then it's easier to find stuff.
    parse('somecode', cache=True, path=p)
    node_cache_item = next(iter(parser_cache.values()))[p]
    now = time.time()
    assert node_cache_item.last_used < now
    if use_file_io:
        f = _FixedTimeFileIO(p, 'code', node_cache_item.last_used - 10)
        parse(file_io=f, cache=True, diff_cache=diff_cache)
    else:
        parse('somecode2', cache=True, path=p, diff_cache=diff_cache)
    node_cache_item = next(iter(parser_cache.values()))[p]
    assert now < node_cache_item.last_used < time.time()