diff --git a/parso/cache.py b/parso/cache.py index 1f8d886..86fde9a 100644 --- a/parso/cache.py +++ b/parso/cache.py @@ -17,6 +17,21 @@ from parso._compatibility import FileNotFoundError LOG = logging.getLogger(__name__) +_CACHED_FILE_MINIMUM_SURVIVAL = 60 * 10 # 10 minutes +""" +Cached files should survive at least a few minutes. +""" +_CACHED_SIZE_TRIGGER = 600 +""" +This setting limits the amount of cached files. It's basically a way to start +garbage collection. + +The reasoning for this limit being as big as it is, is the following: + +Numpy, Pandas, Matplotlib and Tensorflow together use about 500 files. This +makes Jedi use ~500mb of memory. Since we might want a bit more than those few +libraries, we just increase it a bit. +""" _PICKLE_VERSION = 32 """ @@ -76,6 +91,7 @@ class _NodeCacheItem(object): if change_time is None: change_time = time.time() self.change_time = change_time + self.last_used = change_time def load_module(hashed_grammar, file_io, cache_path=None): @@ -89,6 +105,7 @@ def load_module(hashed_grammar, file_io, cache_path=None): try: module_cache_item = parser_cache[hashed_grammar][file_io.path] if p_time <= module_cache_item.change_time: + module_cache_item.last_used = time.time() return module_cache_item.node except KeyError: return _load_from_file_system( @@ -122,11 +139,27 @@ def _load_from_file_system(hashed_grammar, path, p_time, cache_path=None): except FileNotFoundError: return None else: - parser_cache.setdefault(hashed_grammar, {})[path] = module_cache_item + _set_cache_item(hashed_grammar, path, module_cache_item) LOG.debug('pickle loaded: %s', path) return module_cache_item.node +def _set_cache_item(hashed_grammar, path, module_cache_item): + if sum(len(v) for v in parser_cache.values()) >= _CACHED_SIZE_TRIGGER: + # Garbage collection of old cache files. + # We are basically throwing everything away that hasn't been accessed + # in 10 minutes. + cutoff_time = time.time() - _CACHED_FILE_MINIMUM_SURVIVAL + for key, path_to_item_map in parser_cache.items(): + parser_cache[key] = { + path: node_item + for path, node_item in path_to_item_map.items() + if node_item.last_used > cutoff_time + } + + parser_cache.setdefault(hashed_grammar, {})[path] = module_cache_item + + def save_module(hashed_grammar, file_io, module, lines, pickling=True, cache_path=None): path = file_io.path try: @@ -136,7 +169,7 @@ def save_module(hashed_grammar, file_io, module, lines, pickling=True, cache_pat pickling = False item = _NodeCacheItem(module, lines, p_time) - parser_cache.setdefault(hashed_grammar, {})[path] = item + _set_cache_item(hashed_grammar, path, item) if pickling and path is not None: _save_to_file_system(hashed_grammar, path, item, cache_path=cache_path) diff --git a/test/test_cache.py b/test/test_cache.py index 7fef203..ebf1303 100644 --- a/test/test_cache.py +++ b/test/test_cache.py @@ -5,12 +5,14 @@ Test all things related to the ``jedi.cache`` module. from os import unlink import pytest +import time from parso.cache import _NodeCacheItem, save_module, load_module, \ _get_hashed_path, parser_cache, _load_from_file_system, _save_to_file_system from parso import load_grammar from parso import cache from parso import file_io +from parso import parse @pytest.fixture() @@ -87,3 +89,53 @@ def test_modulepickling_simulate_deleted_cache(tmpdir): cached2 = load_module(grammar._hashed, io) assert cached2 is None + + +def test_cache_limit(): + def cache_size(): + return sum(len(v) for v in parser_cache.values()) + + try: + parser_cache.clear() + future_node_cache_item = _NodeCacheItem('bla', [], change_time=time.time() + 10e6) + old_node_cache_item = _NodeCacheItem('bla', [], change_time=time.time() - 10e4) + parser_cache['some_hash_old'] = { + '/path/%s' % i: old_node_cache_item for i in range(300) + } + parser_cache['some_hash_new'] = { + '/path/%s' % i: future_node_cache_item for i in range(300) + } + assert cache_size() == 600 + parse('somecode', cache=True, path='/path/somepath') + assert cache_size() == 301 + finally: + parser_cache.clear() + + +class _FixedTimeFileIO(file_io.KnownContentFileIO): + def __init__(self, path, content, last_modified): + super(_FixedTimeFileIO, self).__init__(path, content) + self._last_modified = last_modified + + def get_last_modified(self): + return self._last_modified + + +@pytest.mark.parametrize('diff_cache', [False, True]) +@pytest.mark.parametrize('use_file_io', [False, True]) +def test_cache_last_used_update(diff_cache, use_file_io): + p = '/path/last-used' + parser_cache.clear() # Clear, because then it's easier to find stuff. + parse('somecode', cache=True, path=p) + node_cache_item = next(iter(parser_cache.values()))[p] + now = time.time() + assert node_cache_item.last_used < now + + if use_file_io: + f = _FixedTimeFileIO(p, 'code', node_cache_item.last_used - 10) + parse(file_io=f, cache=True, diff_cache=diff_cache) + else: + parse('somecode2', cache=True, path=p, diff_cache=diff_cache) + + node_cache_item = next(iter(parser_cache.values()))[p] + assert now < node_cache_item.last_used < time.time()