mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-07 21:34:32 +08:00
Make sure to limit the amount of cached files parso stores, fixes davidhalter/jedi#1340
This commit is contained in:
@@ -17,6 +17,21 @@ from parso._compatibility import FileNotFoundError
|
|||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_CACHED_FILE_MINIMUM_SURVIVAL = 60 * 10 # 10 minutes
|
||||||
|
"""
|
||||||
|
Cached files should survive at least a few minutes.
|
||||||
|
"""
|
||||||
|
_CACHED_SIZE_TRIGGER = 600
|
||||||
|
"""
|
||||||
|
This setting limits the amount of cached files. It's basically a way to start
|
||||||
|
garbage collection.
|
||||||
|
|
||||||
|
The reasoning for this limit being as big as it is, is the following:
|
||||||
|
|
||||||
|
Numpy, Pandas, Matplotlib and Tensorflow together use about 500 files. This
|
||||||
|
makes Jedi use ~500mb of memory. Since we might want a bit more than those few
|
||||||
|
libraries, we just increase it a bit.
|
||||||
|
"""
|
||||||
|
|
||||||
_PICKLE_VERSION = 32
|
_PICKLE_VERSION = 32
|
||||||
"""
|
"""
|
||||||
@@ -76,6 +91,7 @@ class _NodeCacheItem(object):
|
|||||||
if change_time is None:
|
if change_time is None:
|
||||||
change_time = time.time()
|
change_time = time.time()
|
||||||
self.change_time = change_time
|
self.change_time = change_time
|
||||||
|
self.last_used = change_time
|
||||||
|
|
||||||
|
|
||||||
def load_module(hashed_grammar, file_io, cache_path=None):
|
def load_module(hashed_grammar, file_io, cache_path=None):
|
||||||
@@ -89,6 +105,7 @@ def load_module(hashed_grammar, file_io, cache_path=None):
|
|||||||
try:
|
try:
|
||||||
module_cache_item = parser_cache[hashed_grammar][file_io.path]
|
module_cache_item = parser_cache[hashed_grammar][file_io.path]
|
||||||
if p_time <= module_cache_item.change_time:
|
if p_time <= module_cache_item.change_time:
|
||||||
|
module_cache_item.last_used = time.time()
|
||||||
return module_cache_item.node
|
return module_cache_item.node
|
||||||
except KeyError:
|
except KeyError:
|
||||||
return _load_from_file_system(
|
return _load_from_file_system(
|
||||||
@@ -122,11 +139,27 @@ def _load_from_file_system(hashed_grammar, path, p_time, cache_path=None):
|
|||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
parser_cache.setdefault(hashed_grammar, {})[path] = module_cache_item
|
_set_cache_item(hashed_grammar, path, module_cache_item)
|
||||||
LOG.debug('pickle loaded: %s', path)
|
LOG.debug('pickle loaded: %s', path)
|
||||||
return module_cache_item.node
|
return module_cache_item.node
|
||||||
|
|
||||||
|
|
||||||
|
def _set_cache_item(hashed_grammar, path, module_cache_item):
|
||||||
|
if sum(len(v) for v in parser_cache.values()) >= _CACHED_SIZE_TRIGGER:
|
||||||
|
# Garbage collection of old cache files.
|
||||||
|
# We are basically throwing everything away that hasn't been accessed
|
||||||
|
# in 10 minutes.
|
||||||
|
cutoff_time = time.time() - _CACHED_FILE_MINIMUM_SURVIVAL
|
||||||
|
for key, path_to_item_map in parser_cache.items():
|
||||||
|
parser_cache[key] = {
|
||||||
|
path: node_item
|
||||||
|
for path, node_item in path_to_item_map.items()
|
||||||
|
if node_item.last_used > cutoff_time
|
||||||
|
}
|
||||||
|
|
||||||
|
parser_cache.setdefault(hashed_grammar, {})[path] = module_cache_item
|
||||||
|
|
||||||
|
|
||||||
def save_module(hashed_grammar, file_io, module, lines, pickling=True, cache_path=None):
|
def save_module(hashed_grammar, file_io, module, lines, pickling=True, cache_path=None):
|
||||||
path = file_io.path
|
path = file_io.path
|
||||||
try:
|
try:
|
||||||
@@ -136,7 +169,7 @@ def save_module(hashed_grammar, file_io, module, lines, pickling=True, cache_pat
|
|||||||
pickling = False
|
pickling = False
|
||||||
|
|
||||||
item = _NodeCacheItem(module, lines, p_time)
|
item = _NodeCacheItem(module, lines, p_time)
|
||||||
parser_cache.setdefault(hashed_grammar, {})[path] = item
|
_set_cache_item(hashed_grammar, path, item)
|
||||||
if pickling and path is not None:
|
if pickling and path is not None:
|
||||||
_save_to_file_system(hashed_grammar, path, item, cache_path=cache_path)
|
_save_to_file_system(hashed_grammar, path, item, cache_path=cache_path)
|
||||||
|
|
||||||
|
|||||||
@@ -5,12 +5,14 @@ Test all things related to the ``jedi.cache`` module.
|
|||||||
from os import unlink
|
from os import unlink
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
import time
|
||||||
|
|
||||||
from parso.cache import _NodeCacheItem, save_module, load_module, \
|
from parso.cache import _NodeCacheItem, save_module, load_module, \
|
||||||
_get_hashed_path, parser_cache, _load_from_file_system, _save_to_file_system
|
_get_hashed_path, parser_cache, _load_from_file_system, _save_to_file_system
|
||||||
from parso import load_grammar
|
from parso import load_grammar
|
||||||
from parso import cache
|
from parso import cache
|
||||||
from parso import file_io
|
from parso import file_io
|
||||||
|
from parso import parse
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture()
|
@pytest.fixture()
|
||||||
@@ -87,3 +89,53 @@ def test_modulepickling_simulate_deleted_cache(tmpdir):
|
|||||||
|
|
||||||
cached2 = load_module(grammar._hashed, io)
|
cached2 = load_module(grammar._hashed, io)
|
||||||
assert cached2 is None
|
assert cached2 is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_cache_limit():
|
||||||
|
def cache_size():
|
||||||
|
return sum(len(v) for v in parser_cache.values())
|
||||||
|
|
||||||
|
try:
|
||||||
|
parser_cache.clear()
|
||||||
|
future_node_cache_item = _NodeCacheItem('bla', [], change_time=time.time() + 10e6)
|
||||||
|
old_node_cache_item = _NodeCacheItem('bla', [], change_time=time.time() - 10e4)
|
||||||
|
parser_cache['some_hash_old'] = {
|
||||||
|
'/path/%s' % i: old_node_cache_item for i in range(300)
|
||||||
|
}
|
||||||
|
parser_cache['some_hash_new'] = {
|
||||||
|
'/path/%s' % i: future_node_cache_item for i in range(300)
|
||||||
|
}
|
||||||
|
assert cache_size() == 600
|
||||||
|
parse('somecode', cache=True, path='/path/somepath')
|
||||||
|
assert cache_size() == 301
|
||||||
|
finally:
|
||||||
|
parser_cache.clear()
|
||||||
|
|
||||||
|
|
||||||
|
class _FixedTimeFileIO(file_io.KnownContentFileIO):
|
||||||
|
def __init__(self, path, content, last_modified):
|
||||||
|
super(_FixedTimeFileIO, self).__init__(path, content)
|
||||||
|
self._last_modified = last_modified
|
||||||
|
|
||||||
|
def get_last_modified(self):
|
||||||
|
return self._last_modified
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('diff_cache', [False, True])
|
||||||
|
@pytest.mark.parametrize('use_file_io', [False, True])
|
||||||
|
def test_cache_last_used_update(diff_cache, use_file_io):
|
||||||
|
p = '/path/last-used'
|
||||||
|
parser_cache.clear() # Clear, because then it's easier to find stuff.
|
||||||
|
parse('somecode', cache=True, path=p)
|
||||||
|
node_cache_item = next(iter(parser_cache.values()))[p]
|
||||||
|
now = time.time()
|
||||||
|
assert node_cache_item.last_used < now
|
||||||
|
|
||||||
|
if use_file_io:
|
||||||
|
f = _FixedTimeFileIO(p, 'code', node_cache_item.last_used - 10)
|
||||||
|
parse(file_io=f, cache=True, diff_cache=diff_cache)
|
||||||
|
else:
|
||||||
|
parse('somecode2', cache=True, path=p, diff_cache=diff_cache)
|
||||||
|
|
||||||
|
node_cache_item = next(iter(parser_cache.values()))[p]
|
||||||
|
assert now < node_cache_item.last_used < time.time()
|
||||||
|
|||||||
Reference in New Issue
Block a user