Implement garbage collections for inactive cache files (#121)

Cache files that weren't accessed in the last 30 days will be automatically
garbage collected. This collection happens when the `save_module` is called
via a lock system that would make it happen only one time per day.
This commit is contained in:
Batuhan Taskaya
2020-06-02 12:36:05 +03:00
committed by GitHub
parent 450e9d0a19
commit fe24f0dc1b
4 changed files with 150 additions and 18 deletions

View File

@@ -13,7 +13,8 @@ try:
except:
import pickle
from parso._compatibility import FileNotFoundError
from parso._compatibility import FileNotFoundError, scandir
from parso.file_io import FileIO
LOG = logging.getLogger(__name__)
@@ -21,6 +22,13 @@ _CACHED_FILE_MINIMUM_SURVIVAL = 60 * 10 # 10 minutes
"""
Cached files should survive at least a few minutes.
"""
_CACHED_FILE_MAXIMUM_SURVIVAL = 60 * 60 * 24 * 30
"""
Maximum time for a cached file to survive if it is not
accessed within.
"""
_CACHED_SIZE_TRIGGER = 600
"""
This setting limits the amount of cached files. It's basically a way to start
@@ -82,6 +90,19 @@ On Linux, if environment variable ``$XDG_CACHE_HOME`` is set,
``$XDG_CACHE_HOME/parso`` is used instead of the default one.
"""
_CACHE_CLEAR_THRESHOLD = 60 * 60 * 24
def _get_cache_clear_lock(cache_path = None):
"""
The path where the cache lock is stored.
Cache lock will prevent continous cache clearing and only allow garbage
collection once a day (can be configured in _CACHE_CLEAR_THRESHOLD).
"""
cache_path = cache_path or _get_default_cache_path()
return FileIO(os.path.join(cache_path, "PARSO-CACHE-LOCK"))
parser_cache = {}
@@ -173,6 +194,7 @@ def save_module(hashed_grammar, file_io, module, lines, pickling=True, cache_pat
_set_cache_item(hashed_grammar, path, item)
if pickling and path is not None:
_save_to_file_system(hashed_grammar, path, item, cache_path=cache_path)
_remove_cache_and_update_lock(cache_path = cache_path)
def _save_to_file_system(hashed_grammar, path, item, cache_path=None):
@@ -187,6 +209,46 @@ def clear_cache(cache_path=None):
parser_cache.clear()
def clear_inactive_cache(
cache_path=None,
inactivity_threshold=_CACHED_FILE_MAXIMUM_SURVIVAL,
):
if cache_path is None:
cache_path = _get_default_cache_path()
if not os.path.exists(cache_path):
return False
for version_path in os.listdir(cache_path):
version_path = os.path.join(cache_path, version_path)
if not os.path.isdir(version_path):
continue
for file in scandir(version_path):
if (
file.stat().st_atime + _CACHED_FILE_MAXIMUM_SURVIVAL
<= time.time()
):
try:
os.remove(file.path)
except OSError: # silently ignore all failures
continue
else:
return True
def _remove_cache_and_update_lock(cache_path = None):
lock = _get_cache_clear_lock(cache_path=cache_path)
clear_lock_time = lock.get_last_modified()
if (
clear_lock_time is None # first time
or clear_lock_time + _CACHE_CLEAR_THRESHOLD <= time.time()
):
if not lock._touch():
# First make sure that as few as possible other cleanup jobs also
# get started. There is still a race condition but it's probably
# not a big problem.
return False
clear_inactive_cache(cache_path = cache_path)
def _get_hashed_path(hashed_grammar, path, cache_path=None):
directory = _get_cache_directory_path(cache_path=cache_path)