mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-07 13:24:39 +08:00
Implement garbage collections for inactive cache files (#121)
Cache files that weren't accessed in the last 30 days will be automatically garbage collected. This collection happens when the `save_module` is called via a lock system that would make it happen only one time per day.
This commit is contained in:
@@ -13,7 +13,8 @@ try:
|
||||
except:
|
||||
import pickle
|
||||
|
||||
from parso._compatibility import FileNotFoundError
|
||||
from parso._compatibility import FileNotFoundError, scandir
|
||||
from parso.file_io import FileIO
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
@@ -21,6 +22,13 @@ _CACHED_FILE_MINIMUM_SURVIVAL = 60 * 10 # 10 minutes
|
||||
"""
|
||||
Cached files should survive at least a few minutes.
|
||||
"""
|
||||
|
||||
_CACHED_FILE_MAXIMUM_SURVIVAL = 60 * 60 * 24 * 30
|
||||
"""
|
||||
Maximum time for a cached file to survive if it is not
|
||||
accessed within.
|
||||
"""
|
||||
|
||||
_CACHED_SIZE_TRIGGER = 600
|
||||
"""
|
||||
This setting limits the amount of cached files. It's basically a way to start
|
||||
@@ -82,6 +90,19 @@ On Linux, if environment variable ``$XDG_CACHE_HOME`` is set,
|
||||
``$XDG_CACHE_HOME/parso`` is used instead of the default one.
|
||||
"""
|
||||
|
||||
_CACHE_CLEAR_THRESHOLD = 60 * 60 * 24
|
||||
|
||||
def _get_cache_clear_lock(cache_path = None):
|
||||
"""
|
||||
The path where the cache lock is stored.
|
||||
|
||||
Cache lock will prevent continous cache clearing and only allow garbage
|
||||
collection once a day (can be configured in _CACHE_CLEAR_THRESHOLD).
|
||||
"""
|
||||
cache_path = cache_path or _get_default_cache_path()
|
||||
return FileIO(os.path.join(cache_path, "PARSO-CACHE-LOCK"))
|
||||
|
||||
|
||||
parser_cache = {}
|
||||
|
||||
|
||||
@@ -173,6 +194,7 @@ def save_module(hashed_grammar, file_io, module, lines, pickling=True, cache_pat
|
||||
_set_cache_item(hashed_grammar, path, item)
|
||||
if pickling and path is not None:
|
||||
_save_to_file_system(hashed_grammar, path, item, cache_path=cache_path)
|
||||
_remove_cache_and_update_lock(cache_path = cache_path)
|
||||
|
||||
|
||||
def _save_to_file_system(hashed_grammar, path, item, cache_path=None):
|
||||
@@ -187,6 +209,46 @@ def clear_cache(cache_path=None):
|
||||
parser_cache.clear()
|
||||
|
||||
|
||||
def clear_inactive_cache(
|
||||
cache_path=None,
|
||||
inactivity_threshold=_CACHED_FILE_MAXIMUM_SURVIVAL,
|
||||
):
|
||||
if cache_path is None:
|
||||
cache_path = _get_default_cache_path()
|
||||
if not os.path.exists(cache_path):
|
||||
return False
|
||||
for version_path in os.listdir(cache_path):
|
||||
version_path = os.path.join(cache_path, version_path)
|
||||
if not os.path.isdir(version_path):
|
||||
continue
|
||||
for file in scandir(version_path):
|
||||
if (
|
||||
file.stat().st_atime + _CACHED_FILE_MAXIMUM_SURVIVAL
|
||||
<= time.time()
|
||||
):
|
||||
try:
|
||||
os.remove(file.path)
|
||||
except OSError: # silently ignore all failures
|
||||
continue
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
def _remove_cache_and_update_lock(cache_path = None):
|
||||
lock = _get_cache_clear_lock(cache_path=cache_path)
|
||||
clear_lock_time = lock.get_last_modified()
|
||||
if (
|
||||
clear_lock_time is None # first time
|
||||
or clear_lock_time + _CACHE_CLEAR_THRESHOLD <= time.time()
|
||||
):
|
||||
if not lock._touch():
|
||||
# First make sure that as few as possible other cleanup jobs also
|
||||
# get started. There is still a race condition but it's probably
|
||||
# not a big problem.
|
||||
return False
|
||||
|
||||
clear_inactive_cache(cache_path = cache_path)
|
||||
|
||||
def _get_hashed_path(hashed_grammar, path, cache_path=None):
|
||||
directory = _get_cache_directory_path(cache_path=cache_path)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user