mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-09 06:04:54 +08:00
Implement garbage collections for inactive cache files (#121)
Cache files that weren't accessed in the last 30 days will be automatically garbage collected. This collection happens when the `save_module` is called via a lock system that would make it happen only one time per day.
This commit is contained in:
@@ -2,6 +2,7 @@
|
|||||||
To ensure compatibility from Python ``2.7`` - ``3.3``, a module has been
|
To ensure compatibility from Python ``2.7`` - ``3.3``, a module has been
|
||||||
created. Clearly there is huge need to use conforming syntax.
|
created. Clearly there is huge need to use conforming syntax.
|
||||||
"""
|
"""
|
||||||
|
import os
|
||||||
import sys
|
import sys
|
||||||
import platform
|
import platform
|
||||||
|
|
||||||
@@ -44,11 +45,11 @@ def u(string):
|
|||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Python 2.7
|
# Python 3.3+
|
||||||
FileNotFoundError = FileNotFoundError
|
FileNotFoundError = FileNotFoundError
|
||||||
except NameError:
|
except NameError:
|
||||||
# Python 3.3+
|
# Python 2.7 (both IOError + OSError)
|
||||||
FileNotFoundError = IOError
|
FileNotFoundError = EnvironmentError
|
||||||
|
|
||||||
|
|
||||||
def utf8_repr(func):
|
def utf8_repr(func):
|
||||||
@@ -67,3 +68,27 @@ def utf8_repr(func):
|
|||||||
return func
|
return func
|
||||||
else:
|
else:
|
||||||
return wrapper
|
return wrapper
|
||||||
|
|
||||||
|
if sys.version_info < (3, 5):
|
||||||
|
"""
|
||||||
|
A super-minimal shim around listdir that behave like
|
||||||
|
scandir for the information we need.
|
||||||
|
"""
|
||||||
|
class _DirEntry:
|
||||||
|
|
||||||
|
def __init__(self, name, basepath):
|
||||||
|
self.name = name
|
||||||
|
self.basepath = basepath
|
||||||
|
|
||||||
|
@property
|
||||||
|
def path(self):
|
||||||
|
return os.path.join(self.basepath, self.name)
|
||||||
|
|
||||||
|
def stat(self):
|
||||||
|
# won't follow symlinks
|
||||||
|
return os.lstat(os.path.join(self.basepath, self.name))
|
||||||
|
|
||||||
|
def scandir(dir):
|
||||||
|
return [_DirEntry(name, dir) for name in os.listdir(dir)]
|
||||||
|
else:
|
||||||
|
from os import scandir
|
||||||
|
|||||||
@@ -13,7 +13,8 @@ try:
|
|||||||
except:
|
except:
|
||||||
import pickle
|
import pickle
|
||||||
|
|
||||||
from parso._compatibility import FileNotFoundError
|
from parso._compatibility import FileNotFoundError, scandir
|
||||||
|
from parso.file_io import FileIO
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -21,6 +22,13 @@ _CACHED_FILE_MINIMUM_SURVIVAL = 60 * 10 # 10 minutes
|
|||||||
"""
|
"""
|
||||||
Cached files should survive at least a few minutes.
|
Cached files should survive at least a few minutes.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
_CACHED_FILE_MAXIMUM_SURVIVAL = 60 * 60 * 24 * 30
|
||||||
|
"""
|
||||||
|
Maximum time for a cached file to survive if it is not
|
||||||
|
accessed within.
|
||||||
|
"""
|
||||||
|
|
||||||
_CACHED_SIZE_TRIGGER = 600
|
_CACHED_SIZE_TRIGGER = 600
|
||||||
"""
|
"""
|
||||||
This setting limits the amount of cached files. It's basically a way to start
|
This setting limits the amount of cached files. It's basically a way to start
|
||||||
@@ -82,6 +90,19 @@ On Linux, if environment variable ``$XDG_CACHE_HOME`` is set,
|
|||||||
``$XDG_CACHE_HOME/parso`` is used instead of the default one.
|
``$XDG_CACHE_HOME/parso`` is used instead of the default one.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
_CACHE_CLEAR_THRESHOLD = 60 * 60 * 24
|
||||||
|
|
||||||
|
def _get_cache_clear_lock(cache_path = None):
|
||||||
|
"""
|
||||||
|
The path where the cache lock is stored.
|
||||||
|
|
||||||
|
Cache lock will prevent continous cache clearing and only allow garbage
|
||||||
|
collection once a day (can be configured in _CACHE_CLEAR_THRESHOLD).
|
||||||
|
"""
|
||||||
|
cache_path = cache_path or _get_default_cache_path()
|
||||||
|
return FileIO(os.path.join(cache_path, "PARSO-CACHE-LOCK"))
|
||||||
|
|
||||||
|
|
||||||
parser_cache = {}
|
parser_cache = {}
|
||||||
|
|
||||||
|
|
||||||
@@ -173,6 +194,7 @@ def save_module(hashed_grammar, file_io, module, lines, pickling=True, cache_pat
|
|||||||
_set_cache_item(hashed_grammar, path, item)
|
_set_cache_item(hashed_grammar, path, item)
|
||||||
if pickling and path is not None:
|
if pickling and path is not None:
|
||||||
_save_to_file_system(hashed_grammar, path, item, cache_path=cache_path)
|
_save_to_file_system(hashed_grammar, path, item, cache_path=cache_path)
|
||||||
|
_remove_cache_and_update_lock(cache_path = cache_path)
|
||||||
|
|
||||||
|
|
||||||
def _save_to_file_system(hashed_grammar, path, item, cache_path=None):
|
def _save_to_file_system(hashed_grammar, path, item, cache_path=None):
|
||||||
@@ -187,6 +209,46 @@ def clear_cache(cache_path=None):
|
|||||||
parser_cache.clear()
|
parser_cache.clear()
|
||||||
|
|
||||||
|
|
||||||
|
def clear_inactive_cache(
|
||||||
|
cache_path=None,
|
||||||
|
inactivity_threshold=_CACHED_FILE_MAXIMUM_SURVIVAL,
|
||||||
|
):
|
||||||
|
if cache_path is None:
|
||||||
|
cache_path = _get_default_cache_path()
|
||||||
|
if not os.path.exists(cache_path):
|
||||||
|
return False
|
||||||
|
for version_path in os.listdir(cache_path):
|
||||||
|
version_path = os.path.join(cache_path, version_path)
|
||||||
|
if not os.path.isdir(version_path):
|
||||||
|
continue
|
||||||
|
for file in scandir(version_path):
|
||||||
|
if (
|
||||||
|
file.stat().st_atime + _CACHED_FILE_MAXIMUM_SURVIVAL
|
||||||
|
<= time.time()
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
os.remove(file.path)
|
||||||
|
except OSError: # silently ignore all failures
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def _remove_cache_and_update_lock(cache_path = None):
|
||||||
|
lock = _get_cache_clear_lock(cache_path=cache_path)
|
||||||
|
clear_lock_time = lock.get_last_modified()
|
||||||
|
if (
|
||||||
|
clear_lock_time is None # first time
|
||||||
|
or clear_lock_time + _CACHE_CLEAR_THRESHOLD <= time.time()
|
||||||
|
):
|
||||||
|
if not lock._touch():
|
||||||
|
# First make sure that as few as possible other cleanup jobs also
|
||||||
|
# get started. There is still a race condition but it's probably
|
||||||
|
# not a big problem.
|
||||||
|
return False
|
||||||
|
|
||||||
|
clear_inactive_cache(cache_path = cache_path)
|
||||||
|
|
||||||
def _get_hashed_path(hashed_grammar, path, cache_path=None):
|
def _get_hashed_path(hashed_grammar, path, cache_path=None):
|
||||||
directory = _get_cache_directory_path(cache_path=cache_path)
|
directory = _get_cache_directory_path(cache_path=cache_path)
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
|
from parso._compatibility import FileNotFoundError
|
||||||
|
|
||||||
|
|
||||||
class FileIO(object):
|
class FileIO(object):
|
||||||
@@ -22,6 +23,17 @@ class FileIO(object):
|
|||||||
# Might raise FileNotFoundError, OSError for Python 2
|
# Might raise FileNotFoundError, OSError for Python 2
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def _touch(self):
|
||||||
|
try:
|
||||||
|
os.utime(self.path, None)
|
||||||
|
except FileNotFoundError:
|
||||||
|
try:
|
||||||
|
file = open(self.path, 'a')
|
||||||
|
file.close()
|
||||||
|
except (OSError, IOError): # TODO Maybe log this?
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return '%s(%s)' % (self.__class__.__name__, self.path)
|
return '%s(%s)' % (self.__class__.__name__, self.path)
|
||||||
|
|
||||||
|
|||||||
@@ -2,13 +2,19 @@
|
|||||||
Test all things related to the ``jedi.cache`` module.
|
Test all things related to the ``jedi.cache`` module.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from os import unlink
|
import os
|
||||||
|
import os.path
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from parso.cache import _NodeCacheItem, save_module, load_module, \
|
from parso.cache import (_CACHED_FILE_MAXIMUM_SURVIVAL, _VERSION_TAG,
|
||||||
_get_hashed_path, parser_cache, _load_from_file_system, _save_to_file_system
|
_get_cache_clear_lock, _get_hashed_path,
|
||||||
|
_load_from_file_system, _NodeCacheItem,
|
||||||
|
_remove_cache_and_update_lock, _save_to_file_system,
|
||||||
|
clear_inactive_cache, load_module, parser_cache,
|
||||||
|
save_module)
|
||||||
|
from parso._compatibility import is_pypy
|
||||||
from parso import load_grammar
|
from parso import load_grammar
|
||||||
from parso import cache
|
from parso import cache
|
||||||
from parso import file_io
|
from parso import file_io
|
||||||
@@ -16,15 +22,13 @@ from parso import parse
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture()
|
@pytest.fixture()
|
||||||
def isolated_jedi_cache(monkeypatch, tmpdir):
|
def isolated_parso_cache(monkeypatch, tmpdir):
|
||||||
"""
|
"""Set `parso.cache._default_cache_path` to a temporary directory
|
||||||
Set `jedi.settings.cache_directory` to a temporary directory during test.
|
during the test. """
|
||||||
|
cache_path = str(os.path.join(str(tmpdir), "__parso_cache"))
|
||||||
Same as `clean_jedi_cache`, but create the temporary directory for
|
monkeypatch.setattr(cache, '_default_cache_path', cache_path)
|
||||||
each test case (scope='function').
|
monkeypatch.setattr(cache, '_get_default_cache_path', lambda *args, **kwargs: cache_path)
|
||||||
"""
|
return cache_path
|
||||||
monkeypatch.setattr(cache, '_default_cache_path', str(tmpdir))
|
|
||||||
|
|
||||||
|
|
||||||
def test_modulepickling_change_cache_dir(tmpdir):
|
def test_modulepickling_change_cache_dir(tmpdir):
|
||||||
"""
|
"""
|
||||||
@@ -57,7 +61,7 @@ def load_stored_item(hashed_grammar, path, item, cache_path):
|
|||||||
return item
|
return item
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.usefixtures("isolated_jedi_cache")
|
@pytest.mark.usefixtures("isolated_parso_cache")
|
||||||
def test_modulepickling_simulate_deleted_cache(tmpdir):
|
def test_modulepickling_simulate_deleted_cache(tmpdir):
|
||||||
"""
|
"""
|
||||||
Tests loading from a cache file after it is deleted.
|
Tests loading from a cache file after it is deleted.
|
||||||
@@ -84,7 +88,7 @@ def test_modulepickling_simulate_deleted_cache(tmpdir):
|
|||||||
save_module(grammar._hashed, io, module, lines=[])
|
save_module(grammar._hashed, io, module, lines=[])
|
||||||
assert load_module(grammar._hashed, io) == module
|
assert load_module(grammar._hashed, io) == module
|
||||||
|
|
||||||
unlink(_get_hashed_path(grammar._hashed, path))
|
os.unlink(_get_hashed_path(grammar._hashed, path))
|
||||||
parser_cache.clear()
|
parser_cache.clear()
|
||||||
|
|
||||||
cached2 = load_module(grammar._hashed, io)
|
cached2 = load_module(grammar._hashed, io)
|
||||||
@@ -139,3 +143,32 @@ def test_cache_last_used_update(diff_cache, use_file_io):
|
|||||||
|
|
||||||
node_cache_item = next(iter(parser_cache.values()))[p]
|
node_cache_item = next(iter(parser_cache.values()))[p]
|
||||||
assert now < node_cache_item.last_used < time.time()
|
assert now < node_cache_item.last_used < time.time()
|
||||||
|
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
is_pypy,
|
||||||
|
reason="pickling in pypy is slow, since we don't pickle,"
|
||||||
|
"we never go into path of auto-collecting garbage"
|
||||||
|
)
|
||||||
|
def test_inactive_cache(tmpdir, isolated_parso_cache):
|
||||||
|
parser_cache.clear()
|
||||||
|
test_subjects = "abcdef"
|
||||||
|
for path in test_subjects:
|
||||||
|
parse('somecode', cache=True, path=os.path.join(str(tmpdir), path))
|
||||||
|
raw_cache_path = os.path.join(isolated_parso_cache, _VERSION_TAG)
|
||||||
|
assert os.path.exists(raw_cache_path)
|
||||||
|
paths = os.listdir(raw_cache_path)
|
||||||
|
a_while_ago = time.time() - _CACHED_FILE_MAXIMUM_SURVIVAL
|
||||||
|
old_paths = set()
|
||||||
|
for path in paths[:len(test_subjects) // 2]: # make certain number of paths old
|
||||||
|
os.utime(os.path.join(raw_cache_path, path), (a_while_ago, a_while_ago))
|
||||||
|
old_paths.add(path)
|
||||||
|
# nothing should be cleared while the lock is on
|
||||||
|
assert os.path.exists(_get_cache_clear_lock().path)
|
||||||
|
_remove_cache_and_update_lock() # it shouldn't clear anything
|
||||||
|
assert len(os.listdir(raw_cache_path)) == len(test_subjects)
|
||||||
|
assert old_paths.issubset(os.listdir(raw_cache_path))
|
||||||
|
|
||||||
|
os.utime(_get_cache_clear_lock().path, (a_while_ago, a_while_ago))
|
||||||
|
_remove_cache_and_update_lock()
|
||||||
|
assert len(os.listdir(raw_cache_path)) == len(test_subjects) // 2
|
||||||
|
assert not old_paths.intersection(os.listdir(raw_cache_path))
|
||||||
|
|||||||
Reference in New Issue
Block a user