The parser cache in RAM has now grammar versioning.

This commit is contained in:
Dave Halter
2017-05-26 12:48:59 -04:00
parent f997b91a12
commit dafffdc9b4
4 changed files with 25 additions and 28 deletions

View File

@@ -58,11 +58,9 @@ On Linux, if environment variable ``$XDG_CACHE_HOME`` is set,
``$XDG_CACHE_HOME/parso`` is used instead of the default one.
"""
# for fast_parser, should not be deleted
parser_cache = {}
class _NodeCacheItem(object):
def __init__(self, node, lines, change_time=None):
self.node = node
@@ -72,7 +70,7 @@ class _NodeCacheItem(object):
self.change_time = change_time
def load_module(grammar_hash, path, cache_path=None):
def load_module(hashed_grammar, path, cache_path=None):
"""
Returns a module or None, if it fails.
"""
@@ -82,16 +80,15 @@ def load_module(grammar_hash, path, cache_path=None):
return None
try:
# TODO Add grammar sha256
module_cache_item = parser_cache[path]
module_cache_item = parser_cache[hashed_grammar][path]
if p_time <= module_cache_item.change_time:
return module_cache_item.node
except KeyError:
return _load_from_file_system(grammar_hash, path, p_time, cache_path=cache_path)
return _load_from_file_system(hashed_grammar, path, p_time, cache_path=cache_path)
def _load_from_file_system(grammar_hash, path, p_time, cache_path=None):
cache_path = _get_hashed_path(grammar_hash, path, cache_path=cache_path)
def _load_from_file_system(hashed_grammar, path, p_time, cache_path=None):
cache_path = _get_hashed_path(hashed_grammar, path, cache_path=cache_path)
try:
try:
if p_time > os.path.getmtime(cache_path):
@@ -113,12 +110,12 @@ def _load_from_file_system(grammar_hash, path, p_time, cache_path=None):
except FileNotFoundError:
return None
else:
parser_cache[path] = module_cache_item
parser_cache.setdefault(hashed_grammar, {})[path] = module_cache_item
logging.debug('pickle loaded: %s', path)
return module_cache_item.node
def save_module(grammar_hash, path, module, lines, pickling=True, cache_path=None):
def save_module(hashed_grammar, path, module, lines, pickling=True, cache_path=None):
try:
p_time = None if path is None else os.path.getmtime(path)
except OSError:
@@ -126,13 +123,13 @@ def save_module(grammar_hash, path, module, lines, pickling=True, cache_path=Non
pickling = False
item = _NodeCacheItem(module, lines, p_time)
parser_cache[path] = item
parser_cache.setdefault(hashed_grammar, {})[path] = item
if pickling and path is not None:
_save_to_file_system(grammar_hash, path, item)
_save_to_file_system(hashed_grammar, path, item)
def _save_to_file_system(grammar_hash, path, item, cache_path=None):
with open(_get_hashed_path(grammar_hash, path, cache_path=cache_path), 'wb') as f:
def _save_to_file_system(hashed_grammar, path, item, cache_path=None):
with open(_get_hashed_path(hashed_grammar, path, cache_path=cache_path), 'wb') as f:
pickle.dump(item, f, pickle.HIGHEST_PROTOCOL)
@@ -143,11 +140,11 @@ def clear_cache(cache_path=None):
parser_cache.clear()
def _get_hashed_path(grammar_hash, path, cache_path=None):
def _get_hashed_path(hashed_grammar, path, cache_path=None):
directory = _get_cache_directory_path(cache_path=cache_path)
file_hash = hashlib.sha256(path.encode("utf-8")).hexdigest()
return os.path.join(directory, '%s-%s.pkl' % (grammar_hash, file_hash))
return os.path.join(directory, '%s-%s.pkl' % (hashed_grammar, file_hash))
def _get_cache_directory_path(cache_path=None):

View File

@@ -28,7 +28,7 @@ class Grammar(object):
self._parser = parser
self._tokenizer = tokenizer
self._diff_parser = diff_parser
self._sha256 = hashlib.sha256(text.encode("utf-8")).hexdigest()
self._hashed = hashlib.sha256(text.encode("utf-8")).hexdigest()
def parse(self, code=None, **kwargs):
"""
@@ -77,7 +77,7 @@ class Grammar(object):
# With the current architecture we cannot load from cache if the
# code is given, because we just load from cache if it's not older than
# the latest change (file last modified).
module_node = load_module(self._sha256, path, cache_path=cache_path)
module_node = load_module(self._hashed, path, cache_path=cache_path)
if module_node is not None:
return module_node
@@ -91,7 +91,7 @@ class Grammar(object):
raise TypeError("You have to define a diff parser to be able "
"to use this option.")
try:
module_cache_item = parser_cache[path]
module_cache_item = parser_cache[self._hashed][path]
except KeyError:
pass
else:
@@ -99,7 +99,7 @@ class Grammar(object):
old_lines = module_cache_item.lines
if old_lines == lines:
# TODO remove this line? I think it's not needed. (dave)
save_module(self._sha256, path, module_node, lines, pickling=False,
save_module(self._hashed, path, module_node, lines, pickling=False,
cache_path=cache_path)
return module_node
@@ -107,7 +107,7 @@ class Grammar(object):
old_lines=old_lines,
new_lines=lines
)
save_module(self._sha256, path, new_node, lines, pickling=cache,
save_module(self._hashed, path, new_node, lines, pickling=cache,
cache_path=cache_path)
return new_node
@@ -126,7 +126,7 @@ class Grammar(object):
remove_last_newline(root_node)
if cache or diff_cache:
save_module(self._sha256, path, root_node, lines, pickling=cache,
save_module(self._hashed, path, root_node, lines, pickling=cache,
cache_path=cache_path)
return root_node

View File

@@ -37,7 +37,7 @@ def test_modulepickling_change_cache_dir(tmpdir):
path_1 = 'fake path 1'
path_2 = 'fake path 2'
hashed_grammar = load_grammar()._sha256
hashed_grammar = load_grammar()._hashed
_save_to_file_system(hashed_grammar, path_1, item_1, cache_path=dir_1)
parser_cache.clear()
cached = load_stored_item(hashed_grammar, path_1, item_1, cache_path=dir_1)
@@ -77,11 +77,11 @@ def test_modulepickling_simulate_deleted_cache(tmpdir):
with open(path, 'w'):
pass
save_module(grammar._sha256, path, module, [])
assert load_module(grammar._sha256, path) == module
save_module(grammar._hashed, path, module, [])
assert load_module(grammar._hashed, path) == module
unlink(_get_hashed_path(grammar._sha256, path))
unlink(_get_hashed_path(grammar._hashed, path))
parser_cache.clear()
cached2 = load_module(grammar._sha256, path)
cached2 = load_module(grammar._hashed, path)
assert cached2 is None

View File

@@ -54,7 +54,7 @@ class Differ(object):
def initialize(self, code):
logging.debug('differ: initialize')
self.lines = splitlines(code, keepends=True)
cache.parser_cache.pop(None, None)
cache.parser_cache[self.grammar._hashed].pop(None, None)
self.module = parse(code, diff_cache=True, cache=True)
return self.module