From dafffdc9b4c8b257f1acaf66d62ae32292d74012 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Fri, 26 May 2017 12:48:59 -0400 Subject: [PATCH] The parser cache in RAM has now grammar versioning. --- parso/cache.py | 29 +++++++++++++---------------- parso/grammar.py | 12 ++++++------ test/test_cache.py | 10 +++++----- test/test_diff_parser.py | 2 +- 4 files changed, 25 insertions(+), 28 deletions(-) diff --git a/parso/cache.py b/parso/cache.py index 7df51c6..3a8f851 100644 --- a/parso/cache.py +++ b/parso/cache.py @@ -58,11 +58,9 @@ On Linux, if environment variable ``$XDG_CACHE_HOME`` is set, ``$XDG_CACHE_HOME/parso`` is used instead of the default one. """ -# for fast_parser, should not be deleted parser_cache = {} - class _NodeCacheItem(object): def __init__(self, node, lines, change_time=None): self.node = node @@ -72,7 +70,7 @@ class _NodeCacheItem(object): self.change_time = change_time -def load_module(grammar_hash, path, cache_path=None): +def load_module(hashed_grammar, path, cache_path=None): """ Returns a module or None, if it fails. """ @@ -82,16 +80,15 @@ def load_module(grammar_hash, path, cache_path=None): return None try: - # TODO Add grammar sha256 - module_cache_item = parser_cache[path] + module_cache_item = parser_cache[hashed_grammar][path] if p_time <= module_cache_item.change_time: return module_cache_item.node except KeyError: - return _load_from_file_system(grammar_hash, path, p_time, cache_path=cache_path) + return _load_from_file_system(hashed_grammar, path, p_time, cache_path=cache_path) -def _load_from_file_system(grammar_hash, path, p_time, cache_path=None): - cache_path = _get_hashed_path(grammar_hash, path, cache_path=cache_path) +def _load_from_file_system(hashed_grammar, path, p_time, cache_path=None): + cache_path = _get_hashed_path(hashed_grammar, path, cache_path=cache_path) try: try: if p_time > os.path.getmtime(cache_path): @@ -113,12 +110,12 @@ def _load_from_file_system(grammar_hash, path, p_time, cache_path=None): except FileNotFoundError: return None else: - parser_cache[path] = module_cache_item + parser_cache.setdefault(hashed_grammar, {})[path] = module_cache_item logging.debug('pickle loaded: %s', path) return module_cache_item.node -def save_module(grammar_hash, path, module, lines, pickling=True, cache_path=None): +def save_module(hashed_grammar, path, module, lines, pickling=True, cache_path=None): try: p_time = None if path is None else os.path.getmtime(path) except OSError: @@ -126,13 +123,13 @@ def save_module(grammar_hash, path, module, lines, pickling=True, cache_path=Non pickling = False item = _NodeCacheItem(module, lines, p_time) - parser_cache[path] = item + parser_cache.setdefault(hashed_grammar, {})[path] = item if pickling and path is not None: - _save_to_file_system(grammar_hash, path, item) + _save_to_file_system(hashed_grammar, path, item) -def _save_to_file_system(grammar_hash, path, item, cache_path=None): - with open(_get_hashed_path(grammar_hash, path, cache_path=cache_path), 'wb') as f: +def _save_to_file_system(hashed_grammar, path, item, cache_path=None): + with open(_get_hashed_path(hashed_grammar, path, cache_path=cache_path), 'wb') as f: pickle.dump(item, f, pickle.HIGHEST_PROTOCOL) @@ -143,11 +140,11 @@ def clear_cache(cache_path=None): parser_cache.clear() -def _get_hashed_path(grammar_hash, path, cache_path=None): +def _get_hashed_path(hashed_grammar, path, cache_path=None): directory = _get_cache_directory_path(cache_path=cache_path) file_hash = hashlib.sha256(path.encode("utf-8")).hexdigest() - return os.path.join(directory, '%s-%s.pkl' % (grammar_hash, file_hash)) + return os.path.join(directory, '%s-%s.pkl' % (hashed_grammar, file_hash)) def _get_cache_directory_path(cache_path=None): diff --git a/parso/grammar.py b/parso/grammar.py index 3732313..af56f4b 100644 --- a/parso/grammar.py +++ b/parso/grammar.py @@ -28,7 +28,7 @@ class Grammar(object): self._parser = parser self._tokenizer = tokenizer self._diff_parser = diff_parser - self._sha256 = hashlib.sha256(text.encode("utf-8")).hexdigest() + self._hashed = hashlib.sha256(text.encode("utf-8")).hexdigest() def parse(self, code=None, **kwargs): """ @@ -77,7 +77,7 @@ class Grammar(object): # With the current architecture we cannot load from cache if the # code is given, because we just load from cache if it's not older than # the latest change (file last modified). - module_node = load_module(self._sha256, path, cache_path=cache_path) + module_node = load_module(self._hashed, path, cache_path=cache_path) if module_node is not None: return module_node @@ -91,7 +91,7 @@ class Grammar(object): raise TypeError("You have to define a diff parser to be able " "to use this option.") try: - module_cache_item = parser_cache[path] + module_cache_item = parser_cache[self._hashed][path] except KeyError: pass else: @@ -99,7 +99,7 @@ class Grammar(object): old_lines = module_cache_item.lines if old_lines == lines: # TODO remove this line? I think it's not needed. (dave) - save_module(self._sha256, path, module_node, lines, pickling=False, + save_module(self._hashed, path, module_node, lines, pickling=False, cache_path=cache_path) return module_node @@ -107,7 +107,7 @@ class Grammar(object): old_lines=old_lines, new_lines=lines ) - save_module(self._sha256, path, new_node, lines, pickling=cache, + save_module(self._hashed, path, new_node, lines, pickling=cache, cache_path=cache_path) return new_node @@ -126,7 +126,7 @@ class Grammar(object): remove_last_newline(root_node) if cache or diff_cache: - save_module(self._sha256, path, root_node, lines, pickling=cache, + save_module(self._hashed, path, root_node, lines, pickling=cache, cache_path=cache_path) return root_node diff --git a/test/test_cache.py b/test/test_cache.py index 2704502..a7cd70c 100644 --- a/test/test_cache.py +++ b/test/test_cache.py @@ -37,7 +37,7 @@ def test_modulepickling_change_cache_dir(tmpdir): path_1 = 'fake path 1' path_2 = 'fake path 2' - hashed_grammar = load_grammar()._sha256 + hashed_grammar = load_grammar()._hashed _save_to_file_system(hashed_grammar, path_1, item_1, cache_path=dir_1) parser_cache.clear() cached = load_stored_item(hashed_grammar, path_1, item_1, cache_path=dir_1) @@ -77,11 +77,11 @@ def test_modulepickling_simulate_deleted_cache(tmpdir): with open(path, 'w'): pass - save_module(grammar._sha256, path, module, []) - assert load_module(grammar._sha256, path) == module + save_module(grammar._hashed, path, module, []) + assert load_module(grammar._hashed, path) == module - unlink(_get_hashed_path(grammar._sha256, path)) + unlink(_get_hashed_path(grammar._hashed, path)) parser_cache.clear() - cached2 = load_module(grammar._sha256, path) + cached2 = load_module(grammar._hashed, path) assert cached2 is None diff --git a/test/test_diff_parser.py b/test/test_diff_parser.py index a8f2c09..2ecaa6e 100644 --- a/test/test_diff_parser.py +++ b/test/test_diff_parser.py @@ -54,7 +54,7 @@ class Differ(object): def initialize(self, code): logging.debug('differ: initialize') self.lines = splitlines(code, keepends=True) - cache.parser_cache.pop(None, None) + cache.parser_cache[self.grammar._hashed].pop(None, None) self.module = parse(code, diff_cache=True, cache=True) return self.module