From 26cce4d078b0c44b344fa82a1fe0d81982e766b3 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Wed, 22 Mar 2017 18:32:49 +0100 Subject: [PATCH] Add the grammar as an argument to saving the parser. This makes collisions of different grammars when loading from the cache impossible. --- jedi/api/__init__.py | 2 +- jedi/api/classes.py | 2 +- jedi/evaluate/imports.py | 4 ++-- jedi/evaluate/sys_path.py | 4 ++-- jedi/parser/pgen2/grammar.py | 6 ++++-- jedi/parser/pgen2/pgen.py | 4 ++-- jedi/parser/python/diff.py | 4 ++-- jedi/parser/utils.py | 23 ++++++++++++----------- test/test_cache.py | 21 ++++++++++++--------- test/test_parser/test_old_fast_parser.py | 5 +++-- 10 files changed, 41 insertions(+), 34 deletions(-) diff --git a/jedi/api/__init__.py b/jedi/api/__init__.py index 63831914..a22cb2ce 100644 --- a/jedi/api/__init__.py +++ b/jedi/api/__init__.py @@ -134,7 +134,7 @@ class Script(object): @cache.memoize_method def _get_module_node(self): parser = FastParser(self._grammar, self._source, self.path) - save_parser(self.path, parser, pickling=False) + save_parser(self._grammar, self.path, parser, pickling=False) return parser.get_root_node() diff --git a/jedi/api/classes.py b/jedi/api/classes.py index c193da80..f27a828f 100644 --- a/jedi/api/classes.py +++ b/jedi/api/classes.py @@ -391,7 +391,7 @@ class BaseDefinition(object): return '' path = self._name.get_root_context().py__file__() - parser = load_parser(path) + parser = load_parser(self._evaluator.grammar, path) lines = common.splitlines(parser.source) line_nr = self._name.start_pos[0] diff --git a/jedi/evaluate/imports.py b/jedi/evaluate/imports.py index e108e3eb..d93a3513 100644 --- a/jedi/evaluate/imports.py +++ b/jedi/evaluate/imports.py @@ -456,14 +456,14 @@ def _load_module(evaluator, path=None, source=None, sys_path=None, parent_module if path is not None and path.endswith(('.py', '.zip', '.egg')) \ and dotted_path not in settings.auto_import_modules: - cached = load_parser(path) + cached = load_parser(evaluator.grammar, path) if cached is None: if source is None: with open(path, 'rb') as f: source = f.read() p = FastParser(evaluator.grammar, source_to_unicode(source), path) - save_parser(path, p) + save_parser(evaluator.grammar, path, p) module_node = p.get_root_node() else: module_node = cached.get_root_node() diff --git a/jedi/evaluate/sys_path.py b/jedi/evaluate/sys_path.py index 3f63f176..c9624271 100644 --- a/jedi/evaluate/sys_path.py +++ b/jedi/evaluate/sys_path.py @@ -221,10 +221,10 @@ def _get_paths_from_buildout_script(evaluator, buildout_script): return p = ParserWithRecovery(evaluator.grammar, source, buildout_script) - save_parser(buildout_script, p) + save_parser(evaluator.grammar, buildout_script, p) return p.get_root_node() - cached = load_parser(buildout_script) + cached = load_parser(evaluator.grammar, buildout_script) module_node = cached and cached.module or load(buildout_script) if module_node is None: return diff --git a/jedi/parser/pgen2/grammar.py b/jedi/parser/pgen2/grammar.py index 414c0dbe..44214f93 100644 --- a/jedi/parser/pgen2/grammar.py +++ b/jedi/parser/pgen2/grammar.py @@ -16,8 +16,9 @@ fallback token code OP, but the parser needs the actual token code. """ -# Python imports import pickle +import hashlib + class Grammar(object): @@ -74,7 +75,7 @@ class Grammar(object): """ - def __init__(self): + def __init__(self, bnf_text): self.symbol2number = {} self.number2symbol = {} self.states = [] @@ -84,6 +85,7 @@ class Grammar(object): self.tokens = {} self.symbol2label = {} self.start = 256 + self.sha256 = hashlib.sha256(bnf_text.encode("utf-8")).hexdigest() def dump(self, filename): """Dump the grammar tables to a pickle file.""" diff --git a/jedi/parser/pgen2/pgen.py b/jedi/parser/pgen2/pgen.py index 90fc63ca..3317a7cd 100644 --- a/jedi/parser/pgen2/pgen.py +++ b/jedi/parser/pgen2/pgen.py @@ -5,7 +5,6 @@ # Copyright 2014 David Halter. Integration into Jedi. # Modifications are dual-licensed: MIT and PSF. -# Pgen imports from . import grammar from jedi.parser import token from jedi.parser import tokenize @@ -13,6 +12,7 @@ from jedi.parser import tokenize class ParserGenerator(object): def __init__(self, bnf_text): + self._bnf_text = bnf_text self.generator = tokenize.source_tokens(bnf_text) self.gettoken() # Initialize lookahead self.dfas, self.startsymbol = self.parse() @@ -20,7 +20,7 @@ class ParserGenerator(object): self.addfirstsets() def make_grammar(self): - c = grammar.Grammar() + c = grammar.Grammar(self._bnf_text) names = list(self.dfas.keys()) names.sort() names.remove(self.startsymbol) diff --git a/jedi/parser/python/diff.py b/jedi/parser/python/diff.py index 298bdb20..27dc8462 100644 --- a/jedi/parser/python/diff.py +++ b/jedi/parser/python/diff.py @@ -146,10 +146,10 @@ class NewDiffParser(object): lines##### TODO tokens = tokenize(lines) if self._module is None: - self._module = load_parser(self._path) + self._module = load_parser(grammar, self._path) if self._module is None: self._module = self._parser.parse(tokens) - save_parser(self._path, self._module) + save_parser(grammar, self._path, self._module) return self._module return bla diff --git a/jedi/parser/utils.py b/jedi/parser/utils.py index 3cf318bc..2b9cd374 100644 --- a/jedi/parser/utils.py +++ b/jedi/parser/utils.py @@ -58,7 +58,7 @@ class ParserCacheItem(object): self.change_time = change_time -def load_parser(path): +def load_parser(grammar, path): """ Returns the module or None, if it fails. """ @@ -69,10 +69,10 @@ def load_parser(path): return parser_cache_item.parser except KeyError: if settings.use_filesystem_cache: - return ParserPickling.load_parser(path, p_time) + return ParserPickling.load_parser(grammar, path, p_time) -def save_parser(path, parser, pickling=True): +def save_parser(grammar, path, parser, pickling=True): try: p_time = None if path is None else os.path.getmtime(path) except OSError: @@ -82,11 +82,11 @@ def save_parser(path, parser, pickling=True): item = ParserCacheItem(parser, p_time) parser_cache[path] = item if settings.use_filesystem_cache and pickling: - ParserPickling.save_parser(path, item) + ParserPickling.save_parser(grammar, path, item) class ParserPickling(object): - version = 27 + version = 28 """ Version number (integer) for file system cache. @@ -112,7 +112,7 @@ class ParserPickling(object): .. todo:: Detect interpreter (e.g., PyPy). """ - def load_parser(self, path, original_changed_time): + def load_parser(self, grammar, path, original_changed_time): """ Try to load the parser for `path`, unless `original_changed_time` is greater than the original pickling time. In which case the pickled @@ -127,7 +127,7 @@ class ParserPickling(object): # the pickle file is outdated return None - with open(self._get_hashed_path(path), 'rb') as f: + with open(self._get_hashed_path(grammar, path), 'rb') as f: try: gc.disable() parser_cache_item = pickle.load(f) @@ -138,7 +138,7 @@ class ParserPickling(object): parser_cache[path] = parser_cache_item return parser_cache_item.parser - def save_parser(self, path, parser_cache_item): + def save_parser(self, grammar, path, parser_cache_item): self.__index = None try: files = self._index @@ -146,7 +146,7 @@ class ParserPickling(object): files = {} self._index = files - with open(self._get_hashed_path(path), 'wb') as f: + with open(self._get_hashed_path(grammar, path), 'wb') as f: pickle.dump(parser_cache_item, f, pickle.HIGHEST_PROTOCOL) files[path] = parser_cache_item.change_time @@ -185,8 +185,9 @@ class ParserPickling(object): shutil.rmtree(self._cache_directory()) self.__index = {} - def _get_hashed_path(self, path): - return self._get_path('%s.pkl' % hashlib.md5(path.encode("utf-8")).hexdigest()) + def _get_hashed_path(self, grammar, path): + file_hash = hashlib.sha256(path.encode("utf-8")).hexdigest() + return self._get_path('%s-%s.pkl' % (grammar.sha256, file_hash)) def _get_path(self, file): dir = self._cache_directory() diff --git a/test/test_cache.py b/test/test_cache.py index 7cff4d4c..ba25cf84 100644 --- a/test/test_cache.py +++ b/test/test_cache.py @@ -9,6 +9,7 @@ import pytest import jedi from jedi import settings, cache from jedi.parser.utils import ParserCacheItem, ParserPickling +from jedi.parser.python import load_grammar ParserPicklingCls = type(ParserPickling) @@ -30,19 +31,20 @@ def test_modulepickling_change_cache_dir(monkeypatch, tmpdir): path_2 = 'fake path 2' monkeypatch.setattr(settings, 'cache_directory', dir_1) - ParserPickling.save_parser(path_1, item_1) - cached = load_stored_item(ParserPickling, path_1, item_1) + grammar = load_grammar() + ParserPickling.save_parser(grammar, path_1, item_1) + cached = load_stored_item(grammar, ParserPickling, path_1, item_1) assert cached == item_1.parser monkeypatch.setattr(settings, 'cache_directory', dir_2) - ParserPickling.save_parser(path_2, item_2) - cached = load_stored_item(ParserPickling, path_1, item_1) + ParserPickling.save_parser(grammar, path_2, item_2) + cached = load_stored_item(grammar, ParserPickling, path_1, item_1) assert cached is None -def load_stored_item(cache, path, item): +def load_stored_item(grammar, cache, path, item): """Load `item` stored at `path` in `cache`.""" - return cache.load_parser(path, item.change_time - 1) + return cache.load_parser(grammar, path, item.change_time - 1) @pytest.mark.usefixtures("isolated_jedi_cache") @@ -52,13 +54,14 @@ def test_modulepickling_delete_incompatible_cache(): cache1 = ParserPicklingCls() cache1.version = 1 - cache1.save_parser(path, item) - cached1 = load_stored_item(cache1, path, item) + grammar = load_grammar() + cache1.save_parser(grammar, path, item) + cached1 = load_stored_item(grammar, cache1, path, item) assert cached1 == item.parser cache2 = ParserPicklingCls() cache2.version = 2 - cached2 = load_stored_item(cache2, path, item) + cached2 = load_stored_item(grammar, cache2, path, item) assert cached2 is None diff --git a/test/test_parser/test_old_fast_parser.py b/test/test_parser/test_old_fast_parser.py index 32deb357..437a9c4b 100644 --- a/test/test_parser/test_old_fast_parser.py +++ b/test/test_parser/test_old_fast_parser.py @@ -46,8 +46,9 @@ def check_p(src, number_parsers_used, number_of_splits=None, number_of_misses=0) if number_of_splits is None: number_of_splits = number_parsers_used - p = FastParser(load_grammar(), u(src)) - save_parser(None, p, pickling=False) + grammar = load_grammar() + p = FastParser(grammar, u(src)) + save_parser(grammar, None, p, pickling=False) assert src == p.get_root_node().get_code() return p.get_root_node()