From 26cce4d078b0c44b344fa82a1fe0d81982e766b3 Mon Sep 17 00:00:00 2001
From: Dave Halter <davidhalter88@gmail.com>
Date: Wed, 22 Mar 2017 18:32:49 +0100
Subject: [PATCH] Add the grammar as an argument to saving the parser.

This makes collisions of different grammars when loading from the cache impossible.
---
 jedi/api/__init__.py                     |  2 +-
 jedi/api/classes.py                      |  2 +-
 jedi/evaluate/imports.py                 |  4 ++--
 jedi/evaluate/sys_path.py                |  4 ++--
 jedi/parser/pgen2/grammar.py             |  6 ++++--
 jedi/parser/pgen2/pgen.py                |  4 ++--
 jedi/parser/python/diff.py               |  4 ++--
 jedi/parser/utils.py                     | 23 ++++++++++++-----------
 test/test_cache.py                       | 21 ++++++++++++---------
 test/test_parser/test_old_fast_parser.py |  5 +++--
 10 files changed, 41 insertions(+), 34 deletions(-)

diff --git a/jedi/api/__init__.py b/jedi/api/__init__.py
index 63831914..a22cb2ce 100644
--- a/jedi/api/__init__.py
+++ b/jedi/api/__init__.py
@@ -134,7 +134,7 @@ class Script(object):
     @cache.memoize_method
     def _get_module_node(self):
         parser = FastParser(self._grammar, self._source, self.path)
-        save_parser(self.path, parser, pickling=False)
+        save_parser(self._grammar, self.path, parser, pickling=False)
 
         return parser.get_root_node()
 
diff --git a/jedi/api/classes.py b/jedi/api/classes.py
index c193da80..f27a828f 100644
--- a/jedi/api/classes.py
+++ b/jedi/api/classes.py
@@ -391,7 +391,7 @@ class BaseDefinition(object):
             return ''
 
         path = self._name.get_root_context().py__file__()
-        parser = load_parser(path)
+        parser = load_parser(self._evaluator.grammar, path)
         lines = common.splitlines(parser.source)
 
         line_nr = self._name.start_pos[0]
diff --git a/jedi/evaluate/imports.py b/jedi/evaluate/imports.py
index e108e3eb..d93a3513 100644
--- a/jedi/evaluate/imports.py
+++ b/jedi/evaluate/imports.py
@@ -456,14 +456,14 @@ def _load_module(evaluator, path=None, source=None, sys_path=None, parent_module
     if path is not None and path.endswith(('.py', '.zip', '.egg')) \
             and dotted_path not in settings.auto_import_modules:
 
-        cached = load_parser(path)
+        cached = load_parser(evaluator.grammar, path)
         if cached is None:
             if source is None:
                 with open(path, 'rb') as f:
                     source = f.read()
 
             p = FastParser(evaluator.grammar, source_to_unicode(source), path)
-            save_parser(path, p)
+            save_parser(evaluator.grammar, path, p)
             module_node = p.get_root_node()
         else:
             module_node = cached.get_root_node()
diff --git a/jedi/evaluate/sys_path.py b/jedi/evaluate/sys_path.py
index 3f63f176..c9624271 100644
--- a/jedi/evaluate/sys_path.py
+++ b/jedi/evaluate/sys_path.py
@@ -221,10 +221,10 @@ def _get_paths_from_buildout_script(evaluator, buildout_script):
             return
 
         p = ParserWithRecovery(evaluator.grammar, source, buildout_script)
-        save_parser(buildout_script, p)
+        save_parser(evaluator.grammar, buildout_script, p)
         return p.get_root_node()
 
-    cached = load_parser(buildout_script)
+    cached = load_parser(evaluator.grammar, buildout_script)
     module_node = cached and cached.module or load(buildout_script)
     if module_node is None:
         return
diff --git a/jedi/parser/pgen2/grammar.py b/jedi/parser/pgen2/grammar.py
index 414c0dbe..44214f93 100644
--- a/jedi/parser/pgen2/grammar.py
+++ b/jedi/parser/pgen2/grammar.py
@@ -16,8 +16,9 @@ fallback token code OP, but the parser needs the actual token code.
 
 """
 
-# Python imports
 import pickle
+import hashlib
+
 
 
 class Grammar(object):
@@ -74,7 +75,7 @@ class Grammar(object):
 
     """
 
-    def __init__(self):
+    def __init__(self, bnf_text):
         self.symbol2number = {}
         self.number2symbol = {}
         self.states = []
@@ -84,6 +85,7 @@ class Grammar(object):
         self.tokens = {}
         self.symbol2label = {}
         self.start = 256
+        self.sha256 = hashlib.sha256(bnf_text.encode("utf-8")).hexdigest()
 
     def dump(self, filename):
         """Dump the grammar tables to a pickle file."""
diff --git a/jedi/parser/pgen2/pgen.py b/jedi/parser/pgen2/pgen.py
index 90fc63ca..3317a7cd 100644
--- a/jedi/parser/pgen2/pgen.py
+++ b/jedi/parser/pgen2/pgen.py
@@ -5,7 +5,6 @@
 # Copyright 2014 David Halter. Integration into Jedi.
 # Modifications are dual-licensed: MIT and PSF.
 
-# Pgen imports
 from . import grammar
 from jedi.parser import token
 from jedi.parser import tokenize
@@ -13,6 +12,7 @@ from jedi.parser import tokenize
 
 class ParserGenerator(object):
     def __init__(self, bnf_text):
+        self._bnf_text = bnf_text
         self.generator = tokenize.source_tokens(bnf_text)
         self.gettoken()  # Initialize lookahead
         self.dfas, self.startsymbol = self.parse()
@@ -20,7 +20,7 @@ class ParserGenerator(object):
         self.addfirstsets()
 
     def make_grammar(self):
-        c = grammar.Grammar()
+        c = grammar.Grammar(self._bnf_text)
         names = list(self.dfas.keys())
         names.sort()
         names.remove(self.startsymbol)
diff --git a/jedi/parser/python/diff.py b/jedi/parser/python/diff.py
index 298bdb20..27dc8462 100644
--- a/jedi/parser/python/diff.py
+++ b/jedi/parser/python/diff.py
@@ -146,10 +146,10 @@ class NewDiffParser(object):
         lines##### TODO
         tokens = tokenize(lines)
         if self._module is None:
-            self._module = load_parser(self._path)
+            self._module = load_parser(grammar, self._path)
             if self._module is None:
                 self._module = self._parser.parse(tokens)
-                save_parser(self._path, self._module)
+                save_parser(grammar, self._path, self._module)
             return self._module
 
         return bla
diff --git a/jedi/parser/utils.py b/jedi/parser/utils.py
index 3cf318bc..2b9cd374 100644
--- a/jedi/parser/utils.py
+++ b/jedi/parser/utils.py
@@ -58,7 +58,7 @@ class ParserCacheItem(object):
         self.change_time = change_time
 
 
-def load_parser(path):
+def load_parser(grammar, path):
     """
     Returns the module or None, if it fails.
     """
@@ -69,10 +69,10 @@ def load_parser(path):
             return parser_cache_item.parser
     except KeyError:
         if settings.use_filesystem_cache:
-            return ParserPickling.load_parser(path, p_time)
+            return ParserPickling.load_parser(grammar, path, p_time)
 
 
-def save_parser(path, parser, pickling=True):
+def save_parser(grammar, path, parser, pickling=True):
     try:
         p_time = None if path is None else os.path.getmtime(path)
     except OSError:
@@ -82,11 +82,11 @@ def save_parser(path, parser, pickling=True):
     item = ParserCacheItem(parser, p_time)
     parser_cache[path] = item
     if settings.use_filesystem_cache and pickling:
-        ParserPickling.save_parser(path, item)
+        ParserPickling.save_parser(grammar, path, item)
 
 
 class ParserPickling(object):
-    version = 27
+    version = 28
     """
     Version number (integer) for file system cache.
 
@@ -112,7 +112,7 @@ class ParserPickling(object):
         .. todo:: Detect interpreter (e.g., PyPy).
         """
 
-    def load_parser(self, path, original_changed_time):
+    def load_parser(self, grammar, path, original_changed_time):
         """
         Try to load the parser for `path`, unless `original_changed_time` is
         greater than the original pickling time. In which case the pickled
@@ -127,7 +127,7 @@ class ParserPickling(object):
             # the pickle file is outdated
             return None
 
-        with open(self._get_hashed_path(path), 'rb') as f:
+        with open(self._get_hashed_path(grammar, path), 'rb') as f:
             try:
                 gc.disable()
                 parser_cache_item = pickle.load(f)
@@ -138,7 +138,7 @@ class ParserPickling(object):
         parser_cache[path] = parser_cache_item
         return parser_cache_item.parser
 
-    def save_parser(self, path, parser_cache_item):
+    def save_parser(self, grammar, path, parser_cache_item):
         self.__index = None
         try:
             files = self._index
@@ -146,7 +146,7 @@ class ParserPickling(object):
             files = {}
             self._index = files
 
-        with open(self._get_hashed_path(path), 'wb') as f:
+        with open(self._get_hashed_path(grammar, path), 'wb') as f:
             pickle.dump(parser_cache_item, f, pickle.HIGHEST_PROTOCOL)
             files[path] = parser_cache_item.change_time
 
@@ -185,8 +185,9 @@ class ParserPickling(object):
         shutil.rmtree(self._cache_directory())
         self.__index = {}
 
-    def _get_hashed_path(self, path):
-        return self._get_path('%s.pkl' % hashlib.md5(path.encode("utf-8")).hexdigest())
+    def _get_hashed_path(self, grammar, path):
+        file_hash = hashlib.sha256(path.encode("utf-8")).hexdigest()
+        return self._get_path('%s-%s.pkl' % (grammar.sha256, file_hash))
 
     def _get_path(self, file):
         dir = self._cache_directory()
diff --git a/test/test_cache.py b/test/test_cache.py
index 7cff4d4c..ba25cf84 100644
--- a/test/test_cache.py
+++ b/test/test_cache.py
@@ -9,6 +9,7 @@ import pytest
 import jedi
 from jedi import settings, cache
 from jedi.parser.utils import ParserCacheItem, ParserPickling
+from jedi.parser.python import load_grammar
 
 
 ParserPicklingCls = type(ParserPickling)
@@ -30,19 +31,20 @@ def test_modulepickling_change_cache_dir(monkeypatch, tmpdir):
     path_2 = 'fake path 2'
 
     monkeypatch.setattr(settings, 'cache_directory', dir_1)
-    ParserPickling.save_parser(path_1, item_1)
-    cached = load_stored_item(ParserPickling, path_1, item_1)
+    grammar = load_grammar()
+    ParserPickling.save_parser(grammar, path_1, item_1)
+    cached = load_stored_item(grammar, ParserPickling, path_1, item_1)
     assert cached == item_1.parser
 
     monkeypatch.setattr(settings, 'cache_directory', dir_2)
-    ParserPickling.save_parser(path_2, item_2)
-    cached = load_stored_item(ParserPickling, path_1, item_1)
+    ParserPickling.save_parser(grammar, path_2, item_2)
+    cached = load_stored_item(grammar, ParserPickling, path_1, item_1)
     assert cached is None
 
 
-def load_stored_item(cache, path, item):
+def load_stored_item(grammar, cache, path, item):
     """Load `item` stored at `path` in `cache`."""
-    return cache.load_parser(path, item.change_time - 1)
+    return cache.load_parser(grammar, path, item.change_time - 1)
 
 
 @pytest.mark.usefixtures("isolated_jedi_cache")
@@ -52,13 +54,14 @@ def test_modulepickling_delete_incompatible_cache():
 
     cache1 = ParserPicklingCls()
     cache1.version = 1
-    cache1.save_parser(path, item)
-    cached1 = load_stored_item(cache1, path, item)
+    grammar = load_grammar()
+    cache1.save_parser(grammar, path, item)
+    cached1 = load_stored_item(grammar, cache1, path, item)
     assert cached1 == item.parser
 
     cache2 = ParserPicklingCls()
     cache2.version = 2
-    cached2 = load_stored_item(cache2, path, item)
+    cached2 = load_stored_item(grammar, cache2, path, item)
     assert cached2 is None
 
 
diff --git a/test/test_parser/test_old_fast_parser.py b/test/test_parser/test_old_fast_parser.py
index 32deb357..437a9c4b 100644
--- a/test/test_parser/test_old_fast_parser.py
+++ b/test/test_parser/test_old_fast_parser.py
@@ -46,8 +46,9 @@ def check_p(src, number_parsers_used, number_of_splits=None, number_of_misses=0)
     if number_of_splits is None:
         number_of_splits = number_parsers_used
 
-    p = FastParser(load_grammar(), u(src))
-    save_parser(None, p, pickling=False)
+    grammar = load_grammar()
+    p = FastParser(grammar, u(src))
+    save_parser(grammar, None, p, pickling=False)
 
     assert src == p.get_root_node().get_code()
     return p.get_root_node()