Save a module instead of a parser when pickling.

2025-12-06 14:04:26 +08:00 · 2017-03-30 00:55:04 +02:00
parent 932703f04a
commit 8059c3c2c8
9 changed files with 103 additions and 103 deletions
--- a/jedi/api/classes.py
+++ b/jedi/api/classes.py
@@ -9,7 +9,7 @@ import re
 from jedi._compatibility import u
 from jedi import settings
 from jedi import common
-from jedi.parser.utils import load_parser
+from jedi.parser import utils as parser_utils
 from jedi.cache import memoize_method
 from jedi.evaluate import representation as er
 from jedi.evaluate import instance
@@ -391,12 +391,11 @@ class BaseDefinition(object):
            return ''

        path = self._name.get_root_context().py__file__()
-        parser = load_parser(self._evaluator.grammar, path)
-        lines = common.splitlines(parser.source)
+        lines = parser_utils.parser_cache[path].lines

        line_nr = self._name.start_pos[0]
        start_line_nr = line_nr - before
-        return '\n'.join(lines[start_line_nr:line_nr + after + 1])
+        return ''.join(lines[start_line_nr:line_nr + after + 1])


 class Completion(BaseDefinition):
--- a/jedi/evaluate/imports.py
+++ b/jedi/evaluate/imports.py
@@ -481,14 +481,14 @@ def get_modules_containing_name(evaluator, modules, name):

    def check_python_file(path):
        try:
-            parser_cache_item = parser_cache[path]
+            node_cache_item = parser_cache[path]
        except KeyError:
            try:
                return check_fs(path)
            except IOError:
                return None
        else:
-            module_node = parser_cache_item.parser.get_root_node()
+            module_node = node_cache_item.node
            return er.ModuleContext(evaluator, module_node, path=path)

    def check_fs(path):
--- a/jedi/parser/python/init.py
+++ b/jedi/parser/python/init.py
@@ -7,7 +7,7 @@ from jedi._compatibility import FileNotFoundError
 from jedi.parser.pgen2.pgen import generate_grammar
 from jedi.parser.python.parser import Parser, _remove_last_newline
 from jedi.parser.python.diff import DiffParser
-from jedi.parser.tokenize import source_tokens
+from jedi.parser.tokenize import generate_tokens
 from jedi.parser import utils
 from jedi.common import splitlines, source_to_unicode

@@ -78,42 +78,44 @@ def parse(code=None, path=None, grammar=None, error_recovery=True,
    use_cache = cache and path is not None and not code
    if use_cache:
        # In this case we do actual caching. We just try to load it.
-        p = utils.load_parser(grammar, path)
-        if p is not None:
-            return p.get_root_node()
+        module_node = utils.load_module(grammar, path)
+        if module_node is not None:
+            return module_node

    if code is None:
        with open(path, 'rb') as f:
            code = source_to_unicode(f.read())

-    added_newline = not code.endswith('\n')
-    if added_newline:
-        code += '\n'
-
-    tokens = source_tokens(code, use_exact_op_types=True)
-    # TODO add recovery
-    p = None
    if diff_cache:
        try:
-            parser_cache_item = utils.parser_cache[path]
+            module_cache_item = utils.parser_cache[path]
        except KeyError:
            pass
        else:
-            p = parser_cache_item.parser
            lines = splitlines(code, keepends=True)
-            new_node = DiffParser(p).update(lines)
-            p._parsed = new_node
-            utils.save_parser(grammar, path, p, pickling=False)
-            if added_newline:
-                p.source = code[:-1]
-                _remove_last_newline(new_node)
+            module_node = module_cache_item.node
+            new_node = DiffParser(grammar, module_node).update(
+                old_lines=module_cache_item.lines,
+                new_lines=lines
+            )
+            utils.save_module(grammar, path, module_node, lines, pickling=False)
            return new_node
-    p = Parser(grammar, code, error_recovery=error_recovery, start_symbol=start_symbol)
-    new_node = p.parse(tokens=tokens)
+
+    added_newline = not code.endswith('\n')
+    lines = tokenize_lines = splitlines(code, keepends=True)
    if added_newline:
-        _remove_last_newline(new_node)
-        p.source = code[:-1]
+        code += '\n'
+        tokenize_lines = list(tokenize_lines)
+        tokenize_lines[-1] += '\n'
+        tokenize_lines.append([])
+
+    tokens = generate_tokens(tokenize_lines, use_exact_op_types=True)
+
+    p = Parser(grammar, code, error_recovery=error_recovery, start_symbol=start_symbol)
+    root_node = p.parse(tokens=tokens)
+    if added_newline:
+        _remove_last_newline(root_node)

    if use_cache or diff_cache:
-        utils.save_parser(grammar, path, p)
-    return new_node
+        utils.save_module(grammar, path, root_node, lines)
+    return root_node
--- a/jedi/parser/python/diff.py
+++ b/jedi/parser/python/diff.py
@@ -156,10 +156,9 @@ class NewDiffParser(object):


 class DiffParser(object):
-    def __init__(self, parser):
-        self._parser = parser
-        self._grammar = self._parser._grammar
-        self._module = parser.get_root_node()
+    def __init__(self, grammar, module):
+        self._grammar = grammar
+        self._module = module

    def _reset(self):
        self._copy_count = 0
@@ -167,7 +166,7 @@ class DiffParser(object):

        self._nodes_stack = _NodesStack(self._module)

-    def update(self, lines_new):
+    def update(self, old_lines, new_lines):
        '''
        The algorithm works as follows:

@@ -187,24 +186,23 @@ class DiffParser(object):
        # Reset the used names cache so they get regenerated.
        self._module._used_names = None

-        self._parser_lines_new = lines_new
+        self._parser_lines_new = new_lines
        self._added_newline = False
-        if lines_new[-1] != '':
+        if new_lines[-1] != '':
            # The Python grammar needs a newline at the end of a file, but for
-            # everything else we keep working with lines_new here.
-            self._parser_lines_new = list(lines_new)
+            # everything else we keep working with new_lines here.
+            self._parser_lines_new = list(new_lines)
            self._parser_lines_new[-1] += '\n'
            self._parser_lines_new.append('')
            self._added_newline = True

        self._reset()

-        line_length = len(lines_new)
-        lines_old = splitlines(self._parser.source, keepends=True)
-        sm = difflib.SequenceMatcher(None, lines_old, self._parser_lines_new)
+        line_length = len(new_lines)
+        sm = difflib.SequenceMatcher(None, old_lines, self._parser_lines_new)
        opcodes = sm.get_opcodes()
        debug.speed('diff parser calculated')
-        debug.dbg('diff: line_lengths old: %s, new: %s' % (len(lines_old), line_length))
+        debug.dbg('diff: line_lengths old: %s, new: %s' % (len(old_lines), line_length))

        for operation, i1, i2, j1, j2 in opcodes:
            debug.dbg('diff %s old[%s:%s] new[%s:%s]',
@@ -229,17 +227,15 @@ class DiffParser(object):
        self._nodes_stack.close()

        if self._added_newline:
-            _remove_last_newline(self._parser.get_root_node())
-
-        self._parser.source = ''.join(lines_new)
+            _remove_last_newline(self._module)

        # Good for debugging.
        if debug.debug_function:
-            self._enabled_debugging(lines_old, lines_new)
+            self._enabled_debugging(old_lines, new_lines)
        last_pos = self._module.end_pos[0]
        if last_pos != line_length:
            current_lines = splitlines(self._module.get_code(), keepends=True)
-            diff = difflib.unified_diff(current_lines, lines_new)
+            diff = difflib.unified_diff(current_lines, new_lines)
            raise Exception(
                "There's an issue (%s != %s) with the diff parser. Please report:\n%s"
                % (last_pos, line_length, ''.join(diff))
@@ -248,9 +244,9 @@ class DiffParser(object):
        debug.speed('diff parser end')
        return self._module

-    def _enabled_debugging(self, lines_old, lines_new):
+    def _enabled_debugging(self, old_lines, lines_new):
        if self._module.get_code() != ''.join(lines_new):
-            debug.warning('parser issue:\n%s\n%s', ''.join(lines_old),
+            debug.warning('parser issue:\n%s\n%s', ''.join(old_lines),
                          ''.join(lines_new))

    def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new):
--- a/jedi/parser/python/parser.py
+++ b/jedi/parser/python/parser.py
@@ -53,9 +53,9 @@ class Parser(BaseParser):
        self.source = source
        self._added_newline = False
        # The Python grammar needs a newline at the end of each statement.
-        if not source.endswith('\n') and start_symbol == 'file_input':
-            source += '\n'
-            self._added_newline = True
+        #if not source.endswith('\n') and start_symbol == 'file_input':
+        #    source += '\n'
+        #    self._added_newline = True

        self.new_code = source

@@ -91,8 +91,8 @@ class Parser(BaseParser):
                [node]
            )

-        if self._added_newline:
-            _remove_last_newline(node)
+        #if self._added_newline:
+        #    _remove_last_newline(node)
        return node

    def get_root_node(self):
--- a/jedi/parser/utils.py
+++ b/jedi/parser/utils.py
@@ -51,44 +51,45 @@ def underscore_memoization(func):
 parser_cache = {}


-class ParserCacheItem(object):
-    def __init__(self, parser, change_time=None):
-        self.parser = parser
+class NodeCacheItem(object):
+    def __init__(self, node, lines, change_time=None):
+        self.node = node
+        self.lines = lines
        if change_time is None:
            change_time = time.time()
        self.change_time = change_time


-def load_parser(grammar, path):
+def load_module(grammar, path):
    """
-    Returns the module or None, if it fails.
+    Returns a module or None, if it fails.
    """
    p_time = os.path.getmtime(path) if path else None
    try:
        # TODO Add grammar sha256
-        parser_cache_item = parser_cache[path]
-        if not path or p_time <= parser_cache_item.change_time:
-            return parser_cache_item.parser
+        module_cache_item = parser_cache[path]
+        if not path or p_time <= module_cache_item.change_time:
+            return module_cache_item.node
    except KeyError:
        if settings.use_filesystem_cache:
-            return ParserPickling.load_parser(grammar, path, p_time)
+            return ParserPickling.load_item(grammar, path, p_time)


-def save_parser(grammar, path, parser, pickling=True):
+def save_module(grammar, path, module, lines, pickling=True):
    try:
        p_time = None if path is None else os.path.getmtime(path)
    except OSError:
        p_time = None
        pickling = False

-    item = ParserCacheItem(parser, p_time)
+    item = NodeCacheItem(module, lines, p_time)
    parser_cache[path] = item
    if settings.use_filesystem_cache and pickling and path is not None:
-        ParserPickling.save_parser(grammar, path, item)
+        ParserPickling.save_item(grammar, path, item)


 class ParserPickling(object):
-    version = 28
+    version = 29
    """
    Version number (integer) for file system cache.

@@ -114,7 +115,7 @@ class ParserPickling(object):
        .. todo:: Detect interpreter (e.g., PyPy).
        """

-    def load_parser(self, grammar, path, original_changed_time):
+    def load_item(self, grammar, path, original_changed_time):
        """
        Try to load the parser for `path`, unless `original_changed_time` is
        greater than the original pickling time. In which case the pickled
@@ -133,17 +134,17 @@ class ParserPickling(object):
            with open(self._get_hashed_path(grammar, path), 'rb') as f:
                try:
                    gc.disable()
-                    parser_cache_item = pickle.load(f)
+                    module_cache_item = pickle.load(f)
                finally:
                    gc.enable()
        except FileNotFoundError:
            return None

        debug.dbg('pickle loaded: %s', path)
-        parser_cache[path] = parser_cache_item
-        return parser_cache_item.parser
+        parser_cache[path] = module_cache_item
+        return module_cache_item

-    def save_parser(self, grammar, path, parser_cache_item):
+    def save_item(self, grammar, path, module_cache_item):
        self.__index = None
        try:
            files = self._index
@@ -152,8 +153,8 @@ class ParserPickling(object):
            self._index = files

        with open(self._get_hashed_path(grammar, path), 'wb') as f:
-            pickle.dump(parser_cache_item, f, pickle.HIGHEST_PROTOCOL)
-            files[path] = parser_cache_item.change_time
+            pickle.dump(module_cache_item, f, pickle.HIGHEST_PROTOCOL)
+            files[path] = module_cache_item.change_time

        self._flush_index()

--- a/test/test_api/test_api.py
+++ b/test/test_api/test_api.py
@@ -166,7 +166,7 @@ def test_get_line_code():
    # With before/after
    line = '    foo'
    source = 'def foo():\n%s\nother_line' % line
-    assert get_line_code(source, line=2) == line
+    assert get_line_code(source, line=2) == line + '\n'
    assert get_line_code(source, line=2, after=1) == line + '\nother_line'
    assert get_line_code(source, line=2, after=1, before=1) == source

--- a/test/test_cache.py
+++ b/test/test_cache.py
@@ -9,7 +9,7 @@ import pytest

 import jedi
 from jedi import settings, cache
-from jedi.parser.utils import ParserCacheItem, ParserPickling
+from jedi.parser.utils import NodeCacheItem, ParserPickling
 from jedi.parser.python import load_grammar


@@ -26,39 +26,40 @@ def test_modulepickling_change_cache_dir(monkeypatch, tmpdir):
    dir_1 = str(tmpdir.mkdir('first'))
    dir_2 = str(tmpdir.mkdir('second'))

-    item_1 = ParserCacheItem('fake parser 1')
-    item_2 = ParserCacheItem('fake parser 2')
+    item_1 = NodeCacheItem('bla', [])
+    item_2 = NodeCacheItem('bla', [])
    path_1 = 'fake path 1'
    path_2 = 'fake path 2'

    monkeypatch.setattr(settings, 'cache_directory', dir_1)
    grammar = load_grammar()
-    ParserPickling.save_parser(grammar, path_1, item_1)
+    ParserPickling.save_item(grammar, path_1, item_1)
    cached = load_stored_item(grammar, ParserPickling, path_1, item_1)
-    assert cached == item_1.parser
+    assert cached == item_1.node

    monkeypatch.setattr(settings, 'cache_directory', dir_2)
-    ParserPickling.save_parser(grammar, path_2, item_2)
+    ParserPickling.save_item(grammar, path_2, item_2)
    cached = load_stored_item(grammar, ParserPickling, path_1, item_1)
    assert cached is None


 def load_stored_item(grammar, cache, path, item):
    """Load `item` stored at `path` in `cache`."""
-    return cache.load_parser(grammar, path, item.change_time - 1)
+    item = cache.load_item(grammar, path, item.change_time - 1)
+    return item and item.node


@pytest.mark.usefixtures("isolated_jedi_cache")
 def test_modulepickling_delete_incompatible_cache():
-    item = ParserCacheItem('fake parser')
+    item = NodeCacheItem('fake parser', [])
    path = 'fake path'

    cache1 = ParserPicklingCls()
    cache1.version = 1
    grammar = load_grammar()
-    cache1.save_parser(grammar, path, item)
+    cache1.save_item(grammar, path, item)
    cached1 = load_stored_item(grammar, cache1, path, item)
-    assert cached1 == item.parser
+    assert cached1 == item.node

    cache2 = ParserPicklingCls()
    cache2.version = 2
@@ -81,15 +82,15 @@ def test_modulepickling_simulate_deleted_cache():

    __ https://developer.apple.com/library/content/documentation/FileManagement/Conceptual/FileSystemProgrammingGuide/FileSystemOverview/FileSystemOverview.html
    """
-    item = ParserCacheItem('fake parser')
+    item = NodeCacheItem('fake parser', [])
    path = 'fake path'

    cache = ParserPicklingCls()
    cache.version = 1
    grammar = load_grammar()
-    cache.save_parser(grammar, path, item)
+    cache.save_item(grammar, path, item)
    cached1 = load_stored_item(grammar, cache, path, item)
-    assert cached1 == item.parser
+    assert cached1 == item.node

    unlink(cache._get_hashed_path(grammar, path))

--- a/test/test_parser/test_diff_parser.py
+++ b/test/test_parser/test_diff_parser.py
@@ -6,10 +6,10 @@ import jedi
 from jedi import debug
 from jedi.common import splitlines
 from jedi import cache
+from jedi.parser.utils import parser_cache
 from jedi.parser.python import load_grammar
 from jedi.parser.python.diff import DiffParser
-from jedi.parser.python.parser import Parser
-from jedi.parser.tokenize import source_tokens
+from jedi.parser.python import parse


 def _check_error_leaves_nodes(node):
@@ -42,23 +42,24 @@ def _assert_valid_graph(node):


 class Differ(object):
+    grammar = load_grammar()
+
    def initialize(self, code):
        debug.dbg('differ: initialize', color='YELLOW')
-        grammar = load_grammar()
-        self.parser = Parser(grammar, code, error_recovery=True)
-        tokens = source_tokens(self.parser.new_code, use_exact_op_types=True)
-        return self.parser.parse(tokens)
+        self.lines = splitlines(code, keepends=True)
+        parser_cache.pop(None, None)
+        self.module = parse(code, diff_cache=True, cache=True)
+        return self.module

-    def parse(self, source, copies=0, parsers=0, expect_error_leaves=False):
+    def parse(self, code, copies=0, parsers=0, expect_error_leaves=False):
        debug.dbg('differ: parse copies=%s parsers=%s', copies, parsers, color='YELLOW')
-        lines = splitlines(source, keepends=True)
-        diff_parser = DiffParser(self.parser)
-        new_module = diff_parser.update(lines)
-        assert source == new_module.get_code()
+        lines = splitlines(code, keepends=True)
+        diff_parser = DiffParser(self.grammar, self.module)
+        new_module = diff_parser.update(self.lines, lines)
+        self.lines = lines
+        assert code == new_module.get_code()
        assert diff_parser._copy_count == copies
        assert diff_parser._parser_count == parsers
-        self.parser.module = new_module
-        self.parser._parsed = new_module

        assert expect_error_leaves == _check_error_leaves_nodes(new_module)
        _assert_valid_graph(new_module)