Most of the new diff parsers functionality should be working now. There are a few TODOs to solve, though.

2016-08-16 18:58:28 +02:00
parent b9040870c0
commit 54297cc4a5
2 changed files with 144 additions and 37 deletions
--- a/jedi/api/helpers.py
+++ b/jedi/api/helpers.py
@@ -149,7 +149,7 @@ def get_stack_at_position(grammar, code_lines, module, pos):
    try:
        p.parse(tokenizer=tokenize_without_endmarker(code))
    except EndMarkerReached:
-        return Stack(p.stack)
+        return Stack(p.pgen_parser.stack)
    raise SystemError("This really shouldn't happen. There's a bug in Jedi.")
--- a/jedi/parser/fast.py
+++ b/jedi/parser/fast.py
@@ -5,6 +5,7 @@ finished (and still not working as I want), I won't document it any further.
 """
 import re
 from itertools import chain
 import copy
 import difflib
 from jedi._compatibility import use_metaclass
@@ -72,6 +73,7 @@ class DiffParser():
        lines_old = splitlines(self._parser.source, keepends=True)
        sm = difflib.SequenceMatcher(None, lines_old, lines_new)
        print(lines_old, lines_new)
        for operation, i1, i2, j1, j2 in sm.get_opcodes():
            print(operation)
            if operation == 'equal':
@@ -87,25 +89,47 @@ class DiffParser():
                self._delete_count += 1  # For statistics
    def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new):
-        while until_line_new < self._parsed_until_line:
+        while until_line_new > self._parsed_until_line:
-            parsed_until_line_old = self._parsed_until_line + line_offset
+            parsed_until_line_old = self._parsed_until_line - line_offset
-            if matches:
+            line_stmt = self._get_old_line_stmt(parsed_until_line_old + 1)
-                # TODO check missing indent/dedent
+            if line_stmt is None:
-                _copy_p()
+                # Parse 1 line at least. We don't need more, because we just
-                self._update_positions(line_offset)
+                # want to get into a state where the old parser has starting
                # statements again (not e.g. lines within parentheses).
                self._parse(self._parsed_until_line + 1)
            else:
                p_children = line_stmt.parent.children
                index = p_children.index(line_stmt)
                nodes = []
                for node in p_children[index:]:
                    if until_line_old < node.end_pos[0]:
                        divided_node = self._divide_node(node)
                        if divided_node is not None:
                            nodes.append(divided_node)
                        break
                    else:
                        nodes.append(node)
                if nodes:
                    self._insert_nodes(nodes)
                # TODO remove dedent at end
                self._update_positions(nodes, line_offset)
                # We have copied as much as possible (but definitely not too
                # much). Therefore we escape, even if we're not at the end. The
                # rest will be parsed.
                # Might not reach until the end, because there's a statement
                # that is not finished.
                break
            else:
                # Parse 1 line at least. We don't need more, because we just
                # want to get into a state where the old parser has starting
                # statements again (not e.g. lines within parentheses).
                self._parse(self._parsed_until_line + 1)
-    def _update_positions(self, line_offset, line_start, line_end):
+    def _update_positions(self, nodes, line_offset):
        for node in nodes:
            try:
                children = node.children
            except AttributeError:
                # Is a leaf
                node.start_pos = node.start_pos[0] + line_offset, node.start_pos[1]
            else:
                self._update_positions(children)
        if line_offset == 0:
            return
@@ -118,10 +142,60 @@ class DiffParser():
        self._insert_count += 1
        self._parse(until_line_new)
    def _insert_nodes(self, nodes):
        before_node = self._get_before_insertion_node()
        line_indentation = nodes[0].start_pos[1]
        while True:
            p_children = before_node.parent.children
            indentation = p_children[0].start_pos[1]
            if line_indentation < indentation:  # Dedent
                # We might be at the most outer layer: modules. We
                # don't want to depend on the first statement
                # having the right indentation.
                if before_node.parent is not None:
                    # TODO add dedent
                    before_node = before_node.parent
                    continue
            # TODO check if the indentation is lower than the last statement
            # and add a dedent error leaf.
            # TODO do the same for indent error leafs.
            p_children += nodes
            break
    def _divide_node(self, node, until_line):
        """
        Breaks up scopes and returns only the part until the given line.
        Tries to get the parts it can safely get and ignores the rest.
        """
        if node.type not in ('classdef', 'funcdef'):
            return None
        suite = node.children[-1]
        if suite.type != 'suite':
            return None
        new_node = copy.copy(node)
        new_node.children[-1] = new_suite = copy.copy(suite)
        for i, child_node in enumerate(new_suite.children):
            if child_node.end_pos[1] > until_line:
                divided_node = self._divide_node(child_node, until_line)
                if divided_node is not None:
                    new_suite.children[i] = divided_node
                    new_suite.children[i + 1:] = []
                else:
                    new_suite.children[i:] = []
                break
        return new_node
    def _get_before_insertion_node(self):
        if not self._new_children:
            return None
        line = self._parsed_until_line + 1
        print(line)
        leaf = self._module.get_leaf_for_position((line, 0), include_prefixes=False)
        while leaf.type != 'newline':
            try:
@@ -136,48 +210,78 @@ class DiffParser():
            print(parent)
            if parent.type in ('suite', 'file_input'):
                print(node)
-                print(i, line, node.end_pos)
+                print(line, node.end_pos)
                assert node.end_pos[0] <= line
                assert node.end_pos[1] == 0
                return node
            node = parent
    def _get_old_line_stmt(self, old_line):
        leaf = self._module.get_leaf_for_position((old_line, 0), include_prefixes=True)
        if leaf.get_start_pos_with_prefix()[0] == old_line:
            return leaf.get_definition()
        # Must be on the same line. Otherwise we need to parse that bit.
        return None
    def _parse(self, until_line):
        """
        Parses at least until the given line, but might just parse more until a
        valid state is reached.
        """
        while until_line > self._parsed_until_line:
-            node = self._parse_scope_part(before_node, until_line)
+            node = self._parse_scope_node(until_line)
-            first_leaf = node.first_leaf()
+            nodes = self._get_children_nodes(node)
            if nodes:
                self._insert_nodes(nodes)
                first_leaf = nodes[0].first_leaf()
                first_leaf.prefix = self._prefix + first_leaf.prefix
                self._prefix = ''
            self._prefix += node.children[-1].prefix
            before_node = self._get_before_insertion_node()
            if before_node is None:
                # The start of the file.
-                self.new_children += node.children
+                self._new_children += node.children
            else:
                before_node.parent.children += node.children
-    def _parse_scope_node(self, before_node, until_line, line_offset=0):
+    def _get_children_nodes(self, node):
-        # TODO speed up, shouldn't copy the whole thing all the time.
+        nodes = node.children[:-1]
        if nodes:  # More than an error leaf
            first_element = nodes[0]
            if first_element.type == 'error_leaf' and \
                    first_element.original_type == 'indent':
                assert nodes[-1].type == 'dedent'
                # This means that the start and end leaf
                nodes = nodes[1:-1]
        return nodes
    def _parse_scope_node(self, until_line):
        # TODO speed up, shouldn't copy the whole list all the time.
        # memoryview?
        lines_after = self._lines_new[self._parsed_until_line + 1:]
-        tokenizer = self._diff_tokenize(lines_after, until_line, line_offset)
+        tokenizer = self._diff_tokenize(
            lines_after,
            until_line,
            line_offset=self._parsed_until_line
        )
        self._parser = ParserWithRecovery(
            self._parser._grammar,
-            source=None,
+            source='\n',
            tokenizer=tokenizer,
            start_parsing=False
        )
-        return self._parser.parse()
+        return self._parser.parse(tokenizer=tokenizer)
-    def _diff_tokenize(lines, until_line, line_offset=0):
+    def _diff_tokenize(self, lines, until_line, line_offset=0):
        is_first_token = True
        omited_first_indent = False
        indent_count = 0
        l = iter(lines)
        tokens = generate_tokens(lambda: next(l, ''))
-        for token_info in tokens:
+        for typ, string, start_pos, prefix in tokens:
-            typ = token_info.type
+            start_pos = start_pos[0] + line_offset, start_pos[1]
            if typ == 'indent':
                indent_count += 1
                if is_first_token:
@@ -192,17 +296,20 @@ class DiffParser():
                    # We are done here, only thing that can come now is an
                    # endmarker or another dedented code block.
                    break
-            elif typ == 'newline' and token_info.start_pos[0] >= until_line:
+            elif typ == 'newline' and start_pos[0] >= until_line:
-                yield token_info
+                yield tokenize.TokenInfo(typ, string, start_pos, prefix)
-                x = self.
+                x = self._parser.pgen_parser.stack
                # Check if the parser is actually in a valid suite state.
                if 1:
                    # TODO check if the parser is in a flow, and let it pass if
                    # so.
                    import pdb; pdb.set_trace()
                    break
            is_first_token = False
            if line_offset != 0:
                raise NotImplementedError
                yield tokenize.TokenInfo(*token_info.string[1:])
            else:
                yield token_info
-        yield tokenize.TokenInfo(tokenize.ENDMARKER, *token_info.string[1:])
+            yield tokenize.TokenInfo(typ, string, start_pos, prefix)
        typ, string, start_pos, prefix = next(tokens)
        start_pos = start_pos[0] + line_offset, start_pos[1]
        yield tokenize.TokenInfo(tokenize.ENDMARKER, string, start_pos, prefix)