Most of the new diff parsers functionality should be working now. There are a few TODOs to solve, though.

2016-08-16 18:58:28 +02:00
parent b9040870c0
commit 54297cc4a5
2 changed files with 144 additions and 37 deletions
--- a/jedi/api/helpers.py
+++ b/jedi/api/helpers.py
@@ -149,7 +149,7 @@ def get_stack_at_position(grammar, code_lines, module, pos):
    try:
        p.parse(tokenizer=tokenize_without_endmarker(code))
    except EndMarkerReached:
-        return Stack(p.stack)
+        return Stack(p.pgen_parser.stack)
    raise SystemError("This really shouldn't happen. There's a bug in Jedi.")


--- a/jedi/parser/fast.py
+++ b/jedi/parser/fast.py
@@ -5,6 +5,7 @@ finished (and still not working as I want), I won't document it any further.
 """
 import re
 from itertools import chain
+import copy
 import difflib

 from jedi._compatibility import use_metaclass
@@ -72,6 +73,7 @@ class DiffParser():

        lines_old = splitlines(self._parser.source, keepends=True)
        sm = difflib.SequenceMatcher(None, lines_old, lines_new)
+        print(lines_old, lines_new)
        for operation, i1, i2, j1, j2 in sm.get_opcodes():
            print(operation)
            if operation == 'equal':
@@ -87,25 +89,47 @@ class DiffParser():
                self._delete_count += 1  # For statistics

    def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new):
-        while until_line_new < self._parsed_until_line:
-            parsed_until_line_old = self._parsed_until_line + line_offset
-            if matches:
-                # TODO check missing indent/dedent
-                _copy_p()
-                self._update_positions(line_offset)
+        while until_line_new > self._parsed_until_line:
+            parsed_until_line_old = self._parsed_until_line - line_offset
+            line_stmt = self._get_old_line_stmt(parsed_until_line_old + 1)
+            if line_stmt is None:
+                # Parse 1 line at least. We don't need more, because we just
+                # want to get into a state where the old parser has starting
+                # statements again (not e.g. lines within parentheses).
+                self._parse(self._parsed_until_line + 1)
+            else:
+                p_children = line_stmt.parent.children
+                index = p_children.index(line_stmt)
+                nodes = []
+                for node in p_children[index:]:
+                    if until_line_old < node.end_pos[0]:
+                        divided_node = self._divide_node(node)
+                        if divided_node is not None:
+                            nodes.append(divided_node)
+                        break
+                    else:
+                        nodes.append(node)
+
+                if nodes:
+                    self._insert_nodes(nodes)
+                # TODO remove dedent at end
+                self._update_positions(nodes, line_offset)
                # We have copied as much as possible (but definitely not too
                # much). Therefore we escape, even if we're not at the end. The
                # rest will be parsed.
                # Might not reach until the end, because there's a statement
                # that is not finished.
                break
-            else:
-                # Parse 1 line at least. We don't need more, because we just
-                # want to get into a state where the old parser has starting
-                # statements again (not e.g. lines within parentheses).
-                self._parse(self._parsed_until_line + 1)

-    def _update_positions(self, line_offset, line_start, line_end):
+    def _update_positions(self, nodes, line_offset):
+        for node in nodes:
+            try:
+                children = node.children
+            except AttributeError:
+                # Is a leaf
+                node.start_pos = node.start_pos[0] + line_offset, node.start_pos[1]
+            else:
+                self._update_positions(children)
        if line_offset == 0:
            return

@@ -118,10 +142,60 @@ class DiffParser():
        self._insert_count += 1
        self._parse(until_line_new)

+    def _insert_nodes(self, nodes):
+        before_node = self._get_before_insertion_node()
+        line_indentation = nodes[0].start_pos[1]
+        while True:
+            p_children = before_node.parent.children
+            indentation = p_children[0].start_pos[1]
+
+            if line_indentation < indentation:  # Dedent
+                # We might be at the most outer layer: modules. We
+                # don't want to depend on the first statement
+                # having the right indentation.
+                if before_node.parent is not None:
+                    # TODO add dedent
+                    before_node = before_node.parent
+                    continue
+
+            # TODO check if the indentation is lower than the last statement
+            # and add a dedent error leaf.
+            # TODO do the same for indent error leafs.
+            p_children += nodes
+            break
+
+    def _divide_node(self, node, until_line):
+        """
+        Breaks up scopes and returns only the part until the given line.
+
+        Tries to get the parts it can safely get and ignores the rest.
+        """
+        if node.type not in ('classdef', 'funcdef'):
+            return None
+
+        suite = node.children[-1]
+        if suite.type != 'suite':
+            return None
+
+        new_node = copy.copy(node)
+        new_node.children[-1] = new_suite = copy.copy(suite)
+        for i, child_node in enumerate(new_suite.children):
+            if child_node.end_pos[1] > until_line:
+                divided_node = self._divide_node(child_node, until_line)
+                if divided_node is not None:
+                    new_suite.children[i] = divided_node
+                    new_suite.children[i + 1:] = []
+                else:
+                    new_suite.children[i:] = []
+                break
+        return new_node
+
    def _get_before_insertion_node(self):
        if not self._new_children:
            return None

+        line = self._parsed_until_line + 1
+        print(line)
        leaf = self._module.get_leaf_for_position((line, 0), include_prefixes=False)
        while leaf.type != 'newline':
            try:
@@ -136,48 +210,78 @@ class DiffParser():
            print(parent)
            if parent.type in ('suite', 'file_input'):
                print(node)
-                print(i, line, node.end_pos)
+                print(line, node.end_pos)
                assert node.end_pos[0] <= line
                assert node.end_pos[1] == 0
                return node
            node = parent

+    def _get_old_line_stmt(self, old_line):
+        leaf = self._module.get_leaf_for_position((old_line, 0), include_prefixes=True)
+        if leaf.get_start_pos_with_prefix()[0] == old_line:
+            return leaf.get_definition()
+        # Must be on the same line. Otherwise we need to parse that bit.
+        return None
+
    def _parse(self, until_line):
        """
        Parses at least until the given line, but might just parse more until a
        valid state is reached.
        """
        while until_line > self._parsed_until_line:
-            node = self._parse_scope_part(before_node, until_line)
-            first_leaf = node.first_leaf()
+            node = self._parse_scope_node(until_line)
+            nodes = self._get_children_nodes(node)
+            if nodes:
+                self._insert_nodes(nodes)
+
+                first_leaf = nodes[0].first_leaf()
+                first_leaf.prefix = self._prefix + first_leaf.prefix
+                self._prefix = ''
+            self._prefix += node.children[-1].prefix

            before_node = self._get_before_insertion_node()
            if before_node is None:
                # The start of the file.
-                self.new_children += node.children
+                self._new_children += node.children
            else:
                before_node.parent.children += node.children

-    def _parse_scope_node(self, before_node, until_line, line_offset=0):
-        # TODO speed up, shouldn't copy the whole thing all the time.
+    def _get_children_nodes(self, node):
+        nodes = node.children[:-1]
+        if nodes:  # More than an error leaf
+            first_element = nodes[0]
+            if first_element.type == 'error_leaf' and \
+                    first_element.original_type == 'indent':
+                assert nodes[-1].type == 'dedent'
+                # This means that the start and end leaf
+                nodes = nodes[1:-1]
+
+        return nodes
+
+    def _parse_scope_node(self, until_line):
+        # TODO speed up, shouldn't copy the whole list all the time.
        # memoryview?
        lines_after = self._lines_new[self._parsed_until_line + 1:]
-        tokenizer = self._diff_tokenize(lines_after, until_line, line_offset)
+        tokenizer = self._diff_tokenize(
+            lines_after,
+            until_line,
+            line_offset=self._parsed_until_line
+        )
        self._parser = ParserWithRecovery(
            self._parser._grammar,
-            source=None,
-            tokenizer=tokenizer,
+            source='\n',
            start_parsing=False
        )
-        return self._parser.parse()
+        return self._parser.parse(tokenizer=tokenizer)

-    def _diff_tokenize(lines, until_line, line_offset=0):
+    def _diff_tokenize(self, lines, until_line, line_offset=0):
        is_first_token = True
        omited_first_indent = False
        indent_count = 0
+        l = iter(lines)
        tokens = generate_tokens(lambda: next(l, ''))
-        for token_info in tokens:
-            typ = token_info.type
+        for typ, string, start_pos, prefix in tokens:
+            start_pos = start_pos[0] + line_offset, start_pos[1]
            if typ == 'indent':
                indent_count += 1
                if is_first_token:
@@ -192,17 +296,20 @@ class DiffParser():
                    # We are done here, only thing that can come now is an
                    # endmarker or another dedented code block.
                    break
-            elif typ == 'newline' and token_info.start_pos[0] >= until_line:
-                yield token_info
-                x = self.
+            elif typ == 'newline' and start_pos[0] >= until_line:
+                yield tokenize.TokenInfo(typ, string, start_pos, prefix)
+                x = self._parser.pgen_parser.stack
+                # Check if the parser is actually in a valid suite state.
+                if 1:
+                    # TODO check if the parser is in a flow, and let it pass if
+                    # so.
                    import pdb; pdb.set_trace()
                    break

            is_first_token = False
-            if line_offset != 0:
-                raise NotImplementedError
-                yield tokenize.TokenInfo(*token_info.string[1:])
-            else:
-                yield token_info

-        yield tokenize.TokenInfo(tokenize.ENDMARKER, *token_info.string[1:])
+            yield tokenize.TokenInfo(typ, string, start_pos, prefix)
+
+        typ, string, start_pos, prefix = next(tokens)
+        start_pos = start_pos[0] + line_offset, start_pos[1]
+        yield tokenize.TokenInfo(tokenize.ENDMARKER, string, start_pos, prefix)