First time a test partially passes of the new fast parser.

2016-08-30 23:12:24 +02:00
parent 42e5777620
commit d505c764de
1 changed files with 59 additions and 25 deletions
--- a/jedi/parser/fast.py
+++ b/jedi/parser/fast.py
@@ -10,12 +10,12 @@ from jedi._compatibility import use_metaclass
 from jedi import settings
 from jedi.common import splitlines
 from jedi.parser import ParserWithRecovery
-from jedi.parser.tree import Module, search_ancestor
+from jedi.parser.tree import Module, search_ancestor, EndMarker
 from jedi.parser.utils import parser_cache
 from jedi.parser import tokenize
 from jedi import debug
 from jedi.parser.tokenize import (generate_tokens, NEWLINE,
-                                  ENDMARKER, INDENT, DEDENT)
+                                  ENDMARKER, INDENT, DEDENT, tok_name)


 class CachedFastParser(type):
@@ -40,6 +40,17 @@ def _merge_names_dicts(base_dict, other_dict):
        base_dict.setdefault(key, []).extend(names)


+def suite_or_file_input_is_valid(parser):
+    stack = parser.pgen_parser.stack
+    for dfa, newstate, (symbol_number, nodes) in reversed(stack):
+        if symbol_number == parser._grammar.symbol2number['suite']:
+            # If we don't have ondes already, the suite is not valid.
+            return bool(nodes)
+    # Not reaching a suite means that we're dealing with file_input levels
+    # where there's no need for a valid statement in it. It can also be empty.
+    return True
+
+
 class DiffParser():
    endmarker_type = 'endmarker'

@@ -77,6 +88,13 @@ class DiffParser():
            - Set parsed_until_line
        '''
        self._lines_new = lines_new
+        self._added_newline = False
+        # The Python grammar needs a newline at the end of a file.
+        if lines_new[-1] != '':
+            lines_new[-1] += '\n'
+            lines_new.append('')
+            self._added_newline = True
+
        self._reset()

        self._old_children = self._module.children
@@ -90,7 +108,11 @@ class DiffParser():
        sm = difflib.SequenceMatcher(None, lines_old, lines_new)
        print(len(lines_old), len(lines_new), lines_old, lines_new)
        for operation, i1, i2, j1, j2 in sm.get_opcodes():
-            print(operation, i1, i2, j1, j2)
+            print('\t\t', operation, i1, i2, j1, j2)
+            if j2 == len(lines_new):
+                # The empty part after the last newline is not relevant.
+                j2 -= 1
+
            if operation == 'equal':
                line_offset = j1 - i1
                self._copy_from_old_parser(line_offset, i2 + 1, j2)
@@ -108,6 +130,9 @@ class DiffParser():
        self._module.children = self._new_children
        # TODO insert endmarker
        print(self._module.get_code())
+        if self._added_newline:
+            self._parser.remove_last_newline()
+        self._parser.source = ''.join(lines_new)

    def _insert(self, until_line_new):
        self._insert_count += 1
@@ -128,7 +153,7 @@ class DiffParser():
                nodes = []
                for node in p_children[index:]:
                    if until_line_old < node.end_pos[0]:
-                        divided_node = self._divide_node(node)
+                        divided_node = self._divide_node(node, until_line_new)
                        if divided_node is not None:
                            nodes.append(divided_node)
                        break
@@ -183,7 +208,6 @@ class DiffParser():
                # endmarker.
                pass

-        print(last_non_endmarker)
        if last_non_endmarker.type in ('newline', 'dedent'):
            # Newlines end on the next line, which means that they would cover
            # the next line. That line is not fully parsed at this point.
@@ -200,7 +224,7 @@ class DiffParser():
            nodes = nodes[:-1]
            if not nodes:
                return self._module
-        print("X", nodes)
+        print("insert_nodes", nodes)

        # Now the preparations are done. We are inserting the nodes.
        if before_node is None:  # Everything is empty.
@@ -344,7 +368,7 @@ class DiffParser():
        return nodes

    def _parse_scope_node(self, until_line):
-        print('PARSE', until_line, self._parsed_until_line)
+        print('PARSE', self._parsed_until_line, until_line)
        # TODO speed up, shouldn't copy the whole list all the time.
        # memoryview?
        lines_after = self._lines_new[self._parsed_until_line:]
@@ -354,12 +378,12 @@ class DiffParser():
            until_line,
            line_offset=self._parsed_until_line
        )
-        self._parser = ParserWithRecovery(
+        self._active_parser = ParserWithRecovery(
            self._parser._grammar,
            source='\n',
            start_parsing=False
        )
-        return self._parser.parse(tokenizer=tokenizer)
+        return self._active_parser.parse(tokenizer=tokenizer)

    def _post_parse(self):
        # Add the used names from the old parser to the new one.
@@ -373,41 +397,51 @@ class DiffParser():
                if name.start_pos[0] in copied_line_numbers:
                    new_used_names.setdefault(key, []).add(name)

+        # Add an endmarker.
+        last_leaf = self._temp_module.last_leaf()
+        while last_leaf.type == 'dedent':
+            last_leaf = last_leaf.get_previous_leaf()
+        endmarker = EndMarker(self._parser.position_modifier, '', last_leaf.end_pos, self._prefix)
+        endmarker.parent = self._module
+        self._new_children.append(endmarker)
+
    def _diff_tokenize(self, lines, until_line, line_offset=0):
        is_first_token = True
-        omited_first_indent = False
-        indent_count = 0
+        omitted_first_indent = False
+        indents = []
        l = iter(lines)
-        tokens = generate_tokens(lambda: next(l, ''))
+        tokens = generate_tokens(lambda: next(l, ''), use_exact_op_types=True)
        for typ, string, start_pos, prefix in tokens:
            start_pos = start_pos[0] + line_offset, start_pos[1]
            if typ == tokenize.INDENT:
-                indent_count += 1
+                indents.append(start_pos[1])
                if is_first_token:
-                    omited_first_indent = True
+                    omitted_first_indent = True
                    # We want to get rid of indents that are only here because
                    # we only parse part of the file. These indents would only
                    # get parsed as error leafs, which doesn't make any sense.
+                    is_first_token = False
                    continue
-            elif typ == tokenize.DEDENT:
-                indent_count -= 1
-                if omited_first_indent and indent_count == 0:
+            is_first_token = False
+
+            if typ == tokenize.DEDENT:
+                indents.pop()
+                if omitted_first_indent and not indents:
                    # We are done here, only thing that can come now is an
                    # endmarker or another dedented code block.
                    break
            elif typ == tokenize.NEWLINE and start_pos[0] >= until_line:
                yield tokenize.TokenInfo(typ, string, start_pos, prefix)
                # Check if the parser is actually in a valid suite state.
-                if 1:
-                    x = self._parser.pgen_parser.stack
-                    # TODO check if the parser is in a flow, and let it pass if
-                    # so.
-                    import pdb; pdb.set_trace()
+                if suite_or_file_input_is_valid(self._active_parser):
+                    while len(indents) > int(omitted_first_indent):
+                        indent_pos = start_pos[0] + 1, indents.pop()
+                        yield tokenize.TokenInfo(tokenize.DEDENT, '', indent_pos, '')
                    break
+                else:
+                    continue

-            is_first_token = False
-
-            print('tok', typ, string, start_pos)
+            print('tok', tok_name[typ], repr(string), start_pos)
            yield tokenize.TokenInfo(typ, string, start_pos, prefix)

        typ, string, start_pos, prefix = next(tokens)