From d505c764deb8fe83e9bc82eaae8aafa41509cc7e Mon Sep 17 00:00:00 2001
From: Dave Halter <davidhalter88@gmail.com>
Date: Tue, 30 Aug 2016 23:12:24 +0200
Subject: [PATCH] First time a test partially passes of the new fast parser.

---
 jedi/parser/fast.py | 84 +++++++++++++++++++++++++++++++--------------
 1 file changed, 59 insertions(+), 25 deletions(-)

diff --git a/jedi/parser/fast.py b/jedi/parser/fast.py
index dc8cda82..7231ee33 100644
--- a/jedi/parser/fast.py
+++ b/jedi/parser/fast.py
@@ -10,12 +10,12 @@ from jedi._compatibility import use_metaclass
 from jedi import settings
 from jedi.common import splitlines
 from jedi.parser import ParserWithRecovery
-from jedi.parser.tree import Module, search_ancestor
+from jedi.parser.tree import Module, search_ancestor, EndMarker
 from jedi.parser.utils import parser_cache
 from jedi.parser import tokenize
 from jedi import debug
 from jedi.parser.tokenize import (generate_tokens, NEWLINE,
-                                  ENDMARKER, INDENT, DEDENT)
+                                  ENDMARKER, INDENT, DEDENT, tok_name)
 
 
 class CachedFastParser(type):
@@ -40,6 +40,17 @@ def _merge_names_dicts(base_dict, other_dict):
         base_dict.setdefault(key, []).extend(names)
 
 
+def suite_or_file_input_is_valid(parser):
+    stack = parser.pgen_parser.stack
+    for dfa, newstate, (symbol_number, nodes) in reversed(stack):
+        if symbol_number == parser._grammar.symbol2number['suite']:
+            # If we don't have ondes already, the suite is not valid.
+            return bool(nodes)
+    # Not reaching a suite means that we're dealing with file_input levels
+    # where there's no need for a valid statement in it. It can also be empty.
+    return True
+
+
 class DiffParser():
     endmarker_type = 'endmarker'
 
@@ -77,6 +88,13 @@ class DiffParser():
             - Set parsed_until_line
         '''
         self._lines_new = lines_new
+        self._added_newline = False
+        # The Python grammar needs a newline at the end of a file.
+        if lines_new[-1] != '':
+            lines_new[-1] += '\n'
+            lines_new.append('')
+            self._added_newline = True
+
         self._reset()
 
         self._old_children = self._module.children
@@ -90,7 +108,11 @@ class DiffParser():
         sm = difflib.SequenceMatcher(None, lines_old, lines_new)
         print(len(lines_old), len(lines_new), lines_old, lines_new)
         for operation, i1, i2, j1, j2 in sm.get_opcodes():
-            print(operation, i1, i2, j1, j2)
+            print('\t\t', operation, i1, i2, j1, j2)
+            if j2 == len(lines_new):
+                # The empty part after the last newline is not relevant.
+                j2 -= 1
+
             if operation == 'equal':
                 line_offset = j1 - i1
                 self._copy_from_old_parser(line_offset, i2 + 1, j2)
@@ -108,6 +130,9 @@ class DiffParser():
         self._module.children = self._new_children
         # TODO insert endmarker
         print(self._module.get_code())
+        if self._added_newline:
+            self._parser.remove_last_newline()
+        self._parser.source = ''.join(lines_new)
 
     def _insert(self, until_line_new):
         self._insert_count += 1
@@ -128,7 +153,7 @@ class DiffParser():
                 nodes = []
                 for node in p_children[index:]:
                     if until_line_old < node.end_pos[0]:
-                        divided_node = self._divide_node(node)
+                        divided_node = self._divide_node(node, until_line_new)
                         if divided_node is not None:
                             nodes.append(divided_node)
                         break
@@ -183,7 +208,6 @@ class DiffParser():
                 # endmarker.
                 pass
 
-        print(last_non_endmarker)
         if last_non_endmarker.type in ('newline', 'dedent'):
             # Newlines end on the next line, which means that they would cover
             # the next line. That line is not fully parsed at this point.
@@ -200,7 +224,7 @@ class DiffParser():
             nodes = nodes[:-1]
             if not nodes:
                 return self._module
-        print("X", nodes)
+        print("insert_nodes", nodes)
 
         # Now the preparations are done. We are inserting the nodes.
         if before_node is None:  # Everything is empty.
@@ -344,7 +368,7 @@ class DiffParser():
         return nodes
 
     def _parse_scope_node(self, until_line):
-        print('PARSE', until_line, self._parsed_until_line)
+        print('PARSE', self._parsed_until_line, until_line)
         # TODO speed up, shouldn't copy the whole list all the time.
         # memoryview?
         lines_after = self._lines_new[self._parsed_until_line:]
@@ -354,12 +378,12 @@ class DiffParser():
             until_line,
             line_offset=self._parsed_until_line
         )
-        self._parser = ParserWithRecovery(
+        self._active_parser = ParserWithRecovery(
             self._parser._grammar,
             source='\n',
             start_parsing=False
         )
-        return self._parser.parse(tokenizer=tokenizer)
+        return self._active_parser.parse(tokenizer=tokenizer)
 
     def _post_parse(self):
         # Add the used names from the old parser to the new one.
@@ -373,41 +397,51 @@ class DiffParser():
                 if name.start_pos[0] in copied_line_numbers:
                     new_used_names.setdefault(key, []).add(name)
 
+        # Add an endmarker.
+        last_leaf = self._temp_module.last_leaf()
+        while last_leaf.type == 'dedent':
+            last_leaf = last_leaf.get_previous_leaf()
+        endmarker = EndMarker(self._parser.position_modifier, '', last_leaf.end_pos, self._prefix)
+        endmarker.parent = self._module
+        self._new_children.append(endmarker)
+
     def _diff_tokenize(self, lines, until_line, line_offset=0):
         is_first_token = True
-        omited_first_indent = False
-        indent_count = 0
+        omitted_first_indent = False
+        indents = []
         l = iter(lines)
-        tokens = generate_tokens(lambda: next(l, ''))
+        tokens = generate_tokens(lambda: next(l, ''), use_exact_op_types=True)
         for typ, string, start_pos, prefix in tokens:
             start_pos = start_pos[0] + line_offset, start_pos[1]
             if typ == tokenize.INDENT:
-                indent_count += 1
+                indents.append(start_pos[1])
                 if is_first_token:
-                    omited_first_indent = True
+                    omitted_first_indent = True
                     # We want to get rid of indents that are only here because
                     # we only parse part of the file. These indents would only
                     # get parsed as error leafs, which doesn't make any sense.
+                    is_first_token = False
                     continue
-            elif typ == tokenize.DEDENT:
-                indent_count -= 1
-                if omited_first_indent and indent_count == 0:
+            is_first_token = False
+
+            if typ == tokenize.DEDENT:
+                indents.pop()
+                if omitted_first_indent and not indents:
                     # We are done here, only thing that can come now is an
                     # endmarker or another dedented code block.
                     break
             elif typ == tokenize.NEWLINE and start_pos[0] >= until_line:
                 yield tokenize.TokenInfo(typ, string, start_pos, prefix)
                 # Check if the parser is actually in a valid suite state.
-                if 1:
-                    x = self._parser.pgen_parser.stack
-                    # TODO check if the parser is in a flow, and let it pass if
-                    # so.
-                    import pdb; pdb.set_trace()
+                if suite_or_file_input_is_valid(self._active_parser):
+                    while len(indents) > int(omitted_first_indent):
+                        indent_pos = start_pos[0] + 1, indents.pop()
+                        yield tokenize.TokenInfo(tokenize.DEDENT, '', indent_pos, '')
                     break
+                else:
+                    continue
 
-            is_first_token = False
-
-            print('tok', typ, string, start_pos)
+            print('tok', tok_name[typ], repr(string), start_pos)
             yield tokenize.TokenInfo(typ, string, start_pos, prefix)
 
         typ, string, start_pos, prefix = next(tokens)