Working with dedents in error recovery.

2014-11-28 21:33:40 +01:00
parent 31600b9552
commit 2c684906e3
3 changed files with 32 additions and 10 deletions
--- a/jedi/parser/init.py
+++ b/jedi/parser/init.py
@@ -200,7 +200,8 @@ class Parser(object):
        else:
            return pt.Operator(value, start_pos, prefix)

-    def error_recovery(self, grammar, stack, typ, value, start_pos):
+    def error_recovery(self, grammar, stack, typ, value, start_pos, prefix,
+                       add_token_callback):
        """
        This parser is written in a dynamic way, meaning that this parser
        allows using different grammars (even non-Python). However, error
@@ -208,7 +209,7 @@ class Parser(object):
        """
        # For now just discard everything that is not a suite or
        # file_input, if we detect an error.
-        for index, (dfa, state, (_type, _)) in reversed(list(enumerate(stack))):
+        for index, (dfa, state, (_type, nodes)) in reversed(list(enumerate(stack))):
            # `suite` can sometimes be only simple_stmt, not stmt.
            symbol = grammar.number2symbol[_type]
            if symbol in ('file_input', 'suite'):
@@ -216,7 +217,17 @@ class Parser(object):
        # No success finding a transition
        print('err', tokenize.tok_name[typ], repr(value), start_pos, len(stack), index)
        self._stack_removal(grammar, stack, index + 1, value, start_pos)
-        return value not in ('def', 'class')
+        # Those can always be new statements.
+        if value in ('import', 'from', 'class', 'def', 'try', 'while', 'return'):
+            pass
+        elif typ == tokenize.DEDENT:
+            if symbol == 'suite':
+                if len(nodes) > 2:
+                    add_token_callback(typ, value, prefix, start_pos)
+                else:
+                    # If a function or anything else contains a suite that is
+                    # "empty" (just NEWLINE/INDENT), we remove it.
+                    self._stack_removal(grammar, stack, len(stack) - 2, value, start_pos)

    def _stack_removal(self, grammar, stack, start_index, value, start_pos):
        def clear_names(children):
--- a/jedi/parser/pgen2/parse.py
+++ b/jedi/parser/pgen2/parse.py
@@ -104,6 +104,7 @@ class Parser(object):
        the parser stack we are able to do error recovery from wrong indents.
        """
        for type, value, prefix, start_pos in tokenizer:
+            #print(token.tok_name[type], value)
            yield type, value, prefix, start_pos

    def parse(self, tokenizer):
@@ -111,9 +112,12 @@ class Parser(object):
            if self.addtoken(type, value, prefix, start_pos):
                break
        else:
-            # We never broke out -- EOF is too soon (how can this happen???)
-            # Hint: It probably doesn't since there's an ENDMARKER.
-            raise ParseError("incomplete input", type, value, start_pos)
+            # We never broke out -- EOF is too soon -- Unfinished statement.
+            self.error_recovery(self.grammar, self.stack, type, value,
+                                start_pos, prefix, self.addtoken)
+            # Add the ENDMARKER again.
+            if not self.addtoken(type, value, prefix, start_pos):
+                raise ParseError("incomplete input", type, value, start_pos)
        return self.rootnode

    def addtoken(self, type, value, prefix, start_pos):
@@ -166,9 +170,9 @@ class Parser(object):
                        # Done parsing, but another token is input
                        raise ParseError("too much input", type, value, start_pos)
                else:
-                    if self.error_recovery(self.grammar, self.stack, type,
-                                           value, start_pos):
-                        break
+                    self.error_recovery(self.grammar, self.stack, type,
+                                        value, start_pos, prefix, self.addtoken)
+                    break

    def classify(self, type, value, start_pos):
        """Turn a token into a label.  (Internal)"""
--- a/jedi/parser/tokenize.py
+++ b/jedi/parser/tokenize.py
@@ -320,9 +320,16 @@ def generate_tokens(readline, line_offset=0):
                else:                                       # ordinary string
                    yield Token(STRING, token, spos, prefix)
            elif initial in namechars:                      # ordinary name
-                yield Token(NAME, token, spos, prefix)
                if token in ALWAYS_BREAK_TOKEN:
                    paren_level = 0
+                    while True:
+                        indent = indents.pop()
+                        if indent > start:
+                            yield Token(DEDENT, '', (lnum, 0), '')
+                        else:
+                            indents.append(indent)
+                            break
+                yield Token(NAME, token, spos, prefix)
            elif initial == '\\' and line[start:] == '\\\n':  # continued stmt
                continue
            else: