Working with dedents in error recovery.

2025-12-08 23:04:48 +08:00 · 2014-11-28 21:33:40 +01:00
parent 31600b9552
commit 2c684906e3
3 changed files with 32 additions and 10 deletions
--- a/jedi/parser/init.py
+++ b/jedi/parser/init.py
@@ -200,7 +200,8 @@ class Parser(object):
        else:
            return pt.Operator(value, start_pos, prefix)
-    def error_recovery(self, grammar, stack, typ, value, start_pos):
+    def error_recovery(self, grammar, stack, typ, value, start_pos, prefix,
                       add_token_callback):
        """
        This parser is written in a dynamic way, meaning that this parser
        allows using different grammars (even non-Python). However, error
@@ -208,7 +209,7 @@ class Parser(object):
        """
        # For now just discard everything that is not a suite or
        # file_input, if we detect an error.
-        for index, (dfa, state, (_type, _)) in reversed(list(enumerate(stack))):
+        for index, (dfa, state, (_type, nodes)) in reversed(list(enumerate(stack))):
            # `suite` can sometimes be only simple_stmt, not stmt.
            symbol = grammar.number2symbol[_type]
            if symbol in ('file_input', 'suite'):
@@ -216,7 +217,17 @@ class Parser(object):
        # No success finding a transition
        print('err', tokenize.tok_name[typ], repr(value), start_pos, len(stack), index)
        self._stack_removal(grammar, stack, index + 1, value, start_pos)
-        return value not in ('def', 'class')
+        # Those can always be new statements.
        if value in ('import', 'from', 'class', 'def', 'try', 'while', 'return'):
            pass
        elif typ == tokenize.DEDENT:
            if symbol == 'suite':
                if len(nodes) > 2:
                    add_token_callback(typ, value, prefix, start_pos)
                else:
                    # If a function or anything else contains a suite that is
                    # "empty" (just NEWLINE/INDENT), we remove it.
                    self._stack_removal(grammar, stack, len(stack) - 2, value, start_pos)
    def _stack_removal(self, grammar, stack, start_index, value, start_pos):
        def clear_names(children):
--- a/jedi/parser/pgen2/parse.py
+++ b/jedi/parser/pgen2/parse.py
@@ -104,6 +104,7 @@ class Parser(object):
        the parser stack we are able to do error recovery from wrong indents.
        """
        for type, value, prefix, start_pos in tokenizer:
            #print(token.tok_name[type], value)
            yield type, value, prefix, start_pos
    def parse(self, tokenizer):
@@ -111,9 +112,12 @@ class Parser(object):
            if self.addtoken(type, value, prefix, start_pos):
                break
        else:
-            # We never broke out -- EOF is too soon (how can this happen???)
+            # We never broke out -- EOF is too soon -- Unfinished statement.
-            # Hint: It probably doesn't since there's an ENDMARKER.
+            self.error_recovery(self.grammar, self.stack, type, value,
-            raise ParseError("incomplete input", type, value, start_pos)
+                                start_pos, prefix, self.addtoken)
            # Add the ENDMARKER again.
            if not self.addtoken(type, value, prefix, start_pos):
                raise ParseError("incomplete input", type, value, start_pos)
        return self.rootnode
    def addtoken(self, type, value, prefix, start_pos):
@@ -166,9 +170,9 @@ class Parser(object):
                        # Done parsing, but another token is input
                        raise ParseError("too much input", type, value, start_pos)
                else:
-                    if self.error_recovery(self.grammar, self.stack, type,
+                    self.error_recovery(self.grammar, self.stack, type,
-                                           value, start_pos):
+                                        value, start_pos, prefix, self.addtoken)
-                        break
+                    break
    def classify(self, type, value, start_pos):
        """Turn a token into a label.  (Internal)"""
--- a/jedi/parser/tokenize.py
+++ b/jedi/parser/tokenize.py
@@ -320,9 +320,16 @@ def generate_tokens(readline, line_offset=0):
                else:                                       # ordinary string
                    yield Token(STRING, token, spos, prefix)
            elif initial in namechars:                      # ordinary name
                yield Token(NAME, token, spos, prefix)
                if token in ALWAYS_BREAK_TOKEN:
                    paren_level = 0
                    while True:
                        indent = indents.pop()
                        if indent > start:
                            yield Token(DEDENT, '', (lnum, 0), '')
                        else:
                            indents.append(indent)
                            break
                yield Token(NAME, token, spos, prefix)
            elif initial == '\\' and line[start:] == '\\\n':  # continued stmt
                continue
            else: