diff --git a/jedi/parser/__init__.py b/jedi/parser/__init__.py index 74222628..c2743220 100644 --- a/jedi/parser/__init__.py +++ b/jedi/parser/__init__.py @@ -200,7 +200,8 @@ class Parser(object): else: return pt.Operator(value, start_pos, prefix) - def error_recovery(self, grammar, stack, typ, value, start_pos): + def error_recovery(self, grammar, stack, typ, value, start_pos, prefix, + add_token_callback): """ This parser is written in a dynamic way, meaning that this parser allows using different grammars (even non-Python). However, error @@ -208,7 +209,7 @@ class Parser(object): """ # For now just discard everything that is not a suite or # file_input, if we detect an error. - for index, (dfa, state, (_type, _)) in reversed(list(enumerate(stack))): + for index, (dfa, state, (_type, nodes)) in reversed(list(enumerate(stack))): # `suite` can sometimes be only simple_stmt, not stmt. symbol = grammar.number2symbol[_type] if symbol in ('file_input', 'suite'): @@ -216,7 +217,17 @@ class Parser(object): # No success finding a transition print('err', tokenize.tok_name[typ], repr(value), start_pos, len(stack), index) self._stack_removal(grammar, stack, index + 1, value, start_pos) - return value not in ('def', 'class') + # Those can always be new statements. + if value in ('import', 'from', 'class', 'def', 'try', 'while', 'return'): + pass + elif typ == tokenize.DEDENT: + if symbol == 'suite': + if len(nodes) > 2: + add_token_callback(typ, value, prefix, start_pos) + else: + # If a function or anything else contains a suite that is + # "empty" (just NEWLINE/INDENT), we remove it. + self._stack_removal(grammar, stack, len(stack) - 2, value, start_pos) def _stack_removal(self, grammar, stack, start_index, value, start_pos): def clear_names(children): diff --git a/jedi/parser/pgen2/parse.py b/jedi/parser/pgen2/parse.py index f530d57c..08538937 100644 --- a/jedi/parser/pgen2/parse.py +++ b/jedi/parser/pgen2/parse.py @@ -104,6 +104,7 @@ class Parser(object): the parser stack we are able to do error recovery from wrong indents. """ for type, value, prefix, start_pos in tokenizer: + #print(token.tok_name[type], value) yield type, value, prefix, start_pos def parse(self, tokenizer): @@ -111,9 +112,12 @@ class Parser(object): if self.addtoken(type, value, prefix, start_pos): break else: - # We never broke out -- EOF is too soon (how can this happen???) - # Hint: It probably doesn't since there's an ENDMARKER. - raise ParseError("incomplete input", type, value, start_pos) + # We never broke out -- EOF is too soon -- Unfinished statement. + self.error_recovery(self.grammar, self.stack, type, value, + start_pos, prefix, self.addtoken) + # Add the ENDMARKER again. + if not self.addtoken(type, value, prefix, start_pos): + raise ParseError("incomplete input", type, value, start_pos) return self.rootnode def addtoken(self, type, value, prefix, start_pos): @@ -166,9 +170,9 @@ class Parser(object): # Done parsing, but another token is input raise ParseError("too much input", type, value, start_pos) else: - if self.error_recovery(self.grammar, self.stack, type, - value, start_pos): - break + self.error_recovery(self.grammar, self.stack, type, + value, start_pos, prefix, self.addtoken) + break def classify(self, type, value, start_pos): """Turn a token into a label. (Internal)""" diff --git a/jedi/parser/tokenize.py b/jedi/parser/tokenize.py index abbf62a3..010eb890 100644 --- a/jedi/parser/tokenize.py +++ b/jedi/parser/tokenize.py @@ -320,9 +320,16 @@ def generate_tokens(readline, line_offset=0): else: # ordinary string yield Token(STRING, token, spos, prefix) elif initial in namechars: # ordinary name - yield Token(NAME, token, spos, prefix) if token in ALWAYS_BREAK_TOKEN: paren_level = 0 + while True: + indent = indents.pop() + if indent > start: + yield Token(DEDENT, '', (lnum, 0), '') + else: + indents.append(indent) + break + yield Token(NAME, token, spos, prefix) elif initial == '\\' and line[start:] == '\\\n': # continued stmt continue else: