Working with dedents in error recovery.

This commit is contained in:
Dave Halter
2014-11-28 21:33:40 +01:00
parent 31600b9552
commit 2c684906e3
3 changed files with 32 additions and 10 deletions

View File

@@ -200,7 +200,8 @@ class Parser(object):
else: else:
return pt.Operator(value, start_pos, prefix) return pt.Operator(value, start_pos, prefix)
def error_recovery(self, grammar, stack, typ, value, start_pos): def error_recovery(self, grammar, stack, typ, value, start_pos, prefix,
add_token_callback):
""" """
This parser is written in a dynamic way, meaning that this parser This parser is written in a dynamic way, meaning that this parser
allows using different grammars (even non-Python). However, error allows using different grammars (even non-Python). However, error
@@ -208,7 +209,7 @@ class Parser(object):
""" """
# For now just discard everything that is not a suite or # For now just discard everything that is not a suite or
# file_input, if we detect an error. # file_input, if we detect an error.
for index, (dfa, state, (_type, _)) in reversed(list(enumerate(stack))): for index, (dfa, state, (_type, nodes)) in reversed(list(enumerate(stack))):
# `suite` can sometimes be only simple_stmt, not stmt. # `suite` can sometimes be only simple_stmt, not stmt.
symbol = grammar.number2symbol[_type] symbol = grammar.number2symbol[_type]
if symbol in ('file_input', 'suite'): if symbol in ('file_input', 'suite'):
@@ -216,7 +217,17 @@ class Parser(object):
# No success finding a transition # No success finding a transition
print('err', tokenize.tok_name[typ], repr(value), start_pos, len(stack), index) print('err', tokenize.tok_name[typ], repr(value), start_pos, len(stack), index)
self._stack_removal(grammar, stack, index + 1, value, start_pos) self._stack_removal(grammar, stack, index + 1, value, start_pos)
return value not in ('def', 'class') # Those can always be new statements.
if value in ('import', 'from', 'class', 'def', 'try', 'while', 'return'):
pass
elif typ == tokenize.DEDENT:
if symbol == 'suite':
if len(nodes) > 2:
add_token_callback(typ, value, prefix, start_pos)
else:
# If a function or anything else contains a suite that is
# "empty" (just NEWLINE/INDENT), we remove it.
self._stack_removal(grammar, stack, len(stack) - 2, value, start_pos)
def _stack_removal(self, grammar, stack, start_index, value, start_pos): def _stack_removal(self, grammar, stack, start_index, value, start_pos):
def clear_names(children): def clear_names(children):

View File

@@ -104,6 +104,7 @@ class Parser(object):
the parser stack we are able to do error recovery from wrong indents. the parser stack we are able to do error recovery from wrong indents.
""" """
for type, value, prefix, start_pos in tokenizer: for type, value, prefix, start_pos in tokenizer:
#print(token.tok_name[type], value)
yield type, value, prefix, start_pos yield type, value, prefix, start_pos
def parse(self, tokenizer): def parse(self, tokenizer):
@@ -111,9 +112,12 @@ class Parser(object):
if self.addtoken(type, value, prefix, start_pos): if self.addtoken(type, value, prefix, start_pos):
break break
else: else:
# We never broke out -- EOF is too soon (how can this happen???) # We never broke out -- EOF is too soon -- Unfinished statement.
# Hint: It probably doesn't since there's an ENDMARKER. self.error_recovery(self.grammar, self.stack, type, value,
raise ParseError("incomplete input", type, value, start_pos) start_pos, prefix, self.addtoken)
# Add the ENDMARKER again.
if not self.addtoken(type, value, prefix, start_pos):
raise ParseError("incomplete input", type, value, start_pos)
return self.rootnode return self.rootnode
def addtoken(self, type, value, prefix, start_pos): def addtoken(self, type, value, prefix, start_pos):
@@ -166,9 +170,9 @@ class Parser(object):
# Done parsing, but another token is input # Done parsing, but another token is input
raise ParseError("too much input", type, value, start_pos) raise ParseError("too much input", type, value, start_pos)
else: else:
if self.error_recovery(self.grammar, self.stack, type, self.error_recovery(self.grammar, self.stack, type,
value, start_pos): value, start_pos, prefix, self.addtoken)
break break
def classify(self, type, value, start_pos): def classify(self, type, value, start_pos):
"""Turn a token into a label. (Internal)""" """Turn a token into a label. (Internal)"""

View File

@@ -320,9 +320,16 @@ def generate_tokens(readline, line_offset=0):
else: # ordinary string else: # ordinary string
yield Token(STRING, token, spos, prefix) yield Token(STRING, token, spos, prefix)
elif initial in namechars: # ordinary name elif initial in namechars: # ordinary name
yield Token(NAME, token, spos, prefix)
if token in ALWAYS_BREAK_TOKEN: if token in ALWAYS_BREAK_TOKEN:
paren_level = 0 paren_level = 0
while True:
indent = indents.pop()
if indent > start:
yield Token(DEDENT, '', (lnum, 0), '')
else:
indents.append(indent)
break
yield Token(NAME, token, spos, prefix)
elif initial == '\\' and line[start:] == '\\\n': # continued stmt elif initial == '\\' and line[start:] == '\\\n': # continued stmt
continue continue
else: else: