Try to completely remove the word symbol and use nonterminal

The ones that we could not remove are in grammar.py, because that's the public documented API.
This commit is contained in:
Dave Halter
2018-06-17 18:30:20 +02:00
parent 640f544af9
commit 73ce57428b
7 changed files with 89 additions and 86 deletions

View File

@@ -62,8 +62,8 @@ class Parser(BaseParser):
FSTRING_END: tree.FStringEnd,
}
def __init__(self, pgen_grammar, error_recovery=True, start_symbol='file_input'):
super(Parser, self).__init__(pgen_grammar, start_symbol, error_recovery=error_recovery)
def __init__(self, pgen_grammar, error_recovery=True, start_nonterminal='file_input'):
super(Parser, self).__init__(pgen_grammar, start_nonterminal, error_recovery=error_recovery)
self.syntax_errors = []
self._omit_dedent_list = []
@@ -81,19 +81,19 @@ class Parser(BaseParser):
def parse(self, tokens):
if self._error_recovery:
if self._start_symbol != 'file_input':
if self._start_nonterminal != 'file_input':
raise NotImplementedError
tokens = self._recovery_tokenize(tokens)
node = super(Parser, self).parse(tokens)
if self._start_symbol == 'file_input' != node.type:
if self._start_nonterminal == 'file_input' != node.type:
# If there's only one statement, we get back a non-module. That's
# not what we want, we want a module, so we add it here:
node = self.convert_node(
self._pgen_grammar,
self._pgen_grammar.symbol2number['file_input'],
self._pgen_grammar.nonterminal2number['file_input'],
[node]
)
@@ -107,24 +107,24 @@ class Parser(BaseParser):
grammar rule produces a new complete node, so that the tree is build
strictly bottom-up.
"""
# TODO REMOVE symbol, we don't want type here.
symbol = pgen_grammar.number2symbol[type]
# TODO REMOVE nonterminal, we don't want type here.
nonterminal = pgen_grammar.number2nonterminal[type]
try:
return self.node_map[symbol](children)
return self.node_map[nonterminal](children)
except KeyError:
if symbol == 'suite':
if nonterminal == 'suite':
# We don't want the INDENT/DEDENT in our parser tree. Those
# leaves are just cancer. They are virtual leaves and not real
# ones and therefore have pseudo start/end positions and no
# prefixes. Just ignore them.
children = [children[0]] + children[2:-1]
elif symbol == 'list_if':
elif nonterminal == 'list_if':
# Make transitioning from 2 to 3 easier.
symbol = 'comp_if'
elif symbol == 'listmaker':
nonterminal = 'comp_if'
elif nonterminal == 'listmaker':
# Same as list_if above.
symbol = 'testlist_comp'
return self.default_node(symbol, children)
nonterminal = 'testlist_comp'
return self.default_node(nonterminal, children)
def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
# print('leaf', repr(value), token.tok_name[type])
@@ -138,10 +138,10 @@ class Parser(BaseParser):
def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
add_token_callback):
def get_symbol_and_nodes(stack):
def get_nonterminal_and_nodes(stack):
for dfa, state, (type_, nodes) in stack:
symbol = pgen_grammar.number2symbol[type_]
yield symbol, nodes
nonterminal = pgen_grammar.number2nonterminal[type_]
yield nonterminal, nodes
tos_nodes = stack.get_tos_nodes()
if tos_nodes:
@@ -149,7 +149,7 @@ class Parser(BaseParser):
else:
last_leaf = None
if self._start_symbol == 'file_input' and \
if self._start_nonterminal == 'file_input' and \
(typ == ENDMARKER or typ == DEDENT and '\n' not in last_leaf.value):
def reduce_stack(states, newstate):
# reduce
@@ -168,13 +168,13 @@ class Parser(BaseParser):
ilabel = token_to_ilabel(pgen_grammar, NEWLINE, value)
dfa, state, (type_, nodes) = stack[-1]
symbol = pgen_grammar.number2symbol[type_]
nonterminal = pgen_grammar.number2nonterminal[type_]
states, first = dfa
arcs = states[state]
# Look for a state with this label
for i, newstate in arcs:
if ilabel == i:
if symbol == 'simple_stmt':
if nonterminal == 'simple_stmt':
# This is basically shifting
stack[-1] = (dfa, newstate, (type_, nodes))
@@ -182,12 +182,12 @@ class Parser(BaseParser):
add_token_callback(typ, value, start_pos, prefix)
return
# Check if we're at the right point
#for symbol, nodes in get_symbol_and_nodes(stack):
#for nonterminal, nodes in get_nonterminal_and_nodes(stack):
# self.pgen_parser._pop()
#break
break
#symbol = pgen_grammar.number2symbol[type_]
#nonterminal = pgen_grammar.number2nonterminal[type_]
if not self._error_recovery:
return super(Parser, self).error_recovery(
@@ -198,21 +198,21 @@ class Parser(BaseParser):
# For now just discard everything that is not a suite or
# file_input, if we detect an error.
one_line_suite = False
for index, (symbol, nodes) in reversed(list(enumerate(get_symbol_and_nodes(stack)))):
for index, (nonterminal, nodes) in reversed(list(enumerate(get_nonterminal_and_nodes(stack)))):
# `suite` can sometimes be only simple_stmt, not stmt.
if one_line_suite:
break
elif symbol == 'file_input':
elif nonterminal == 'file_input':
break
elif symbol == 'suite':
elif nonterminal == 'suite':
if len(nodes) > 1:
break
elif not nodes:
one_line_suite = True
# `suite` without an indent are error nodes.
return index, symbol, nodes
return index, nonterminal, nodes
index, symbol, nodes = current_suite(stack)
index, nonterminal, nodes = current_suite(stack)
# print('err', token.tok_name[typ], repr(value), start_pos, len(stack), index)
if self._stack_removal(pgen_grammar, stack, arcs, index + 1, value, start_pos):
@@ -226,11 +226,11 @@ class Parser(BaseParser):
error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix)
stack[-1][2][1].append(error_leaf)
if symbol == 'suite':
if nonterminal == 'suite':
dfa, state, node = stack[-1]
states, first = dfa
arcs = states[state]
intended_label = pgen_grammar.symbol2label['stmt']
intended_label = pgen_grammar.nonterminal2label['stmt']
# Introduce a proper state transition. We're basically allowing
# there to be no valid statements inside a suite.
if [x[0] for x in arcs] == [intended_label]: