mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-09 22:25:53 +08:00
Try to completely remove the word symbol and use nonterminal
The ones that we could not remove are in grammar.py, because that's the public documented API.
This commit is contained in:
@@ -51,8 +51,8 @@ class Grammar(object):
|
|||||||
it is invalid, it will be returned as an error node. If disabled,
|
it is invalid, it will be returned as an error node. If disabled,
|
||||||
you will get a ParseError when encountering syntax errors in your
|
you will get a ParseError when encountering syntax errors in your
|
||||||
code.
|
code.
|
||||||
:param str start_symbol: The grammar symbol that you want to parse. Only
|
:param str start_symbol: The grammar rule (nonterminal) that you want
|
||||||
allowed to be used when error_recovery is False.
|
to parse. Only allowed to be used when error_recovery is False.
|
||||||
:param str path: The path to the file you want to open. Only needed for caching.
|
:param str path: The path to the file you want to open. Only needed for caching.
|
||||||
:param bool cache: Keeps a copy of the parser tree in RAM and on disk
|
:param bool cache: Keeps a copy of the parser tree in RAM and on disk
|
||||||
if a path is given. Returns the cached trees if the corresponding
|
if a path is given. Returns the cached trees if the corresponding
|
||||||
@@ -88,7 +88,7 @@ class Grammar(object):
|
|||||||
raise TypeError("Please provide either code or a path.")
|
raise TypeError("Please provide either code or a path.")
|
||||||
|
|
||||||
if start_symbol is None:
|
if start_symbol is None:
|
||||||
start_symbol = self._start_symbol
|
start_symbol = self._start_nonterminal
|
||||||
|
|
||||||
if error_recovery and start_symbol != 'file_input':
|
if error_recovery and start_symbol != 'file_input':
|
||||||
raise NotImplementedError("This is currently not implemented.")
|
raise NotImplementedError("This is currently not implemented.")
|
||||||
@@ -136,7 +136,7 @@ class Grammar(object):
|
|||||||
p = self._parser(
|
p = self._parser(
|
||||||
self._pgen_grammar,
|
self._pgen_grammar,
|
||||||
error_recovery=error_recovery,
|
error_recovery=error_recovery,
|
||||||
start_symbol=start_symbol
|
start_nonterminal=start_symbol
|
||||||
)
|
)
|
||||||
root_node = p.parse(tokens=tokens)
|
root_node = p.parse(tokens=tokens)
|
||||||
|
|
||||||
@@ -186,7 +186,7 @@ class Grammar(object):
|
|||||||
return normalizer.issues
|
return normalizer.issues
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
labels = self._pgen_grammar.number2symbol.values()
|
labels = self._pgen_grammar.number2nonterminal.values()
|
||||||
txt = ' '.join(list(labels)[:3]) + ' ...'
|
txt = ' '.join(list(labels)[:3]) + ' ...'
|
||||||
return '<%s:%s>' % (self.__class__.__name__, txt)
|
return '<%s:%s>' % (self.__class__.__name__, txt)
|
||||||
|
|
||||||
@@ -194,7 +194,7 @@ class Grammar(object):
|
|||||||
class PythonGrammar(Grammar):
|
class PythonGrammar(Grammar):
|
||||||
_error_normalizer_config = ErrorFinderConfig()
|
_error_normalizer_config = ErrorFinderConfig()
|
||||||
_token_namespace = token
|
_token_namespace = token
|
||||||
_start_symbol = 'file_input'
|
_start_nonterminal = 'file_input'
|
||||||
|
|
||||||
def __init__(self, version_info, bnf_text):
|
def __init__(self, version_info, bnf_text):
|
||||||
super(PythonGrammar, self).__init__(
|
super(PythonGrammar, self).__init__(
|
||||||
|
|||||||
@@ -38,13 +38,13 @@ class BaseParser(object):
|
|||||||
}
|
}
|
||||||
default_leaf = tree.Leaf
|
default_leaf = tree.Leaf
|
||||||
|
|
||||||
def __init__(self, pgen_grammar, start_symbol='file_input', error_recovery=False):
|
def __init__(self, pgen_grammar, start_nonterminal='file_input', error_recovery=False):
|
||||||
self._pgen_grammar = pgen_grammar
|
self._pgen_grammar = pgen_grammar
|
||||||
self._start_symbol = start_symbol
|
self._start_nonterminal = start_nonterminal
|
||||||
self._error_recovery = error_recovery
|
self._error_recovery = error_recovery
|
||||||
|
|
||||||
def parse(self, tokens):
|
def parse(self, tokens):
|
||||||
start_number = self._pgen_grammar.symbol2number[self._start_symbol]
|
start_number = self._pgen_grammar.nonterminal2number[self._start_nonterminal]
|
||||||
self.pgen_parser = PgenParser(
|
self.pgen_parser = PgenParser(
|
||||||
self._pgen_grammar, self.convert_node, self.convert_leaf,
|
self._pgen_grammar, self.convert_node, self.convert_leaf,
|
||||||
self.error_recovery, start_number
|
self.error_recovery, start_number
|
||||||
@@ -64,12 +64,12 @@ class BaseParser(object):
|
|||||||
raise ParserSyntaxError('SyntaxError: invalid syntax', error_leaf)
|
raise ParserSyntaxError('SyntaxError: invalid syntax', error_leaf)
|
||||||
|
|
||||||
def convert_node(self, pgen_grammar, type_, children):
|
def convert_node(self, pgen_grammar, type_, children):
|
||||||
# TODO REMOVE symbol, we don't want type here.
|
# TODO REMOVE nonterminal, we don't want type here.
|
||||||
symbol = pgen_grammar.number2symbol[type_]
|
nonterminal = pgen_grammar.number2nonterminal[type_]
|
||||||
try:
|
try:
|
||||||
return self.node_map[symbol](children)
|
return self.node_map[nonterminal](children)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
return self.default_node(symbol, children)
|
return self.default_node(nonterminal, children)
|
||||||
|
|
||||||
def convert_leaf(self, pgen_grammar, type_, value, prefix, start_pos):
|
def convert_leaf(self, pgen_grammar, type_, value, prefix, start_pos):
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -28,12 +28,14 @@ class Grammar(object):
|
|||||||
|
|
||||||
The instance variables are as follows:
|
The instance variables are as follows:
|
||||||
|
|
||||||
symbol2number -- a dict mapping symbol names to numbers. Symbol
|
nonterminal2number --
|
||||||
numbers are always 256 or higher, to distinguish
|
A dict mapping nonterminal names to numbers.
|
||||||
them from token numbers, which are between 0 and
|
Nonterminal numbers are always 256 or higher, to
|
||||||
255 (inclusive).
|
distinguish them from token numbers, which are between 0
|
||||||
|
and 255 (inclusive).
|
||||||
|
|
||||||
number2symbol -- a dict mapping numbers to symbol names;
|
number2nonterminal --
|
||||||
|
A dict mapping numbers to nonterminal names;
|
||||||
these two are each other's inverse.
|
these two are each other's inverse.
|
||||||
|
|
||||||
states -- a list of DFAs, where each DFA is a list of
|
states -- a list of DFAs, where each DFA is a list of
|
||||||
@@ -44,20 +46,20 @@ class Grammar(object):
|
|||||||
Final states are represented by a special arc of
|
Final states are represented by a special arc of
|
||||||
the form (0, j) where j is its own state number.
|
the form (0, j) where j is its own state number.
|
||||||
|
|
||||||
dfas -- a dict mapping symbol numbers to (DFA, first)
|
dfas -- a dict mapping nonterminal numbers to (DFA, first)
|
||||||
pairs, where DFA is an item from the states list
|
pairs, where DFA is an item from the states list
|
||||||
above, and first is a set of tokens that can
|
above, and first is a set of tokens that can
|
||||||
begin this grammar rule (represented by a dict
|
begin this grammar rule (represented by a dict
|
||||||
whose values are always 1).
|
whose values are always 1).
|
||||||
|
|
||||||
labels -- a list of (x, y) pairs where x is either a token
|
labels -- a list of (x, y) pairs where x is either a token
|
||||||
number or a symbol number, and y is either None
|
number or a nonterminal number, and y is either None
|
||||||
or a string; the strings are keywords. The label
|
or a string; the strings are keywords. The label
|
||||||
number is the index in this list; label numbers
|
number is the index in this list; label numbers
|
||||||
are used to mark state transitions (arcs) in the
|
are used to mark state transitions (arcs) in the
|
||||||
DFAs.
|
DFAs.
|
||||||
|
|
||||||
start -- the number of the grammar's start symbol.
|
start -- the number of the grammar's start nonterminal.
|
||||||
|
|
||||||
keywords -- a dict mapping keyword strings to arc labels.
|
keywords -- a dict mapping keyword strings to arc labels.
|
||||||
|
|
||||||
@@ -65,29 +67,29 @@ class Grammar(object):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, bnf_text, start_symbol):
|
def __init__(self, bnf_text, start_nonterminal):
|
||||||
self.symbol2number = {}
|
self.nonterminal2number = {}
|
||||||
self.number2symbol = {}
|
self.number2nonterminal = {}
|
||||||
self.states = []
|
self.states = []
|
||||||
self.dfas = {}
|
self.dfas = {}
|
||||||
self.labels = [(0, "EMPTY")]
|
self.labels = [(0, "EMPTY")]
|
||||||
self.keywords = {}
|
self.keywords = {}
|
||||||
self.tokens = {}
|
self.tokens = {}
|
||||||
self.symbol2label = {}
|
self.nonterminal2label = {}
|
||||||
self.label2symbol = {}
|
self.label2nonterminal = {}
|
||||||
self.start_symbol = start_symbol
|
self.start_nonterminal = start_nonterminal
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def start(self):
|
def start(self):
|
||||||
return self.symbol2number[self.start_symbol]
|
return self.nonterminal2number[self.start_nonterminal]
|
||||||
|
|
||||||
def report(self):
|
def report(self):
|
||||||
"""Dump the grammar tables to standard output, for debugging."""
|
"""Dump the grammar tables to standard output, for debugging."""
|
||||||
from pprint import pprint
|
from pprint import pprint
|
||||||
print("s2n")
|
print("s2n")
|
||||||
pprint(self.symbol2number)
|
pprint(self.nonterminal2number)
|
||||||
print("n2s")
|
print("n2s")
|
||||||
pprint(self.number2symbol)
|
pprint(self.number2nonterminal)
|
||||||
print("states")
|
print("states")
|
||||||
pprint(self.states)
|
pprint(self.states)
|
||||||
print("dfas")
|
print("dfas")
|
||||||
|
|||||||
@@ -118,8 +118,8 @@ class PgenParser(object):
|
|||||||
up.
|
up.
|
||||||
|
|
||||||
A concrete syntax tree node is a (type, nodes) tuple, where
|
A concrete syntax tree node is a (type, nodes) tuple, where
|
||||||
type is the node type (a token or symbol number) and nodes
|
type is the node type (a token or nonterminal number) and nodes
|
||||||
is a list of children for symbols, and None for tokens.
|
is a list of children for nonterminals, and None for tokens.
|
||||||
|
|
||||||
An abstract syntax tree node may be anything; this is entirely
|
An abstract syntax tree node may be anything; this is entirely
|
||||||
up to the converter function.
|
up to the converter function.
|
||||||
@@ -184,11 +184,11 @@ class PgenParser(object):
|
|||||||
# Done with this token
|
# Done with this token
|
||||||
return False
|
return False
|
||||||
elif t >= 256:
|
elif t >= 256:
|
||||||
# See if it's a symbol and if we're in its first set
|
# See if it's a nonterminal and if we're in its first set
|
||||||
itsdfa = _gram.dfas[t]
|
itsdfa = _gram.dfas[t]
|
||||||
itsstates, itsfirst = itsdfa
|
itsstates, itsfirst = itsdfa
|
||||||
if ilabel in itsfirst:
|
if ilabel in itsfirst:
|
||||||
# Push a symbol
|
# Push a nonterminal
|
||||||
_push(t, itsdfa, newstate)
|
_push(t, itsdfa, newstate)
|
||||||
break # To continue the outer while loop
|
break # To continue the outer while loop
|
||||||
else:
|
else:
|
||||||
@@ -231,7 +231,7 @@ class PgenParser(object):
|
|||||||
try:
|
try:
|
||||||
# Equal to:
|
# Equal to:
|
||||||
# dfa, state, node = self.stack[-1]
|
# dfa, state, node = self.stack[-1]
|
||||||
# symbol, children = node
|
# nonterminal, children = node
|
||||||
self.stack[-1][2][1].append(newnode)
|
self.stack[-1][2][1].append(newnode)
|
||||||
except IndexError:
|
except IndexError:
|
||||||
# Stack is empty, set the rootnode.
|
# Stack is empty, set the rootnode.
|
||||||
|
|||||||
@@ -29,7 +29,8 @@ class ParserGenerator(object):
|
|||||||
self._nonterminal_to_dfas = rule_to_dfas
|
self._nonterminal_to_dfas = rule_to_dfas
|
||||||
|
|
||||||
def make_grammar(self, grammar):
|
def make_grammar(self, grammar):
|
||||||
self._first_terminals = {} # map from symbol name to set of tokens
|
# Map from grammar rule (nonterminal) name to a set of tokens.
|
||||||
|
self._first_terminals = {}
|
||||||
|
|
||||||
names = list(self._nonterminal_to_dfas.keys())
|
names = list(self._nonterminal_to_dfas.keys())
|
||||||
names.sort()
|
names.sort()
|
||||||
@@ -37,9 +38,9 @@ class ParserGenerator(object):
|
|||||||
if name not in self._first_terminals:
|
if name not in self._first_terminals:
|
||||||
self._calculate_first_terminals(name)
|
self._calculate_first_terminals(name)
|
||||||
|
|
||||||
i = 256 + len(grammar.symbol2number)
|
i = 256 + len(grammar.nonterminal2number)
|
||||||
grammar.symbol2number[name] = i
|
grammar.nonterminal2number[name] = i
|
||||||
grammar.number2symbol[i] = name
|
grammar.number2nonterminal[i] = name
|
||||||
|
|
||||||
# Now that we have calculated the first terminals, we are sure that
|
# Now that we have calculated the first terminals, we are sure that
|
||||||
# there is no left recursion or ambiguities.
|
# there is no left recursion or ambiguities.
|
||||||
@@ -55,7 +56,7 @@ class ParserGenerator(object):
|
|||||||
arcs.append((0, dfas.index(state)))
|
arcs.append((0, dfas.index(state)))
|
||||||
states.append(arcs)
|
states.append(arcs)
|
||||||
grammar.states.append(states)
|
grammar.states.append(states)
|
||||||
grammar.dfas[grammar.symbol2number[name]] = (states, self._make_first(grammar, name))
|
grammar.dfas[grammar.nonterminal2number[name]] = (states, self._make_first(grammar, name))
|
||||||
return grammar
|
return grammar
|
||||||
|
|
||||||
def _make_first(self, grammar, name):
|
def _make_first(self, grammar, name):
|
||||||
@@ -71,15 +72,15 @@ class ParserGenerator(object):
|
|||||||
# XXX Maybe this should be a method on a subclass of converter?
|
# XXX Maybe this should be a method on a subclass of converter?
|
||||||
ilabel = len(grammar.labels)
|
ilabel = len(grammar.labels)
|
||||||
if label[0].isalpha():
|
if label[0].isalpha():
|
||||||
# Either a symbol name or a named token
|
# Either a nonterminal name or a named token
|
||||||
if label in grammar.symbol2number:
|
if label in grammar.nonterminal2number:
|
||||||
# A symbol name (a non-terminal)
|
# A nonterminal name (a non-terminal)
|
||||||
if label in grammar.symbol2label:
|
if label in grammar.nonterminal2label:
|
||||||
return grammar.symbol2label[label]
|
return grammar.nonterminal2label[label]
|
||||||
else:
|
else:
|
||||||
grammar.labels.append((grammar.symbol2number[label], None))
|
grammar.labels.append((grammar.nonterminal2number[label], None))
|
||||||
grammar.symbol2label[label] = ilabel
|
grammar.nonterminal2label[label] = ilabel
|
||||||
grammar.label2symbol[ilabel] = label
|
grammar.label2nonterminal[ilabel] = label
|
||||||
return ilabel
|
return ilabel
|
||||||
else:
|
else:
|
||||||
# A named token (NAME, NUMBER, STRING)
|
# A named token (NAME, NUMBER, STRING)
|
||||||
@@ -293,7 +294,7 @@ def generate_grammar(bnf_grammar, token_namespace):
|
|||||||
own parser.
|
own parser.
|
||||||
"""
|
"""
|
||||||
rule_to_dfas = {}
|
rule_to_dfas = {}
|
||||||
start_symbol = None
|
start_nonterminal = None
|
||||||
for nfa_a, nfa_z in GrammarParser(bnf_grammar).parse():
|
for nfa_a, nfa_z in GrammarParser(bnf_grammar).parse():
|
||||||
#_dump_nfa(a, z)
|
#_dump_nfa(a, z)
|
||||||
dfas = _make_dfas(nfa_a, nfa_z)
|
dfas = _make_dfas(nfa_a, nfa_z)
|
||||||
@@ -304,8 +305,8 @@ def generate_grammar(bnf_grammar, token_namespace):
|
|||||||
rule_to_dfas[nfa_a.from_rule] = dfas
|
rule_to_dfas[nfa_a.from_rule] = dfas
|
||||||
#print(nfa_a.from_rule, oldlen, newlen)
|
#print(nfa_a.from_rule, oldlen, newlen)
|
||||||
|
|
||||||
if start_symbol is None:
|
if start_nonterminal is None:
|
||||||
start_symbol = nfa_a.from_rule
|
start_nonterminal = nfa_a.from_rule
|
||||||
|
|
||||||
p = ParserGenerator(rule_to_dfas, token_namespace)
|
p = ParserGenerator(rule_to_dfas, token_namespace)
|
||||||
return p.make_grammar(Grammar(bnf_grammar, start_symbol))
|
return p.make_grammar(Grammar(bnf_grammar, start_nonterminal))
|
||||||
|
|||||||
@@ -41,9 +41,9 @@ def _flows_finished(pgen_grammar, stack):
|
|||||||
if, while, for and try might not be finished, because another part might
|
if, while, for and try might not be finished, because another part might
|
||||||
still be parsed.
|
still be parsed.
|
||||||
"""
|
"""
|
||||||
for dfa, newstate, (symbol_number, nodes) in stack:
|
for dfa, newstate, (nonterminal_number, nodes) in stack:
|
||||||
if pgen_grammar.number2symbol[symbol_number] in ('if_stmt', 'while_stmt',
|
if pgen_grammar.number2nonterminal[nonterminal_number] \
|
||||||
'for_stmt', 'try_stmt'):
|
in ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt'):
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@@ -52,8 +52,8 @@ def suite_or_file_input_is_valid(pgen_grammar, stack):
|
|||||||
if not _flows_finished(pgen_grammar, stack):
|
if not _flows_finished(pgen_grammar, stack):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
for dfa, newstate, (symbol_number, nodes) in reversed(stack):
|
for dfa, newstate, (nonterminal_number, nodes) in reversed(stack):
|
||||||
if pgen_grammar.number2symbol[symbol_number] == 'suite':
|
if pgen_grammar.number2nonterminal[nonterminal_number] == 'suite':
|
||||||
# If only newline is in the suite, the suite is not valid, yet.
|
# If only newline is in the suite, the suite is not valid, yet.
|
||||||
return len(nodes) > 1
|
return len(nodes) > 1
|
||||||
# Not reaching a suite means that we're dealing with file_input levels
|
# Not reaching a suite means that we're dealing with file_input levels
|
||||||
|
|||||||
@@ -62,8 +62,8 @@ class Parser(BaseParser):
|
|||||||
FSTRING_END: tree.FStringEnd,
|
FSTRING_END: tree.FStringEnd,
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, pgen_grammar, error_recovery=True, start_symbol='file_input'):
|
def __init__(self, pgen_grammar, error_recovery=True, start_nonterminal='file_input'):
|
||||||
super(Parser, self).__init__(pgen_grammar, start_symbol, error_recovery=error_recovery)
|
super(Parser, self).__init__(pgen_grammar, start_nonterminal, error_recovery=error_recovery)
|
||||||
|
|
||||||
self.syntax_errors = []
|
self.syntax_errors = []
|
||||||
self._omit_dedent_list = []
|
self._omit_dedent_list = []
|
||||||
@@ -81,19 +81,19 @@ class Parser(BaseParser):
|
|||||||
|
|
||||||
def parse(self, tokens):
|
def parse(self, tokens):
|
||||||
if self._error_recovery:
|
if self._error_recovery:
|
||||||
if self._start_symbol != 'file_input':
|
if self._start_nonterminal != 'file_input':
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
tokens = self._recovery_tokenize(tokens)
|
tokens = self._recovery_tokenize(tokens)
|
||||||
|
|
||||||
node = super(Parser, self).parse(tokens)
|
node = super(Parser, self).parse(tokens)
|
||||||
|
|
||||||
if self._start_symbol == 'file_input' != node.type:
|
if self._start_nonterminal == 'file_input' != node.type:
|
||||||
# If there's only one statement, we get back a non-module. That's
|
# If there's only one statement, we get back a non-module. That's
|
||||||
# not what we want, we want a module, so we add it here:
|
# not what we want, we want a module, so we add it here:
|
||||||
node = self.convert_node(
|
node = self.convert_node(
|
||||||
self._pgen_grammar,
|
self._pgen_grammar,
|
||||||
self._pgen_grammar.symbol2number['file_input'],
|
self._pgen_grammar.nonterminal2number['file_input'],
|
||||||
[node]
|
[node]
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -107,24 +107,24 @@ class Parser(BaseParser):
|
|||||||
grammar rule produces a new complete node, so that the tree is build
|
grammar rule produces a new complete node, so that the tree is build
|
||||||
strictly bottom-up.
|
strictly bottom-up.
|
||||||
"""
|
"""
|
||||||
# TODO REMOVE symbol, we don't want type here.
|
# TODO REMOVE nonterminal, we don't want type here.
|
||||||
symbol = pgen_grammar.number2symbol[type]
|
nonterminal = pgen_grammar.number2nonterminal[type]
|
||||||
try:
|
try:
|
||||||
return self.node_map[symbol](children)
|
return self.node_map[nonterminal](children)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
if symbol == 'suite':
|
if nonterminal == 'suite':
|
||||||
# We don't want the INDENT/DEDENT in our parser tree. Those
|
# We don't want the INDENT/DEDENT in our parser tree. Those
|
||||||
# leaves are just cancer. They are virtual leaves and not real
|
# leaves are just cancer. They are virtual leaves and not real
|
||||||
# ones and therefore have pseudo start/end positions and no
|
# ones and therefore have pseudo start/end positions and no
|
||||||
# prefixes. Just ignore them.
|
# prefixes. Just ignore them.
|
||||||
children = [children[0]] + children[2:-1]
|
children = [children[0]] + children[2:-1]
|
||||||
elif symbol == 'list_if':
|
elif nonterminal == 'list_if':
|
||||||
# Make transitioning from 2 to 3 easier.
|
# Make transitioning from 2 to 3 easier.
|
||||||
symbol = 'comp_if'
|
nonterminal = 'comp_if'
|
||||||
elif symbol == 'listmaker':
|
elif nonterminal == 'listmaker':
|
||||||
# Same as list_if above.
|
# Same as list_if above.
|
||||||
symbol = 'testlist_comp'
|
nonterminal = 'testlist_comp'
|
||||||
return self.default_node(symbol, children)
|
return self.default_node(nonterminal, children)
|
||||||
|
|
||||||
def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
|
def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
|
||||||
# print('leaf', repr(value), token.tok_name[type])
|
# print('leaf', repr(value), token.tok_name[type])
|
||||||
@@ -138,10 +138,10 @@ class Parser(BaseParser):
|
|||||||
|
|
||||||
def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
|
def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
|
||||||
add_token_callback):
|
add_token_callback):
|
||||||
def get_symbol_and_nodes(stack):
|
def get_nonterminal_and_nodes(stack):
|
||||||
for dfa, state, (type_, nodes) in stack:
|
for dfa, state, (type_, nodes) in stack:
|
||||||
symbol = pgen_grammar.number2symbol[type_]
|
nonterminal = pgen_grammar.number2nonterminal[type_]
|
||||||
yield symbol, nodes
|
yield nonterminal, nodes
|
||||||
|
|
||||||
tos_nodes = stack.get_tos_nodes()
|
tos_nodes = stack.get_tos_nodes()
|
||||||
if tos_nodes:
|
if tos_nodes:
|
||||||
@@ -149,7 +149,7 @@ class Parser(BaseParser):
|
|||||||
else:
|
else:
|
||||||
last_leaf = None
|
last_leaf = None
|
||||||
|
|
||||||
if self._start_symbol == 'file_input' and \
|
if self._start_nonterminal == 'file_input' and \
|
||||||
(typ == ENDMARKER or typ == DEDENT and '\n' not in last_leaf.value):
|
(typ == ENDMARKER or typ == DEDENT and '\n' not in last_leaf.value):
|
||||||
def reduce_stack(states, newstate):
|
def reduce_stack(states, newstate):
|
||||||
# reduce
|
# reduce
|
||||||
@@ -168,13 +168,13 @@ class Parser(BaseParser):
|
|||||||
ilabel = token_to_ilabel(pgen_grammar, NEWLINE, value)
|
ilabel = token_to_ilabel(pgen_grammar, NEWLINE, value)
|
||||||
|
|
||||||
dfa, state, (type_, nodes) = stack[-1]
|
dfa, state, (type_, nodes) = stack[-1]
|
||||||
symbol = pgen_grammar.number2symbol[type_]
|
nonterminal = pgen_grammar.number2nonterminal[type_]
|
||||||
states, first = dfa
|
states, first = dfa
|
||||||
arcs = states[state]
|
arcs = states[state]
|
||||||
# Look for a state with this label
|
# Look for a state with this label
|
||||||
for i, newstate in arcs:
|
for i, newstate in arcs:
|
||||||
if ilabel == i:
|
if ilabel == i:
|
||||||
if symbol == 'simple_stmt':
|
if nonterminal == 'simple_stmt':
|
||||||
# This is basically shifting
|
# This is basically shifting
|
||||||
stack[-1] = (dfa, newstate, (type_, nodes))
|
stack[-1] = (dfa, newstate, (type_, nodes))
|
||||||
|
|
||||||
@@ -182,12 +182,12 @@ class Parser(BaseParser):
|
|||||||
add_token_callback(typ, value, start_pos, prefix)
|
add_token_callback(typ, value, start_pos, prefix)
|
||||||
return
|
return
|
||||||
# Check if we're at the right point
|
# Check if we're at the right point
|
||||||
#for symbol, nodes in get_symbol_and_nodes(stack):
|
#for nonterminal, nodes in get_nonterminal_and_nodes(stack):
|
||||||
# self.pgen_parser._pop()
|
# self.pgen_parser._pop()
|
||||||
|
|
||||||
#break
|
#break
|
||||||
break
|
break
|
||||||
#symbol = pgen_grammar.number2symbol[type_]
|
#nonterminal = pgen_grammar.number2nonterminal[type_]
|
||||||
|
|
||||||
if not self._error_recovery:
|
if not self._error_recovery:
|
||||||
return super(Parser, self).error_recovery(
|
return super(Parser, self).error_recovery(
|
||||||
@@ -198,21 +198,21 @@ class Parser(BaseParser):
|
|||||||
# For now just discard everything that is not a suite or
|
# For now just discard everything that is not a suite or
|
||||||
# file_input, if we detect an error.
|
# file_input, if we detect an error.
|
||||||
one_line_suite = False
|
one_line_suite = False
|
||||||
for index, (symbol, nodes) in reversed(list(enumerate(get_symbol_and_nodes(stack)))):
|
for index, (nonterminal, nodes) in reversed(list(enumerate(get_nonterminal_and_nodes(stack)))):
|
||||||
# `suite` can sometimes be only simple_stmt, not stmt.
|
# `suite` can sometimes be only simple_stmt, not stmt.
|
||||||
if one_line_suite:
|
if one_line_suite:
|
||||||
break
|
break
|
||||||
elif symbol == 'file_input':
|
elif nonterminal == 'file_input':
|
||||||
break
|
break
|
||||||
elif symbol == 'suite':
|
elif nonterminal == 'suite':
|
||||||
if len(nodes) > 1:
|
if len(nodes) > 1:
|
||||||
break
|
break
|
||||||
elif not nodes:
|
elif not nodes:
|
||||||
one_line_suite = True
|
one_line_suite = True
|
||||||
# `suite` without an indent are error nodes.
|
# `suite` without an indent are error nodes.
|
||||||
return index, symbol, nodes
|
return index, nonterminal, nodes
|
||||||
|
|
||||||
index, symbol, nodes = current_suite(stack)
|
index, nonterminal, nodes = current_suite(stack)
|
||||||
|
|
||||||
# print('err', token.tok_name[typ], repr(value), start_pos, len(stack), index)
|
# print('err', token.tok_name[typ], repr(value), start_pos, len(stack), index)
|
||||||
if self._stack_removal(pgen_grammar, stack, arcs, index + 1, value, start_pos):
|
if self._stack_removal(pgen_grammar, stack, arcs, index + 1, value, start_pos):
|
||||||
@@ -226,11 +226,11 @@ class Parser(BaseParser):
|
|||||||
error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix)
|
error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix)
|
||||||
stack[-1][2][1].append(error_leaf)
|
stack[-1][2][1].append(error_leaf)
|
||||||
|
|
||||||
if symbol == 'suite':
|
if nonterminal == 'suite':
|
||||||
dfa, state, node = stack[-1]
|
dfa, state, node = stack[-1]
|
||||||
states, first = dfa
|
states, first = dfa
|
||||||
arcs = states[state]
|
arcs = states[state]
|
||||||
intended_label = pgen_grammar.symbol2label['stmt']
|
intended_label = pgen_grammar.nonterminal2label['stmt']
|
||||||
# Introduce a proper state transition. We're basically allowing
|
# Introduce a proper state transition. We're basically allowing
|
||||||
# there to be no valid statements inside a suite.
|
# there to be no valid statements inside a suite.
|
||||||
if [x[0] for x in arcs] == [intended_label]:
|
if [x[0] for x in arcs] == [intended_label]:
|
||||||
|
|||||||
Reference in New Issue
Block a user