mirror of
https://github.com/davidhalter/parso.git
synced 2026-01-31 07:05:42 +08:00
Move some ParserGenerator stuff into the Grammar class
This commit is contained in:
@@ -16,6 +16,8 @@ fallback token code OP, but the parser needs the actual token code.
|
||||
|
||||
"""
|
||||
|
||||
from parso.python import token
|
||||
|
||||
|
||||
class Grammar(object):
|
||||
"""Pgen parsing tables conversion class.
|
||||
@@ -67,7 +69,10 @@ class Grammar(object):
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, bnf_text, start_nonterminal):
|
||||
def __init__(self, bnf_grammar, start_nonterminal, rule_to_dfas, token_namespace):
|
||||
self._token_namespace = token_namespace
|
||||
self._nonterminal_to_dfas = rule_to_dfas
|
||||
|
||||
self.nonterminal2number = {}
|
||||
self.number2nonterminal = {}
|
||||
self.states = []
|
||||
@@ -79,6 +84,130 @@ class Grammar(object):
|
||||
self.label2nonterminal = {}
|
||||
self.start_nonterminal = start_nonterminal
|
||||
|
||||
self._make_grammar()
|
||||
|
||||
def _make_grammar(self):
|
||||
# Map from grammar rule (nonterminal) name to a set of tokens.
|
||||
self._first_terminals = {}
|
||||
|
||||
nonterminals = list(self._nonterminal_to_dfas.keys())
|
||||
nonterminals.sort()
|
||||
for nonterminal in nonterminals:
|
||||
if nonterminal not in self._first_terminals:
|
||||
self._calculate_first_terminals(nonterminal)
|
||||
|
||||
i = 256 + len(self.nonterminal2number)
|
||||
self.nonterminal2number[nonterminal] = i
|
||||
self.number2nonterminal[i] = nonterminal
|
||||
|
||||
# Now that we have calculated the first terminals, we are sure that
|
||||
# there is no left recursion or ambiguities.
|
||||
|
||||
for nonterminal in nonterminals:
|
||||
dfas = self._nonterminal_to_dfas[nonterminal]
|
||||
states = []
|
||||
for state in dfas:
|
||||
arcs = []
|
||||
for label, next_ in state.arcs.items():
|
||||
arcs.append((self._make_label(label), dfas.index(next_)))
|
||||
if state.isfinal:
|
||||
arcs.append((0, dfas.index(state)))
|
||||
states.append(arcs)
|
||||
self.states.append(states)
|
||||
self.dfas[self.nonterminal2number[nonterminal]] = (states, self._make_first(nonterminal))
|
||||
|
||||
def _make_first(self, nonterminal):
|
||||
rawfirst = self._first_terminals[nonterminal]
|
||||
first = set()
|
||||
for label in rawfirst:
|
||||
ilabel = self._make_label(label)
|
||||
##assert ilabel not in first, "%s failed on <> ... !=" % label
|
||||
first.add(ilabel)
|
||||
return first
|
||||
|
||||
def _make_label(self, label):
|
||||
# XXX Maybe this should be a method on a subclass of converter?
|
||||
ilabel = len(self.labels)
|
||||
if label[0].isalpha():
|
||||
# Either a nonterminal name or a named token
|
||||
if label in self.nonterminal2number:
|
||||
# A nonterminal name
|
||||
if label in self.nonterminal2label:
|
||||
return self.nonterminal2label[label]
|
||||
else:
|
||||
self.labels.append((self.nonterminal2number[label], None))
|
||||
self.nonterminal2label[label] = ilabel
|
||||
self.label2nonterminal[ilabel] = label
|
||||
return ilabel
|
||||
else:
|
||||
# A named token (NAME, NUMBER, STRING)
|
||||
itoken = getattr(self._token_namespace, label, None)
|
||||
assert isinstance(itoken, int), label
|
||||
if itoken in self.tokens:
|
||||
return self.tokens[itoken]
|
||||
else:
|
||||
self.labels.append((itoken, None))
|
||||
self.tokens[itoken] = ilabel
|
||||
return ilabel
|
||||
else:
|
||||
# Either a keyword or an operator
|
||||
assert label[0] in ('"', "'"), label
|
||||
value = eval(label)
|
||||
if value[0].isalpha():
|
||||
# A keyword
|
||||
if value in self.keywords:
|
||||
return self.keywords[value]
|
||||
else:
|
||||
self.labels.append((token.NAME, value))
|
||||
self.keywords[value] = ilabel
|
||||
return ilabel
|
||||
else:
|
||||
# An operator (any non-numeric token)
|
||||
itoken = self._token_namespace.generate_token_id(value)
|
||||
if itoken in self.tokens:
|
||||
return self.tokens[itoken]
|
||||
else:
|
||||
self.labels.append((itoken, None))
|
||||
self.tokens[itoken] = ilabel
|
||||
return ilabel
|
||||
|
||||
def _calculate_first_terminals(self, nonterminal):
|
||||
dfas = self._nonterminal_to_dfas[nonterminal]
|
||||
self._first_terminals[nonterminal] = None # dummy to detect left recursion
|
||||
# We only need to check the first dfa. All the following ones are not
|
||||
# interesting to find first terminals.
|
||||
state = dfas[0]
|
||||
totalset = set()
|
||||
overlapcheck = {}
|
||||
for nonterminal_or_string, next_ in state.arcs.items():
|
||||
if nonterminal_or_string in self._nonterminal_to_dfas:
|
||||
# It's a nonterminal and we have either a left recursion issue
|
||||
# in the grammare or we have to recurse.
|
||||
try:
|
||||
fset = self._first_terminals[nonterminal_or_string]
|
||||
except KeyError:
|
||||
self._calculate_first_terminals(nonterminal_or_string)
|
||||
fset = self._first_terminals[nonterminal_or_string]
|
||||
else:
|
||||
if fset is None:
|
||||
raise ValueError("left recursion for rule %r" % nonterminal)
|
||||
totalset.update(fset)
|
||||
overlapcheck[nonterminal_or_string] = fset
|
||||
else:
|
||||
# It's a string. We have finally found a possible first token.
|
||||
totalset.add(nonterminal_or_string)
|
||||
overlapcheck[nonterminal_or_string] = set([nonterminal_or_string])
|
||||
|
||||
inverse = {}
|
||||
for nonterminal_or_string, first_set in overlapcheck.items():
|
||||
for terminal in first_set:
|
||||
if terminal in inverse:
|
||||
raise ValueError("rule %s is ambiguous; %s is in the"
|
||||
" first sets of %s as well as %s" %
|
||||
(nonterminal, terminal, nonterminal_or_string, inverse[terminal]))
|
||||
inverse[terminal] = nonterminal_or_string
|
||||
self._first_terminals[nonterminal] = totalset
|
||||
|
||||
@property
|
||||
def start(self):
|
||||
return self.nonterminal2number[self.start_nonterminal]
|
||||
|
||||
@@ -19,139 +19,9 @@ This grammar is self-referencing.
|
||||
"""
|
||||
|
||||
from parso.pgen2.grammar import Grammar
|
||||
from parso.python import token
|
||||
from parso.pgen2.grammar_parser import GrammarParser, NFAState
|
||||
|
||||
|
||||
class ParserGenerator(object):
|
||||
def __init__(self, rule_to_dfas, token_namespace):
|
||||
self._token_namespace = token_namespace
|
||||
self._nonterminal_to_dfas = rule_to_dfas
|
||||
|
||||
def make_grammar(self, grammar):
|
||||
# Map from grammar rule (nonterminal) name to a set of tokens.
|
||||
self._first_terminals = {}
|
||||
|
||||
nonterminals = list(self._nonterminal_to_dfas.keys())
|
||||
nonterminals.sort()
|
||||
for nonterminal in nonterminals:
|
||||
if nonterminal not in self._first_terminals:
|
||||
self._calculate_first_terminals(nonterminal)
|
||||
|
||||
i = 256 + len(grammar.nonterminal2number)
|
||||
grammar.nonterminal2number[nonterminal] = i
|
||||
grammar.number2nonterminal[i] = nonterminal
|
||||
|
||||
# Now that we have calculated the first terminals, we are sure that
|
||||
# there is no left recursion or ambiguities.
|
||||
|
||||
for nonterminal in nonterminals:
|
||||
dfas = self._nonterminal_to_dfas[nonterminal]
|
||||
states = []
|
||||
for state in dfas:
|
||||
arcs = []
|
||||
for label, next_ in state.arcs.items():
|
||||
arcs.append((self._make_label(grammar, label), dfas.index(next_)))
|
||||
if state.isfinal:
|
||||
arcs.append((0, dfas.index(state)))
|
||||
states.append(arcs)
|
||||
grammar.states.append(states)
|
||||
grammar.dfas[grammar.nonterminal2number[nonterminal]] = (states, self._make_first(grammar, nonterminal))
|
||||
return grammar
|
||||
|
||||
def _make_first(self, grammar, nonterminal):
|
||||
rawfirst = self._first_terminals[nonterminal]
|
||||
first = set()
|
||||
for label in rawfirst:
|
||||
ilabel = self._make_label(grammar, label)
|
||||
##assert ilabel not in first, "%s failed on <> ... !=" % label
|
||||
first.add(ilabel)
|
||||
return first
|
||||
|
||||
def _make_label(self, grammar, label):
|
||||
# XXX Maybe this should be a method on a subclass of converter?
|
||||
ilabel = len(grammar.labels)
|
||||
if label[0].isalpha():
|
||||
# Either a nonterminal name or a named token
|
||||
if label in grammar.nonterminal2number:
|
||||
# A nonterminal name
|
||||
if label in grammar.nonterminal2label:
|
||||
return grammar.nonterminal2label[label]
|
||||
else:
|
||||
grammar.labels.append((grammar.nonterminal2number[label], None))
|
||||
grammar.nonterminal2label[label] = ilabel
|
||||
grammar.label2nonterminal[ilabel] = label
|
||||
return ilabel
|
||||
else:
|
||||
# A named token (NAME, NUMBER, STRING)
|
||||
itoken = getattr(self._token_namespace, label, None)
|
||||
assert isinstance(itoken, int), label
|
||||
if itoken in grammar.tokens:
|
||||
return grammar.tokens[itoken]
|
||||
else:
|
||||
grammar.labels.append((itoken, None))
|
||||
grammar.tokens[itoken] = ilabel
|
||||
return ilabel
|
||||
else:
|
||||
# Either a keyword or an operator
|
||||
assert label[0] in ('"', "'"), label
|
||||
value = eval(label)
|
||||
if value[0].isalpha():
|
||||
# A keyword
|
||||
if value in grammar.keywords:
|
||||
return grammar.keywords[value]
|
||||
else:
|
||||
grammar.labels.append((token.NAME, value))
|
||||
grammar.keywords[value] = ilabel
|
||||
return ilabel
|
||||
else:
|
||||
# An operator (any non-numeric token)
|
||||
itoken = self._token_namespace.generate_token_id(value)
|
||||
if itoken in grammar.tokens:
|
||||
return grammar.tokens[itoken]
|
||||
else:
|
||||
grammar.labels.append((itoken, None))
|
||||
grammar.tokens[itoken] = ilabel
|
||||
return ilabel
|
||||
|
||||
def _calculate_first_terminals(self, nonterminal):
|
||||
dfas = self._nonterminal_to_dfas[nonterminal]
|
||||
self._first_terminals[nonterminal] = None # dummy to detect left recursion
|
||||
# We only need to check the first dfa. All the following ones are not
|
||||
# interesting to find first terminals.
|
||||
state = dfas[0]
|
||||
totalset = set()
|
||||
overlapcheck = {}
|
||||
for nonterminal_or_string, next_ in state.arcs.items():
|
||||
if nonterminal_or_string in self._nonterminal_to_dfas:
|
||||
# It's a nonterminal and we have either a left recursion issue
|
||||
# in the grammare or we have to recurse.
|
||||
try:
|
||||
fset = self._first_terminals[nonterminal_or_string]
|
||||
except KeyError:
|
||||
self._calculate_first_terminals(nonterminal_or_string)
|
||||
fset = self._first_terminals[nonterminal_or_string]
|
||||
else:
|
||||
if fset is None:
|
||||
raise ValueError("left recursion for rule %r" % nonterminal)
|
||||
totalset.update(fset)
|
||||
overlapcheck[nonterminal_or_string] = fset
|
||||
else:
|
||||
# It's a string. We have finally found a possible first token.
|
||||
totalset.add(nonterminal_or_string)
|
||||
overlapcheck[nonterminal_or_string] = set([nonterminal_or_string])
|
||||
|
||||
inverse = {}
|
||||
for nonterminal_or_string, first_set in overlapcheck.items():
|
||||
for terminal in first_set:
|
||||
if terminal in inverse:
|
||||
raise ValueError("rule %s is ambiguous; %s is in the"
|
||||
" first sets of %s as well as %s" %
|
||||
(nonterminal, terminal, nonterminal_or_string, inverse[terminal]))
|
||||
inverse[terminal] = nonterminal_or_string
|
||||
self._first_terminals[nonterminal] = totalset
|
||||
|
||||
|
||||
class DFAState(object):
|
||||
def __init__(self, from_rule, nfa_set, final):
|
||||
assert isinstance(nfa_set, set)
|
||||
@@ -308,5 +178,4 @@ def generate_grammar(bnf_grammar, token_namespace):
|
||||
if start_nonterminal is None:
|
||||
start_nonterminal = nfa_a.from_rule
|
||||
|
||||
p = ParserGenerator(rule_to_dfas, token_namespace)
|
||||
return p.make_grammar(Grammar(bnf_grammar, start_nonterminal))
|
||||
return Grammar(bnf_grammar, start_nonterminal, rule_to_dfas, token_namespace)
|
||||
|
||||
Reference in New Issue
Block a user