Refactor some things in pgen

This commit is contained in:
Dave Halter
2018-06-12 22:08:19 +02:00
parent eeb456a6d4
commit 41c38311f7

View File

@@ -12,20 +12,20 @@ from parso.utils import parse_version_string
class ParserGenerator(object): class ParserGenerator(object):
def __init__(self, bnf_grammar, token_namespace): def __init__(self, dfas, start_symbol, token_namespace):
self._bnf_grammar = bnf_grammar
self._token_namespace = token_namespace self._token_namespace = token_namespace
self.dfas, self.startsymbol = _GrammarParser(bnf_grammar)._parse() self.dfas = dfas
self.start_symbol = start_symbol
def make_grammar(self): def make_grammar(self, grammar):
grammar = Grammar(self._bnf_grammar)
self._first = {} # map from symbol name to set of tokens self._first = {} # map from symbol name to set of tokens
self._addfirstsets() self._addfirstsets()
names = list(self.dfas.keys()) names = list(self.dfas.keys())
names.sort() names.sort()
# TODO do we still need this? # TODO do we still need this?
names.remove(self.startsymbol) names.remove(self.start_symbol)
names.insert(0, self.startsymbol) names.insert(0, self.start_symbol)
for name in names: for name in names:
i = 256 + len(grammar.symbol2number) i = 256 + len(grammar.symbol2number)
grammar.symbol2number[name] = i grammar.symbol2number[name] = i
@@ -42,7 +42,7 @@ class ParserGenerator(object):
states.append(arcs) states.append(arcs)
grammar.states.append(states) grammar.states.append(states)
grammar.dfas[grammar.symbol2number[name]] = (states, self._make_first(grammar, name)) grammar.dfas[grammar.symbol2number[name]] = (states, self._make_first(grammar, name))
grammar.start = grammar.symbol2number[self.startsymbol] grammar.start = grammar.symbol2number[self.start_symbol]
return grammar return grammar
def _make_first(self, grammar, name): def _make_first(self, grammar, name):
@@ -154,7 +154,7 @@ class _GrammarParser():
def _parse(self): def _parse(self):
dfas = {} dfas = {}
startsymbol = None start_symbol = None
# MSTART: (NEWLINE | RULE)* ENDMARKER # MSTART: (NEWLINE | RULE)* ENDMARKER
while self.type != token.ENDMARKER: while self.type != token.ENDMARKER:
while self.type == token.NEWLINE: while self.type == token.NEWLINE:
@@ -172,9 +172,9 @@ class _GrammarParser():
# newlen = len(dfa) # newlen = len(dfa)
dfas[name] = dfa dfas[name] = dfa
#print name, oldlen, newlen #print name, oldlen, newlen
if startsymbol is None: if start_symbol is None:
startsymbol = name start_symbol = name
return dfas, startsymbol return dfas, start_symbol
def _make_dfa(self, start, finish): def _make_dfa(self, start, finish):
# To turn an NFA into a DFA, we define the states of the DFA # To turn an NFA into a DFA, we define the states of the DFA
@@ -395,7 +395,7 @@ class DFAState(object):
__hash__ = None # For Py3 compatibility. __hash__ = None # For Py3 compatibility.
def generate_grammar(bnf_text, token_namespace): def generate_grammar(bnf_grammar, token_namespace):
""" """
``bnf_text`` is a grammar in extended BNF (using * for repetition, + for ``bnf_text`` is a grammar in extended BNF (using * for repetition, + for
at-least-once repetition, [] for optional parts, | for alternatives and () at-least-once repetition, [] for optional parts, | for alternatives and ()
@@ -404,5 +404,6 @@ def generate_grammar(bnf_text, token_namespace):
It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its
own parser. own parser.
""" """
p = ParserGenerator(bnf_text, token_namespace) dfas, start_symbol = _GrammarParser(bnf_grammar)._parse()
return p.make_grammar() p = ParserGenerator(dfas, start_symbol, token_namespace)
return p.make_grammar(Grammar(bnf_grammar))