Move the Grammar to the pgen module

2026-02-27 12:02:36 +08:00 · 2018-06-26 10:08:44 +02:00
parent c1675da0cb
commit 30cf491b4f
2 changed files with 83 additions and 100 deletions
--- a/parso/pgen2/grammar.py
+++ b/parso/pgen2/grammar.py
@@ -1,99 +0,0 @@
 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
 # Licensed to PSF under a Contributor Agreement.
 # Modifications:
 # Copyright David Halter and Contributors
 # Modifications are dual-licensed: MIT and PSF.
 """This module defines the data structures used to represent a grammar.
 These are a bit arcane because they are derived from the data
 structures used by Python's 'pgen' parser generator.
 There's also a table here mapping operators to their names in the
 token module; the Python tokenize module reports all operators as the
 fallback token code OP, but the parser needs the actual token code.
 """
 class DFAPlan(object):
    def __init__(self, next_dfa, dfa_pushes=[]):
        self.next_dfa = next_dfa
        self.dfa_pushes = dfa_pushes
    def __repr__(self):
        return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes)
 class Grammar(object):
    """Pgen parsing tables conversion class.
    Once initialized, this class supplies the grammar tables for the
    parsing engine implemented by parse.py.  The parsing engine
    accesses the instance variables directly.  The class here does not
    provide initialization of the tables; several subclasses exist to
    do this (see the conv and pgen modules).
    """
    def __init__(self, start_nonterminal, rule_to_dfas, reserved_syntax_strings):
        self.nonterminal_to_dfas = rule_to_dfas
        self.reserved_syntax_strings = reserved_syntax_strings
        self.start_nonterminal = start_nonterminal
        self._make_grammar()
    def _make_grammar(self):
        # Map from grammar rule (nonterminal) name to a set of tokens.
        self._first_plans = {}
        nonterminals = list(self.nonterminal_to_dfas.keys())
        nonterminals.sort()
        for nonterminal in nonterminals:
            if nonterminal not in self._first_plans:
                self._calculate_first_terminals(nonterminal)
        # Now that we have calculated the first terminals, we are sure that
        # there is no left recursion or ambiguities.
        for dfas in self.nonterminal_to_dfas.values():
            for dfa_state in dfas:
                for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items():
                    for transition, pushes in self._first_plans[nonterminal].items():
                        dfa_state.ilabel_to_plan[transition] = DFAPlan(next_dfa, pushes)
    def _calculate_first_terminals(self, nonterminal):
        dfas = self.nonterminal_to_dfas[nonterminal]
        new_first_plans = {}
        self._first_plans[nonterminal] = None  # dummy to detect left recursion
        # We only need to check the first dfa. All the following ones are not
        # interesting to find first terminals.
        state = dfas[0]
        for nonterminal2, next_ in state.nonterminal_arcs.items():
            # It's a nonterminal and we have either a left recursion issue
            # in the grammar or we have to recurse.
            try:
                first_plans2 = self._first_plans[nonterminal2]
            except KeyError:
                first_plans2 = self._calculate_first_terminals(nonterminal2)
            else:
                if first_plans2 is None:
                    raise ValueError("left recursion for rule %r" % nonterminal)
            for t, pushes in first_plans2.items():
                check = new_first_plans.get(t)
                if check is not None:
                    raise ValueError(
                        "Rule %s is ambiguous; %s is the"
                        " start of the rule %s as well as %s."
                        % (nonterminal, t, nonterminal2, check[-1].from_rule)
                    )
                new_first_plans[t] = [next_] + pushes
        for transition, next_ in state.ilabel_to_plan.items():
            # It's a string. We have finally found a possible first token.
            new_first_plans[transition] = [next_.next_dfa]
        self._first_plans[nonterminal] = new_first_plans
        return new_first_plans
--- a/parso/pgen2/pgen.py
+++ b/parso/pgen2/pgen.py
@@ -6,6 +6,8 @@
 # Modifications are dual-licensed: MIT and PSF.
 """
 This module defines the data structures used to represent a grammar.
 Specifying grammars in pgen is possible with this grammar::
    grammar: (NEWLINE | rule)* ENDMARKER
@@ -20,10 +22,90 @@ This grammar is self-referencing.
 from ast import literal_eval
 from parso.pgen2.grammar import Grammar, DFAPlan
 from parso.pgen2.grammar_parser import GrammarParser, NFAState
 class DFAPlan(object):
    def __init__(self, next_dfa, dfa_pushes=[]):
        self.next_dfa = next_dfa
        self.dfa_pushes = dfa_pushes
    def __repr__(self):
        return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes)
 class Grammar(object):
    """Pgen parsing tables conversion class.
    Once initialized, this class supplies the grammar tables for the
    parsing engine implemented by parse.py.  The parsing engine
    accesses the instance variables directly.  The class here does not
    provide initialization of the tables; several subclasses exist to
    do this (see the conv and pgen modules).
    """
    def __init__(self, start_nonterminal, rule_to_dfas, reserved_syntax_strings):
        self.nonterminal_to_dfas = rule_to_dfas  # Dict[str, List[DFAState]]
        self.reserved_syntax_strings = reserved_syntax_strings
        self.start_nonterminal = start_nonterminal
        self._make_grammar()
    def _make_grammar(self):
        # Map from grammar rule (nonterminal) name to a set of tokens.
        self._first_plans = {}
        nonterminals = list(self.nonterminal_to_dfas.keys())
        nonterminals.sort()
        for nonterminal in nonterminals:
            if nonterminal not in self._first_plans:
                self._calculate_first_terminals(nonterminal)
        # Now that we have calculated the first terminals, we are sure that
        # there is no left recursion or ambiguities.
        for dfas in self.nonterminal_to_dfas.values():
            for dfa_state in dfas:
                for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items():
                    for transition, pushes in self._first_plans[nonterminal].items():
                        dfa_state.ilabel_to_plan[transition] = DFAPlan(next_dfa, pushes)
    def _calculate_first_terminals(self, nonterminal):
        dfas = self.nonterminal_to_dfas[nonterminal]
        new_first_plans = {}
        self._first_plans[nonterminal] = None  # dummy to detect left recursion
        # We only need to check the first dfa. All the following ones are not
        # interesting to find first terminals.
        state = dfas[0]
        for nonterminal2, next_ in state.nonterminal_arcs.items():
            # It's a nonterminal and we have either a left recursion issue
            # in the grammar or we have to recurse.
            try:
                first_plans2 = self._first_plans[nonterminal2]
            except KeyError:
                first_plans2 = self._calculate_first_terminals(nonterminal2)
            else:
                if first_plans2 is None:
                    raise ValueError("left recursion for rule %r" % nonterminal)
            for t, pushes in first_plans2.items():
                check = new_first_plans.get(t)
                if check is not None:
                    raise ValueError(
                        "Rule %s is ambiguous; %s is the"
                        " start of the rule %s as well as %s."
                        % (nonterminal, t, nonterminal2, check[-1].from_rule)
                    )
                new_first_plans[t] = [next_] + pushes
        for transition, next_ in state.ilabel_to_plan.items():
            # It's a string. We have finally found a possible first token.
            new_first_plans[transition] = [next_.next_dfa]
        self._first_plans[nonterminal] = new_first_plans
        return new_first_plans
 class DFAState(object):
    def __init__(self, from_rule, nfa_set, final):
        assert isinstance(nfa_set, set)