Way better documentation for the DFA generator

2026-03-05 22:54:35 +08:00 · 2018-06-28 10:08:09 +02:00
parent 375ebf2181
commit ecdb90d9bc
1 changed files with 46 additions and 9 deletions
--- a/parso/pgen2/generator.py
+++ b/parso/pgen2/generator.py
@@ -35,9 +35,10 @@ class Grammar(object):
    """
    Once initialized, this class supplies the grammar tables for the
    parsing engine implemented by parse.py.  The parsing engine
-    accesses the instance variables directly.  The class here does not
+    accesses the instance variables directly.
-    provide initialization of the tables; several subclasses exist to
+
-    do this (see the conv and pgen modules).
+    The only important part in this parsers are dfas and transitions between
    dfas.
    """
    def __init__(self, start_nonterminal, rule_to_dfas, reserved_syntax_strings):
@@ -47,6 +48,10 @@ class Grammar(object):
 class DFAPlan(object):
    """
    Plans are used for the parser to create stack nodes and do the proper
    DFA state transitions.
    """
    def __init__(self, next_dfa, dfa_pushes=[]):
        self.next_dfa = next_dfa
        self.dfa_pushes = dfa_pushes
@@ -56,6 +61,15 @@ class DFAPlan(object):
 class DFAState(object):
    """
    The DFAState object is the core class for pretty much anything. DFAState
    are the vertices of an ordered graph while arcs and transitions are the
    edges.
    Arcs are the initial edges, where most DFAStates are not connected and
    transitions are then calculated to connect the DFA state machines that have
    different nonterminals.
    """
    def __init__(self, from_rule, nfa_set, final):
        assert isinstance(nfa_set, set)
        assert isinstance(next(iter(nfa_set)), NFAState)
@@ -106,6 +120,12 @@ class DFAState(object):
 class ReservedString(object):
    """
    Most grammars will have certain keywords and operators that are mentioned
    in the grammar as strings (e.g. "if") and not token types (e.g. NUMBER).
    This class basically is the former.
    """
    def __init__(self, value):
        self.value = value
@@ -114,12 +134,14 @@ class ReservedString(object):
 def _simplify_dfas(dfas):
-    # This is not theoretically optimal, but works well enough.
+    """
-    # Algorithm: repeatedly look for two states that have the same
+    This is not theoretically optimal, but works well enough.
-    # set of arcs (same labels pointing to the same nodes) and
+    Algorithm: repeatedly look for two states that have the same
-    # unify them, until things stop changing.
+    set of arcs (same labels pointing to the same nodes) and
    unify them, until things stop changing.
-    # dfas is a list of DFAState instances
+    dfas is a list of DFAState instances
    """
    changes = True
    while changes:
        changes = False
@@ -137,7 +159,10 @@ def _simplify_dfas(dfas):
 def _make_dfas(start, finish):
    """
-    This is basically doing what the powerset construction algorithm is doing.
+    Uses the powerset construction algorithm to create DFA states from sets of
    NFA states.
    Also does state reduction if some states are not needed.
    """
    # To turn an NFA into a DFA, we define the states of the DFA
    # to correspond to *sets* of states of the NFA.  Then do some
@@ -250,6 +275,10 @@ def generate_grammar(bnf_grammar, token_namespace):
 def _make_transition(token_namespace, reserved_syntax_strings, label):
    """
    Creates a reserved string ("if", "for", "*", ...) or returns the token type
    (NUMBER, STRING, ...) for a given grammar terminal.
    """
    if label[0].isalpha():
        # A named token (e.g. NAME, NUMBER, STRING)
        return getattr(token_namespace, label)
@@ -267,6 +296,10 @@ def _make_transition(token_namespace, reserved_syntax_strings, label):
 def _calculate_tree_traversal(nonterminal_to_dfas):
    """
    By this point we know how dfas can move around within a stack node, but we
    don't know how we can add a new stack node (nonterminal transitions).
    """
    # Map from grammar rule (nonterminal) name to a set of tokens.
    first_plans = {}
@@ -287,6 +320,10 @@ def _calculate_tree_traversal(nonterminal_to_dfas):
 def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal):
    """
    Calculates the first plan in the first_plans dictionary for every given
    nonterminal. This is going to be used to know when to create stack nodes.
    """
    dfas = nonterminal_to_dfas[nonterminal]
    new_first_plans = {}
    first_plans[nonterminal] = None  # dummy to detect left recursion