Way better documentation for the DFA generator

2025-12-23 04:31:33 +08:00 · 2018-06-28 10:08:09 +02:00
parent 375ebf2181
commit ecdb90d9bc
1 changed files with 46 additions and 9 deletions
--- a/parso/pgen2/generator.py
+++ b/parso/pgen2/generator.py
@@ -35,9 +35,10 @@ class Grammar(object):
    """
    Once initialized, this class supplies the grammar tables for the
    parsing engine implemented by parse.py.  The parsing engine
-    accesses the instance variables directly.  The class here does not
-    provide initialization of the tables; several subclasses exist to
-    do this (see the conv and pgen modules).
+    accesses the instance variables directly.
+
+    The only important part in this parsers are dfas and transitions between
+    dfas.
    """

    def __init__(self, start_nonterminal, rule_to_dfas, reserved_syntax_strings):
@@ -47,6 +48,10 @@ class Grammar(object):


 class DFAPlan(object):
+    """
+    Plans are used for the parser to create stack nodes and do the proper
+    DFA state transitions.
+    """
    def __init__(self, next_dfa, dfa_pushes=[]):
        self.next_dfa = next_dfa
        self.dfa_pushes = dfa_pushes
@@ -56,6 +61,15 @@ class DFAPlan(object):


 class DFAState(object):
+    """
+    The DFAState object is the core class for pretty much anything. DFAState
+    are the vertices of an ordered graph while arcs and transitions are the
+    edges.
+
+    Arcs are the initial edges, where most DFAStates are not connected and
+    transitions are then calculated to connect the DFA state machines that have
+    different nonterminals.
+    """
    def __init__(self, from_rule, nfa_set, final):
        assert isinstance(nfa_set, set)
        assert isinstance(next(iter(nfa_set)), NFAState)
@@ -106,6 +120,12 @@ class DFAState(object):


 class ReservedString(object):
+    """
+    Most grammars will have certain keywords and operators that are mentioned
+    in the grammar as strings (e.g. "if") and not token types (e.g. NUMBER).
+    This class basically is the former.
+    """
+
    def __init__(self, value):
        self.value = value

@@ -114,12 +134,14 @@ class ReservedString(object):


 def _simplify_dfas(dfas):
-    # This is not theoretically optimal, but works well enough.
-    # Algorithm: repeatedly look for two states that have the same
-    # set of arcs (same labels pointing to the same nodes) and
-    # unify them, until things stop changing.
+    """
+    This is not theoretically optimal, but works well enough.
+    Algorithm: repeatedly look for two states that have the same
+    set of arcs (same labels pointing to the same nodes) and
+    unify them, until things stop changing.

-    # dfas is a list of DFAState instances
+    dfas is a list of DFAState instances
+    """
    changes = True
    while changes:
        changes = False
@@ -137,7 +159,10 @@ def _simplify_dfas(dfas):

 def _make_dfas(start, finish):
    """
-    This is basically doing what the powerset construction algorithm is doing.
+    Uses the powerset construction algorithm to create DFA states from sets of
+    NFA states.
+
+    Also does state reduction if some states are not needed.
    """
    # To turn an NFA into a DFA, we define the states of the DFA
    # to correspond to *sets* of states of the NFA.  Then do some
@@ -250,6 +275,10 @@ def generate_grammar(bnf_grammar, token_namespace):


 def _make_transition(token_namespace, reserved_syntax_strings, label):
+    """
+    Creates a reserved string ("if", "for", "*", ...) or returns the token type
+    (NUMBER, STRING, ...) for a given grammar terminal.
+    """
    if label[0].isalpha():
        # A named token (e.g. NAME, NUMBER, STRING)
        return getattr(token_namespace, label)
@@ -267,6 +296,10 @@ def _make_transition(token_namespace, reserved_syntax_strings, label):


 def _calculate_tree_traversal(nonterminal_to_dfas):
+    """
+    By this point we know how dfas can move around within a stack node, but we
+    don't know how we can add a new stack node (nonterminal transitions).
+    """
    # Map from grammar rule (nonterminal) name to a set of tokens.
    first_plans = {}

@@ -287,6 +320,10 @@ def _calculate_tree_traversal(nonterminal_to_dfas):


 def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal):
+    """
+    Calculates the first plan in the first_plans dictionary for every given
+    nonterminal. This is going to be used to know when to create stack nodes.
+    """
    dfas = nonterminal_to_dfas[nonterminal]
    new_first_plans = {}
    first_plans[nonterminal] = None  # dummy to detect left recursion