A lot of new code to hopefully transition to a better parsing mechanism in the future

2026-02-14 22:22:07 +08:00 · 2018-06-21 18:17:32 +02:00
parent d691bf0fd1
commit d8554d86d1
3 changed files with 66 additions and 0 deletions
--- a/parso/pgen2/grammar.py
+++ b/parso/pgen2/grammar.py
@@ -19,6 +19,12 @@ fallback token code OP, but the parser needs the actual token code.
 from parso.python import token


+class DFAPlan(object):
+    def __init__(self, next_dfa, pushes=[]):
+        self.next_dfa = next_dfa
+        self.pushes = pushes
+
+
 class Grammar(object):
    """Pgen parsing tables conversion class.

@@ -117,6 +123,17 @@ class Grammar(object):
            self.states.append(states)
            self.dfas[self.nonterminal2number[nonterminal]] = (states, self._make_first(nonterminal))

+        for dfas in self._nonterminal_to_dfas.values():
+            for dfa_state in dfas:
+                dfa_state.ilabel_to_plan = plans = {}
+                for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items():
+                    if terminal_or_nonterminal in self.nonterminal2number:
+                        for first in self._make_first(terminal_or_nonterminal):
+                            plans[first] = None
+                    else:
+                        ilabel = self._make_label(terminal_or_nonterminal)
+                        plans[ilabel] = DFAPlan(next_dfa)
+
    def _make_first(self, nonterminal):
        rawfirst = self._first_terminals[nonterminal]
        first = set()
--- a/parso/pgen2/parse.py
+++ b/parso/pgen2/parse.py
@@ -54,6 +54,16 @@ class Stack(list):
        return sorted(check())


+class StackNode(object):
+    def __init__(self, dfa):
+        self.dfa = dfa
+        self.nodes = []
+
+    @property
+    def nonterminal(self):
+        return self.dfa.from_rule
+
+
 def token_to_ilabel(grammar, type_, value):
    # Map from token to label
    # TODO this is not good, shouldn't use tokenize.NAME, but somehow use the
@@ -152,6 +162,44 @@ class PgenParser(object):
                raise InternalParseError("incomplete input", type_, value, start_pos)
        return self.rootnode

+    def add_token(self, type_, value, start_pos, prefix):
+        """Add a token; return True if this is the end of the program."""
+        ilabel = token_to_ilabel(self.grammar, type_, value)
+        stack = self.stack
+
+        while True:
+            ilabel
+            try:
+                plan = stack[-1].current_dfa.ilabel_to_plan[ilabel]
+            except KeyError:
+                self.error_recovery(self.grammar, stack, type_,
+                                    value, start_pos, prefix, self.add_token)
+                break
+
+            stack[-1].current_dfa = plan.next_dfa
+            for push in plan.pushes:
+                stack.append(StackNode(push.dfa))
+
+            leaf = self.convert_leaf(self.grammar, type_, value, prefix, start_pos)
+            stack[-1].nodes.append(leaf)
+
+            while stack[-1].current_dfa.is_final:
+                tos = self.stack.pop()
+                # If there's exactly one child, return that child instead of
+                # creating a new node.  We still create expr_stmt and
+                # file_input though, because a lot of Jedi depends on its
+                # logic.
+                if len(tos.nodes) == 1:
+                    new_node = tos.nodes[0]
+                else:
+                    new_node = self.convert_node(self.grammar, type_, tos.nodes)
+
+                try:
+                    stack[-1].nodes.append(new_node)
+                except IndexError:
+                    # Stack is empty, set the rootnode.
+                    self.rootnode = new_node
+
    def add_token(self, type_, value, start_pos, prefix):
        """Add a token; return True if this is the end of the program."""
        ilabel = token_to_ilabel(self.grammar, type_, value)
--- a/parso/pgen2/pgen.py
+++ b/parso/pgen2/pgen.py
@@ -31,6 +31,7 @@ class DFAState(object):
        self.nfa_set = nfa_set
        self.isfinal = final in nfa_set
        self.arcs = {}  # map from terminals/nonterminals to DFAState
+        self.ilabel_to_plan = {}

    def add_arc(self, next_, label):
        assert isinstance(label, str)