From d8554d86d1d44f442a07459594560038d8800749 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Thu, 21 Jun 2018 18:17:32 +0200 Subject: [PATCH] A lot of new code to hopefully transition to a better parsing mechanism in the future --- parso/pgen2/grammar.py | 17 +++++++++++++++ parso/pgen2/parse.py | 48 ++++++++++++++++++++++++++++++++++++++++++ parso/pgen2/pgen.py | 1 + 3 files changed, 66 insertions(+) diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index 8247b26..f8292b7 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -19,6 +19,12 @@ fallback token code OP, but the parser needs the actual token code. from parso.python import token +class DFAPlan(object): + def __init__(self, next_dfa, pushes=[]): + self.next_dfa = next_dfa + self.pushes = pushes + + class Grammar(object): """Pgen parsing tables conversion class. @@ -117,6 +123,17 @@ class Grammar(object): self.states.append(states) self.dfas[self.nonterminal2number[nonterminal]] = (states, self._make_first(nonterminal)) + for dfas in self._nonterminal_to_dfas.values(): + for dfa_state in dfas: + dfa_state.ilabel_to_plan = plans = {} + for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items(): + if terminal_or_nonterminal in self.nonterminal2number: + for first in self._make_first(terminal_or_nonterminal): + plans[first] = None + else: + ilabel = self._make_label(terminal_or_nonterminal) + plans[ilabel] = DFAPlan(next_dfa) + def _make_first(self, nonterminal): rawfirst = self._first_terminals[nonterminal] first = set() diff --git a/parso/pgen2/parse.py b/parso/pgen2/parse.py index 42d2ad5..8c3cf33 100644 --- a/parso/pgen2/parse.py +++ b/parso/pgen2/parse.py @@ -54,6 +54,16 @@ class Stack(list): return sorted(check()) +class StackNode(object): + def __init__(self, dfa): + self.dfa = dfa + self.nodes = [] + + @property + def nonterminal(self): + return self.dfa.from_rule + + def token_to_ilabel(grammar, type_, value): # Map from token to label # TODO this is not good, shouldn't use tokenize.NAME, but somehow use the @@ -152,6 +162,44 @@ class PgenParser(object): raise InternalParseError("incomplete input", type_, value, start_pos) return self.rootnode + def add_token(self, type_, value, start_pos, prefix): + """Add a token; return True if this is the end of the program.""" + ilabel = token_to_ilabel(self.grammar, type_, value) + stack = self.stack + + while True: + ilabel + try: + plan = stack[-1].current_dfa.ilabel_to_plan[ilabel] + except KeyError: + self.error_recovery(self.grammar, stack, type_, + value, start_pos, prefix, self.add_token) + break + + stack[-1].current_dfa = plan.next_dfa + for push in plan.pushes: + stack.append(StackNode(push.dfa)) + + leaf = self.convert_leaf(self.grammar, type_, value, prefix, start_pos) + stack[-1].nodes.append(leaf) + + while stack[-1].current_dfa.is_final: + tos = self.stack.pop() + # If there's exactly one child, return that child instead of + # creating a new node. We still create expr_stmt and + # file_input though, because a lot of Jedi depends on its + # logic. + if len(tos.nodes) == 1: + new_node = tos.nodes[0] + else: + new_node = self.convert_node(self.grammar, type_, tos.nodes) + + try: + stack[-1].nodes.append(new_node) + except IndexError: + # Stack is empty, set the rootnode. + self.rootnode = new_node + def add_token(self, type_, value, start_pos, prefix): """Add a token; return True if this is the end of the program.""" ilabel = token_to_ilabel(self.grammar, type_, value) diff --git a/parso/pgen2/pgen.py b/parso/pgen2/pgen.py index 8caff85..cf37e12 100644 --- a/parso/pgen2/pgen.py +++ b/parso/pgen2/pgen.py @@ -31,6 +31,7 @@ class DFAState(object): self.nfa_set = nfa_set self.isfinal = final in nfa_set self.arcs = {} # map from terminals/nonterminals to DFAState + self.ilabel_to_plan = {} def add_arc(self, next_, label): assert isinstance(label, str)