A lot of new code to hopefully transition to a better parsing mechanism in the future

This commit is contained in:
Dave Halter
2018-06-21 18:17:32 +02:00
parent d691bf0fd1
commit d8554d86d1
3 changed files with 66 additions and 0 deletions

View File

@@ -19,6 +19,12 @@ fallback token code OP, but the parser needs the actual token code.
from parso.python import token
class DFAPlan(object):
def __init__(self, next_dfa, pushes=[]):
self.next_dfa = next_dfa
self.pushes = pushes
class Grammar(object):
"""Pgen parsing tables conversion class.
@@ -117,6 +123,17 @@ class Grammar(object):
self.states.append(states)
self.dfas[self.nonterminal2number[nonterminal]] = (states, self._make_first(nonterminal))
for dfas in self._nonterminal_to_dfas.values():
for dfa_state in dfas:
dfa_state.ilabel_to_plan = plans = {}
for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items():
if terminal_or_nonterminal in self.nonterminal2number:
for first in self._make_first(terminal_or_nonterminal):
plans[first] = None
else:
ilabel = self._make_label(terminal_or_nonterminal)
plans[ilabel] = DFAPlan(next_dfa)
def _make_first(self, nonterminal):
rawfirst = self._first_terminals[nonterminal]
first = set()

View File

@@ -54,6 +54,16 @@ class Stack(list):
return sorted(check())
class StackNode(object):
def __init__(self, dfa):
self.dfa = dfa
self.nodes = []
@property
def nonterminal(self):
return self.dfa.from_rule
def token_to_ilabel(grammar, type_, value):
# Map from token to label
# TODO this is not good, shouldn't use tokenize.NAME, but somehow use the
@@ -152,6 +162,44 @@ class PgenParser(object):
raise InternalParseError("incomplete input", type_, value, start_pos)
return self.rootnode
def add_token(self, type_, value, start_pos, prefix):
"""Add a token; return True if this is the end of the program."""
ilabel = token_to_ilabel(self.grammar, type_, value)
stack = self.stack
while True:
ilabel
try:
plan = stack[-1].current_dfa.ilabel_to_plan[ilabel]
except KeyError:
self.error_recovery(self.grammar, stack, type_,
value, start_pos, prefix, self.add_token)
break
stack[-1].current_dfa = plan.next_dfa
for push in plan.pushes:
stack.append(StackNode(push.dfa))
leaf = self.convert_leaf(self.grammar, type_, value, prefix, start_pos)
stack[-1].nodes.append(leaf)
while stack[-1].current_dfa.is_final:
tos = self.stack.pop()
# If there's exactly one child, return that child instead of
# creating a new node. We still create expr_stmt and
# file_input though, because a lot of Jedi depends on its
# logic.
if len(tos.nodes) == 1:
new_node = tos.nodes[0]
else:
new_node = self.convert_node(self.grammar, type_, tos.nodes)
try:
stack[-1].nodes.append(new_node)
except IndexError:
# Stack is empty, set the rootnode.
self.rootnode = new_node
def add_token(self, type_, value, start_pos, prefix):
"""Add a token; return True if this is the end of the program."""
ilabel = token_to_ilabel(self.grammar, type_, value)

View File

@@ -31,6 +31,7 @@ class DFAState(object):
self.nfa_set = nfa_set
self.isfinal = final in nfa_set
self.arcs = {} # map from terminals/nonterminals to DFAState
self.ilabel_to_plan = {}
def add_arc(self, next_, label):
assert isinstance(label, str)