mirror of
https://github.com/davidhalter/parso.git
synced 2026-01-01 00:53:39 +08:00
Move the Grammar to the pgen module
This commit is contained in:
@@ -1,99 +0,0 @@
|
||||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
|
||||
# Modifications:
|
||||
# Copyright David Halter and Contributors
|
||||
# Modifications are dual-licensed: MIT and PSF.
|
||||
|
||||
"""This module defines the data structures used to represent a grammar.
|
||||
|
||||
These are a bit arcane because they are derived from the data
|
||||
structures used by Python's 'pgen' parser generator.
|
||||
|
||||
There's also a table here mapping operators to their names in the
|
||||
token module; the Python tokenize module reports all operators as the
|
||||
fallback token code OP, but the parser needs the actual token code.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
class DFAPlan(object):
|
||||
def __init__(self, next_dfa, dfa_pushes=[]):
|
||||
self.next_dfa = next_dfa
|
||||
self.dfa_pushes = dfa_pushes
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes)
|
||||
|
||||
|
||||
class Grammar(object):
|
||||
"""Pgen parsing tables conversion class.
|
||||
|
||||
Once initialized, this class supplies the grammar tables for the
|
||||
parsing engine implemented by parse.py. The parsing engine
|
||||
accesses the instance variables directly. The class here does not
|
||||
provide initialization of the tables; several subclasses exist to
|
||||
do this (see the conv and pgen modules).
|
||||
"""
|
||||
|
||||
def __init__(self, start_nonterminal, rule_to_dfas, reserved_syntax_strings):
|
||||
self.nonterminal_to_dfas = rule_to_dfas
|
||||
|
||||
self.reserved_syntax_strings = reserved_syntax_strings
|
||||
self.start_nonterminal = start_nonterminal
|
||||
|
||||
self._make_grammar()
|
||||
|
||||
def _make_grammar(self):
|
||||
# Map from grammar rule (nonterminal) name to a set of tokens.
|
||||
self._first_plans = {}
|
||||
|
||||
nonterminals = list(self.nonterminal_to_dfas.keys())
|
||||
nonterminals.sort()
|
||||
for nonterminal in nonterminals:
|
||||
if nonterminal not in self._first_plans:
|
||||
self._calculate_first_terminals(nonterminal)
|
||||
|
||||
# Now that we have calculated the first terminals, we are sure that
|
||||
# there is no left recursion or ambiguities.
|
||||
|
||||
for dfas in self.nonterminal_to_dfas.values():
|
||||
for dfa_state in dfas:
|
||||
for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items():
|
||||
for transition, pushes in self._first_plans[nonterminal].items():
|
||||
dfa_state.ilabel_to_plan[transition] = DFAPlan(next_dfa, pushes)
|
||||
|
||||
def _calculate_first_terminals(self, nonterminal):
|
||||
dfas = self.nonterminal_to_dfas[nonterminal]
|
||||
new_first_plans = {}
|
||||
self._first_plans[nonterminal] = None # dummy to detect left recursion
|
||||
# We only need to check the first dfa. All the following ones are not
|
||||
# interesting to find first terminals.
|
||||
state = dfas[0]
|
||||
for nonterminal2, next_ in state.nonterminal_arcs.items():
|
||||
# It's a nonterminal and we have either a left recursion issue
|
||||
# in the grammar or we have to recurse.
|
||||
try:
|
||||
first_plans2 = self._first_plans[nonterminal2]
|
||||
except KeyError:
|
||||
first_plans2 = self._calculate_first_terminals(nonterminal2)
|
||||
else:
|
||||
if first_plans2 is None:
|
||||
raise ValueError("left recursion for rule %r" % nonterminal)
|
||||
|
||||
for t, pushes in first_plans2.items():
|
||||
check = new_first_plans.get(t)
|
||||
if check is not None:
|
||||
raise ValueError(
|
||||
"Rule %s is ambiguous; %s is the"
|
||||
" start of the rule %s as well as %s."
|
||||
% (nonterminal, t, nonterminal2, check[-1].from_rule)
|
||||
)
|
||||
new_first_plans[t] = [next_] + pushes
|
||||
|
||||
for transition, next_ in state.ilabel_to_plan.items():
|
||||
# It's a string. We have finally found a possible first token.
|
||||
new_first_plans[transition] = [next_.next_dfa]
|
||||
|
||||
self._first_plans[nonterminal] = new_first_plans
|
||||
return new_first_plans
|
||||
@@ -6,6 +6,8 @@
|
||||
# Modifications are dual-licensed: MIT and PSF.
|
||||
|
||||
"""
|
||||
This module defines the data structures used to represent a grammar.
|
||||
|
||||
Specifying grammars in pgen is possible with this grammar::
|
||||
|
||||
grammar: (NEWLINE | rule)* ENDMARKER
|
||||
@@ -20,10 +22,90 @@ This grammar is self-referencing.
|
||||
|
||||
from ast import literal_eval
|
||||
|
||||
from parso.pgen2.grammar import Grammar, DFAPlan
|
||||
from parso.pgen2.grammar_parser import GrammarParser, NFAState
|
||||
|
||||
|
||||
class DFAPlan(object):
|
||||
def __init__(self, next_dfa, dfa_pushes=[]):
|
||||
self.next_dfa = next_dfa
|
||||
self.dfa_pushes = dfa_pushes
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes)
|
||||
|
||||
|
||||
class Grammar(object):
|
||||
"""Pgen parsing tables conversion class.
|
||||
|
||||
Once initialized, this class supplies the grammar tables for the
|
||||
parsing engine implemented by parse.py. The parsing engine
|
||||
accesses the instance variables directly. The class here does not
|
||||
provide initialization of the tables; several subclasses exist to
|
||||
do this (see the conv and pgen modules).
|
||||
"""
|
||||
|
||||
def __init__(self, start_nonterminal, rule_to_dfas, reserved_syntax_strings):
|
||||
self.nonterminal_to_dfas = rule_to_dfas # Dict[str, List[DFAState]]
|
||||
self.reserved_syntax_strings = reserved_syntax_strings
|
||||
self.start_nonterminal = start_nonterminal
|
||||
|
||||
self._make_grammar()
|
||||
|
||||
def _make_grammar(self):
|
||||
# Map from grammar rule (nonterminal) name to a set of tokens.
|
||||
self._first_plans = {}
|
||||
|
||||
nonterminals = list(self.nonterminal_to_dfas.keys())
|
||||
nonterminals.sort()
|
||||
for nonterminal in nonterminals:
|
||||
if nonterminal not in self._first_plans:
|
||||
self._calculate_first_terminals(nonterminal)
|
||||
|
||||
# Now that we have calculated the first terminals, we are sure that
|
||||
# there is no left recursion or ambiguities.
|
||||
|
||||
for dfas in self.nonterminal_to_dfas.values():
|
||||
for dfa_state in dfas:
|
||||
for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items():
|
||||
for transition, pushes in self._first_plans[nonterminal].items():
|
||||
dfa_state.ilabel_to_plan[transition] = DFAPlan(next_dfa, pushes)
|
||||
|
||||
def _calculate_first_terminals(self, nonterminal):
|
||||
dfas = self.nonterminal_to_dfas[nonterminal]
|
||||
new_first_plans = {}
|
||||
self._first_plans[nonterminal] = None # dummy to detect left recursion
|
||||
# We only need to check the first dfa. All the following ones are not
|
||||
# interesting to find first terminals.
|
||||
state = dfas[0]
|
||||
for nonterminal2, next_ in state.nonterminal_arcs.items():
|
||||
# It's a nonterminal and we have either a left recursion issue
|
||||
# in the grammar or we have to recurse.
|
||||
try:
|
||||
first_plans2 = self._first_plans[nonterminal2]
|
||||
except KeyError:
|
||||
first_plans2 = self._calculate_first_terminals(nonterminal2)
|
||||
else:
|
||||
if first_plans2 is None:
|
||||
raise ValueError("left recursion for rule %r" % nonterminal)
|
||||
|
||||
for t, pushes in first_plans2.items():
|
||||
check = new_first_plans.get(t)
|
||||
if check is not None:
|
||||
raise ValueError(
|
||||
"Rule %s is ambiguous; %s is the"
|
||||
" start of the rule %s as well as %s."
|
||||
% (nonterminal, t, nonterminal2, check[-1].from_rule)
|
||||
)
|
||||
new_first_plans[t] = [next_] + pushes
|
||||
|
||||
for transition, next_ in state.ilabel_to_plan.items():
|
||||
# It's a string. We have finally found a possible first token.
|
||||
new_first_plans[transition] = [next_.next_dfa]
|
||||
|
||||
self._first_plans[nonterminal] = new_first_plans
|
||||
return new_first_plans
|
||||
|
||||
|
||||
class DFAState(object):
|
||||
def __init__(self, from_rule, nfa_set, final):
|
||||
assert isinstance(nfa_set, set)
|
||||
|
||||
Reference in New Issue
Block a user