mirror of
https://github.com/davidhalter/parso.git
synced 2026-02-27 12:02:36 +08:00
Move the Grammar to the pgen module
This commit is contained in:
@@ -1,99 +0,0 @@
|
|||||||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
|
||||||
# Licensed to PSF under a Contributor Agreement.
|
|
||||||
|
|
||||||
# Modifications:
|
|
||||||
# Copyright David Halter and Contributors
|
|
||||||
# Modifications are dual-licensed: MIT and PSF.
|
|
||||||
|
|
||||||
"""This module defines the data structures used to represent a grammar.
|
|
||||||
|
|
||||||
These are a bit arcane because they are derived from the data
|
|
||||||
structures used by Python's 'pgen' parser generator.
|
|
||||||
|
|
||||||
There's also a table here mapping operators to their names in the
|
|
||||||
token module; the Python tokenize module reports all operators as the
|
|
||||||
fallback token code OP, but the parser needs the actual token code.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
class DFAPlan(object):
|
|
||||||
def __init__(self, next_dfa, dfa_pushes=[]):
|
|
||||||
self.next_dfa = next_dfa
|
|
||||||
self.dfa_pushes = dfa_pushes
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes)
|
|
||||||
|
|
||||||
|
|
||||||
class Grammar(object):
|
|
||||||
"""Pgen parsing tables conversion class.
|
|
||||||
|
|
||||||
Once initialized, this class supplies the grammar tables for the
|
|
||||||
parsing engine implemented by parse.py. The parsing engine
|
|
||||||
accesses the instance variables directly. The class here does not
|
|
||||||
provide initialization of the tables; several subclasses exist to
|
|
||||||
do this (see the conv and pgen modules).
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, start_nonterminal, rule_to_dfas, reserved_syntax_strings):
|
|
||||||
self.nonterminal_to_dfas = rule_to_dfas
|
|
||||||
|
|
||||||
self.reserved_syntax_strings = reserved_syntax_strings
|
|
||||||
self.start_nonterminal = start_nonterminal
|
|
||||||
|
|
||||||
self._make_grammar()
|
|
||||||
|
|
||||||
def _make_grammar(self):
|
|
||||||
# Map from grammar rule (nonterminal) name to a set of tokens.
|
|
||||||
self._first_plans = {}
|
|
||||||
|
|
||||||
nonterminals = list(self.nonterminal_to_dfas.keys())
|
|
||||||
nonterminals.sort()
|
|
||||||
for nonterminal in nonterminals:
|
|
||||||
if nonterminal not in self._first_plans:
|
|
||||||
self._calculate_first_terminals(nonterminal)
|
|
||||||
|
|
||||||
# Now that we have calculated the first terminals, we are sure that
|
|
||||||
# there is no left recursion or ambiguities.
|
|
||||||
|
|
||||||
for dfas in self.nonterminal_to_dfas.values():
|
|
||||||
for dfa_state in dfas:
|
|
||||||
for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items():
|
|
||||||
for transition, pushes in self._first_plans[nonterminal].items():
|
|
||||||
dfa_state.ilabel_to_plan[transition] = DFAPlan(next_dfa, pushes)
|
|
||||||
|
|
||||||
def _calculate_first_terminals(self, nonterminal):
|
|
||||||
dfas = self.nonterminal_to_dfas[nonterminal]
|
|
||||||
new_first_plans = {}
|
|
||||||
self._first_plans[nonterminal] = None # dummy to detect left recursion
|
|
||||||
# We only need to check the first dfa. All the following ones are not
|
|
||||||
# interesting to find first terminals.
|
|
||||||
state = dfas[0]
|
|
||||||
for nonterminal2, next_ in state.nonterminal_arcs.items():
|
|
||||||
# It's a nonterminal and we have either a left recursion issue
|
|
||||||
# in the grammar or we have to recurse.
|
|
||||||
try:
|
|
||||||
first_plans2 = self._first_plans[nonterminal2]
|
|
||||||
except KeyError:
|
|
||||||
first_plans2 = self._calculate_first_terminals(nonterminal2)
|
|
||||||
else:
|
|
||||||
if first_plans2 is None:
|
|
||||||
raise ValueError("left recursion for rule %r" % nonterminal)
|
|
||||||
|
|
||||||
for t, pushes in first_plans2.items():
|
|
||||||
check = new_first_plans.get(t)
|
|
||||||
if check is not None:
|
|
||||||
raise ValueError(
|
|
||||||
"Rule %s is ambiguous; %s is the"
|
|
||||||
" start of the rule %s as well as %s."
|
|
||||||
% (nonterminal, t, nonterminal2, check[-1].from_rule)
|
|
||||||
)
|
|
||||||
new_first_plans[t] = [next_] + pushes
|
|
||||||
|
|
||||||
for transition, next_ in state.ilabel_to_plan.items():
|
|
||||||
# It's a string. We have finally found a possible first token.
|
|
||||||
new_first_plans[transition] = [next_.next_dfa]
|
|
||||||
|
|
||||||
self._first_plans[nonterminal] = new_first_plans
|
|
||||||
return new_first_plans
|
|
||||||
@@ -6,6 +6,8 @@
|
|||||||
# Modifications are dual-licensed: MIT and PSF.
|
# Modifications are dual-licensed: MIT and PSF.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
This module defines the data structures used to represent a grammar.
|
||||||
|
|
||||||
Specifying grammars in pgen is possible with this grammar::
|
Specifying grammars in pgen is possible with this grammar::
|
||||||
|
|
||||||
grammar: (NEWLINE | rule)* ENDMARKER
|
grammar: (NEWLINE | rule)* ENDMARKER
|
||||||
@@ -20,10 +22,90 @@ This grammar is self-referencing.
|
|||||||
|
|
||||||
from ast import literal_eval
|
from ast import literal_eval
|
||||||
|
|
||||||
from parso.pgen2.grammar import Grammar, DFAPlan
|
|
||||||
from parso.pgen2.grammar_parser import GrammarParser, NFAState
|
from parso.pgen2.grammar_parser import GrammarParser, NFAState
|
||||||
|
|
||||||
|
|
||||||
|
class DFAPlan(object):
|
||||||
|
def __init__(self, next_dfa, dfa_pushes=[]):
|
||||||
|
self.next_dfa = next_dfa
|
||||||
|
self.dfa_pushes = dfa_pushes
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes)
|
||||||
|
|
||||||
|
|
||||||
|
class Grammar(object):
|
||||||
|
"""Pgen parsing tables conversion class.
|
||||||
|
|
||||||
|
Once initialized, this class supplies the grammar tables for the
|
||||||
|
parsing engine implemented by parse.py. The parsing engine
|
||||||
|
accesses the instance variables directly. The class here does not
|
||||||
|
provide initialization of the tables; several subclasses exist to
|
||||||
|
do this (see the conv and pgen modules).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, start_nonterminal, rule_to_dfas, reserved_syntax_strings):
|
||||||
|
self.nonterminal_to_dfas = rule_to_dfas # Dict[str, List[DFAState]]
|
||||||
|
self.reserved_syntax_strings = reserved_syntax_strings
|
||||||
|
self.start_nonterminal = start_nonterminal
|
||||||
|
|
||||||
|
self._make_grammar()
|
||||||
|
|
||||||
|
def _make_grammar(self):
|
||||||
|
# Map from grammar rule (nonterminal) name to a set of tokens.
|
||||||
|
self._first_plans = {}
|
||||||
|
|
||||||
|
nonterminals = list(self.nonterminal_to_dfas.keys())
|
||||||
|
nonterminals.sort()
|
||||||
|
for nonterminal in nonterminals:
|
||||||
|
if nonterminal not in self._first_plans:
|
||||||
|
self._calculate_first_terminals(nonterminal)
|
||||||
|
|
||||||
|
# Now that we have calculated the first terminals, we are sure that
|
||||||
|
# there is no left recursion or ambiguities.
|
||||||
|
|
||||||
|
for dfas in self.nonterminal_to_dfas.values():
|
||||||
|
for dfa_state in dfas:
|
||||||
|
for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items():
|
||||||
|
for transition, pushes in self._first_plans[nonterminal].items():
|
||||||
|
dfa_state.ilabel_to_plan[transition] = DFAPlan(next_dfa, pushes)
|
||||||
|
|
||||||
|
def _calculate_first_terminals(self, nonterminal):
|
||||||
|
dfas = self.nonterminal_to_dfas[nonterminal]
|
||||||
|
new_first_plans = {}
|
||||||
|
self._first_plans[nonterminal] = None # dummy to detect left recursion
|
||||||
|
# We only need to check the first dfa. All the following ones are not
|
||||||
|
# interesting to find first terminals.
|
||||||
|
state = dfas[0]
|
||||||
|
for nonterminal2, next_ in state.nonterminal_arcs.items():
|
||||||
|
# It's a nonterminal and we have either a left recursion issue
|
||||||
|
# in the grammar or we have to recurse.
|
||||||
|
try:
|
||||||
|
first_plans2 = self._first_plans[nonterminal2]
|
||||||
|
except KeyError:
|
||||||
|
first_plans2 = self._calculate_first_terminals(nonterminal2)
|
||||||
|
else:
|
||||||
|
if first_plans2 is None:
|
||||||
|
raise ValueError("left recursion for rule %r" % nonterminal)
|
||||||
|
|
||||||
|
for t, pushes in first_plans2.items():
|
||||||
|
check = new_first_plans.get(t)
|
||||||
|
if check is not None:
|
||||||
|
raise ValueError(
|
||||||
|
"Rule %s is ambiguous; %s is the"
|
||||||
|
" start of the rule %s as well as %s."
|
||||||
|
% (nonterminal, t, nonterminal2, check[-1].from_rule)
|
||||||
|
)
|
||||||
|
new_first_plans[t] = [next_] + pushes
|
||||||
|
|
||||||
|
for transition, next_ in state.ilabel_to_plan.items():
|
||||||
|
# It's a string. We have finally found a possible first token.
|
||||||
|
new_first_plans[transition] = [next_.next_dfa]
|
||||||
|
|
||||||
|
self._first_plans[nonterminal] = new_first_plans
|
||||||
|
return new_first_plans
|
||||||
|
|
||||||
|
|
||||||
class DFAState(object):
|
class DFAState(object):
|
||||||
def __init__(self, from_rule, nfa_set, final):
|
def __init__(self, from_rule, nfa_set, final):
|
||||||
assert isinstance(nfa_set, set)
|
assert isinstance(nfa_set, set)
|
||||||
|
|||||||
Reference in New Issue
Block a user