mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-07 13:24:39 +08:00
Fix an ambiguity issue
Unfortunately had to refactor most of the transition generation
This commit is contained in:
@@ -16,8 +16,6 @@ fallback token code OP, but the parser needs the actual token code.
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from ast import literal_eval
|
|
||||||
|
|
||||||
|
|
||||||
class DFAPlan(object):
|
class DFAPlan(object):
|
||||||
def __init__(self, next_dfa, dfa_pushes=[]):
|
def __init__(self, next_dfa, dfa_pushes=[]):
|
||||||
@@ -28,11 +26,6 @@ class DFAPlan(object):
|
|||||||
return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes)
|
return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes)
|
||||||
|
|
||||||
|
|
||||||
class ReservedString(object):
|
|
||||||
def __init__(self, value):
|
|
||||||
self.value = value
|
|
||||||
|
|
||||||
|
|
||||||
class Grammar(object):
|
class Grammar(object):
|
||||||
"""Pgen parsing tables conversion class.
|
"""Pgen parsing tables conversion class.
|
||||||
|
|
||||||
@@ -43,11 +36,10 @@ class Grammar(object):
|
|||||||
do this (see the conv and pgen modules).
|
do this (see the conv and pgen modules).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, bnf_grammar, start_nonterminal, rule_to_dfas, token_namespace):
|
def __init__(self, bnf_grammar, start_nonterminal, rule_to_dfas, reserved_syntax_strings):
|
||||||
self._token_namespace = token_namespace
|
|
||||||
self._nonterminal_to_dfas = rule_to_dfas
|
self._nonterminal_to_dfas = rule_to_dfas
|
||||||
|
|
||||||
self.reserved_syntax_strings = {}
|
self.reserved_syntax_strings = reserved_syntax_strings
|
||||||
self.start_nonterminal = start_nonterminal
|
self.start_nonterminal = start_nonterminal
|
||||||
|
|
||||||
self._make_grammar()
|
self._make_grammar()
|
||||||
@@ -68,33 +60,10 @@ class Grammar(object):
|
|||||||
|
|
||||||
for dfas in self._nonterminal_to_dfas.values():
|
for dfas in self._nonterminal_to_dfas.values():
|
||||||
for dfa_state in dfas:
|
for dfa_state in dfas:
|
||||||
dfa_state.ilabel_to_plan = plans = {}
|
for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items():
|
||||||
for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items():
|
for transition, pushes in self._first_plans[nonterminal].items():
|
||||||
if terminal_or_nonterminal in self._nonterminal_to_dfas:
|
dfa_state.ilabel_to_plan[transition] = DFAPlan(next_dfa, pushes)
|
||||||
for t, pushes in self._first_plans[terminal_or_nonterminal].items():
|
#print(dfa_state.from_rule, dfa_state.ilabel_to_plan)
|
||||||
plans[self._make_label(t)] = DFAPlan(next_dfa, pushes)
|
|
||||||
else:
|
|
||||||
ilabel = self._make_label(terminal_or_nonterminal)
|
|
||||||
plans[ilabel] = DFAPlan(next_dfa)
|
|
||||||
|
|
||||||
def _make_label(self, label):
|
|
||||||
if label[0].isalpha():
|
|
||||||
# Either a nonterminal name or a named token
|
|
||||||
assert label not in self._nonterminal_to_dfas
|
|
||||||
|
|
||||||
# A named token (e.g. NAME, NUMBER, STRING)
|
|
||||||
token_type = getattr(self._token_namespace, label, None)
|
|
||||||
return token_type
|
|
||||||
else:
|
|
||||||
# Either a keyword or an operator
|
|
||||||
assert label[0] in ('"', "'"), label
|
|
||||||
# TODO use literal_eval instead of a simple eval.
|
|
||||||
value = literal_eval(label)
|
|
||||||
try:
|
|
||||||
return self.reserved_syntax_strings[value]
|
|
||||||
except KeyError:
|
|
||||||
r = self.reserved_syntax_strings[value] = ReservedString(value)
|
|
||||||
return r
|
|
||||||
|
|
||||||
def _calculate_first_terminals(self, nonterminal):
|
def _calculate_first_terminals(self, nonterminal):
|
||||||
dfas = self._nonterminal_to_dfas[nonterminal]
|
dfas = self._nonterminal_to_dfas[nonterminal]
|
||||||
@@ -105,35 +74,35 @@ class Grammar(object):
|
|||||||
state = dfas[0]
|
state = dfas[0]
|
||||||
totalset = set()
|
totalset = set()
|
||||||
overlapcheck = {}
|
overlapcheck = {}
|
||||||
for nonterminal_or_string, next_ in state.arcs.items():
|
for transition, next_ in state.ilabel_to_plan.items():
|
||||||
if nonterminal_or_string in self._nonterminal_to_dfas:
|
# It's a string. We have finally found a possible first token.
|
||||||
|
totalset.add(transition)
|
||||||
|
#overlapcheck[nonterminal] = set([transition])
|
||||||
|
first_plans[transition] = [next_.next_dfa]
|
||||||
|
|
||||||
|
for nonterminal2, next_ in state.nonterminal_arcs.items():
|
||||||
# It's a nonterminal and we have either a left recursion issue
|
# It's a nonterminal and we have either a left recursion issue
|
||||||
# in the grammar or we have to recurse.
|
# in the grammar or we have to recurse.
|
||||||
try:
|
try:
|
||||||
fset = self._first_terminals[nonterminal_or_string]
|
fset = self._first_terminals[nonterminal2]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
self._calculate_first_terminals(nonterminal_or_string)
|
self._calculate_first_terminals(nonterminal2)
|
||||||
fset = self._first_terminals[nonterminal_or_string]
|
fset = self._first_terminals[nonterminal2]
|
||||||
else:
|
else:
|
||||||
if fset is None:
|
if fset is None:
|
||||||
raise ValueError("left recursion for rule %r" % nonterminal)
|
raise ValueError("left recursion for rule %r" % nonterminal)
|
||||||
totalset.update(fset)
|
totalset.update(fset)
|
||||||
overlapcheck[nonterminal_or_string] = fset
|
overlapcheck[nonterminal2] = fset
|
||||||
|
|
||||||
for t, pushes in self._first_plans[nonterminal_or_string].items():
|
for t, pushes in self._first_plans[nonterminal2].items():
|
||||||
check = first_plans.get(t)
|
check = first_plans.get(t)
|
||||||
if check is not None:
|
if check is not None:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Rule %s is ambiguous; %s is the"
|
"Rule %s is ambiguous; %s is the"
|
||||||
" start of the rule %s as well as %s."
|
" start of the rule %s as well as %s."
|
||||||
% (nonterminal, t, nonterminal_or_string, check[-1].from_rule)
|
% (nonterminal, t, nonterminal2, check[-1].from_rule)
|
||||||
)
|
)
|
||||||
first_plans[t] = [next_] + pushes
|
first_plans[t] = [next_] + pushes
|
||||||
else:
|
|
||||||
# It's a string. We have finally found a possible first token.
|
|
||||||
totalset.add(nonterminal_or_string)
|
|
||||||
overlapcheck[nonterminal_or_string] = set([nonterminal_or_string])
|
|
||||||
first_plans[nonterminal_or_string] = [next_]
|
|
||||||
|
|
||||||
inverse = {}
|
inverse = {}
|
||||||
for nonterminal_or_string, first_set in overlapcheck.items():
|
for nonterminal_or_string, first_set in overlapcheck.items():
|
||||||
|
|||||||
@@ -18,7 +18,9 @@ Specifying grammars in pgen is possible with this grammar::
|
|||||||
This grammar is self-referencing.
|
This grammar is self-referencing.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from parso.pgen2.grammar import Grammar
|
from ast import literal_eval
|
||||||
|
|
||||||
|
from parso.pgen2.grammar import Grammar, DFAPlan
|
||||||
from parso.pgen2.grammar_parser import GrammarParser, NFAState
|
from parso.pgen2.grammar_parser import GrammarParser, NFAState
|
||||||
|
|
||||||
|
|
||||||
@@ -32,6 +34,7 @@ class DFAState(object):
|
|||||||
self.is_final = final in nfa_set
|
self.is_final = final in nfa_set
|
||||||
self.arcs = {} # map from terminals/nonterminals to DFAState
|
self.arcs = {} # map from terminals/nonterminals to DFAState
|
||||||
self.ilabel_to_plan = {}
|
self.ilabel_to_plan = {}
|
||||||
|
self.nonterminal_arcs = {}
|
||||||
|
|
||||||
def add_arc(self, next_, label):
|
def add_arc(self, next_, label):
|
||||||
assert isinstance(label, str)
|
assert isinstance(label, str)
|
||||||
@@ -66,6 +69,14 @@ class DFAState(object):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ReservedString(object):
|
||||||
|
def __init__(self, value):
|
||||||
|
self.value = value
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '%s(%s)' % (self.__class__.__name__, self.value)
|
||||||
|
|
||||||
|
|
||||||
def _simplify_dfas(dfas):
|
def _simplify_dfas(dfas):
|
||||||
# This is not theoretically optimal, but works well enough.
|
# This is not theoretically optimal, but works well enough.
|
||||||
# Algorithm: repeatedly look for two states that have the same
|
# Algorithm: repeatedly look for two states that have the same
|
||||||
@@ -184,4 +195,35 @@ def generate_grammar(bnf_grammar, token_namespace):
|
|||||||
if start_nonterminal is None:
|
if start_nonterminal is None:
|
||||||
start_nonterminal = nfa_a.from_rule
|
start_nonterminal = nfa_a.from_rule
|
||||||
|
|
||||||
return Grammar(bnf_grammar, start_nonterminal, rule_to_dfas, token_namespace)
|
reserved_strings = {}
|
||||||
|
for nonterminal, dfas in rule_to_dfas.items():
|
||||||
|
for dfa_state in dfas:
|
||||||
|
for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items():
|
||||||
|
if terminal_or_nonterminal in rule_to_dfas:
|
||||||
|
dfa_state.nonterminal_arcs[terminal_or_nonterminal] = next_dfa
|
||||||
|
else:
|
||||||
|
transition = _make_transition(
|
||||||
|
token_namespace,
|
||||||
|
reserved_strings,
|
||||||
|
terminal_or_nonterminal
|
||||||
|
)
|
||||||
|
dfa_state.ilabel_to_plan[transition] = DFAPlan(next_dfa)
|
||||||
|
|
||||||
|
return Grammar(bnf_grammar, start_nonterminal, rule_to_dfas, reserved_strings)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_transition(token_namespace, reserved_syntax_strings, label):
|
||||||
|
if label[0].isalpha():
|
||||||
|
# A named token (e.g. NAME, NUMBER, STRING)
|
||||||
|
return getattr(token_namespace, label)
|
||||||
|
else:
|
||||||
|
# Either a keyword or an operator
|
||||||
|
assert label[0] in ('"', "'"), label
|
||||||
|
assert not label.startswith('"""') and not label.startswith("'''")
|
||||||
|
# TODO use literal_eval instead of a simple eval.
|
||||||
|
value = literal_eval(label)
|
||||||
|
try:
|
||||||
|
return reserved_syntax_strings[value]
|
||||||
|
except KeyError:
|
||||||
|
r = reserved_syntax_strings[value] = ReservedString(value)
|
||||||
|
return r
|
||||||
|
|||||||
@@ -282,3 +282,6 @@ def test_left_recursion():
|
|||||||
def test_ambiguities():
|
def test_ambiguities():
|
||||||
with pytest.raises(ValueError, match='ambiguous'):
|
with pytest.raises(ValueError, match='ambiguous'):
|
||||||
generate_grammar('foo: bar | baz\nbar: NAME\nbaz: NAME\n', tokenize.PythonTokenTypes)
|
generate_grammar('foo: bar | baz\nbar: NAME\nbaz: NAME\n', tokenize.PythonTokenTypes)
|
||||||
|
|
||||||
|
with pytest.raises(ValueError, match='ambiguous'):
|
||||||
|
generate_grammar('''foo: bar | baz\nbar: 'x'\nbaz: "x"\n''', tokenize.PythonTokenTypes)
|
||||||
|
|||||||
Reference in New Issue
Block a user