mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-06 21:04:29 +08:00
Make pgen2's grammar ambiguity detection handle more cases
Under the old implementation, ``` outer: A [inner] B C inner: B C [inner] ``` wouldn't get detected as the ambiguous grammar that it is, whereas ``` outer: A rest rest: [inner] B C inner: B C [inner] ``` would. This would manifest itself as non-determinism in the DFA state generation. See the discussion #62 on for a full explanation. This modifies the ambiguity detection to work on a broader class of issues, so it should now hopefully detect all cases where the given grammar is ambiguous. At some point, we could extend this logic to allow developers to optionally set precedence of grammar productions, which could resolve ambiguities, but that's not a strict requirement for parsing python.
This commit is contained in:
committed by
Dave Halter
parent
c0ace63a69
commit
0032bae041
@@ -309,13 +309,39 @@ def _calculate_tree_traversal(nonterminal_to_dfas):
|
||||
_calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal)
|
||||
|
||||
# Now that we have calculated the first terminals, we are sure that
|
||||
# there is no left recursion or ambiguities.
|
||||
# there is no left recursion.
|
||||
|
||||
for dfas in nonterminal_to_dfas.values():
|
||||
for dfa_state in dfas:
|
||||
transitions = dfa_state.transitions
|
||||
for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items():
|
||||
for transition, pushes in first_plans[nonterminal].items():
|
||||
dfa_state.transitions[transition] = DFAPlan(next_dfa, pushes)
|
||||
if transition in transitions:
|
||||
prev_plan = transitions[transition]
|
||||
# Make sure these are sorted so that error messages are
|
||||
# at least deterministic
|
||||
choices = sorted([
|
||||
(
|
||||
prev_plan.dfa_pushes[0].from_rule
|
||||
if prev_plan.dfa_pushes
|
||||
else prev_plan.next_dfa.from_rule
|
||||
),
|
||||
(
|
||||
pushes[0].from_rule
|
||||
if pushes else next_dfa.from_rule
|
||||
),
|
||||
])
|
||||
raise ValueError(
|
||||
"Rule %s is ambiguous; given a %s token, we "
|
||||
"can't determine if we should evaluate %s or %s."
|
||||
% (
|
||||
(
|
||||
dfa_state.from_rule,
|
||||
transition,
|
||||
) + tuple(choices)
|
||||
)
|
||||
)
|
||||
transitions[transition] = DFAPlan(next_dfa, pushes)
|
||||
|
||||
|
||||
def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal):
|
||||
@@ -345,13 +371,6 @@ def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal):
|
||||
raise ValueError("left recursion for rule %r" % nonterminal)
|
||||
|
||||
for t, pushes in first_plans2.items():
|
||||
check = new_first_plans.get(t)
|
||||
if check is not None:
|
||||
raise ValueError(
|
||||
"Rule %s is ambiguous; %s is the"
|
||||
" start of the rule %s as well as %s."
|
||||
% (nonterminal, t, nonterminal2, check[-1].from_rule)
|
||||
)
|
||||
new_first_plans[t] = [next_] + pushes
|
||||
|
||||
first_plans[nonterminal] = new_first_plans
|
||||
|
||||
@@ -293,11 +293,40 @@ def test_left_recursion():
|
||||
|
||||
|
||||
def test_ambiguities():
|
||||
with pytest.raises(ValueError, match='ambiguous'):
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match=r"foo is ambiguous.*given a TokenType\(NAME\).*bar or baz"
|
||||
):
|
||||
generate_grammar('foo: bar | baz\nbar: NAME\nbaz: NAME\n', tokenize.PythonTokenTypes)
|
||||
|
||||
with pytest.raises(ValueError, match='ambiguous'):
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match=r"foo is ambiguous.*given a ReservedString\(x\).*bar or baz"
|
||||
):
|
||||
generate_grammar('''foo: bar | baz\nbar: 'x'\nbaz: "x"\n''', tokenize.PythonTokenTypes)
|
||||
|
||||
with pytest.raises(ValueError, match='ambiguous'):
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match=r"foo is ambiguous.*given a ReservedString\(x\).*bar or foo"
|
||||
):
|
||||
generate_grammar('''foo: bar | 'x'\nbar: 'x'\n''', tokenize.PythonTokenTypes)
|
||||
|
||||
# an ambiguity with the second (not the first) child of a production
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match=r"outer is ambiguous.*given a ReservedString\(b\).*inner or outer"
|
||||
):
|
||||
generate_grammar(
|
||||
'outer: "a" [inner] "b" "c"\ninner: "b" "c" [inner]\n',
|
||||
tokenize.PythonTokenTypes,
|
||||
)
|
||||
|
||||
# an ambiguity hidden by a level of indirection (middle)
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match=r"outer is ambiguous.*given a ReservedString\(b\).*middle or outer"
|
||||
):
|
||||
generate_grammar(
|
||||
'outer: "a" [middle] "b" "c"\nmiddle: inner\ninner: "b" "c" [inner]\n',
|
||||
tokenize.PythonTokenTypes,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user