Better detection of ambiguities

This commit is contained in:
Dave Halter
2018-06-25 01:56:02 +02:00
parent 43d4a8a834
commit da5aa8a2ab
2 changed files with 22 additions and 4 deletions

View File

@@ -99,7 +99,7 @@ class Grammar(object):
def _calculate_first_terminals(self, nonterminal):
dfas = self._nonterminal_to_dfas[nonterminal]
self._first_terminals[nonterminal] = None # dummy to detect left recursion
self._first_plans[nonterminal] = {}
first_plans = self._first_plans[nonterminal] = {}
# We only need to check the first dfa. All the following ones are not
# interesting to find first terminals.
state = dfas[0]
@@ -121,13 +121,19 @@ class Grammar(object):
overlapcheck[nonterminal_or_string] = fset
for t, pushes in self._first_plans[nonterminal_or_string].items():
assert not self._first_plans[nonterminal].get(t)
self._first_plans[nonterminal][t] = [next_] + pushes
check = first_plans.get(t)
if check is not None:
raise ValueError(
"Rule %s is ambiguous; %s is the"
" start of the rule %s as well as %s."
% (nonterminal, t, nonterminal_or_string, check[-1].from_rule)
)
first_plans[t] = [next_] + pushes
else:
# It's a string. We have finally found a possible first token.
totalset.add(nonterminal_or_string)
overlapcheck[nonterminal_or_string] = set([nonterminal_or_string])
self._first_plans[nonterminal][nonterminal_or_string] = [next_]
first_plans[nonterminal_or_string] = [next_]
inverse = {}
for nonterminal_or_string, first_set in overlapcheck.items():

View File

@@ -12,6 +12,8 @@ import pytest
from parso import load_grammar
from parso import ParserSyntaxError
from parso.pgen2.pgen import generate_grammar
from parso.python import tokenize
def _parse(code, version=None):
@@ -270,3 +272,13 @@ def py_br(each_version):
def test_py3_rb(works_ge_py3):
works_ge_py3.parse("rb'1'")
works_ge_py3.parse("RB'1'")
def test_left_recursion():
with pytest.raises(ValueError, match='left recursion'):
generate_grammar('foo: foo NAME\n', tokenize.PythonTokenTypes)
def test_ambiguities():
with pytest.raises(ValueError, match='ambiguous'):
generate_grammar('foo: bar | baz\nbar: NAME\nbaz: NAME\n', tokenize.PythonTokenTypes)