diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index ae5e7de..c8aaaad 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -99,7 +99,7 @@ class Grammar(object): def _calculate_first_terminals(self, nonterminal): dfas = self._nonterminal_to_dfas[nonterminal] self._first_terminals[nonterminal] = None # dummy to detect left recursion - self._first_plans[nonterminal] = {} + first_plans = self._first_plans[nonterminal] = {} # We only need to check the first dfa. All the following ones are not # interesting to find first terminals. state = dfas[0] @@ -121,13 +121,19 @@ class Grammar(object): overlapcheck[nonterminal_or_string] = fset for t, pushes in self._first_plans[nonterminal_or_string].items(): - assert not self._first_plans[nonterminal].get(t) - self._first_plans[nonterminal][t] = [next_] + pushes + check = first_plans.get(t) + if check is not None: + raise ValueError( + "Rule %s is ambiguous; %s is the" + " start of the rule %s as well as %s." + % (nonterminal, t, nonterminal_or_string, check[-1].from_rule) + ) + first_plans[t] = [next_] + pushes else: # It's a string. We have finally found a possible first token. totalset.add(nonterminal_or_string) overlapcheck[nonterminal_or_string] = set([nonterminal_or_string]) - self._first_plans[nonterminal][nonterminal_or_string] = [next_] + first_plans[nonterminal_or_string] = [next_] inverse = {} for nonterminal_or_string, first_set in overlapcheck.items(): diff --git a/test/test_pgen2.py b/test/test_pgen2.py index 8c6e90f..88f6591 100644 --- a/test/test_pgen2.py +++ b/test/test_pgen2.py @@ -12,6 +12,8 @@ import pytest from parso import load_grammar from parso import ParserSyntaxError +from parso.pgen2.pgen import generate_grammar +from parso.python import tokenize def _parse(code, version=None): @@ -270,3 +272,13 @@ def py_br(each_version): def test_py3_rb(works_ge_py3): works_ge_py3.parse("rb'1'") works_ge_py3.parse("RB'1'") + + +def test_left_recursion(): + with pytest.raises(ValueError, match='left recursion'): + generate_grammar('foo: foo NAME\n', tokenize.PythonTokenTypes) + + +def test_ambiguities(): + with pytest.raises(ValueError, match='ambiguous'): + generate_grammar('foo: bar | baz\nbar: NAME\nbaz: NAME\n', tokenize.PythonTokenTypes)