Better detection of ambiguities

2025-12-16 09:27:12 +08:00 · 2018-06-25 01:56:02 +02:00
parent 43d4a8a834
commit da5aa8a2ab
2 changed files with 22 additions and 4 deletions
--- a/parso/pgen2/grammar.py
+++ b/parso/pgen2/grammar.py
@@ -99,7 +99,7 @@ class Grammar(object):
    def _calculate_first_terminals(self, nonterminal):
        dfas = self._nonterminal_to_dfas[nonterminal]
        self._first_terminals[nonterminal] = None  # dummy to detect left recursion
-        self._first_plans[nonterminal] = {}
+        first_plans = self._first_plans[nonterminal] = {}
        # We only need to check the first dfa. All the following ones are not
        # interesting to find first terminals.
        state = dfas[0]
@@ -121,13 +121,19 @@ class Grammar(object):
                overlapcheck[nonterminal_or_string] = fset

                for t, pushes in self._first_plans[nonterminal_or_string].items():
-                    assert not self._first_plans[nonterminal].get(t)
-                    self._first_plans[nonterminal][t] = [next_] + pushes
+                    check = first_plans.get(t)
+                    if check is not None:
+                        raise ValueError(
+                            "Rule %s is ambiguous; %s is the"
+                            " start of the rule %s as well as %s."
+                            % (nonterminal, t, nonterminal_or_string, check[-1].from_rule)
+                        )
+                    first_plans[t] = [next_] + pushes
            else:
                # It's a string. We have finally found a possible first token.
                totalset.add(nonterminal_or_string)
                overlapcheck[nonterminal_or_string] = set([nonterminal_or_string])
-                self._first_plans[nonterminal][nonterminal_or_string] = [next_]
+                first_plans[nonterminal_or_string] = [next_]

        inverse = {}
        for nonterminal_or_string, first_set in overlapcheck.items():
--- a/test/test_pgen2.py
+++ b/test/test_pgen2.py
@@ -12,6 +12,8 @@ import pytest

 from parso import load_grammar
 from parso import ParserSyntaxError
+from parso.pgen2.pgen import generate_grammar
+from parso.python import tokenize


 def _parse(code, version=None):
@@ -270,3 +272,13 @@ def py_br(each_version):
 def test_py3_rb(works_ge_py3):
    works_ge_py3.parse("rb'1'")
    works_ge_py3.parse("RB'1'")
+
+
+def test_left_recursion():
+    with pytest.raises(ValueError, match='left recursion'):
+        generate_grammar('foo: foo NAME\n', tokenize.PythonTokenTypes)
+
+
+def test_ambiguities():
+    with pytest.raises(ValueError, match='ambiguous'):
+        generate_grammar('foo: bar | baz\nbar: NAME\nbaz: NAME\n', tokenize.PythonTokenTypes)