Merge branch 'pgen'

2026-02-07 02:11:16 +08:00 · 2018-06-29 18:14:03 +02:00
parent 706a92ee0d 1af5d9d46b
commit ff67de248f
20 changed files with 774 additions and 1009 deletions
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -3,6 +3,11 @@
 Changelog
 ---------

+0.3.0 (2018-07-30)
+++++++++++++++++++
+
+- Rewrote the pgen2 parser generator.
+
 0.2.1 (2018-05-21)
 +++++++++++++++++++

--- a/parso/init.py
+++ b/parso/init.py
@@ -43,7 +43,7 @@ from parso.grammar import Grammar, load_grammar
 from parso.utils import split_lines, python_bytes_to_unicode


-__version__ = '0.2.1'
+__version__ = '0.3.0'


 def parse(code=None, **kwargs):
--- a/parso/grammar.py
+++ b/parso/grammar.py
@@ -2,11 +2,11 @@ import hashlib
 import os

 from parso._compatibility import FileNotFoundError, is_pypy
-from parso.pgen2.pgen import generate_grammar
+from parso.pgen2 import generate_grammar
 from parso.utils import split_lines, python_bytes_to_unicode, parse_version_string
 from parso.python.diff import DiffParser
 from parso.python.tokenize import tokenize_lines, tokenize
-from parso.python import token
+from parso.python.token import PythonTokenTypes
 from parso.cache import parser_cache, load_module, save_module
 from parso.parser import BaseParser
 from parso.python.parser import Parser as PythonParser
@@ -51,8 +51,8 @@ class Grammar(object):
            it is invalid, it will be returned as an error node. If disabled,
            you will get a ParseError when encountering syntax errors in your
            code.
-        :param str start_symbol: The grammar symbol that you want to parse. Only
-            allowed to be used when error_recovery is False.
+        :param str start_symbol: The grammar rule (nonterminal) that you want
+            to parse. Only allowed to be used when error_recovery is False.
        :param str path: The path to the file you want to open. Only needed for caching.
        :param bool cache: Keeps a copy of the parser tree in RAM and on disk
            if a path is given. Returns the cached trees if the corresponding
@@ -88,7 +88,7 @@ class Grammar(object):
            raise TypeError("Please provide either code or a path.")

        if start_symbol is None:
-            start_symbol = self._start_symbol
+            start_symbol = self._start_nonterminal

        if error_recovery and start_symbol != 'file_input':
            raise NotImplementedError("This is currently not implemented.")
@@ -136,7 +136,7 @@ class Grammar(object):
        p = self._parser(
            self._pgen_grammar,
            error_recovery=error_recovery,
-            start_symbol=start_symbol
+            start_nonterminal=start_symbol
        )
        root_node = p.parse(tokens=tokens)

@@ -186,15 +186,15 @@ class Grammar(object):
        return normalizer.issues

    def __repr__(self):
-        labels = self._pgen_grammar.number2symbol.values()
-        txt = ' '.join(list(labels)[:3]) + ' ...'
+        nonterminals = self._pgen_grammar._nonterminal_to_dfas.keys()
+        txt = ' '.join(list(nonterminals)[:3]) + ' ...'
        return '<%s:%s>' % (self.__class__.__name__, txt)


 class PythonGrammar(Grammar):
    _error_normalizer_config = ErrorFinderConfig()
-    _token_namespace = token
-    _start_symbol = 'file_input'
+    _token_namespace = PythonTokenTypes
+    _start_nonterminal = 'file_input'

    def __init__(self, version_info, bnf_text):
        super(PythonGrammar, self).__init__(
--- a/parso/parser.py
+++ b/parso/parser.py
@@ -1,3 +1,11 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+# Modifications:
+# Copyright David Halter and Contributors
+# Modifications are dual-licensed: MIT and PSF.
+# 99% of the code is different from pgen2, now.
+
 """
 The ``Parser`` tries to convert the available Python code in an easy to read
 format, something like an abstract syntax tree. The classes who represent this
@@ -16,7 +24,7 @@ complexity of the ``Parser`` (there's another parser sitting inside
 ``Statement``, which produces ``Array`` and ``Call``).
 """
 from parso import tree
-from parso.pgen2.parse import PgenParser
+from parso.pgen2.generator import ReservedString


 class ParserSyntaxError(Exception):
@@ -30,7 +38,76 @@ class ParserSyntaxError(Exception):
        self.error_leaf = error_leaf


+class InternalParseError(Exception):
+    """
+    Exception to signal the parser is stuck and error recovery didn't help.
+    Basically this shouldn't happen. It's a sign that something is really
+    wrong.
+    """
+
+    def __init__(self, msg, type_, value, start_pos):
+        Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" %
+                           (msg, type_.name, value, start_pos))
+        self.msg = msg
+        self.type = type
+        self.value = value
+        self.start_pos = start_pos
+
+
+class Stack(list):
+    def _allowed_transition_names_and_token_types(self):
+        def iterate():
+            # An API just for Jedi.
+            for stack_node in reversed(self):
+                for transition in stack_node.dfa.transitions:
+                    if isinstance(transition, ReservedString):
+                        yield transition.value
+                    else:
+                        yield transition  # A token type
+
+                if not stack_node.dfa.is_final:
+                    break
+
+        return list(iterate())
+
+
+class StackNode(object):
+    def __init__(self, dfa):
+        self.dfa = dfa
+        self.nodes = []
+
+    @property
+    def nonterminal(self):
+        return self.dfa.from_rule
+
+    def __repr__(self):
+        return '%s(%s, %s)' % (self.__class__.__name__, self.dfa, self.nodes)
+
+
+def _token_to_transition(grammar, type_, value):
+    # Map from token to label
+    if type_.contains_syntax:
+        # Check for reserved words (keywords)
+        try:
+            return grammar.reserved_syntax_strings[value]
+        except KeyError:
+            pass
+
+    return type_
+
+
 class BaseParser(object):
+    """Parser engine.
+
+    A Parser instance contains state pertaining to the current token
+    sequence, and should not be used concurrently by different threads
+    to parse separate token sequences.
+
+    See python/tokenize.py for how to get input tokens by a string.
+
+    When a syntax error occurs, error_recovery() is called.
+    """
+
    node_map = {}
    default_node = tree.Node

@@ -38,41 +115,94 @@ class BaseParser(object):
    }
    default_leaf = tree.Leaf

-    def __init__(self, pgen_grammar, start_symbol='file_input', error_recovery=False):
+    def __init__(self, pgen_grammar, start_nonterminal='file_input', error_recovery=False):
        self._pgen_grammar = pgen_grammar
-        self._start_symbol = start_symbol
+        self._start_nonterminal = start_nonterminal
        self._error_recovery = error_recovery

    def parse(self, tokens):
-        start_number = self._pgen_grammar.symbol2number[self._start_symbol]
-        self.pgen_parser = PgenParser(
-            self._pgen_grammar, self.convert_node, self.convert_leaf,
-            self.error_recovery, start_number
-        )
+        first_dfa = self._pgen_grammar.nonterminal_to_dfas[self._start_nonterminal][0]
+        self.stack = Stack([StackNode(first_dfa)])

-        node = self.pgen_parser.parse(tokens)
-        # The stack is empty now, we don't need it anymore.
-        del self.pgen_parser
-        return node
+        for token in tokens:
+            self._add_token(token)

-    def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
-                       add_token_callback):
+        while True:
+            tos = self.stack[-1]
+            if not tos.dfa.is_final:
+                # We never broke out -- EOF is too soon -- Unfinished statement.
+                # However, the error recovery might have added the token again, if
+                # the stack is empty, we're fine.
+                raise InternalParseError(
+                    "incomplete input", token.type, token.value, token.start_pos
+                )
+
+            if len(self.stack) > 1:
+                self._pop()
+            else:
+                return self.convert_node(tos.nonterminal, tos.nodes)
+
+    def error_recovery(self, token):
        if self._error_recovery:
            raise NotImplementedError("Error Recovery is not implemented")
        else:
-            error_leaf = tree.ErrorLeaf('TODO %s' % typ, value, start_pos, prefix)
+            type_, value, start_pos, prefix = token
+            error_leaf = tree.ErrorLeaf('TODO %s' % type_, value, start_pos, prefix)
            raise ParserSyntaxError('SyntaxError: invalid syntax', error_leaf)

-    def convert_node(self, pgen_grammar, type_, children):
-        # TODO REMOVE symbol, we don't want type here.
-        symbol = pgen_grammar.number2symbol[type_]
+    def convert_node(self, nonterminal, children):
        try:
-            return self.node_map[symbol](children)
+            return self.node_map[nonterminal](children)
        except KeyError:
-            return self.default_node(symbol, children)
+            return self.default_node(nonterminal, children)

-    def convert_leaf(self, pgen_grammar, type_, value, prefix, start_pos):
+    def convert_leaf(self, type_, value, prefix, start_pos):
        try:
            return self.leaf_map[type_](value, start_pos, prefix)
        except KeyError:
            return self.default_leaf(value, start_pos, prefix)
+
+    def _add_token(self, token):
+        """
+        This is the only core function for parsing. Here happens basically
+        everything. Everything is well prepared by the parser generator and we
+        only apply the necessary steps here.
+        """
+        grammar = self._pgen_grammar
+        stack = self.stack
+        type_, value, start_pos, prefix = token
+        transition = _token_to_transition(grammar, type_, value)
+
+        while True:
+            try:
+                plan = stack[-1].dfa.transitions[transition]
+                break
+            except KeyError:
+                if stack[-1].dfa.is_final:
+                    self._pop()
+                else:
+                    self.error_recovery(token)
+                    return
+            except IndexError:
+                raise InternalParseError("too much input", type_, value, start_pos)
+
+        stack[-1].dfa = plan.next_dfa
+
+        for push in plan.dfa_pushes:
+            stack.append(StackNode(push))
+
+        leaf = self.convert_leaf(type_, value, prefix, start_pos)
+        stack[-1].nodes.append(leaf)
+
+    def _pop(self):
+        tos = self.stack.pop()
+        # If there's exactly one child, return that child instead of
+        # creating a new node.  We still create expr_stmt and
+        # file_input though, because a lot of Jedi depends on its
+        # logic.
+        if len(tos.nodes) == 1:
+            new_node = tos.nodes[0]
+        else:
+            new_node = self.convert_node(tos.dfa.from_rule, tos.nodes)
+
+        self.stack[-1].nodes.append(new_node)
--- a/parso/pgen2/init.py
+++ b/parso/pgen2/init.py
@@ -6,3 +6,5 @@
 # Licensed to PSF under a Contributor Agreement.
 # Copyright 2014 David Halter and Contributors
 # Modifications are dual-licensed: MIT and PSF.
+
+from parso.pgen2.generator import generate_grammar
--- a/parso/pgen2/generator.py
+++ b/parso/pgen2/generator.py
@@ -0,0 +1,359 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+# Modifications:
+# Copyright David Halter and Contributors
+# Modifications are dual-licensed: MIT and PSF.
+
+"""
+This module defines the data structures used to represent a grammar.
+
+Specifying grammars in pgen is possible with this grammar::
+
+    grammar: (NEWLINE | rule)* ENDMARKER
+    rule: NAME ':' rhs NEWLINE
+    rhs: items ('|' items)*
+    items: item+
+    item: '[' rhs ']' | atom ['+' | '*']
+    atom: '(' rhs ')' | NAME | STRING
+
+This grammar is self-referencing.
+
+This parser generator (pgen2) was created by Guido Rossum and used for lib2to3.
+Most of the code has been refactored to make it more Pythonic. Since this was a
+"copy" of the CPython Parser parser "pgen", there was some work needed to make
+it more readable. It should also be slightly faster than the original pgen2,
+because we made some optimizations.
+"""
+
+from ast import literal_eval
+
+from parso.pgen2.grammar_parser import GrammarParser, NFAState
+
+
+class Grammar(object):
+    """
+    Once initialized, this class supplies the grammar tables for the
+    parsing engine implemented by parse.py.  The parsing engine
+    accesses the instance variables directly.
+
+    The only important part in this parsers are dfas and transitions between
+    dfas.
+    """
+
+    def __init__(self, start_nonterminal, rule_to_dfas, reserved_syntax_strings):
+        self.nonterminal_to_dfas = rule_to_dfas  # Dict[str, List[DFAState]]
+        self.reserved_syntax_strings = reserved_syntax_strings
+        self.start_nonterminal = start_nonterminal
+
+
+class DFAPlan(object):
+    """
+    Plans are used for the parser to create stack nodes and do the proper
+    DFA state transitions.
+    """
+    def __init__(self, next_dfa, dfa_pushes=[]):
+        self.next_dfa = next_dfa
+        self.dfa_pushes = dfa_pushes
+
+    def __repr__(self):
+        return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes)
+
+
+class DFAState(object):
+    """
+    The DFAState object is the core class for pretty much anything. DFAState
+    are the vertices of an ordered graph while arcs and transitions are the
+    edges.
+
+    Arcs are the initial edges, where most DFAStates are not connected and
+    transitions are then calculated to connect the DFA state machines that have
+    different nonterminals.
+    """
+    def __init__(self, from_rule, nfa_set, final):
+        assert isinstance(nfa_set, set)
+        assert isinstance(next(iter(nfa_set)), NFAState)
+        assert isinstance(final, NFAState)
+        self.from_rule = from_rule
+        self.nfa_set = nfa_set
+        self.arcs = {}  # map from terminals/nonterminals to DFAState
+        # In an intermediary step we set these nonterminal arcs (which has the
+        # same structure as arcs). These don't contain terminals anymore.
+        self.nonterminal_arcs = {}
+
+        # Transitions are basically the only thing that  the parser is using
+        # with is_final. Everyting else is purely here to create a parser.
+        self.transitions = {}  #: Dict[Union[TokenType, ReservedString], DFAPlan]
+        self.is_final = final in nfa_set
+
+    def add_arc(self, next_, label):
+        assert isinstance(label, str)
+        assert label not in self.arcs
+        assert isinstance(next_, DFAState)
+        self.arcs[label] = next_
+
+    def unifystate(self, old, new):
+        for label, next_ in self.arcs.items():
+            if next_ is old:
+                self.arcs[label] = new
+
+    def __eq__(self, other):
+        # Equality test -- ignore the nfa_set instance variable
+        assert isinstance(other, DFAState)
+        if self.is_final != other.is_final:
+            return False
+        # Can't just return self.arcs == other.arcs, because that
+        # would invoke this method recursively, with cycles...
+        if len(self.arcs) != len(other.arcs):
+            return False
+        for label, next_ in self.arcs.items():
+            if next_ is not other.arcs.get(label):
+                return False
+        return True
+
+    __hash__ = None  # For Py3 compatibility.
+
+    def __repr__(self):
+        return '<%s: %s is_final=%s>' % (
+            self.__class__.__name__, self.from_rule, self.is_final
+        )
+
+
+class ReservedString(object):
+    """
+    Most grammars will have certain keywords and operators that are mentioned
+    in the grammar as strings (e.g. "if") and not token types (e.g. NUMBER).
+    This class basically is the former.
+    """
+
+    def __init__(self, value):
+        self.value = value
+
+    def __repr__(self):
+        return '%s(%s)' % (self.__class__.__name__, self.value)
+
+
+def _simplify_dfas(dfas):
+    """
+    This is not theoretically optimal, but works well enough.
+    Algorithm: repeatedly look for two states that have the same
+    set of arcs (same labels pointing to the same nodes) and
+    unify them, until things stop changing.
+
+    dfas is a list of DFAState instances
+    """
+    changes = True
+    while changes:
+        changes = False
+        for i, state_i in enumerate(dfas):
+            for j in range(i + 1, len(dfas)):
+                state_j = dfas[j]
+                if state_i == state_j:
+                    #print "  unify", i, j
+                    del dfas[j]
+                    for state in dfas:
+                        state.unifystate(state_j, state_i)
+                    changes = True
+                    break
+
+
+def _make_dfas(start, finish):
+    """
+    Uses the powerset construction algorithm to create DFA states from sets of
+    NFA states.
+
+    Also does state reduction if some states are not needed.
+    """
+    # To turn an NFA into a DFA, we define the states of the DFA
+    # to correspond to *sets* of states of the NFA.  Then do some
+    # state reduction.
+    assert isinstance(start, NFAState)
+    assert isinstance(finish, NFAState)
+
+    def addclosure(nfa_state, base_nfa_set):
+        assert isinstance(nfa_state, NFAState)
+        if nfa_state in base_nfa_set:
+            return
+        base_nfa_set.add(nfa_state)
+        for nfa_arc in nfa_state.arcs:
+            if nfa_arc.nonterminal_or_string is None:
+                addclosure(nfa_arc.next, base_nfa_set)
+
+    base_nfa_set = set()
+    addclosure(start, base_nfa_set)
+    states = [DFAState(start.from_rule, base_nfa_set, finish)]
+    for state in states:  # NB states grows while we're iterating
+        arcs = {}
+        # Find state transitions and store them in arcs.
+        for nfa_state in state.nfa_set:
+            for nfa_arc in nfa_state.arcs:
+                if nfa_arc.nonterminal_or_string is not None:
+                    nfa_set = arcs.setdefault(nfa_arc.nonterminal_or_string, set())
+                    addclosure(nfa_arc.next, nfa_set)
+
+        # Now create the dfa's with no None's in arcs anymore. All Nones have
+        # been eliminated and state transitions (arcs) are properly defined, we
+        # just need to create the dfa's.
+        for nonterminal_or_string, nfa_set in arcs.items():
+            for nested_state in states:
+                if nested_state.nfa_set == nfa_set:
+                    # The DFA state already exists for this rule.
+                    break
+            else:
+                nested_state = DFAState(start.from_rule, nfa_set, finish)
+                states.append(nested_state)
+
+            state.add_arc(nested_state, nonterminal_or_string)
+    return states  # List of DFAState instances; first one is start
+
+
+def _dump_nfa(start, finish):
+    print("Dump of NFA for", start.from_rule)
+    todo = [start]
+    for i, state in enumerate(todo):
+        print("  State", i, state is finish and "(final)" or "")
+        for label, next_ in state.arcs:
+            if next_ in todo:
+                j = todo.index(next_)
+            else:
+                j = len(todo)
+                todo.append(next_)
+            if label is None:
+                print("    -> %d" % j)
+            else:
+                print("    %s -> %d" % (label, j))
+
+
+def _dump_dfas(dfas):
+    print("Dump of DFA for", dfas[0].from_rule)
+    for i, state in enumerate(dfas):
+        print("  State", i, state.is_final and "(final)" or "")
+        for nonterminal, next_ in state.arcs.items():
+            print("    %s -> %d" % (nonterminal, dfas.index(next_)))
+
+
+def generate_grammar(bnf_grammar, token_namespace):
+    """
+    ``bnf_text`` is a grammar in extended BNF (using * for repetition, + for
+    at-least-once repetition, [] for optional parts, | for alternatives and ()
+    for grouping).
+
+    It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its
+    own parser.
+    """
+    rule_to_dfas = {}
+    start_nonterminal = None
+    for nfa_a, nfa_z in GrammarParser(bnf_grammar).parse():
+        #_dump_nfa(a, z)
+        dfas = _make_dfas(nfa_a, nfa_z)
+        #_dump_dfas(dfas)
+        # oldlen = len(dfas)
+        _simplify_dfas(dfas)
+        # newlen = len(dfas)
+        rule_to_dfas[nfa_a.from_rule] = dfas
+        #print(nfa_a.from_rule, oldlen, newlen)
+
+        if start_nonterminal is None:
+            start_nonterminal = nfa_a.from_rule
+
+    reserved_strings = {}
+    for nonterminal, dfas in rule_to_dfas.items():
+        for dfa_state in dfas:
+            for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items():
+                if terminal_or_nonterminal in rule_to_dfas:
+                    dfa_state.nonterminal_arcs[terminal_or_nonterminal] = next_dfa
+                else:
+                    transition = _make_transition(
+                        token_namespace,
+                        reserved_strings,
+                        terminal_or_nonterminal
+                    )
+                    dfa_state.transitions[transition] = DFAPlan(next_dfa)
+
+    _calculate_tree_traversal(rule_to_dfas)
+    return Grammar(start_nonterminal, rule_to_dfas, reserved_strings)
+
+
+def _make_transition(token_namespace, reserved_syntax_strings, label):
+    """
+    Creates a reserved string ("if", "for", "*", ...) or returns the token type
+    (NUMBER, STRING, ...) for a given grammar terminal.
+    """
+    if label[0].isalpha():
+        # A named token (e.g. NAME, NUMBER, STRING)
+        return getattr(token_namespace, label)
+    else:
+        # Either a keyword or an operator
+        assert label[0] in ('"', "'"), label
+        assert not label.startswith('"""') and not label.startswith("'''")
+        # TODO use literal_eval instead of a simple eval.
+        value = literal_eval(label)
+        try:
+            return reserved_syntax_strings[value]
+        except KeyError:
+            r = reserved_syntax_strings[value] = ReservedString(value)
+            return r
+
+
+def _calculate_tree_traversal(nonterminal_to_dfas):
+    """
+    By this point we know how dfas can move around within a stack node, but we
+    don't know how we can add a new stack node (nonterminal transitions).
+    """
+    # Map from grammar rule (nonterminal) name to a set of tokens.
+    first_plans = {}
+
+    nonterminals = list(nonterminal_to_dfas.keys())
+    nonterminals.sort()
+    for nonterminal in nonterminals:
+        if nonterminal not in first_plans:
+            _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal)
+
+    # Now that we have calculated the first terminals, we are sure that
+    # there is no left recursion or ambiguities.
+
+    for dfas in nonterminal_to_dfas.values():
+        for dfa_state in dfas:
+            for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items():
+                for transition, pushes in first_plans[nonterminal].items():
+                    dfa_state.transitions[transition] = DFAPlan(next_dfa, pushes)
+
+
+def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal):
+    """
+    Calculates the first plan in the first_plans dictionary for every given
+    nonterminal. This is going to be used to know when to create stack nodes.
+    """
+    dfas = nonterminal_to_dfas[nonterminal]
+    new_first_plans = {}
+    first_plans[nonterminal] = None  # dummy to detect left recursion
+    # We only need to check the first dfa. All the following ones are not
+    # interesting to find first terminals.
+    state = dfas[0]
+    for transition, next_ in state.transitions.items():
+        # It's a string. We have finally found a possible first token.
+        new_first_plans[transition] = [next_.next_dfa]
+
+    for nonterminal2, next_ in state.nonterminal_arcs.items():
+        # It's a nonterminal and we have either a left recursion issue
+        # in the grammar or we have to recurse.
+        try:
+            first_plans2 = first_plans[nonterminal2]
+        except KeyError:
+            first_plans2 = _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal2)
+        else:
+            if first_plans2 is None:
+                raise ValueError("left recursion for rule %r" % nonterminal)
+
+        for t, pushes in first_plans2.items():
+            check = new_first_plans.get(t)
+            if check is not None:
+                raise ValueError(
+                    "Rule %s is ambiguous; %s is the"
+                    " start of the rule %s as well as %s."
+                    % (nonterminal, t, nonterminal2, check[-1].from_rule)
+                )
+            new_first_plans[t] = [next_] + pushes
+
+    first_plans[nonterminal] = new_first_plans
+    return new_first_plans
--- a/parso/pgen2/grammar.py
+++ b/parso/pgen2/grammar.py
@@ -1,97 +0,0 @@
-# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
-# Licensed to PSF under a Contributor Agreement.
-
-# Modifications:
-# Copyright David Halter and Contributors
-# Modifications are dual-licensed: MIT and PSF.
-
-"""This module defines the data structures used to represent a grammar.
-
-These are a bit arcane because they are derived from the data
-structures used by Python's 'pgen' parser generator.
-
-There's also a table here mapping operators to their names in the
-token module; the Python tokenize module reports all operators as the
-fallback token code OP, but the parser needs the actual token code.
-
-"""
-
-
-class Grammar(object):
-    """Pgen parsing tables conversion class.
-
-    Once initialized, this class supplies the grammar tables for the
-    parsing engine implemented by parse.py.  The parsing engine
-    accesses the instance variables directly.  The class here does not
-    provide initialization of the tables; several subclasses exist to
-    do this (see the conv and pgen modules).
-
-    The instance variables are as follows:
-
-    symbol2number -- a dict mapping symbol names to numbers.  Symbol
-                     numbers are always 256 or higher, to distinguish
-                     them from token numbers, which are between 0 and
-                     255 (inclusive).
-
-    number2symbol -- a dict mapping numbers to symbol names;
-                     these two are each other's inverse.
-
-    states        -- a list of DFAs, where each DFA is a list of
-                     states, each state is a list of arcs, and each
-                     arc is a (i, j) pair where i is a label and j is
-                     a state number.  The DFA number is the index into
-                     this list.  (This name is slightly confusing.)
-                     Final states are represented by a special arc of
-                     the form (0, j) where j is its own state number.
-
-    dfas          -- a dict mapping symbol numbers to (DFA, first)
-                     pairs, where DFA is an item from the states list
-                     above, and first is a set of tokens that can
-                     begin this grammar rule (represented by a dict
-                     whose values are always 1).
-
-    labels        -- a list of (x, y) pairs where x is either a token
-                     number or a symbol number, and y is either None
-                     or a string; the strings are keywords.  The label
-                     number is the index in this list; label numbers
-                     are used to mark state transitions (arcs) in the
-                     DFAs.
-
-    start         -- the number of the grammar's start symbol.
-
-    keywords      -- a dict mapping keyword strings to arc labels.
-
-    tokens        -- a dict mapping token numbers to arc labels.
-
-    """
-
-    def __init__(self, bnf_text, start_symbol):
-        self.symbol2number = {}
-        self.number2symbol = {}
-        self.states = []
-        self.dfas = {}
-        self.labels = [(0, "EMPTY")]
-        self.keywords = {}
-        self.tokens = {}
-        self.symbol2label = {}
-        self.label2symbol = {}
-        self.start_symbol = start_symbol
-
-    @property
-    def start(self):
-        return self.symbol2number[self.start_symbol]
-
-    def report(self):
-        """Dump the grammar tables to standard output, for debugging."""
-        from pprint import pprint
-        print("s2n")
-        pprint(self.symbol2number)
-        print("n2s")
-        pprint(self.number2symbol)
-        print("states")
-        pprint(self.states)
-        print("dfas")
-        pprint(self.dfas)
-        print("labels")
-        pprint(self.labels)
-        print("start", self.start)
--- a/parso/pgen2/grammar_parser.py
+++ b/parso/pgen2/grammar_parser.py
@@ -5,9 +5,9 @@
 # Copyright David Halter and Contributors
 # Modifications are dual-licensed: MIT and PSF.

-from parso.python import tokenize
+from parso.python.tokenize import tokenize
 from parso.utils import parse_version_string
-from parso.python import token
+from parso.python.token import PythonTokenTypes


 class GrammarParser():
@@ -16,7 +16,7 @@ class GrammarParser():
    """
    def __init__(self, bnf_grammar):
        self._bnf_grammar = bnf_grammar
-        self.generator = tokenize.tokenize(
+        self.generator = tokenize(
            bnf_grammar,
            version_info=parse_version_string('3.6')
        )
@@ -24,16 +24,16 @@ class GrammarParser():

    def parse(self):
        # grammar: (NEWLINE | rule)* ENDMARKER
-        while self.type != token.ENDMARKER:
-            while self.type == token.NEWLINE:
+        while self.type != PythonTokenTypes.ENDMARKER:
+            while self.type == PythonTokenTypes.NEWLINE:
                self._gettoken()

            # rule: NAME ':' rhs NEWLINE
-            self._current_rule_name = self._expect(token.NAME)
-            self._expect(token.COLON)
+            self._current_rule_name = self._expect(PythonTokenTypes.NAME)
+            self._expect(PythonTokenTypes.OP, ':')

            a, z = self._parse_rhs()
-            self._expect(token.NEWLINE)
+            self._expect(PythonTokenTypes.NEWLINE)

            yield a, z

@@ -60,7 +60,8 @@ class GrammarParser():
    def _parse_items(self):
        # items: item+
        a, b = self._parse_item()
-        while self.type in (token.NAME, token.STRING, token.LPAR, token.LSQB):
+        while self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING) \
+                or self.value in ('(', '['):
            c, d = self._parse_item()
            # Need to end on the next item.
            b.add_arc(c)
@@ -72,7 +73,7 @@ class GrammarParser():
        if self.value == "[":
            self._gettoken()
            a, z = self._parse_rhs()
-            self._expect(token.RSQB)
+            self._expect(PythonTokenTypes.OP, ']')
            # Make it also possible that there is no token and change the
            # state.
            a.add_arc(z)
@@ -97,9 +98,9 @@ class GrammarParser():
        if self.value == "(":
            self._gettoken()
            a, z = self._parse_rhs()
-            self._expect(token.RPAR)
+            self._expect(PythonTokenTypes.OP, ')')
            return a, z
-        elif self.type in (token.NAME, token.STRING):
+        elif self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING):
            a = NFAState(self._current_rule_name)
            z = NFAState(self._current_rule_name)
            # Make it clear that the state transition requires that value.
@@ -110,10 +111,12 @@ class GrammarParser():
            self._raise_error("expected (...) or NAME or STRING, got %s/%s",
                              self.type, self.value)

-    def _expect(self, type):
-        if self.type != type:
-            self._raise_error("expected %s(%s), got %s(%s)",
-                              type, token.tok_name[type], self.type, self.value)
+    def _expect(self, type_, value=None):
+        if self.type != type_:
+            self._raise_error("expected %s, got %s [%s]",
+                              type_, self.type, self.value)
+        if value is not None and self.value != value:
+            self._raise_error("expected %s, got %s", value, self.value)
        value = self.value
        self._gettoken()
        return value
@@ -134,20 +137,20 @@ class GrammarParser():


 class NFAArc(object):
-    def __init__(self, next_, label_or_string):
+    def __init__(self, next_, nonterminal_or_string):
        self.next = next_
-        self.label_or_string = label_or_string
+        self.nonterminal_or_string = nonterminal_or_string


 class NFAState(object):
    def __init__(self, from_rule):
        self.from_rule = from_rule
-        self.arcs = []  # list of (label, NFAState) pairs
+        self.arcs = []  # List[nonterminal (str), NFAState]

-    def add_arc(self, next_, label=None):
-        assert label is None or isinstance(label, str)
+    def add_arc(self, next_, nonterminal_or_string=None):
+        assert nonterminal_or_string is None or isinstance(nonterminal_or_string, str)
        assert isinstance(next_, NFAState)
-        self.arcs.append(NFAArc(next_, label))
+        self.arcs.append(NFAArc(next_, nonterminal_or_string))

    def __repr__(self):
        return '<%s: from %s>' % (self.__class__.__name__, self.from_rule)
--- a/parso/pgen2/parse.py
+++ b/parso/pgen2/parse.py
@@ -1,238 +0,0 @@
-# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
-# Licensed to PSF under a Contributor Agreement.
-
-# Modifications:
-# Copyright David Halter and Contributors
-# Modifications are dual-licensed: MIT and PSF.
-
-"""
-Parser engine for the grammar tables generated by pgen.
-
-The grammar table must be loaded first.
-
-See Parser/parser.c in the Python distribution for additional info on
-how this parsing engine works.
-"""
-
-from parso.python import tokenize
-
-
-class InternalParseError(Exception):
-    """
-    Exception to signal the parser is stuck and error recovery didn't help.
-    Basically this shouldn't happen. It's a sign that something is really
-    wrong.
-    """
-
-    def __init__(self, msg, type, value, start_pos):
-        Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" %
-                           (msg, tokenize.tok_name[type], value, start_pos))
-        self.msg = msg
-        self.type = type
-        self.value = value
-        self.start_pos = start_pos
-
-
-class Stack(list):
-    def get_tos_nodes(self):
-        tos = self[-1]
-        return tos[2][1]
-
-    def get_tos_first_tokens(self, grammar):
-        tos = self[-1]
-        inv_tokens = dict((v, k) for k, v in grammar.tokens.items())
-        inv_keywords = dict((v, k) for k, v in grammar.keywords.items())
-        dfa, state, nodes = tos
-
-        def check():
-            for first in dfa[1]:
-                try:
-                    yield inv_keywords[first]
-                except KeyError:
-                    yield tokenize.tok_name[inv_tokens[first]]
-
-        return sorted(check())
-
-
-def token_to_ilabel(grammar, type_, value):
-    # Map from token to label
-    if type_ == tokenize.NAME:
-        # Check for reserved words (keywords)
-        try:
-            return grammar.keywords[value]
-        except KeyError:
-            pass
-
-    try:
-        return grammar.tokens[type_]
-    except KeyError:
-        return None
-
-
-class PgenParser(object):
-    """Parser engine.
-
-    The proper usage sequence is:
-
-    p = Parser(grammar, [converter])  # create instance
-    p.setup([start])                  # prepare for parsing
-    <for each input token>:
-        if p.add_token(...):           # parse a token
-            break
-    root = p.rootnode                 # root of abstract syntax tree
-
-    A Parser instance may be reused by calling setup() repeatedly.
-
-    A Parser instance contains state pertaining to the current token
-    sequence, and should not be used concurrently by different threads
-    to parse separate token sequences.
-
-    See driver.py for how to get input tokens by tokenizing a file or
-    string.
-
-    Parsing is complete when add_token() returns True; the root of the
-    abstract syntax tree can then be retrieved from the rootnode
-    instance variable.  When a syntax error occurs, error_recovery()
-    is called. There is no error recovery; the parser cannot be used
-    after a syntax error was reported (but it can be reinitialized by
-    calling setup()).
-
-    """
-
-    def __init__(self, grammar, convert_node, convert_leaf, error_recovery, start):
-        """Constructor.
-
-        The grammar argument is a grammar.Grammar instance; see the
-        grammar module for more information.
-
-        The parser is not ready yet for parsing; you must call the
-        setup() method to get it started.
-
-        The optional convert argument is a function mapping concrete
-        syntax tree nodes to abstract syntax tree nodes.  If not
-        given, no conversion is done and the syntax tree produced is
-        the concrete syntax tree.  If given, it must be a function of
-        two arguments, the first being the grammar (a grammar.Grammar
-        instance), and the second being the concrete syntax tree node
-        to be converted.  The syntax tree is converted from the bottom
-        up.
-
-        A concrete syntax tree node is a (type, nodes) tuple, where
-        type is the node type (a token or symbol number) and nodes
-        is a list of children for symbols, and None for tokens.
-
-        An abstract syntax tree node may be anything; this is entirely
-        up to the converter function.
-
-        """
-        self.grammar = grammar
-        self.convert_node = convert_node
-        self.convert_leaf = convert_leaf
-
-        # Each stack entry is a tuple: (dfa, state, node).
-        # A node is a tuple: (type, children),
-        # where children is a list of nodes or None
-        newnode = (start, [])
-        stackentry = (self.grammar.dfas[start], 0, newnode)
-        self.stack = Stack([stackentry])
-        self.rootnode = None
-        self.error_recovery = error_recovery
-
-    def parse(self, tokens):
-        for type_, value, start_pos, prefix in tokens:
-            if self.add_token(type_, value, start_pos, prefix):
-                break
-        else:
-            # We never broke out -- EOF is too soon -- Unfinished statement.
-            # However, the error recovery might have added the token again, if
-            # the stack is empty, we're fine.
-            if self.stack:
-                raise InternalParseError("incomplete input", type_, value, start_pos)
-        return self.rootnode
-
-    def add_token(self, type_, value, start_pos, prefix):
-        """Add a token; return True if this is the end of the program."""
-        ilabel = token_to_ilabel(self.grammar, type_, value)
-
-        # Loop until the token is shifted; may raise exceptions
-        _gram = self.grammar
-        _labels = _gram.labels
-        _push = self._push
-        _pop = self._pop
-        _shift = self._shift
-        while True:
-            dfa, state, node = self.stack[-1]
-            states, first = dfa
-            arcs = states[state]
-            # Look for a state with this label
-            for i, newstate in arcs:
-                t, v = _labels[i]
-                if ilabel == i:
-                    # Look it up in the list of labels
-                    assert t < 256
-                    # Shift a token; we're done with it
-                    _shift(type_, value, newstate, prefix, start_pos)
-                    # Pop while we are in an accept-only state
-                    state = newstate
-                    while states[state] == [(0, state)]:
-                        _pop()
-                        if not self.stack:
-                            # Done parsing!
-                            return True
-                        dfa, state, node = self.stack[-1]
-                        states, first = dfa
-                    # Done with this token
-                    return False
-                elif t >= 256:
-                    # See if it's a symbol and if we're in its first set
-                    itsdfa = _gram.dfas[t]
-                    itsstates, itsfirst = itsdfa
-                    if ilabel in itsfirst:
-                        # Push a symbol
-                        _push(t, itsdfa, newstate)
-                        break  # To continue the outer while loop
-            else:
-                if (0, state) in arcs:
-                    # An accepting state, pop it and try something else
-                    _pop()
-                    if not self.stack:
-                        # Done parsing, but another token is input
-                        raise InternalParseError("too much input", type_, value, start_pos)
-                else:
-                    self.error_recovery(self.grammar, self.stack, arcs, type_,
-                                        value, start_pos, prefix, self.add_token)
-                    break
-
-    def _shift(self, type_, value, newstate, prefix, start_pos):
-        """Shift a token.  (Internal)"""
-        dfa, state, node = self.stack[-1]
-        newnode = self.convert_leaf(self.grammar, type_, value, prefix, start_pos)
-        node[-1].append(newnode)
-        self.stack[-1] = (dfa, newstate, node)
-
-    def _push(self, type_, newdfa, newstate):
-        """Push a nonterminal.  (Internal)"""
-        dfa, state, node = self.stack[-1]
-        newnode = (type_, [])
-        self.stack[-1] = (dfa, newstate, node)
-        self.stack.append((newdfa, 0, newnode))
-
-    def _pop(self):
-        """Pop a nonterminal.  (Internal)"""
-        popdfa, popstate, (type_, children) = self.stack.pop()
-        # If there's exactly one child, return that child instead of creating a
-        # new node.  We still create expr_stmt and file_input though, because a
-        # lot of Jedi depends on its logic.
-        if len(children) == 1:
-            newnode = children[0]
-        else:
-            newnode = self.convert_node(self.grammar, type_, children)
-
-        try:
-            # Equal to:
-            # dfa, state, node = self.stack[-1]
-            # symbol, children = node
-            self.stack[-1][2][1].append(newnode)
-        except IndexError:
-            # Stack is empty, set the rootnode.
-            self.rootnode = newnode
--- a/parso/pgen2/pgen.py
+++ b/parso/pgen2/pgen.py
@@ -1,303 +0,0 @@
-# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
-# Licensed to PSF under a Contributor Agreement.
-
-# Modifications:
-# Copyright David Halter and Contributors
-# Modifications are dual-licensed: MIT and PSF.
-
-"""
-Specifying grammars in pgen is possible with this grammar::
-
-    grammar: (NEWLINE | rule)* ENDMARKER
-    rule: NAME ':' rhs NEWLINE
-    rhs: items ('|' items)*
-    items: item+
-    item: '[' rhs ']' | atom ['+' | '*']
-    atom: '(' rhs ')' | NAME | STRING
-
-This grammar is self-referencing.
-"""
-
-from parso.pgen2.grammar import Grammar
-from parso.python import token
-from parso.pgen2.grammar_parser import GrammarParser, NFAState
-
-
-class ParserGenerator(object):
-    def __init__(self, rule_to_dfas, token_namespace):
-        self._token_namespace = token_namespace
-        self._rule_to_dfas = rule_to_dfas
-
-    def make_grammar(self, grammar):
-        self._first = {}  # map from symbol name to set of tokens
-
-        names = list(self._rule_to_dfas.keys())
-        names.sort()
-        for name in names:
-            if name not in self._first:
-                self._calcfirst(name)
-            #print name, self._first[name].keys()
-
-            i = 256 + len(grammar.symbol2number)
-            grammar.symbol2number[name] = i
-            grammar.number2symbol[i] = name
-
-        for name in names:
-            dfas = self._rule_to_dfas[name]
-            states = []
-            for state in dfas:
-                arcs = []
-                for label, next in state.arcs.items():
-                    arcs.append((self._make_label(grammar, label), dfas.index(next)))
-                if state.isfinal:
-                    arcs.append((0, dfas.index(state)))
-                states.append(arcs)
-            grammar.states.append(states)
-            grammar.dfas[grammar.symbol2number[name]] = (states, self._make_first(grammar, name))
-        return grammar
-
-    def _make_first(self, grammar, name):
-        rawfirst = self._first[name]
-        first = {}
-        for label in rawfirst:
-            ilabel = self._make_label(grammar, label)
-            ##assert ilabel not in first # XXX failed on <> ... !=
-            first[ilabel] = 1
-        return first
-
-    def _make_label(self, grammar, label):
-        # XXX Maybe this should be a method on a subclass of converter?
-        ilabel = len(grammar.labels)
-        if label[0].isalpha():
-            # Either a symbol name or a named token
-            if label in grammar.symbol2number:
-                # A symbol name (a non-terminal)
-                if label in grammar.symbol2label:
-                    return grammar.symbol2label[label]
-                else:
-                    grammar.labels.append((grammar.symbol2number[label], None))
-                    grammar.symbol2label[label] = ilabel
-                    grammar.label2symbol[ilabel] = label
-                    return ilabel
-            else:
-                # A named token (NAME, NUMBER, STRING)
-                itoken = getattr(self._token_namespace, label, None)
-                assert isinstance(itoken, int), label
-                if itoken in grammar.tokens:
-                    return grammar.tokens[itoken]
-                else:
-                    grammar.labels.append((itoken, None))
-                    grammar.tokens[itoken] = ilabel
-                    return ilabel
-        else:
-            # Either a keyword or an operator
-            assert label[0] in ('"', "'"), label
-            value = eval(label)
-            if value[0].isalpha():
-                # A keyword
-                if value in grammar.keywords:
-                    return grammar.keywords[value]
-                else:
-                    # TODO this might be an issue?! Using token.NAME here?
-                    grammar.labels.append((token.NAME, value))
-                    grammar.keywords[value] = ilabel
-                    return ilabel
-            else:
-                # An operator (any non-numeric token)
-                itoken = self._token_namespace.generate_token_id(value)
-                if itoken in grammar.tokens:
-                    return grammar.tokens[itoken]
-                else:
-                    grammar.labels.append((itoken, None))
-                    grammar.tokens[itoken] = ilabel
-                    return ilabel
-
-    def _calcfirst(self, name):
-        dfa = self._rule_to_dfas[name]
-        self._first[name] = None  # dummy to detect left recursion
-        state = dfa[0]
-        totalset = {}
-        overlapcheck = {}
-        for label, next in state.arcs.items():
-            if label in self._rule_to_dfas:
-                if label in self._first:
-                    fset = self._first[label]
-                    if fset is None:
-                        raise ValueError("left recursion for rule %r" % name)
-                else:
-                    self._calcfirst(label)
-                    fset = self._first[label]
-                totalset.update(fset)
-                overlapcheck[label] = fset
-            else:
-                totalset[label] = 1
-                overlapcheck[label] = {label: 1}
-        inverse = {}
-        for label, itsfirst in overlapcheck.items():
-            for symbol in itsfirst:
-                if symbol in inverse:
-                    raise ValueError("rule %s is ambiguous; %s is in the"
-                                     " first sets of %s as well as %s" %
-                                     (name, symbol, label, inverse[symbol]))
-                inverse[symbol] = label
-        self._first[name] = totalset
-
-
-class DFAState(object):
-    def __init__(self, from_rule, nfa_set, final):
-        assert isinstance(nfa_set, set)
-        assert isinstance(next(iter(nfa_set)), NFAState)
-        assert isinstance(final, NFAState)
-        self.from_rule = from_rule
-        self.nfa_set = nfa_set
-        self.isfinal = final in nfa_set
-        self.arcs = {}  # map from label to DFAState
-
-    def add_arc(self, next_, label):
-        assert isinstance(label, str)
-        assert label not in self.arcs
-        assert isinstance(next_, DFAState)
-        self.arcs[label] = next_
-
-    def unifystate(self, old, new):
-        for label, next in self.arcs.items():
-            if next is old:
-                self.arcs[label] = new
-
-    def __eq__(self, other):
-        # Equality test -- ignore the nfa_set instance variable
-        assert isinstance(other, DFAState)
-        if self.isfinal != other.isfinal:
-            return False
-        # Can't just return self.arcs == other.arcs, because that
-        # would invoke this method recursively, with cycles...
-        if len(self.arcs) != len(other.arcs):
-            return False
-        for label, next in self.arcs.items():
-            if next is not other.arcs.get(label):
-                return False
-        return True
-
-    __hash__ = None  # For Py3 compatibility.
-
-
-def _simplify_dfas(dfas):
-    # This is not theoretically optimal, but works well enough.
-    # Algorithm: repeatedly look for two states that have the same
-    # set of arcs (same labels pointing to the same nodes) and
-    # unify them, until things stop changing.
-
-    # dfas is a list of DFAState instances
-    changes = True
-    while changes:
-        changes = False
-        for i, state_i in enumerate(dfas):
-            for j in range(i + 1, len(dfas)):
-                state_j = dfas[j]
-                if state_i == state_j:
-                    #print "  unify", i, j
-                    del dfas[j]
-                    for state in dfas:
-                        state.unifystate(state_j, state_i)
-                    changes = True
-                    break
-
-
-def _make_dfas(start, finish):
-    """
-    This is basically doing what the powerset construction algorithm is doing.
-    """
-    # To turn an NFA into a DFA, we define the states of the DFA
-    # to correspond to *sets* of states of the NFA.  Then do some
-    # state reduction.
-    assert isinstance(start, NFAState)
-    assert isinstance(finish, NFAState)
-
-    def addclosure(nfa_state, base_nfa_set):
-        assert isinstance(nfa_state, NFAState)
-        if nfa_state in base_nfa_set:
-            return
-        base_nfa_set.add(nfa_state)
-        for nfa_arc in nfa_state.arcs:
-            if nfa_arc.label_or_string is None:
-                addclosure(nfa_arc.next, base_nfa_set)
-
-    base_nfa_set = set()
-    addclosure(start, base_nfa_set)
-    states = [DFAState(start.from_rule, base_nfa_set, finish)]
-    for state in states:  # NB states grows while we're iterating
-        arcs = {}
-        # Find state transitions and store them in arcs.
-        for nfa_state in state.nfa_set:
-            for nfa_arc in nfa_state.arcs:
-                if nfa_arc.label_or_string is not None:
-                    nfa_set = arcs.setdefault(nfa_arc.label_or_string, set())
-                    addclosure(nfa_arc.next, nfa_set)
-
-        # Now create the dfa's with no None's in arcs anymore. All Nones have
-        # been eliminated and state transitions (arcs) are properly defined, we
-        # just need to create the dfa's.
-        for label_or_string, nfa_set in arcs.items():
-            for nested_state in states:
-                if nested_state.nfa_set == nfa_set:
-                    # The DFA state already exists for this rule.
-                    break
-            else:
-                nested_state = DFAState(start.from_rule, nfa_set, finish)
-                states.append(nested_state)
-
-            state.add_arc(nested_state, label_or_string)
-    return states  # List of DFAState instances; first one is start
-
-
-def _dump_nfa(start, finish):
-    print("Dump of NFA for", start.from_rule)
-    todo = [start]
-    for i, state in enumerate(todo):
-        print("  State", i, state is finish and "(final)" or "")
-        for label, next in state.arcs:
-            if next in todo:
-                j = todo.index(next)
-            else:
-                j = len(todo)
-                todo.append(next)
-            if label is None:
-                print("    -> %d" % j)
-            else:
-                print("    %s -> %d" % (label, j))
-
-
-def _dump_dfas(dfas):
-    print("Dump of DFA for", dfas[0].from_rule)
-    for i, state in enumerate(dfas):
-        print("  State", i, state.isfinal and "(final)" or "")
-        for label, next in state.arcs.items():
-            print("    %s -> %d" % (label, dfas.index(next)))
-
-
-def generate_grammar(bnf_grammar, token_namespace):
-    """
-    ``bnf_text`` is a grammar in extended BNF (using * for repetition, + for
-    at-least-once repetition, [] for optional parts, | for alternatives and ()
-    for grouping).
-
-    It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its
-    own parser.
-    """
-    rule_to_dfas = {}
-    start_symbol = None
-    for nfa_a, nfa_z in GrammarParser(bnf_grammar).parse():
-        #_dump_nfa(a, z)
-        dfas = _make_dfas(nfa_a, nfa_z)
-        #_dump_dfas(dfas)
-        # oldlen = len(dfas)
-        _simplify_dfas(dfas)
-        # newlen = len(dfas)
-        rule_to_dfas[nfa_a.from_rule] = dfas
-        #print(nfa_a.from_rule, oldlen, newlen)
-
-        if start_symbol is None:
-            start_symbol = nfa_a.from_rule
-
-    p = ParserGenerator(rule_to_dfas, token_namespace)
-    return p.make_grammar(Grammar(bnf_grammar, start_symbol))
--- a/parso/python/diff.py
+++ b/parso/python/diff.py
@@ -13,8 +13,8 @@ import logging
 from parso.utils import split_lines
 from parso.python.parser import Parser
 from parso.python.tree import EndMarker
-from parso.python.tokenize import (NEWLINE, PythonToken, ERROR_DEDENT,
-                                   ENDMARKER, INDENT, DEDENT)
+from parso.python.tokenize import PythonToken
+from parso.python.token import PythonTokenTypes

 LOG = logging.getLogger(__name__)

@@ -29,7 +29,7 @@ def _get_last_line(node_or_leaf):

 def _ends_with_newline(leaf, suffix=''):
    if leaf.type == 'error_leaf':
-        typ = leaf.original_type
+        typ = leaf.token_type.lower()
    else:
        typ = leaf.type

@@ -41,9 +41,8 @@ def _flows_finished(pgen_grammar, stack):
    if, while, for and try might not be finished, because another part might
    still be parsed.
    """
-    for dfa, newstate, (symbol_number, nodes) in stack:
-        if pgen_grammar.number2symbol[symbol_number] in ('if_stmt', 'while_stmt',
-                                                    'for_stmt', 'try_stmt'):
+    for stack_node in stack:
+        if stack_node.nonterminal in ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt'):
            return False
    return True

@@ -52,10 +51,10 @@ def suite_or_file_input_is_valid(pgen_grammar, stack):
    if not _flows_finished(pgen_grammar, stack):
        return False

-    for dfa, newstate, (symbol_number, nodes) in reversed(stack):
-        if pgen_grammar.number2symbol[symbol_number] == 'suite':
+    for stack_node in reversed(stack):
+        if stack_node.nonterminal == 'suite':
            # If only newline is in the suite, the suite is not valid, yet.
-            return len(nodes) > 1
+            return len(stack_node.nodes) > 1
    # Not reaching a suite means that we're dealing with file_input levels
    # where there's no need for a valid statement in it. It can also be empty.
    return True
@@ -168,8 +167,7 @@ class DiffParser(object):

    def _enabled_debugging(self, old_lines, lines_new):
        if self._module.get_code() != ''.join(lines_new):
-            LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines),
-                            ''.join(lines_new))
+            LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines), ''.join(lines_new))

    def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new):
        copied_nodes = [None]
@@ -273,7 +271,6 @@ class DiffParser(object):
        # memoryview?
        parsed_until_line = self._nodes_stack.parsed_until_line
        lines_after = self._parser_lines_new[parsed_until_line:]
-        #print('parse_content', parsed_until_line, lines_after, until_line)
        tokens = self._diff_tokenize(
            lines_after,
            until_line,
@@ -290,10 +287,10 @@ class DiffParser(object):
        omitted_first_indent = False
        indents = []
        tokens = self._tokenizer(lines, (1, 0))
-        stack = self._active_parser.pgen_parser.stack
+        stack = self._active_parser.stack
        for typ, string, start_pos, prefix in tokens:
            start_pos = start_pos[0] + line_offset, start_pos[1]
-            if typ == INDENT:
+            if typ == PythonTokenTypes.INDENT:
                indents.append(start_pos[1])
                if is_first_token:
                    omitted_first_indent = True
@@ -306,8 +303,9 @@ class DiffParser(object):

            # In case of omitted_first_indent, it might not be dedented fully.
            # However this is a sign for us that a dedent happened.
-            if typ == DEDENT \
-                    or typ == ERROR_DEDENT and omitted_first_indent and len(indents) == 1:
+            if typ == PythonTokenTypes.DEDENT \
+                    or typ == PythonTokenTypes.ERROR_DEDENT \
+                    and omitted_first_indent and len(indents) == 1:
                indents.pop()
                if omitted_first_indent and not indents:
                    # We are done here, only thing that can come now is an
@@ -317,18 +315,22 @@ class DiffParser(object):
                        prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix)
                    else:
                        prefix = ''
-                    yield PythonToken(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix)
+                    yield PythonToken(
+                        PythonTokenTypes.ENDMARKER, '',
+                        (start_pos[0] + line_offset, 0),
+                        prefix
+                    )
                    break
-            elif typ == NEWLINE and start_pos[0] >= until_line:
+            elif typ == PythonTokenTypes.NEWLINE and start_pos[0] >= until_line:
                yield PythonToken(typ, string, start_pos, prefix)
                # Check if the parser is actually in a valid suite state.
                if suite_or_file_input_is_valid(self._pgen_grammar, stack):
                    start_pos = start_pos[0] + 1, 0
                    while len(indents) > int(omitted_first_indent):
                        indents.pop()
-                        yield PythonToken(DEDENT, '', start_pos, '')
+                        yield PythonToken(PythonTokenTypes.DEDENT, '', start_pos, '')

-                    yield PythonToken(ENDMARKER, '', start_pos, '')
+                    yield PythonToken(PythonTokenTypes.ENDMARKER, '', start_pos, '')
                    break
                else:
                    continue
@@ -537,7 +539,7 @@ class _NodesStack(object):
                    line_offset_index = -2

        elif (new_nodes[-1].type in ('error_leaf', 'error_node') or
-                          _is_flow_node(new_nodes[-1])):
+              _is_flow_node(new_nodes[-1])):
            # Error leafs/nodes don't have a defined start/end. Error
            # nodes might not end with a newline (e.g. if there's an
            # open `(`). Therefore ignore all of them unless they are
--- a/parso/python/errors.py
+++ b/parso/python/errors.py
@@ -306,12 +306,12 @@ class ErrorFinder(Normalizer):

    def visit_leaf(self, leaf):
        if leaf.type == 'error_leaf':
-            if leaf.original_type in ('indent', 'error_dedent'):
+            if leaf.token_type in ('INDENT', 'ERROR_DEDENT'):
                # Indents/Dedents itself never have a prefix. They are just
                # "pseudo" tokens that get removed by the syntax tree later.
                # Therefore in case of an error we also have to check for this.
                spacing = list(leaf.get_next_leaf()._split_prefix())[-1]
-                if leaf.original_type == 'indent':
+                if leaf.token_type == 'INDENT':
                    message = 'unexpected indent'
                else:
                    message = 'unindent does not match any outer indentation level'
--- a/parso/python/parser.py
+++ b/parso/python/parser.py
@@ -1,9 +1,11 @@
 from parso.python import tree
-from parso.python.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER,
-                                STRING, tok_name, NAME, FSTRING_STRING,
-                                FSTRING_START, FSTRING_END)
+from parso.python.token import PythonTokenTypes
 from parso.parser import BaseParser
-from parso.pgen2.parse import token_to_ilabel
+
+
+NAME = PythonTokenTypes.NAME
+INDENT = PythonTokenTypes.INDENT
+DEDENT = PythonTokenTypes.DEDENT


 class Parser(BaseParser):
@@ -53,53 +55,33 @@ class Parser(BaseParser):

    # Names/Keywords are handled separately
    _leaf_map = {
-        STRING: tree.String,
-        NUMBER: tree.Number,
-        NEWLINE: tree.Newline,
-        ENDMARKER: tree.EndMarker,
-        FSTRING_STRING: tree.FStringString,
-        FSTRING_START: tree.FStringStart,
-        FSTRING_END: tree.FStringEnd,
+        PythonTokenTypes.STRING: tree.String,
+        PythonTokenTypes.NUMBER: tree.Number,
+        PythonTokenTypes.NEWLINE: tree.Newline,
+        PythonTokenTypes.ENDMARKER: tree.EndMarker,
+        PythonTokenTypes.FSTRING_STRING: tree.FStringString,
+        PythonTokenTypes.FSTRING_START: tree.FStringStart,
+        PythonTokenTypes.FSTRING_END: tree.FStringEnd,
    }

-    def __init__(self, pgen_grammar, error_recovery=True, start_symbol='file_input'):
-        super(Parser, self).__init__(pgen_grammar, start_symbol, error_recovery=error_recovery)
+    def __init__(self, pgen_grammar, error_recovery=True, start_nonterminal='file_input'):
+        super(Parser, self).__init__(pgen_grammar, start_nonterminal,
+                                     error_recovery=error_recovery)

        self.syntax_errors = []
        self._omit_dedent_list = []
        self._indent_counter = 0

-        # TODO do print absolute import detection here.
-        # try:
-        #     del python_grammar_no_print_statement.keywords["print"]
-        # except KeyError:
-        #     pass  # Doesn't exist in the Python 3 grammar.
-
-        # if self.options["print_function"]:
-        #     python_grammar = pygram.python_grammar_no_print_statement
-        # else:
-
    def parse(self, tokens):
        if self._error_recovery:
-            if self._start_symbol != 'file_input':
+            if self._start_nonterminal != 'file_input':
                raise NotImplementedError

            tokens = self._recovery_tokenize(tokens)

-        node = super(Parser, self).parse(tokens)
+        return super(Parser, self).parse(tokens)

-        if self._start_symbol == 'file_input' != node.type:
-            # If there's only one statement, we get back a non-module. That's
-            # not what we want, we want a module, so we add it here:
-            node = self.convert_node(
-                self._pgen_grammar,
-                self._pgen_grammar.symbol2number['file_input'],
-                [node]
-            )
-
-        return node
-
-    def convert_node(self, pgen_grammar, type, children):
+    def convert_node(self, nonterminal, children):
        """
        Convert raw node information to a PythonBaseNode instance.

@@ -107,154 +89,114 @@ class Parser(BaseParser):
        grammar rule produces a new complete node, so that the tree is build
        strictly bottom-up.
        """
-        # TODO REMOVE symbol, we don't want type here.
-        symbol = pgen_grammar.number2symbol[type]
        try:
-            return self.node_map[symbol](children)
+            return self.node_map[nonterminal](children)
        except KeyError:
-            if symbol == 'suite':
+            if nonterminal == 'suite':
                # We don't want the INDENT/DEDENT in our parser tree. Those
                # leaves are just cancer. They are virtual leaves and not real
                # ones and therefore have pseudo start/end positions and no
                # prefixes. Just ignore them.
                children = [children[0]] + children[2:-1]
-            elif symbol == 'list_if':
+            elif nonterminal == 'list_if':
                # Make transitioning from 2 to 3 easier.
-                symbol = 'comp_if'
-            elif symbol == 'listmaker':
+                nonterminal = 'comp_if'
+            elif nonterminal == 'listmaker':
                # Same as list_if above.
-                symbol = 'testlist_comp'
-            return self.default_node(symbol, children)
+                nonterminal = 'testlist_comp'
+            return self.default_node(nonterminal, children)

-    def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
+    def convert_leaf(self, type, value, prefix, start_pos):
        # print('leaf', repr(value), token.tok_name[type])
        if type == NAME:
-            if value in pgen_grammar.keywords:
+            if value in self._pgen_grammar.reserved_syntax_strings:
                return tree.Keyword(value, start_pos, prefix)
            else:
                return tree.Name(value, start_pos, prefix)

        return self._leaf_map.get(type, tree.Operator)(value, start_pos, prefix)

-    def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
-                       add_token_callback):
-        def get_symbol_and_nodes(stack):
-            for dfa, state, (type_, nodes) in stack:
-                symbol = pgen_grammar.number2symbol[type_]
-                yield symbol, nodes
-
-        tos_nodes = stack.get_tos_nodes()
+    def error_recovery(self, token):
+        tos_nodes = self.stack[-1].nodes
        if tos_nodes:
            last_leaf = tos_nodes[-1].get_last_leaf()
        else:
            last_leaf = None

-        if self._start_symbol == 'file_input' and \
-                (typ == ENDMARKER or typ == DEDENT and '\n' not in last_leaf.value):
-            def reduce_stack(states, newstate):
-                # reduce
-                state = newstate
-                while states[state] == [(0, state)]:
-                    self.pgen_parser._pop()
-
-                    dfa, state, (type_, nodes) = stack[-1]
-                    states, first = dfa
-
+        if self._start_nonterminal == 'file_input' and \
+                (token.type == PythonTokenTypes.ENDMARKER or
+                 token.type == DEDENT and '\n' not in last_leaf.value):
            # In Python statements need to end with a newline. But since it's
            # possible (and valid in Python ) that there's no newline at the
            # end of a file, we have to recover even if the user doesn't want
            # error recovery.
-            #print('x', pprint.pprint(stack))
-            ilabel = token_to_ilabel(pgen_grammar, NEWLINE, value)
-
-            dfa, state, (type_, nodes) = stack[-1]
-            symbol = pgen_grammar.number2symbol[type_]
-            states, first = dfa
-            arcs = states[state]
-            # Look for a state with this label
-            for i, newstate in arcs:
-                if ilabel == i:
-                    if symbol == 'simple_stmt':
-                        # This is basically shifting
-                        stack[-1] = (dfa, newstate, (type_, nodes))
-
-                        reduce_stack(states, newstate)
-                        add_token_callback(typ, value, start_pos, prefix)
+            if self.stack[-1].dfa.from_rule == 'simple_stmt':
+                try:
+                    plan = self.stack[-1].dfa.transitions[PythonTokenTypes.NEWLINE]
+                except KeyError:
+                    pass
+                else:
+                    if plan.next_dfa.is_final and not plan.dfa_pushes:
+                        # We are ignoring here that the newline would be
+                        # required for a simple_stmt.
+                        self.stack[-1].dfa = plan.next_dfa
+                        self._add_token(token)
                        return
-                    # Check if we're at the right point
-                    #for symbol, nodes in get_symbol_and_nodes(stack):
-                    #        self.pgen_parser._pop()
-
-                            #break
-                    break
-            #symbol = pgen_grammar.number2symbol[type_]

        if not self._error_recovery:
-            return super(Parser, self).error_recovery(
-                pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
-                add_token_callback)
+            return super(Parser, self).error_recovery(token)

        def current_suite(stack):
            # For now just discard everything that is not a suite or
            # file_input, if we detect an error.
-            one_line_suite = False
-            for index, (symbol, nodes) in reversed(list(enumerate(get_symbol_and_nodes(stack)))):
+            for until_index, stack_node in reversed(list(enumerate(stack))):
                # `suite` can sometimes be only simple_stmt, not stmt.
-                if one_line_suite:
+                if stack_node.nonterminal == 'file_input':
                    break
-                elif symbol == 'file_input':
-                    break
-                elif symbol == 'suite':
-                    if len(nodes) > 1:
+                elif stack_node.nonterminal == 'suite':
+                    # In the case where we just have a newline we don't want to
+                    # do error recovery here. In all other cases, we want to do
+                    # error recovery.
+                    if len(stack_node.nodes) != 1:
                        break
-                    elif not nodes:
-                        one_line_suite = True
-                    # `suite` without an indent are error nodes.
-            return index, symbol, nodes
+            return until_index

-        index, symbol, nodes = current_suite(stack)
+        until_index = current_suite(self.stack)

-        # print('err', token.tok_name[typ], repr(value), start_pos, len(stack), index)
-        if self._stack_removal(pgen_grammar, stack, arcs, index + 1, value, start_pos):
-            add_token_callback(typ, value, start_pos, prefix)
+        if self._stack_removal(until_index + 1):
+            self._add_token(token)
        else:
+            typ, value, start_pos, prefix = token
            if typ == INDENT:
                # For every deleted INDENT we have to delete a DEDENT as well.
                # Otherwise the parser will get into trouble and DEDENT too early.
                self._omit_dedent_list.append(self._indent_counter)

-            error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix)
-            stack[-1][2][1].append(error_leaf)
+            error_leaf = tree.PythonErrorLeaf(typ.name, value, start_pos, prefix)
+            self.stack[-1].nodes.append(error_leaf)

-        if symbol == 'suite':
-            dfa, state, node = stack[-1]
-            states, first = dfa
-            arcs = states[state]
-            intended_label = pgen_grammar.symbol2label['stmt']
-            # Introduce a proper state transition. We're basically allowing
-            # there to be no valid statements inside a suite.
-            if [x[0] for x in arcs] == [intended_label]:
-                new_state = arcs[0][1]
-                stack[-1] = dfa, new_state, node
+        tos = self.stack[-1]
+        if tos.nonterminal == 'suite':
+            # Need at least one statement in the suite. This happend with the
+            # error recovery above.
+            try:
+                tos.dfa = tos.dfa.arcs['stmt']
+            except KeyError:
+                # We're already in a final state.
+                pass

-    def _stack_removal(self, pgen_grammar, stack, arcs, start_index, value, start_pos):
-        failed_stack = False
-        found = False
-        all_nodes = []
-        for dfa, state, (type_, nodes) in stack[start_index:]:
-            if nodes:
-                found = True
-            if found:
-                failed_stack = True
-                all_nodes += nodes
-        if failed_stack:
-            stack[start_index - 1][2][1].append(tree.PythonErrorNode(all_nodes))
+    def _stack_removal(self, start_index):
+        all_nodes = [node for stack_node in self.stack[start_index:] for node in stack_node.nodes]

-        stack[start_index:] = []
-        return failed_stack
+        if all_nodes:
+            self.stack[start_index - 1].nodes.append(tree.PythonErrorNode(all_nodes))
+
+        self.stack[start_index:] = []
+        return bool(all_nodes)

    def _recovery_tokenize(self, tokens):
-        for typ, value, start_pos, prefix in tokens:
+        for token in tokens:
+            typ = token[0]
            # print(tok_name[typ], repr(value), start_pos, repr(prefix))
            if typ == DEDENT:
                # We need to count indents, because if we just omit any DEDENT,
@@ -267,4 +209,4 @@ class Parser(BaseParser):
                self._indent_counter -= 1
            elif typ == INDENT:
                self._indent_counter += 1
-            yield typ, value, start_pos, prefix
+            yield token
--- a/parso/python/token.py
+++ b/parso/python/token.py
@@ -1,115 +1,27 @@
 from __future__ import absolute_import
-from itertools import count
-from token import *
-
-from parso._compatibility import py_version
-
-# Don't mutate the standard library dict
-tok_name = tok_name.copy()
-
-_counter = count(N_TOKENS)
-# Never want to see this thing again.
-del N_TOKENS
-
-COMMENT = next(_counter)
-tok_name[COMMENT] = 'COMMENT'
-
-NL = next(_counter)
-tok_name[NL] = 'NL'
-
-# Sets the attributes that don't exist in these tok_name versions.
-if py_version >= 30:
-    BACKQUOTE = next(_counter)
-    tok_name[BACKQUOTE] = 'BACKQUOTE'
-else:
-    RARROW = next(_counter)
-    tok_name[RARROW] = 'RARROW'
-    ELLIPSIS = next(_counter)
-    tok_name[ELLIPSIS] = 'ELLIPSIS'
-
-if py_version < 35:
-    ATEQUAL = next(_counter)
-    tok_name[ATEQUAL] = 'ATEQUAL'
-
-ERROR_DEDENT = next(_counter)
-tok_name[ERROR_DEDENT] = 'ERROR_DEDENT'
-
-FSTRING_START = next(_counter)
-tok_name[FSTRING_START] = 'FSTRING_START'
-FSTRING_END = next(_counter)
-tok_name[FSTRING_END] = 'FSTRING_END'
-FSTRING_STRING = next(_counter)
-tok_name[FSTRING_STRING] = 'FSTRING_STRING'
-EXCLAMATION = next(_counter)
-tok_name[EXCLAMATION] = 'EXCLAMATION'
-
-# Map from operator to number (since tokenize doesn't do this)
-
-opmap_raw = """\
-( LPAR
-) RPAR
-[ LSQB
-] RSQB
-: COLON
-, COMMA
-; SEMI
-+ PLUS
- MINUS
-* STAR
-/ SLASH
-| VBAR
-& AMPER
-< LESS
-> GREATER
-= EQUAL
-. DOT
-% PERCENT
-` BACKQUOTE
-{ LBRACE
-} RBRACE
-@ AT
-== EQEQUAL
-!= NOTEQUAL
-<> NOTEQUAL
-<= LESSEQUAL
->= GREATEREQUAL
-~ TILDE
-^ CIRCUMFLEX
-<< LEFTSHIFT
->> RIGHTSHIFT
-** DOUBLESTAR
-+= PLUSEQUAL
-= MINEQUAL
-*= STAREQUAL
-/= SLASHEQUAL
-%= PERCENTEQUAL
-&= AMPEREQUAL
-|= VBAREQUAL
-@= ATEQUAL
-^= CIRCUMFLEXEQUAL
-<<= LEFTSHIFTEQUAL
->>= RIGHTSHIFTEQUAL
-**= DOUBLESTAREQUAL
-// DOUBLESLASH
-//= DOUBLESLASHEQUAL
-> RARROW
-... ELLIPSIS
-! EXCLAMATION
-"""
-
-opmap = {}
-for line in opmap_raw.splitlines():
-    op, name = line.split()
-    opmap[op] = globals()[name]


-def generate_token_id(string):
+class TokenType(object):
+    def __init__(self, name, contains_syntax=False):
+        self.name = name
+        self.contains_syntax = contains_syntax
+
+    def __repr__(self):
+        return '%s(%s)' % (self.__class__.__name__, self.name)
+
+
+class TokenTypes(object):
    """
-    Uses a token in the grammar (e.g. `'+'` or `'and'`returns the corresponding
-    ID for it. The strings are part of the grammar file.
+    Basically an enum, but Python 2 doesn't have enums in the standard library.
    """
-    try:
-        return opmap[string]
-    except KeyError:
-        pass
-    return globals()[string]
+    def __init__(self, names, contains_syntax):
+        for name in names:
+            setattr(self, name, TokenType(name, contains_syntax=name in contains_syntax))
+
+
+PythonTokenTypes = TokenTypes((
+    'STRING', 'NUMBER', 'NAME', 'ERRORTOKEN', 'NEWLINE', 'INDENT', 'DEDENT',
+    'ERROR_DEDENT', 'FSTRING_STRING', 'FSTRING_START', 'FSTRING_END', 'OP',
+    'ENDMARKER'),
+    contains_syntax=('NAME', 'OP'),
+)
--- a/parso/python/tokenize.py
+++ b/parso/python/tokenize.py
@@ -18,14 +18,25 @@ from collections import namedtuple
 import itertools as _itertools
 from codecs import BOM_UTF8

-from parso.python.token import (tok_name, ENDMARKER, STRING, NUMBER, opmap,
-                                NAME, ERRORTOKEN, NEWLINE, INDENT, DEDENT,
-                                ERROR_DEDENT, FSTRING_STRING, FSTRING_START,
-                                FSTRING_END)
+from parso.python.token import PythonTokenTypes
 from parso._compatibility import py_version
 from parso.utils import split_lines


+STRING = PythonTokenTypes.STRING
+NAME = PythonTokenTypes.NAME
+NUMBER = PythonTokenTypes.NUMBER
+OP = PythonTokenTypes.OP
+NEWLINE = PythonTokenTypes.NEWLINE
+INDENT = PythonTokenTypes.INDENT
+DEDENT = PythonTokenTypes.DEDENT
+ENDMARKER = PythonTokenTypes.ENDMARKER
+ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
+ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
+FSTRING_START = PythonTokenTypes.FSTRING_START
+FSTRING_STRING = PythonTokenTypes.FSTRING_STRING
+FSTRING_END = PythonTokenTypes.FSTRING_END
+
 TokenCollection = namedtuple(
    'TokenCollection',
    'pseudo_token single_quoted triple_quoted endpats whitespace '
@@ -242,12 +253,9 @@ class Token(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])):


 class PythonToken(Token):
-    def _get_type_name(self, exact=True):
-        return tok_name[self.type]
-
    def __repr__(self):
        return ('TokenInfo(type=%s, string=%r, start_pos=%r, prefix=%r)' %
-                self._replace(type=self._get_type_name()))
+                self._replace(type=self.type.name))


 class FStringNode(object):
@@ -396,7 +404,9 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
            endmatch = endprog.match(line)
            if endmatch:
                pos = endmatch.end(0)
-                yield PythonToken(STRING, contstr + line[:pos], contstr_start, prefix)
+                yield PythonToken(
+                    STRING, contstr + line[:pos],
+                    contstr_start, prefix)
                contstr = ''
                contline = None
            else:
@@ -571,13 +581,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
                        and fstring_stack[-1].parentheses_count == 1:
                    fstring_stack[-1].format_spec_count += 1

-                try:
-                    # This check is needed in any case to check if it's a valid
-                    # operator or just some random unicode character.
-                    typ = opmap[token]
-                except KeyError:
-                    typ = ERRORTOKEN
-                yield PythonToken(typ, token, spos, prefix)
+                yield PythonToken(OP, token, spos, prefix)

    if contstr:
        yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix)
--- a/parso/python/tree.py
+++ b/parso/python/tree.py
@@ -124,7 +124,7 @@ class PythonLeaf(PythonMixin, Leaf):
        #   indent error leafs somehow? No idea how, though.
        previous_leaf = self.get_previous_leaf()
        if previous_leaf is not None and previous_leaf.type == 'error_leaf' \
-                and previous_leaf.original_type in ('indent', 'error_dedent'):
+                and previous_leaf.token_type in ('INDENT', 'ERROR_DEDENT'):
            previous_leaf = previous_leaf.get_previous_leaf()

        if previous_leaf is None:
--- a/parso/tree.py
+++ b/parso/tree.py
@@ -229,6 +229,7 @@ class Leaf(NodeOrLeaf):

 class TypedLeaf(Leaf):
    __slots__ = ('type',)
+
    def __init__(self, type, value, start_pos, prefix=''):
        super(TypedLeaf, self).__init__(value, start_pos, prefix)
        self.type = type
@@ -351,13 +352,13 @@ class ErrorLeaf(Leaf):
    A leaf that is either completely invalid in a language (like `$` in Python)
    or is invalid at that position. Like the star in `1 +* 1`.
    """
-    __slots__ = ('original_type',)
+    __slots__ = ('token_type',)
    type = 'error_leaf'

-    def __init__(self, original_type, value, start_pos, prefix=''):
+    def __init__(self, token_type, value, start_pos, prefix=''):
        super(ErrorLeaf, self).__init__(value, start_pos, prefix)
-        self.original_type = original_type
+        self.token_type = token_type

    def __repr__(self):
        return "<%s: %s:%s, %s>" % \
-            (type(self).__name__, self.original_type, repr(self.value), self.start_pos)
+            (type(self).__name__, self.token_type, repr(self.value), self.start_pos)
--- a/test/test_error_recovery.py
+++ b/test/test_error_recovery.py
@@ -11,25 +11,51 @@ def test_with_stmt():
    assert module.children[2].type == 'name'


-def test_one_line_function():
-    module = parse('def x(): f.')
+def test_one_line_function(each_version):
+    module = parse('def x(): f.', version=each_version)
    assert module.children[0].type == 'funcdef'
    def_, name, parameters, colon, f = module.children[0].children
    assert f.type == 'error_node'

-    module = parse('def x(a:')
+    module = parse('def x(a:', version=each_version)
    func = module.children[0]
    assert func.type == 'error_node'
-    assert func.children[-1] == ':'
+    if each_version.startswith('2'):
+        assert func.children[-1].value == 'a'
+    else:
+        assert func.children[-1] == ':'
+
+
+def test_if_else():
+    module = parse('if x:\n f.\nelse:\n g(')
+    if_stmt = module.children[0]
+    if_, test, colon, suite1, else_, colon, suite2 = if_stmt.children
+    f = suite1.children[1]
+    assert f.type == 'error_node'
+    assert f.children[0].value == 'f'
+    assert f.children[1].value == '.'
+    g = suite2.children[1]
+    assert g.children[0].value == 'g'
+    assert g.children[1].value == '('


 def test_if_stmt():
-    module = parse('if x: f.')# \nelse: g(
+    module = parse('if x: f.\nelse: g(')
    if_stmt = module.children[0]
    assert if_stmt.type == 'if_stmt'
    if_, test, colon, f = if_stmt.children
    assert f.type == 'error_node'
    assert f.children[0].value == 'f'
    assert f.children[1].value == '.'
-    #assert g.children[0].value == 'g'
-    #assert g.children[1].value == '('
+
+    assert module.children[1].type == 'newline'
+    assert module.children[1].value == '\n'
+    assert module.children[2].type == 'error_leaf'
+    assert module.children[2].value == 'else'
+    assert module.children[3].type == 'error_leaf'
+    assert module.children[3].value == ':'
+
+    in_else_stmt = module.children[4]
+    assert in_else_stmt.type == 'error_node'
+    assert in_else_stmt.children[0].value == 'g'
+    assert in_else_stmt.children[1].value == '('
--- a/test/test_pgen2.py
+++ b/test/test_pgen2.py
@@ -12,6 +12,8 @@ import pytest

 from parso import load_grammar
 from parso import ParserSyntaxError
+from parso.pgen2 import generate_grammar
+from parso.python import tokenize


 def _parse(code, version=None):
@@ -270,3 +272,19 @@ def py_br(each_version):
 def test_py3_rb(works_ge_py3):
    works_ge_py3.parse("rb'1'")
    works_ge_py3.parse("RB'1'")
+
+
+def test_left_recursion():
+    with pytest.raises(ValueError, match='left recursion'):
+        generate_grammar('foo: foo NAME\n', tokenize.PythonTokenTypes)
+
+
+def test_ambiguities():
+    with pytest.raises(ValueError, match='ambiguous'):
+        generate_grammar('foo: bar | baz\nbar: NAME\nbaz: NAME\n', tokenize.PythonTokenTypes)
+
+    with pytest.raises(ValueError, match='ambiguous'):
+        generate_grammar('''foo: bar | baz\nbar: 'x'\nbaz: "x"\n''', tokenize.PythonTokenTypes)
+
+    with pytest.raises(ValueError, match='ambiguous'):
+        generate_grammar('''foo: bar | 'x'\nbar: 'x'\n''', tokenize.PythonTokenTypes)
--- a/test/test_tokenize.py
+++ b/test/test_tokenize.py
@@ -1,20 +1,30 @@
 # -*- coding: utf-8    # This file contains Unicode characters.

 from textwrap import dedent
-import tokenize as stdlib_tokenize

 import pytest

 from parso._compatibility import py_version
 from parso.utils import split_lines, parse_version_string
-from parso.python.token import (
-    NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER, ERROR_DEDENT,
-    FSTRING_START)
+from parso.python.token import PythonTokenTypes
 from parso.python import tokenize
 from parso import parse
 from parso.python.tokenize import PythonToken


+# To make it easier to access some of the token types, just put them here.
+NAME = PythonTokenTypes.NAME
+NEWLINE = PythonTokenTypes.NEWLINE
+STRING = PythonTokenTypes.STRING
+INDENT = PythonTokenTypes.INDENT
+DEDENT = PythonTokenTypes.DEDENT
+ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
+OP = PythonTokenTypes.OP
+ENDMARKER = PythonTokenTypes.ENDMARKER
+ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
+FSTRING_START = PythonTokenTypes.FSTRING_START
+
+
 def _get_token_list(string):
    # Load the current version.
    version_info = parse_version_string()
@@ -128,7 +138,7 @@ def test_identifier_contains_unicode():
    else:
        # Unicode tokens in Python 2 seem to be identified as operators.
        # They will be ignored in the parser, that's ok.
-        assert unicode_token[0] == tokenize.ERRORTOKEN
+        assert unicode_token[0] == OP


 def test_quoted_strings():
@@ -188,17 +198,17 @@ def test_ur_literals():

 def test_error_literal():
    error_token, endmarker = _get_token_list('"\n')
-    assert error_token.type == tokenize.ERRORTOKEN
+    assert error_token.type == ERRORTOKEN
    assert endmarker.prefix == ''
    assert error_token.string == '"\n'
-    assert endmarker.type == tokenize.ENDMARKER
+    assert endmarker.type == ENDMARKER
    assert endmarker.prefix == ''

    bracket, error_token, endmarker = _get_token_list('( """')
-    assert error_token.type == tokenize.ERRORTOKEN
+    assert error_token.type == ERRORTOKEN
    assert error_token.prefix == ' '
    assert error_token.string == '"""'
-    assert endmarker.type == tokenize.ENDMARKER
+    assert endmarker.type == ENDMARKER
    assert endmarker.prefix == ''


@@ -236,14 +246,3 @@ def test_error_string():
    assert t1.prefix == ' '
    assert t1.string == '"\n'
    assert endmarker.string == ''
-
-def test_tok_name_copied():
-    # Make sure parso doesn't mutate the standard library
-    tok_len = len(stdlib_tokenize.tok_name)
-    correct_len = stdlib_tokenize.N_TOKENS
-    if 'N_TOKENS' in stdlib_tokenize.tok_name.values(): # Python 3.7
-        correct_len += 1
-    if 'NT_OFFSET' in stdlib_tokenize.tok_name.values(): # Not there in PyPy
-        correct_len += 1
-
-    assert tok_len == correct_len