Merge branch 'pgen'

Add a changelog for 0.3.0
Bump version to 0.3.0
2025-12-08 21:54:54 +08:00 · 2018-06-29 18:14:03 +02:00 · 2018-06-29 18:13:53 +02:00 · 2018-06-29 18:04:55 +02:00 · 2018-06-29 10:04:54 +02:00 · 2018-06-29 00:00:09 +02:00
39 changed files with 1370 additions and 1483 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,4 @@
 /dist/
 parso.egg-info/
 /.cache/
+/.pytest_cache
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,11 +3,10 @@ sudo: false
 python:
  - 2.6
  - 2.7
-  - 3.3
  - 3.4
  - 3.5
  - 3.6
-  - 3.7
+  - 3.7-dev
  - pypy
 matrix:
  allow_failures:
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -3,6 +3,22 @@
 Changelog
 ---------

+0.3.0 (2018-07-30)
+++++++++++++++++++
+
+- Rewrote the pgen2 parser generator.
+
+0.2.1 (2018-05-21)
+++++++++++++++++++
+
+- A bugfix for the diff parser.
+- Grammar files can now be loaded from a specific path.
+
+0.2.0 (2018-04-15)
+++++++++++++++++++
+
+- f-strings are now parsed as a part of the normal Python grammar. This makes
+  it way easier to deal with them.

 0.1.1 (2017-11-05)
 +++++++++++++++++++
--- a/deploy-master.sh
+++ b/deploy-master.sh
@@ -36,7 +36,7 @@ if [[ $tag_ref ]]; then
        exit 1
    fi
 else
-    git tag $tag
+    git tag -a $tag
    git push --tags
 fi

--- a/docs/_themes/flask/layout.html
+++ b/docs/_themes/flask/layout.html
@@ -19,7 +19,6 @@
 {% endblock %}
 {%- block footer %}
  <div class="footer">
-    &copy; Copyright {{ copyright }}.
    Created using <a href="http://sphinx.pocoo.org/">Sphinx</a>.
  </div>
  {% if pagename == 'index' %}
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -13,7 +13,6 @@

 import sys
 import os
-import datetime

 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
@@ -45,7 +44,7 @@ master_doc = 'index'

 # General information about the project.
 project = u'parso'
-copyright = u'2012 - {today.year}, parso contributors'.format(today=datetime.date.today())
+copyright = u'parso contributors'

 import parso
 from parso.utils import version_info
--- a/parso/init.py
+++ b/parso/init.py
@@ -43,7 +43,7 @@ from parso.grammar import Grammar, load_grammar
 from parso.utils import split_lines, python_bytes_to_unicode


-__version__ = '0.1.1'
+__version__ = '0.3.0'


 def parse(code=None, **kwargs):
--- a/parso/_compatibility.py
+++ b/parso/_compatibility.py
@@ -36,7 +36,7 @@ except AttributeError:
 def u(string):
    """Cast to unicode DAMMIT!
    Written because Python2 repr always implicitly casts to a string, so we
-    have to cast back to a unicode (and we now that we always deal with valid
+    have to cast back to a unicode (and we know that we always deal with valid
    unicode, because we check that in the beginning).
    """
    if py_version >= 30:
--- a/parso/grammar.py
+++ b/parso/grammar.py
@@ -2,17 +2,16 @@ import hashlib
 import os

 from parso._compatibility import FileNotFoundError, is_pypy
-from parso.pgen2.pgen import generate_grammar
+from parso.pgen2 import generate_grammar
 from parso.utils import split_lines, python_bytes_to_unicode, parse_version_string
 from parso.python.diff import DiffParser
 from parso.python.tokenize import tokenize_lines, tokenize
-from parso.python import token
+from parso.python.token import PythonTokenTypes
 from parso.cache import parser_cache, load_module, save_module
 from parso.parser import BaseParser
 from parso.python.parser import Parser as PythonParser
 from parso.python.errors import ErrorFinderConfig
 from parso.python import pep8
-from parso.python import fstring

 _loaded_grammars = {}

@@ -21,7 +20,7 @@ class Grammar(object):
    """
    :py:func:`parso.load_grammar` returns instances of this class.

-    Creating custom grammars by calling this is not supported, yet.
+    Creating custom none-python grammars by calling this is not supported, yet.
    """
    #:param text: A BNF representation of your grammar.
    _error_normalizer_config = None
@@ -52,8 +51,8 @@ class Grammar(object):
            it is invalid, it will be returned as an error node. If disabled,
            you will get a ParseError when encountering syntax errors in your
            code.
-        :param str start_symbol: The grammar symbol that you want to parse. Only
-            allowed to be used when error_recovery is False.
+        :param str start_symbol: The grammar rule (nonterminal) that you want
+            to parse. Only allowed to be used when error_recovery is False.
        :param str path: The path to the file you want to open. Only needed for caching.
        :param bool cache: Keeps a copy of the parser tree in RAM and on disk
            if a path is given. Returns the cached trees if the corresponding
@@ -73,7 +72,7 @@ class Grammar(object):
            :py:class:`parso.python.tree.Module`.
        """
        if 'start_pos' in kwargs:
-            raise TypeError("parse() got an unexpected keyworda argument.")
+            raise TypeError("parse() got an unexpected keyword argument.")
        return self._parse(code=code, **kwargs)

    def _parse(self, code=None, error_recovery=True, path=None,
@@ -89,7 +88,7 @@ class Grammar(object):
            raise TypeError("Please provide either code or a path.")

        if start_symbol is None:
-            start_symbol = self._start_symbol
+            start_symbol = self._start_nonterminal

        if error_recovery and start_symbol != 'file_input':
            raise NotImplementedError("This is currently not implemented.")
@@ -137,7 +136,7 @@ class Grammar(object):
        p = self._parser(
            self._pgen_grammar,
            error_recovery=error_recovery,
-            start_symbol=start_symbol
+            start_nonterminal=start_symbol
        )
        root_node = p.parse(tokens=tokens)

@@ -186,17 +185,16 @@ class Grammar(object):
        normalizer.walk(node)
        return normalizer.issues

-
    def __repr__(self):
-        labels = self._pgen_grammar.number2symbol.values()
-        txt = ' '.join(list(labels)[:3]) + ' ...'
+        nonterminals = self._pgen_grammar._nonterminal_to_dfas.keys()
+        txt = ' '.join(list(nonterminals)[:3]) + ' ...'
        return '<%s:%s>' % (self.__class__.__name__, txt)


 class PythonGrammar(Grammar):
    _error_normalizer_config = ErrorFinderConfig()
-    _token_namespace = token
-    _start_symbol = 'file_input'
+    _token_namespace = PythonTokenTypes
+    _start_nonterminal = 'file_input'

    def __init__(self, version_info, bnf_text):
        super(PythonGrammar, self).__init__(
@@ -215,46 +213,19 @@ class PythonGrammar(Grammar):
        return tokenize(code, self.version_info)


-class PythonFStringGrammar(Grammar):
-    _token_namespace = fstring.TokenNamespace
-    _start_symbol = 'fstring'
-
-    def __init__(self):
-        super(PythonFStringGrammar, self).__init__(
-            text=fstring.GRAMMAR,
-            tokenizer=fstring.tokenize,
-            parser=fstring.Parser
-        )
-
-    def parse(self, code, **kwargs):
-        return self._parse(code, **kwargs)
-
-    def _parse(self, code, error_recovery=True, start_pos=(1, 0)):
-        tokens = self._tokenizer(code, start_pos=start_pos)
-        p = self._parser(
-            self._pgen_grammar,
-            error_recovery=error_recovery,
-            start_symbol=self._start_symbol,
-        )
-        return p.parse(tokens=tokens)
-
-    def parse_leaf(self, leaf, error_recovery=True):
-        code = leaf._get_payload()
-        return self.parse(code, error_recovery=True, start_pos=leaf.start_pos)
-
-
 def load_grammar(**kwargs):
    """
    Loads a :py:class:`parso.Grammar`. The default version is the current Python
    version.

    :param str version: A python version string, e.g. ``version='3.3'``.
+    :param str path: A path to a grammar file
    """
-    def load_grammar(language='python', version=None):
+    def load_grammar(language='python', version=None, path=None):
        if language == 'python':
            version_info = parse_version_string(version)

-            file = os.path.join(
+            file = path or os.path.join(
                'python',
                'grammar%s%s.txt' % (version_info.major, version_info.minor)
            )
@@ -273,10 +244,6 @@ def load_grammar(**kwargs):
                except FileNotFoundError:
                    message = "Python version %s is currently not supported." % version
                    raise NotImplementedError(message)
-        elif language == 'python-f-string':
-            if version is not None:
-                raise NotImplementedError("Currently different versions are not supported.")
-            return PythonFStringGrammar()
        else:
            raise NotImplementedError("No support for language %s." % language)

--- a/parso/parser.py
+++ b/parso/parser.py
@@ -1,3 +1,11 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+# Modifications:
+# Copyright David Halter and Contributors
+# Modifications are dual-licensed: MIT and PSF.
+# 99% of the code is different from pgen2, now.
+
 """
 The ``Parser`` tries to convert the available Python code in an easy to read
 format, something like an abstract syntax tree. The classes who represent this
@@ -16,7 +24,7 @@ complexity of the ``Parser`` (there's another parser sitting inside
 ``Statement``, which produces ``Array`` and ``Call``).
 """
 from parso import tree
-from parso.pgen2.parse import PgenParser
+from parso.pgen2.generator import ReservedString


 class ParserSyntaxError(Exception):
@@ -30,7 +38,76 @@ class ParserSyntaxError(Exception):
        self.error_leaf = error_leaf


+class InternalParseError(Exception):
+    """
+    Exception to signal the parser is stuck and error recovery didn't help.
+    Basically this shouldn't happen. It's a sign that something is really
+    wrong.
+    """
+
+    def __init__(self, msg, type_, value, start_pos):
+        Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" %
+                           (msg, type_.name, value, start_pos))
+        self.msg = msg
+        self.type = type
+        self.value = value
+        self.start_pos = start_pos
+
+
+class Stack(list):
+    def _allowed_transition_names_and_token_types(self):
+        def iterate():
+            # An API just for Jedi.
+            for stack_node in reversed(self):
+                for transition in stack_node.dfa.transitions:
+                    if isinstance(transition, ReservedString):
+                        yield transition.value
+                    else:
+                        yield transition  # A token type
+
+                if not stack_node.dfa.is_final:
+                    break
+
+        return list(iterate())
+
+
+class StackNode(object):
+    def __init__(self, dfa):
+        self.dfa = dfa
+        self.nodes = []
+
+    @property
+    def nonterminal(self):
+        return self.dfa.from_rule
+
+    def __repr__(self):
+        return '%s(%s, %s)' % (self.__class__.__name__, self.dfa, self.nodes)
+
+
+def _token_to_transition(grammar, type_, value):
+    # Map from token to label
+    if type_.contains_syntax:
+        # Check for reserved words (keywords)
+        try:
+            return grammar.reserved_syntax_strings[value]
+        except KeyError:
+            pass
+
+    return type_
+
+
 class BaseParser(object):
+    """Parser engine.
+
+    A Parser instance contains state pertaining to the current token
+    sequence, and should not be used concurrently by different threads
+    to parse separate token sequences.
+
+    See python/tokenize.py for how to get input tokens by a string.
+
+    When a syntax error occurs, error_recovery() is called.
+    """
+
    node_map = {}
    default_node = tree.Node

@@ -38,41 +115,94 @@ class BaseParser(object):
    }
    default_leaf = tree.Leaf

-    def __init__(self, pgen_grammar, start_symbol='file_input', error_recovery=False):
+    def __init__(self, pgen_grammar, start_nonterminal='file_input', error_recovery=False):
        self._pgen_grammar = pgen_grammar
-        self._start_symbol = start_symbol
+        self._start_nonterminal = start_nonterminal
        self._error_recovery = error_recovery

    def parse(self, tokens):
-        start_number = self._pgen_grammar.symbol2number[self._start_symbol]
-        self.pgen_parser = PgenParser(
-            self._pgen_grammar, self.convert_node, self.convert_leaf,
-            self.error_recovery, start_number
-        )
+        first_dfa = self._pgen_grammar.nonterminal_to_dfas[self._start_nonterminal][0]
+        self.stack = Stack([StackNode(first_dfa)])

-        node = self.pgen_parser.parse(tokens)
-        # The stack is empty now, we don't need it anymore.
-        del self.pgen_parser
-        return node
+        for token in tokens:
+            self._add_token(token)

-    def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
-                       add_token_callback):
+        while True:
+            tos = self.stack[-1]
+            if not tos.dfa.is_final:
+                # We never broke out -- EOF is too soon -- Unfinished statement.
+                # However, the error recovery might have added the token again, if
+                # the stack is empty, we're fine.
+                raise InternalParseError(
+                    "incomplete input", token.type, token.value, token.start_pos
+                )
+
+            if len(self.stack) > 1:
+                self._pop()
+            else:
+                return self.convert_node(tos.nonterminal, tos.nodes)
+
+    def error_recovery(self, token):
        if self._error_recovery:
            raise NotImplementedError("Error Recovery is not implemented")
        else:
-            error_leaf = tree.ErrorLeaf('TODO %s' % typ, value, start_pos, prefix)
+            type_, value, start_pos, prefix = token
+            error_leaf = tree.ErrorLeaf('TODO %s' % type_, value, start_pos, prefix)
            raise ParserSyntaxError('SyntaxError: invalid syntax', error_leaf)

-    def convert_node(self, pgen_grammar, type_, children):
-        # TODO REMOVE symbol, we don't want type here.
-        symbol = pgen_grammar.number2symbol[type_]
+    def convert_node(self, nonterminal, children):
        try:
-            return self.node_map[symbol](children)
+            return self.node_map[nonterminal](children)
        except KeyError:
-            return self.default_node(symbol, children)
+            return self.default_node(nonterminal, children)

-    def convert_leaf(self, pgen_grammar, type_, value, prefix, start_pos):
+    def convert_leaf(self, type_, value, prefix, start_pos):
        try:
            return self.leaf_map[type_](value, start_pos, prefix)
        except KeyError:
            return self.default_leaf(value, start_pos, prefix)
+
+    def _add_token(self, token):
+        """
+        This is the only core function for parsing. Here happens basically
+        everything. Everything is well prepared by the parser generator and we
+        only apply the necessary steps here.
+        """
+        grammar = self._pgen_grammar
+        stack = self.stack
+        type_, value, start_pos, prefix = token
+        transition = _token_to_transition(grammar, type_, value)
+
+        while True:
+            try:
+                plan = stack[-1].dfa.transitions[transition]
+                break
+            except KeyError:
+                if stack[-1].dfa.is_final:
+                    self._pop()
+                else:
+                    self.error_recovery(token)
+                    return
+            except IndexError:
+                raise InternalParseError("too much input", type_, value, start_pos)
+
+        stack[-1].dfa = plan.next_dfa
+
+        for push in plan.dfa_pushes:
+            stack.append(StackNode(push))
+
+        leaf = self.convert_leaf(type_, value, prefix, start_pos)
+        stack[-1].nodes.append(leaf)
+
+    def _pop(self):
+        tos = self.stack.pop()
+        # If there's exactly one child, return that child instead of
+        # creating a new node.  We still create expr_stmt and
+        # file_input though, because a lot of Jedi depends on its
+        # logic.
+        if len(tos.nodes) == 1:
+            new_node = tos.nodes[0]
+        else:
+            new_node = self.convert_node(tos.dfa.from_rule, tos.nodes)
+
+        self.stack[-1].nodes.append(new_node)
--- a/parso/pgen2/init.py
+++ b/parso/pgen2/init.py
@@ -4,5 +4,7 @@
 # Modifications:
 # Copyright 2006 Google, Inc. All Rights Reserved.
 # Licensed to PSF under a Contributor Agreement.
-# Copyright 2014 David Halter. Integration into Jedi.
+# Copyright 2014 David Halter and Contributors
 # Modifications are dual-licensed: MIT and PSF.
+
+from parso.pgen2.generator import generate_grammar
--- a/parso/pgen2/generator.py
+++ b/parso/pgen2/generator.py
@@ -0,0 +1,359 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+# Modifications:
+# Copyright David Halter and Contributors
+# Modifications are dual-licensed: MIT and PSF.
+
+"""
+This module defines the data structures used to represent a grammar.
+
+Specifying grammars in pgen is possible with this grammar::
+
+    grammar: (NEWLINE | rule)* ENDMARKER
+    rule: NAME ':' rhs NEWLINE
+    rhs: items ('|' items)*
+    items: item+
+    item: '[' rhs ']' | atom ['+' | '*']
+    atom: '(' rhs ')' | NAME | STRING
+
+This grammar is self-referencing.
+
+This parser generator (pgen2) was created by Guido Rossum and used for lib2to3.
+Most of the code has been refactored to make it more Pythonic. Since this was a
+"copy" of the CPython Parser parser "pgen", there was some work needed to make
+it more readable. It should also be slightly faster than the original pgen2,
+because we made some optimizations.
+"""
+
+from ast import literal_eval
+
+from parso.pgen2.grammar_parser import GrammarParser, NFAState
+
+
+class Grammar(object):
+    """
+    Once initialized, this class supplies the grammar tables for the
+    parsing engine implemented by parse.py.  The parsing engine
+    accesses the instance variables directly.
+
+    The only important part in this parsers are dfas and transitions between
+    dfas.
+    """
+
+    def __init__(self, start_nonterminal, rule_to_dfas, reserved_syntax_strings):
+        self.nonterminal_to_dfas = rule_to_dfas  # Dict[str, List[DFAState]]
+        self.reserved_syntax_strings = reserved_syntax_strings
+        self.start_nonterminal = start_nonterminal
+
+
+class DFAPlan(object):
+    """
+    Plans are used for the parser to create stack nodes and do the proper
+    DFA state transitions.
+    """
+    def __init__(self, next_dfa, dfa_pushes=[]):
+        self.next_dfa = next_dfa
+        self.dfa_pushes = dfa_pushes
+
+    def __repr__(self):
+        return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes)
+
+
+class DFAState(object):
+    """
+    The DFAState object is the core class for pretty much anything. DFAState
+    are the vertices of an ordered graph while arcs and transitions are the
+    edges.
+
+    Arcs are the initial edges, where most DFAStates are not connected and
+    transitions are then calculated to connect the DFA state machines that have
+    different nonterminals.
+    """
+    def __init__(self, from_rule, nfa_set, final):
+        assert isinstance(nfa_set, set)
+        assert isinstance(next(iter(nfa_set)), NFAState)
+        assert isinstance(final, NFAState)
+        self.from_rule = from_rule
+        self.nfa_set = nfa_set
+        self.arcs = {}  # map from terminals/nonterminals to DFAState
+        # In an intermediary step we set these nonterminal arcs (which has the
+        # same structure as arcs). These don't contain terminals anymore.
+        self.nonterminal_arcs = {}
+
+        # Transitions are basically the only thing that  the parser is using
+        # with is_final. Everyting else is purely here to create a parser.
+        self.transitions = {}  #: Dict[Union[TokenType, ReservedString], DFAPlan]
+        self.is_final = final in nfa_set
+
+    def add_arc(self, next_, label):
+        assert isinstance(label, str)
+        assert label not in self.arcs
+        assert isinstance(next_, DFAState)
+        self.arcs[label] = next_
+
+    def unifystate(self, old, new):
+        for label, next_ in self.arcs.items():
+            if next_ is old:
+                self.arcs[label] = new
+
+    def __eq__(self, other):
+        # Equality test -- ignore the nfa_set instance variable
+        assert isinstance(other, DFAState)
+        if self.is_final != other.is_final:
+            return False
+        # Can't just return self.arcs == other.arcs, because that
+        # would invoke this method recursively, with cycles...
+        if len(self.arcs) != len(other.arcs):
+            return False
+        for label, next_ in self.arcs.items():
+            if next_ is not other.arcs.get(label):
+                return False
+        return True
+
+    __hash__ = None  # For Py3 compatibility.
+
+    def __repr__(self):
+        return '<%s: %s is_final=%s>' % (
+            self.__class__.__name__, self.from_rule, self.is_final
+        )
+
+
+class ReservedString(object):
+    """
+    Most grammars will have certain keywords and operators that are mentioned
+    in the grammar as strings (e.g. "if") and not token types (e.g. NUMBER).
+    This class basically is the former.
+    """
+
+    def __init__(self, value):
+        self.value = value
+
+    def __repr__(self):
+        return '%s(%s)' % (self.__class__.__name__, self.value)
+
+
+def _simplify_dfas(dfas):
+    """
+    This is not theoretically optimal, but works well enough.
+    Algorithm: repeatedly look for two states that have the same
+    set of arcs (same labels pointing to the same nodes) and
+    unify them, until things stop changing.
+
+    dfas is a list of DFAState instances
+    """
+    changes = True
+    while changes:
+        changes = False
+        for i, state_i in enumerate(dfas):
+            for j in range(i + 1, len(dfas)):
+                state_j = dfas[j]
+                if state_i == state_j:
+                    #print "  unify", i, j
+                    del dfas[j]
+                    for state in dfas:
+                        state.unifystate(state_j, state_i)
+                    changes = True
+                    break
+
+
+def _make_dfas(start, finish):
+    """
+    Uses the powerset construction algorithm to create DFA states from sets of
+    NFA states.
+
+    Also does state reduction if some states are not needed.
+    """
+    # To turn an NFA into a DFA, we define the states of the DFA
+    # to correspond to *sets* of states of the NFA.  Then do some
+    # state reduction.
+    assert isinstance(start, NFAState)
+    assert isinstance(finish, NFAState)
+
+    def addclosure(nfa_state, base_nfa_set):
+        assert isinstance(nfa_state, NFAState)
+        if nfa_state in base_nfa_set:
+            return
+        base_nfa_set.add(nfa_state)
+        for nfa_arc in nfa_state.arcs:
+            if nfa_arc.nonterminal_or_string is None:
+                addclosure(nfa_arc.next, base_nfa_set)
+
+    base_nfa_set = set()
+    addclosure(start, base_nfa_set)
+    states = [DFAState(start.from_rule, base_nfa_set, finish)]
+    for state in states:  # NB states grows while we're iterating
+        arcs = {}
+        # Find state transitions and store them in arcs.
+        for nfa_state in state.nfa_set:
+            for nfa_arc in nfa_state.arcs:
+                if nfa_arc.nonterminal_or_string is not None:
+                    nfa_set = arcs.setdefault(nfa_arc.nonterminal_or_string, set())
+                    addclosure(nfa_arc.next, nfa_set)
+
+        # Now create the dfa's with no None's in arcs anymore. All Nones have
+        # been eliminated and state transitions (arcs) are properly defined, we
+        # just need to create the dfa's.
+        for nonterminal_or_string, nfa_set in arcs.items():
+            for nested_state in states:
+                if nested_state.nfa_set == nfa_set:
+                    # The DFA state already exists for this rule.
+                    break
+            else:
+                nested_state = DFAState(start.from_rule, nfa_set, finish)
+                states.append(nested_state)
+
+            state.add_arc(nested_state, nonterminal_or_string)
+    return states  # List of DFAState instances; first one is start
+
+
+def _dump_nfa(start, finish):
+    print("Dump of NFA for", start.from_rule)
+    todo = [start]
+    for i, state in enumerate(todo):
+        print("  State", i, state is finish and "(final)" or "")
+        for label, next_ in state.arcs:
+            if next_ in todo:
+                j = todo.index(next_)
+            else:
+                j = len(todo)
+                todo.append(next_)
+            if label is None:
+                print("    -> %d" % j)
+            else:
+                print("    %s -> %d" % (label, j))
+
+
+def _dump_dfas(dfas):
+    print("Dump of DFA for", dfas[0].from_rule)
+    for i, state in enumerate(dfas):
+        print("  State", i, state.is_final and "(final)" or "")
+        for nonterminal, next_ in state.arcs.items():
+            print("    %s -> %d" % (nonterminal, dfas.index(next_)))
+
+
+def generate_grammar(bnf_grammar, token_namespace):
+    """
+    ``bnf_text`` is a grammar in extended BNF (using * for repetition, + for
+    at-least-once repetition, [] for optional parts, | for alternatives and ()
+    for grouping).
+
+    It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its
+    own parser.
+    """
+    rule_to_dfas = {}
+    start_nonterminal = None
+    for nfa_a, nfa_z in GrammarParser(bnf_grammar).parse():
+        #_dump_nfa(a, z)
+        dfas = _make_dfas(nfa_a, nfa_z)
+        #_dump_dfas(dfas)
+        # oldlen = len(dfas)
+        _simplify_dfas(dfas)
+        # newlen = len(dfas)
+        rule_to_dfas[nfa_a.from_rule] = dfas
+        #print(nfa_a.from_rule, oldlen, newlen)
+
+        if start_nonterminal is None:
+            start_nonterminal = nfa_a.from_rule
+
+    reserved_strings = {}
+    for nonterminal, dfas in rule_to_dfas.items():
+        for dfa_state in dfas:
+            for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items():
+                if terminal_or_nonterminal in rule_to_dfas:
+                    dfa_state.nonterminal_arcs[terminal_or_nonterminal] = next_dfa
+                else:
+                    transition = _make_transition(
+                        token_namespace,
+                        reserved_strings,
+                        terminal_or_nonterminal
+                    )
+                    dfa_state.transitions[transition] = DFAPlan(next_dfa)
+
+    _calculate_tree_traversal(rule_to_dfas)
+    return Grammar(start_nonterminal, rule_to_dfas, reserved_strings)
+
+
+def _make_transition(token_namespace, reserved_syntax_strings, label):
+    """
+    Creates a reserved string ("if", "for", "*", ...) or returns the token type
+    (NUMBER, STRING, ...) for a given grammar terminal.
+    """
+    if label[0].isalpha():
+        # A named token (e.g. NAME, NUMBER, STRING)
+        return getattr(token_namespace, label)
+    else:
+        # Either a keyword or an operator
+        assert label[0] in ('"', "'"), label
+        assert not label.startswith('"""') and not label.startswith("'''")
+        # TODO use literal_eval instead of a simple eval.
+        value = literal_eval(label)
+        try:
+            return reserved_syntax_strings[value]
+        except KeyError:
+            r = reserved_syntax_strings[value] = ReservedString(value)
+            return r
+
+
+def _calculate_tree_traversal(nonterminal_to_dfas):
+    """
+    By this point we know how dfas can move around within a stack node, but we
+    don't know how we can add a new stack node (nonterminal transitions).
+    """
+    # Map from grammar rule (nonterminal) name to a set of tokens.
+    first_plans = {}
+
+    nonterminals = list(nonterminal_to_dfas.keys())
+    nonterminals.sort()
+    for nonterminal in nonterminals:
+        if nonterminal not in first_plans:
+            _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal)
+
+    # Now that we have calculated the first terminals, we are sure that
+    # there is no left recursion or ambiguities.
+
+    for dfas in nonterminal_to_dfas.values():
+        for dfa_state in dfas:
+            for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items():
+                for transition, pushes in first_plans[nonterminal].items():
+                    dfa_state.transitions[transition] = DFAPlan(next_dfa, pushes)
+
+
+def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal):
+    """
+    Calculates the first plan in the first_plans dictionary for every given
+    nonterminal. This is going to be used to know when to create stack nodes.
+    """
+    dfas = nonterminal_to_dfas[nonterminal]
+    new_first_plans = {}
+    first_plans[nonterminal] = None  # dummy to detect left recursion
+    # We only need to check the first dfa. All the following ones are not
+    # interesting to find first terminals.
+    state = dfas[0]
+    for transition, next_ in state.transitions.items():
+        # It's a string. We have finally found a possible first token.
+        new_first_plans[transition] = [next_.next_dfa]
+
+    for nonterminal2, next_ in state.nonterminal_arcs.items():
+        # It's a nonterminal and we have either a left recursion issue
+        # in the grammar or we have to recurse.
+        try:
+            first_plans2 = first_plans[nonterminal2]
+        except KeyError:
+            first_plans2 = _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal2)
+        else:
+            if first_plans2 is None:
+                raise ValueError("left recursion for rule %r" % nonterminal)
+
+        for t, pushes in first_plans2.items():
+            check = new_first_plans.get(t)
+            if check is not None:
+                raise ValueError(
+                    "Rule %s is ambiguous; %s is the"
+                    " start of the rule %s as well as %s."
+                    % (nonterminal, t, nonterminal2, check[-1].from_rule)
+                )
+            new_first_plans[t] = [next_] + pushes
+
+    first_plans[nonterminal] = new_first_plans
+    return new_first_plans
--- a/parso/pgen2/grammar.py
+++ b/parso/pgen2/grammar.py
@@ -1,128 +0,0 @@
-# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
-# Licensed to PSF under a Contributor Agreement.
-
-# Modifications:
-# Copyright 2014 David Halter. Integration into Jedi.
-# Modifications are dual-licensed: MIT and PSF.
-
-"""This module defines the data structures used to represent a grammar.
-
-These are a bit arcane because they are derived from the data
-structures used by Python's 'pgen' parser generator.
-
-There's also a table here mapping operators to their names in the
-token module; the Python tokenize module reports all operators as the
-fallback token code OP, but the parser needs the actual token code.
-
-"""
-
-try:
-    import cPickle as pickle
-except:
-    import pickle
-
-
-class Grammar(object):
-    """Pgen parsing tables conversion class.
-
-    Once initialized, this class supplies the grammar tables for the
-    parsing engine implemented by parse.py.  The parsing engine
-    accesses the instance variables directly.  The class here does not
-    provide initialization of the tables; several subclasses exist to
-    do this (see the conv and pgen modules).
-
-    The load() method reads the tables from a pickle file, which is
-    much faster than the other ways offered by subclasses.  The pickle
-    file is written by calling dump() (after loading the grammar
-    tables using a subclass).  The report() method prints a readable
-    representation of the tables to stdout, for debugging.
-
-    The instance variables are as follows:
-
-    symbol2number -- a dict mapping symbol names to numbers.  Symbol
-                     numbers are always 256 or higher, to distinguish
-                     them from token numbers, which are between 0 and
-                     255 (inclusive).
-
-    number2symbol -- a dict mapping numbers to symbol names;
-                     these two are each other's inverse.
-
-    states        -- a list of DFAs, where each DFA is a list of
-                     states, each state is a list of arcs, and each
-                     arc is a (i, j) pair where i is a label and j is
-                     a state number.  The DFA number is the index into
-                     this list.  (This name is slightly confusing.)
-                     Final states are represented by a special arc of
-                     the form (0, j) where j is its own state number.
-
-    dfas          -- a dict mapping symbol numbers to (DFA, first)
-                     pairs, where DFA is an item from the states list
-                     above, and first is a set of tokens that can
-                     begin this grammar rule (represented by a dict
-                     whose values are always 1).
-
-    labels        -- a list of (x, y) pairs where x is either a token
-                     number or a symbol number, and y is either None
-                     or a string; the strings are keywords.  The label
-                     number is the index in this list; label numbers
-                     are used to mark state transitions (arcs) in the
-                     DFAs.
-
-    start         -- the number of the grammar's start symbol.
-
-    keywords      -- a dict mapping keyword strings to arc labels.
-
-    tokens        -- a dict mapping token numbers to arc labels.
-
-    """
-
-    def __init__(self, bnf_text):
-        self.symbol2number = {}
-        self.number2symbol = {}
-        self.states = []
-        self.dfas = {}
-        self.labels = [(0, "EMPTY")]
-        self.keywords = {}
-        self.tokens = {}
-        self.symbol2label = {}
-        self.label2symbol = {}
-        self.start = 256
-
-    def dump(self, filename):
-        """Dump the grammar tables to a pickle file."""
-        with open(filename, "wb") as f:
-            pickle.dump(self.__dict__, f, 2)
-
-    def load(self, filename):
-        """Load the grammar tables from a pickle file."""
-        with open(filename, "rb") as f:
-            d = pickle.load(f)
-        self.__dict__.update(d)
-
-    def copy(self):
-        """
-        Copy the grammar.
-        """
-        new = self.__class__()
-        for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
-                          "tokens", "symbol2label"):
-            setattr(new, dict_attr, getattr(self, dict_attr).copy())
-        new.labels = self.labels[:]
-        new.states = self.states[:]
-        new.start = self.start
-        return new
-
-    def report(self):
-        """Dump the grammar tables to standard output, for debugging."""
-        from pprint import pprint
-        print("s2n")
-        pprint(self.symbol2number)
-        print("n2s")
-        pprint(self.number2symbol)
-        print("states")
-        pprint(self.states)
-        print("dfas")
-        pprint(self.dfas)
-        print("labels")
-        pprint(self.labels)
-        print("start", self.start)
--- a/parso/pgen2/grammar_parser.py
+++ b/parso/pgen2/grammar_parser.py
@@ -0,0 +1,156 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+# Modifications:
+# Copyright David Halter and Contributors
+# Modifications are dual-licensed: MIT and PSF.
+
+from parso.python.tokenize import tokenize
+from parso.utils import parse_version_string
+from parso.python.token import PythonTokenTypes
+
+
+class GrammarParser():
+    """
+    The parser for Python grammar files.
+    """
+    def __init__(self, bnf_grammar):
+        self._bnf_grammar = bnf_grammar
+        self.generator = tokenize(
+            bnf_grammar,
+            version_info=parse_version_string('3.6')
+        )
+        self._gettoken()  # Initialize lookahead
+
+    def parse(self):
+        # grammar: (NEWLINE | rule)* ENDMARKER
+        while self.type != PythonTokenTypes.ENDMARKER:
+            while self.type == PythonTokenTypes.NEWLINE:
+                self._gettoken()
+
+            # rule: NAME ':' rhs NEWLINE
+            self._current_rule_name = self._expect(PythonTokenTypes.NAME)
+            self._expect(PythonTokenTypes.OP, ':')
+
+            a, z = self._parse_rhs()
+            self._expect(PythonTokenTypes.NEWLINE)
+
+            yield a, z
+
+    def _parse_rhs(self):
+        # rhs: items ('|' items)*
+        a, z = self._parse_items()
+        if self.value != "|":
+            return a, z
+        else:
+            aa = NFAState(self._current_rule_name)
+            zz = NFAState(self._current_rule_name)
+            while True:
+                # Add the possibility to go into the state of a and come back
+                # to finish.
+                aa.add_arc(a)
+                z.add_arc(zz)
+                if self.value != "|":
+                    break
+
+                self._gettoken()
+                a, z = self._parse_items()
+            return aa, zz
+
+    def _parse_items(self):
+        # items: item+
+        a, b = self._parse_item()
+        while self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING) \
+                or self.value in ('(', '['):
+            c, d = self._parse_item()
+            # Need to end on the next item.
+            b.add_arc(c)
+            b = d
+        return a, b
+
+    def _parse_item(self):
+        # item: '[' rhs ']' | atom ['+' | '*']
+        if self.value == "[":
+            self._gettoken()
+            a, z = self._parse_rhs()
+            self._expect(PythonTokenTypes.OP, ']')
+            # Make it also possible that there is no token and change the
+            # state.
+            a.add_arc(z)
+            return a, z
+        else:
+            a, z = self._parse_atom()
+            value = self.value
+            if value not in ("+", "*"):
+                return a, z
+            self._gettoken()
+            # Make it clear that we can go back to the old state and repeat.
+            z.add_arc(a)
+            if value == "+":
+                return a, z
+            else:
+                # The end state is the same as the beginning, nothing must
+                # change.
+                return a, a
+
+    def _parse_atom(self):
+        # atom: '(' rhs ')' | NAME | STRING
+        if self.value == "(":
+            self._gettoken()
+            a, z = self._parse_rhs()
+            self._expect(PythonTokenTypes.OP, ')')
+            return a, z
+        elif self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING):
+            a = NFAState(self._current_rule_name)
+            z = NFAState(self._current_rule_name)
+            # Make it clear that the state transition requires that value.
+            a.add_arc(z, self.value)
+            self._gettoken()
+            return a, z
+        else:
+            self._raise_error("expected (...) or NAME or STRING, got %s/%s",
+                              self.type, self.value)
+
+    def _expect(self, type_, value=None):
+        if self.type != type_:
+            self._raise_error("expected %s, got %s [%s]",
+                              type_, self.type, self.value)
+        if value is not None and self.value != value:
+            self._raise_error("expected %s, got %s", value, self.value)
+        value = self.value
+        self._gettoken()
+        return value
+
+    def _gettoken(self):
+        tup = next(self.generator)
+        self.type, self.value, self.begin, prefix = tup
+
+    def _raise_error(self, msg, *args):
+        if args:
+            try:
+                msg = msg % args
+            except:
+                msg = " ".join([msg] + list(map(str, args)))
+        line = self._bnf_grammar.splitlines()[self.begin[0] - 1]
+        raise SyntaxError(msg, ('<grammar>', self.begin[0],
+                                self.begin[1], line))
+
+
+class NFAArc(object):
+    def __init__(self, next_, nonterminal_or_string):
+        self.next = next_
+        self.nonterminal_or_string = nonterminal_or_string
+
+
+class NFAState(object):
+    def __init__(self, from_rule):
+        self.from_rule = from_rule
+        self.arcs = []  # List[nonterminal (str), NFAState]
+
+    def add_arc(self, next_, nonterminal_or_string=None):
+        assert nonterminal_or_string is None or isinstance(nonterminal_or_string, str)
+        assert isinstance(next_, NFAState)
+        self.arcs.append(NFAArc(next_, nonterminal_or_string))
+
+    def __repr__(self):
+        return '<%s: from %s>' % (self.__class__.__name__, self.from_rule)
--- a/parso/pgen2/parse.py
+++ b/parso/pgen2/parse.py
@@ -1,223 +0,0 @@
-# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
-# Licensed to PSF under a Contributor Agreement.
-
-# Modifications:
-# Copyright 2014 David Halter. Integration into Jedi.
-# Modifications are dual-licensed: MIT and PSF.
-
-"""
-Parser engine for the grammar tables generated by pgen.
-
-The grammar table must be loaded first.
-
-See Parser/parser.c in the Python distribution for additional info on
-how this parsing engine works.
-"""
-
-from parso.python import tokenize
-
-
-class InternalParseError(Exception):
-    """
-    Exception to signal the parser is stuck and error recovery didn't help.
-    Basically this shouldn't happen. It's a sign that something is really
-    wrong.
-    """
-
-    def __init__(self, msg, type, value, start_pos):
-        Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" %
-                           (msg, tokenize.tok_name[type], value, start_pos))
-        self.msg = msg
-        self.type = type
-        self.value = value
-        self.start_pos = start_pos
-
-
-class Stack(list):
-    def get_tos_nodes(self):
-        tos = self[-1]
-        return tos[2][1]
-
-
-def token_to_ilabel(grammar, type_, value):
-    # Map from token to label
-    if type_ == tokenize.NAME:
-        # Check for reserved words (keywords)
-        try:
-            return grammar.keywords[value]
-        except KeyError:
-            pass
-
-    try:
-        return grammar.tokens[type_]
-    except KeyError:
-        return None
-
-
-class PgenParser(object):
-    """Parser engine.
-
-    The proper usage sequence is:
-
-    p = Parser(grammar, [converter])  # create instance
-    p.setup([start])                  # prepare for parsing
-    <for each input token>:
-        if p.add_token(...):           # parse a token
-            break
-    root = p.rootnode                 # root of abstract syntax tree
-
-    A Parser instance may be reused by calling setup() repeatedly.
-
-    A Parser instance contains state pertaining to the current token
-    sequence, and should not be used concurrently by different threads
-    to parse separate token sequences.
-
-    See driver.py for how to get input tokens by tokenizing a file or
-    string.
-
-    Parsing is complete when add_token() returns True; the root of the
-    abstract syntax tree can then be retrieved from the rootnode
-    instance variable.  When a syntax error occurs, error_recovery()
-    is called. There is no error recovery; the parser cannot be used
-    after a syntax error was reported (but it can be reinitialized by
-    calling setup()).
-
-    """
-
-    def __init__(self, grammar, convert_node, convert_leaf, error_recovery, start):
-        """Constructor.
-
-        The grammar argument is a grammar.Grammar instance; see the
-        grammar module for more information.
-
-        The parser is not ready yet for parsing; you must call the
-        setup() method to get it started.
-
-        The optional convert argument is a function mapping concrete
-        syntax tree nodes to abstract syntax tree nodes.  If not
-        given, no conversion is done and the syntax tree produced is
-        the concrete syntax tree.  If given, it must be a function of
-        two arguments, the first being the grammar (a grammar.Grammar
-        instance), and the second being the concrete syntax tree node
-        to be converted.  The syntax tree is converted from the bottom
-        up.
-
-        A concrete syntax tree node is a (type, nodes) tuple, where
-        type is the node type (a token or symbol number) and nodes
-        is a list of children for symbols, and None for tokens.
-
-        An abstract syntax tree node may be anything; this is entirely
-        up to the converter function.
-
-        """
-        self.grammar = grammar
-        self.convert_node = convert_node
-        self.convert_leaf = convert_leaf
-
-        # Each stack entry is a tuple: (dfa, state, node).
-        # A node is a tuple: (type, children),
-        # where children is a list of nodes or None
-        newnode = (start, [])
-        stackentry = (self.grammar.dfas[start], 0, newnode)
-        self.stack = Stack([stackentry])
-        self.rootnode = None
-        self.error_recovery = error_recovery
-
-    def parse(self, tokens):
-        for type_, value, start_pos, prefix in tokens:
-            if self.add_token(type_, value, start_pos, prefix):
-                break
-        else:
-            # We never broke out -- EOF is too soon -- Unfinished statement.
-            # However, the error recovery might have added the token again, if
-            # the stack is empty, we're fine.
-            if self.stack:
-                raise InternalParseError("incomplete input", type_, value, start_pos)
-        return self.rootnode
-
-    def add_token(self, type_, value, start_pos, prefix):
-        """Add a token; return True if this is the end of the program."""
-        ilabel = token_to_ilabel(self.grammar, type_, value)
-
-        # Loop until the token is shifted; may raise exceptions
-        _gram = self.grammar
-        _labels = _gram.labels
-        _push = self._push
-        _pop = self._pop
-        _shift = self._shift
-        while True:
-            dfa, state, node = self.stack[-1]
-            states, first = dfa
-            arcs = states[state]
-            # Look for a state with this label
-            for i, newstate in arcs:
-                t, v = _labels[i]
-                if ilabel == i:
-                    # Look it up in the list of labels
-                    assert t < 256
-                    # Shift a token; we're done with it
-                    _shift(type_, value, newstate, prefix, start_pos)
-                    # Pop while we are in an accept-only state
-                    state = newstate
-                    while states[state] == [(0, state)]:
-                        _pop()
-                        if not self.stack:
-                            # Done parsing!
-                            return True
-                        dfa, state, node = self.stack[-1]
-                        states, first = dfa
-                    # Done with this token
-                    return False
-                elif t >= 256:
-                    # See if it's a symbol and if we're in its first set
-                    itsdfa = _gram.dfas[t]
-                    itsstates, itsfirst = itsdfa
-                    if ilabel in itsfirst:
-                        # Push a symbol
-                        _push(t, itsdfa, newstate)
-                        break  # To continue the outer while loop
-            else:
-                if (0, state) in arcs:
-                    # An accepting state, pop it and try something else
-                    _pop()
-                    if not self.stack:
-                        # Done parsing, but another token is input
-                        raise InternalParseError("too much input", type_, value, start_pos)
-                else:
-                    self.error_recovery(self.grammar, self.stack, arcs, type_,
-                                        value, start_pos, prefix, self.add_token)
-                    break
-
-    def _shift(self, type_, value, newstate, prefix, start_pos):
-        """Shift a token.  (Internal)"""
-        dfa, state, node = self.stack[-1]
-        newnode = self.convert_leaf(self.grammar, type_, value, prefix, start_pos)
-        node[-1].append(newnode)
-        self.stack[-1] = (dfa, newstate, node)
-
-    def _push(self, type_, newdfa, newstate):
-        """Push a nonterminal.  (Internal)"""
-        dfa, state, node = self.stack[-1]
-        newnode = (type_, [])
-        self.stack[-1] = (dfa, newstate, node)
-        self.stack.append((newdfa, 0, newnode))
-
-    def _pop(self):
-        """Pop a nonterminal.  (Internal)"""
-        popdfa, popstate, (type_, children) = self.stack.pop()
-        # If there's exactly one child, return that child instead of creating a
-        # new node.  We still create expr_stmt and file_input though, because a
-        # lot of Jedi depends on its logic.
-        if len(children) == 1:
-            newnode = children[0]
-        else:
-            newnode = self.convert_node(self.grammar, type_, children)
-
-        try:
-            # Equal to:
-            # dfa, state, node = self.stack[-1]
-            # symbol, children = node
-            self.stack[-1][2][1].append(newnode)
-        except IndexError:
-            # Stack is empty, set the rootnode.
-            self.rootnode = newnode
--- a/parso/pgen2/pgen.py
+++ b/parso/pgen2/pgen.py
@@ -1,399 +0,0 @@
-# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
-# Licensed to PSF under a Contributor Agreement.
-
-# Modifications:
-# Copyright 2014 David Halter. Integration into Jedi.
-# Modifications are dual-licensed: MIT and PSF.
-
-from parso.pgen2 import grammar
-from parso.python import token
-from parso.python import tokenize
-from parso.utils import parse_version_string
-
-
-class ParserGenerator(object):
-    def __init__(self, bnf_text, token_namespace):
-        self._bnf_text = bnf_text
-        self.generator = tokenize.tokenize(
-            bnf_text,
-            version_info=parse_version_string('3.6')
-        )
-        self._gettoken()  # Initialize lookahead
-        self.dfas, self.startsymbol = self._parse()
-        self.first = {}  # map from symbol name to set of tokens
-        self._addfirstsets()
-        self._token_namespace = token_namespace
-
-    def make_grammar(self):
-        c = grammar.Grammar(self._bnf_text)
-        names = list(self.dfas.keys())
-        names.sort()
-        names.remove(self.startsymbol)
-        names.insert(0, self.startsymbol)
-        for name in names:
-            i = 256 + len(c.symbol2number)
-            c.symbol2number[name] = i
-            c.number2symbol[i] = name
-        for name in names:
-            dfa = self.dfas[name]
-            states = []
-            for state in dfa:
-                arcs = []
-                for label, next in state.arcs.items():
-                    arcs.append((self._make_label(c, label), dfa.index(next)))
-                if state.isfinal:
-                    arcs.append((0, dfa.index(state)))
-                states.append(arcs)
-            c.states.append(states)
-            c.dfas[c.symbol2number[name]] = (states, self._make_first(c, name))
-        c.start = c.symbol2number[self.startsymbol]
-        return c
-
-    def _make_first(self, c, name):
-        rawfirst = self.first[name]
-        first = {}
-        for label in rawfirst:
-            ilabel = self._make_label(c, label)
-            ##assert ilabel not in first # XXX failed on <> ... !=
-            first[ilabel] = 1
-        return first
-
-    def _make_label(self, c, label):
-        # XXX Maybe this should be a method on a subclass of converter?
-        ilabel = len(c.labels)
-        if label[0].isalpha():
-            # Either a symbol name or a named token
-            if label in c.symbol2number:
-                # A symbol name (a non-terminal)
-                if label in c.symbol2label:
-                    return c.symbol2label[label]
-                else:
-                    c.labels.append((c.symbol2number[label], None))
-                    c.symbol2label[label] = ilabel
-                    c.label2symbol[ilabel] = label
-                    return ilabel
-            else:
-                # A named token (NAME, NUMBER, STRING)
-                itoken = getattr(self._token_namespace, label, None)
-                assert isinstance(itoken, int), label
-                if itoken in c.tokens:
-                    return c.tokens[itoken]
-                else:
-                    c.labels.append((itoken, None))
-                    c.tokens[itoken] = ilabel
-                    return ilabel
-        else:
-            # Either a keyword or an operator
-            assert label[0] in ('"', "'"), label
-            value = eval(label)
-            if value[0].isalpha():
-                # A keyword
-                if value in c.keywords:
-                    return c.keywords[value]
-                else:
-                    # TODO this might be an issue?! Using token.NAME here?
-                    c.labels.append((token.NAME, value))
-                    c.keywords[value] = ilabel
-                    return ilabel
-            else:
-                # An operator (any non-numeric token)
-                itoken = self._token_namespace.generate_token_id(value)
-                if itoken in c.tokens:
-                    return c.tokens[itoken]
-                else:
-                    c.labels.append((itoken, None))
-                    c.tokens[itoken] = ilabel
-                    return ilabel
-
-    def _addfirstsets(self):
-        names = list(self.dfas.keys())
-        names.sort()
-        for name in names:
-            if name not in self.first:
-                self._calcfirst(name)
-            #print name, self.first[name].keys()
-
-    def _calcfirst(self, name):
-        dfa = self.dfas[name]
-        self.first[name] = None  # dummy to detect left recursion
-        state = dfa[0]
-        totalset = {}
-        overlapcheck = {}
-        for label, next in state.arcs.items():
-            if label in self.dfas:
-                if label in self.first:
-                    fset = self.first[label]
-                    if fset is None:
-                        raise ValueError("recursion for rule %r" % name)
-                else:
-                    self._calcfirst(label)
-                    fset = self.first[label]
-                totalset.update(fset)
-                overlapcheck[label] = fset
-            else:
-                totalset[label] = 1
-                overlapcheck[label] = {label: 1}
-        inverse = {}
-        for label, itsfirst in overlapcheck.items():
-            for symbol in itsfirst:
-                if symbol in inverse:
-                    raise ValueError("rule %s is ambiguous; %s is in the"
-                                     " first sets of %s as well as %s" %
-                                     (name, symbol, label, inverse[symbol]))
-                inverse[symbol] = label
-        self.first[name] = totalset
-
-    def _parse(self):
-        dfas = {}
-        startsymbol = None
-        # MSTART: (NEWLINE | RULE)* ENDMARKER
-        while self.type != token.ENDMARKER:
-            while self.type == token.NEWLINE:
-                self._gettoken()
-            # RULE: NAME ':' RHS NEWLINE
-            name = self._expect(token.NAME)
-            self._expect(token.COLON)
-            a, z = self._parse_rhs()
-            self._expect(token.NEWLINE)
-            #self._dump_nfa(name, a, z)
-            dfa = self._make_dfa(a, z)
-            #self._dump_dfa(name, dfa)
-            # oldlen = len(dfa)
-            self._simplify_dfa(dfa)
-            # newlen = len(dfa)
-            dfas[name] = dfa
-            #print name, oldlen, newlen
-            if startsymbol is None:
-                startsymbol = name
-        return dfas, startsymbol
-
-    def _make_dfa(self, start, finish):
-        # To turn an NFA into a DFA, we define the states of the DFA
-        # to correspond to *sets* of states of the NFA.  Then do some
-        # state reduction.  Let's represent sets as dicts with 1 for
-        # values.
-        assert isinstance(start, NFAState)
-        assert isinstance(finish, NFAState)
-
-        def closure(state):
-            base = {}
-            addclosure(state, base)
-            return base
-
-        def addclosure(state, base):
-            assert isinstance(state, NFAState)
-            if state in base:
-                return
-            base[state] = 1
-            for label, next in state.arcs:
-                if label is None:
-                    addclosure(next, base)
-
-        states = [DFAState(closure(start), finish)]
-        for state in states:  # NB states grows while we're iterating
-            arcs = {}
-            for nfastate in state.nfaset:
-                for label, next in nfastate.arcs:
-                    if label is not None:
-                        addclosure(next, arcs.setdefault(label, {}))
-            for label, nfaset in arcs.items():
-                for st in states:
-                    if st.nfaset == nfaset:
-                        break
-                else:
-                    st = DFAState(nfaset, finish)
-                    states.append(st)
-                state.addarc(st, label)
-        return states  # List of DFAState instances; first one is start
-
-    def _dump_nfa(self, name, start, finish):
-        print("Dump of NFA for", name)
-        todo = [start]
-        for i, state in enumerate(todo):
-            print("  State", i, state is finish and "(final)" or "")
-            for label, next in state.arcs:
-                if next in todo:
-                    j = todo.index(next)
-                else:
-                    j = len(todo)
-                    todo.append(next)
-                if label is None:
-                    print("    -> %d" % j)
-                else:
-                    print("    %s -> %d" % (label, j))
-
-    def _dump_dfa(self, name, dfa):
-        print("Dump of DFA for", name)
-        for i, state in enumerate(dfa):
-            print("  State", i, state.isfinal and "(final)" or "")
-            for label, next in state.arcs.items():
-                print("    %s -> %d" % (label, dfa.index(next)))
-
-    def _simplify_dfa(self, dfa):
-        # This is not theoretically optimal, but works well enough.
-        # Algorithm: repeatedly look for two states that have the same
-        # set of arcs (same labels pointing to the same nodes) and
-        # unify them, until things stop changing.
-
-        # dfa is a list of DFAState instances
-        changes = True
-        while changes:
-            changes = False
-            for i, state_i in enumerate(dfa):
-                for j in range(i + 1, len(dfa)):
-                    state_j = dfa[j]
-                    if state_i == state_j:
-                        #print "  unify", i, j
-                        del dfa[j]
-                        for state in dfa:
-                            state.unifystate(state_j, state_i)
-                        changes = True
-                        break
-
-    def _parse_rhs(self):
-        # RHS: ALT ('|' ALT)*
-        a, z = self._parse_alt()
-        if self.value != "|":
-            return a, z
-        else:
-            aa = NFAState()
-            zz = NFAState()
-            aa.addarc(a)
-            z.addarc(zz)
-            while self.value == "|":
-                self._gettoken()
-                a, z = self._parse_alt()
-                aa.addarc(a)
-                z.addarc(zz)
-            return aa, zz
-
-    def _parse_alt(self):
-        # ALT: ITEM+
-        a, b = self._parse_item()
-        while (self.value in ("(", "[") or
-               self.type in (token.NAME, token.STRING)):
-            c, d = self._parse_item()
-            b.addarc(c)
-            b = d
-        return a, b
-
-    def _parse_item(self):
-        # ITEM: '[' RHS ']' | ATOM ['+' | '*']
-        if self.value == "[":
-            self._gettoken()
-            a, z = self._parse_rhs()
-            self._expect(token.RSQB)
-            a.addarc(z)
-            return a, z
-        else:
-            a, z = self._parse_atom()
-            value = self.value
-            if value not in ("+", "*"):
-                return a, z
-            self._gettoken()
-            z.addarc(a)
-            if value == "+":
-                return a, z
-            else:
-                return a, a
-
-    def _parse_atom(self):
-        # ATOM: '(' RHS ')' | NAME | STRING
-        if self.value == "(":
-            self._gettoken()
-            a, z = self._parse_rhs()
-            self._expect(token.RPAR)
-            return a, z
-        elif self.type in (token.NAME, token.STRING):
-            a = NFAState()
-            z = NFAState()
-            a.addarc(z, self.value)
-            self._gettoken()
-            return a, z
-        else:
-            self._raise_error("expected (...) or NAME or STRING, got %s/%s",
-                              self.type, self.value)
-
-    def _expect(self, type):
-        if self.type != type:
-            self._raise_error("expected %s, got %s(%s)",
-                              type, self.type, self.value)
-        value = self.value
-        self._gettoken()
-        return value
-
-    def _gettoken(self):
-        tup = next(self.generator)
-        while tup[0] in (token.COMMENT, token.NL):
-            tup = next(self.generator)
-        self.type, self.value, self.begin, prefix = tup
-
-    def _raise_error(self, msg, *args):
-        if args:
-            try:
-                msg = msg % args
-            except:
-                msg = " ".join([msg] + list(map(str, args)))
-        line = self._bnf_text.splitlines()[self.begin[0] - 1]
-        raise SyntaxError(msg, ('<grammar>', self.begin[0],
-                                self.begin[1], line))
-
-
-class NFAState(object):
-    def __init__(self):
-        self.arcs = []  # list of (label, NFAState) pairs
-
-    def addarc(self, next, label=None):
-        assert label is None or isinstance(label, str)
-        assert isinstance(next, NFAState)
-        self.arcs.append((label, next))
-
-
-class DFAState(object):
-    def __init__(self, nfaset, final):
-        assert isinstance(nfaset, dict)
-        assert isinstance(next(iter(nfaset)), NFAState)
-        assert isinstance(final, NFAState)
-        self.nfaset = nfaset
-        self.isfinal = final in nfaset
-        self.arcs = {}  # map from label to DFAState
-
-    def addarc(self, next, label):
-        assert isinstance(label, str)
-        assert label not in self.arcs
-        assert isinstance(next, DFAState)
-        self.arcs[label] = next
-
-    def unifystate(self, old, new):
-        for label, next in self.arcs.items():
-            if next is old:
-                self.arcs[label] = new
-
-    def __eq__(self, other):
-        # Equality test -- ignore the nfaset instance variable
-        assert isinstance(other, DFAState)
-        if self.isfinal != other.isfinal:
-            return False
-        # Can't just return self.arcs == other.arcs, because that
-        # would invoke this method recursively, with cycles...
-        if len(self.arcs) != len(other.arcs):
-            return False
-        for label, next in self.arcs.items():
-            if next is not other.arcs.get(label):
-                return False
-        return True
-
-    __hash__ = None  # For Py3 compatibility.
-
-
-def generate_grammar(bnf_text, token_namespace):
-    """
-    ``bnf_text`` is a grammar in extended BNF (using * for repetition, + for
-    at-least-once repetition, [] for optional parts, | for alternatives and ()
-    for grouping).
-
-    It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its
-    own parser.
-    """
-    p = ParserGenerator(bnf_text, token_namespace)
-    return p.make_grammar()
--- a/parso/python/diff.py
+++ b/parso/python/diff.py
@@ -13,8 +13,8 @@ import logging
 from parso.utils import split_lines
 from parso.python.parser import Parser
 from parso.python.tree import EndMarker
-from parso.python.tokenize import (NEWLINE, PythonToken, ERROR_DEDENT,
-                                   ENDMARKER, INDENT, DEDENT)
+from parso.python.tokenize import PythonToken
+from parso.python.token import PythonTokenTypes

 LOG = logging.getLogger(__name__)

@@ -29,7 +29,7 @@ def _get_last_line(node_or_leaf):

 def _ends_with_newline(leaf, suffix=''):
    if leaf.type == 'error_leaf':
-        typ = leaf.original_type
+        typ = leaf.token_type.lower()
    else:
        typ = leaf.type

@@ -41,9 +41,8 @@ def _flows_finished(pgen_grammar, stack):
    if, while, for and try might not be finished, because another part might
    still be parsed.
    """
-    for dfa, newstate, (symbol_number, nodes) in stack:
-        if pgen_grammar.number2symbol[symbol_number] in ('if_stmt', 'while_stmt',
-                                                    'for_stmt', 'try_stmt'):
+    for stack_node in stack:
+        if stack_node.nonterminal in ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt'):
            return False
    return True

@@ -52,10 +51,10 @@ def suite_or_file_input_is_valid(pgen_grammar, stack):
    if not _flows_finished(pgen_grammar, stack):
        return False

-    for dfa, newstate, (symbol_number, nodes) in reversed(stack):
-        if pgen_grammar.number2symbol[symbol_number] == 'suite':
+    for stack_node in reversed(stack):
+        if stack_node.nonterminal == 'suite':
            # If only newline is in the suite, the suite is not valid, yet.
-            return len(nodes) > 1
+            return len(stack_node.nodes) > 1
    # Not reaching a suite means that we're dealing with file_input levels
    # where there's no need for a valid statement in it. It can also be empty.
    return True
@@ -133,7 +132,7 @@ class DiffParser(object):
        LOG.debug('diff: line_lengths old: %s, new: %s' % (len(old_lines), line_length))

        for operation, i1, i2, j1, j2 in opcodes:
-            LOG.debug('diff %s old[%s:%s] new[%s:%s]',
+            LOG.debug('diff code[%s] old[%s:%s] new[%s:%s]',
                      operation, i1 + 1, i2, j1 + 1, j2)

            if j2 == line_length and new_lines[-1] == '':
@@ -168,8 +167,7 @@ class DiffParser(object):

    def _enabled_debugging(self, old_lines, lines_new):
        if self._module.get_code() != ''.join(lines_new):
-            LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines),
-                            ''.join(lines_new))
+            LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines), ''.join(lines_new))

    def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new):
        copied_nodes = [None]
@@ -273,7 +271,6 @@ class DiffParser(object):
        # memoryview?
        parsed_until_line = self._nodes_stack.parsed_until_line
        lines_after = self._parser_lines_new[parsed_until_line:]
-        #print('parse_content', parsed_until_line, lines_after, until_line)
        tokens = self._diff_tokenize(
            lines_after,
            until_line,
@@ -290,10 +287,10 @@ class DiffParser(object):
        omitted_first_indent = False
        indents = []
        tokens = self._tokenizer(lines, (1, 0))
-        stack = self._active_parser.pgen_parser.stack
+        stack = self._active_parser.stack
        for typ, string, start_pos, prefix in tokens:
            start_pos = start_pos[0] + line_offset, start_pos[1]
-            if typ == INDENT:
+            if typ == PythonTokenTypes.INDENT:
                indents.append(start_pos[1])
                if is_first_token:
                    omitted_first_indent = True
@@ -306,8 +303,9 @@ class DiffParser(object):

            # In case of omitted_first_indent, it might not be dedented fully.
            # However this is a sign for us that a dedent happened.
-            if typ == DEDENT \
-                    or typ == ERROR_DEDENT and omitted_first_indent and len(indents) == 1:
+            if typ == PythonTokenTypes.DEDENT \
+                    or typ == PythonTokenTypes.ERROR_DEDENT \
+                    and omitted_first_indent and len(indents) == 1:
                indents.pop()
                if omitted_first_indent and not indents:
                    # We are done here, only thing that can come now is an
@@ -317,18 +315,22 @@ class DiffParser(object):
                        prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix)
                    else:
                        prefix = ''
-                    yield PythonToken(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix)
+                    yield PythonToken(
+                        PythonTokenTypes.ENDMARKER, '',
+                        (start_pos[0] + line_offset, 0),
+                        prefix
+                    )
                    break
-            elif typ == NEWLINE and start_pos[0] >= until_line:
+            elif typ == PythonTokenTypes.NEWLINE and start_pos[0] >= until_line:
                yield PythonToken(typ, string, start_pos, prefix)
                # Check if the parser is actually in a valid suite state.
                if suite_or_file_input_is_valid(self._pgen_grammar, stack):
                    start_pos = start_pos[0] + 1, 0
                    while len(indents) > int(omitted_first_indent):
                        indents.pop()
-                        yield PythonToken(DEDENT, '', start_pos, '')
+                        yield PythonToken(PythonTokenTypes.DEDENT, '', start_pos, '')

-                    yield PythonToken(ENDMARKER, '', start_pos, '')
+                    yield PythonToken(PythonTokenTypes.ENDMARKER, '', start_pos, '')
                    break
                else:
                    continue
@@ -454,7 +456,7 @@ class _NodesStack(object):
        self._last_prefix = ''
        if is_endmarker:
            try:
-                separation = last_leaf.prefix.rindex('\n')
+                separation = last_leaf.prefix.rindex('\n') + 1
            except ValueError:
                pass
            else:
@@ -462,7 +464,7 @@ class _NodesStack(object):
                # That is not relevant if parentheses were opened. Always parse
                # until the end of a line.
                last_leaf.prefix, self._last_prefix = \
-                    last_leaf.prefix[:separation + 1], last_leaf.prefix[separation + 1:]
+                    last_leaf.prefix[:separation], last_leaf.prefix[separation:]

        first_leaf = tree_nodes[0].get_first_leaf()
        first_leaf.prefix = self.prefix + first_leaf.prefix
@@ -472,7 +474,6 @@ class _NodesStack(object):
            self.prefix = last_leaf.prefix

            tree_nodes = tree_nodes[:-1]
-
        return tree_nodes

    def copy_nodes(self, tree_nodes, until_line, line_offset):
@@ -491,12 +492,20 @@ class _NodesStack(object):

        new_tos = tos
        for node in nodes:
+            if node.start_pos[0] > until_line:
+                break
+
            if node.type == 'endmarker':
+                # We basically removed the endmarker, but we are not allowed to
+                # remove the newline at the end of the line, otherwise it's
+                # going to be missing.
+                try:
+                    self.prefix = node.prefix[:node.prefix.rindex('\n') + 1]
+                except ValueError:
+                    pass
                # Endmarkers just distort all the checks below. Remove them.
                break

-            if node.start_pos[0] > until_line:
-                break
            # TODO this check might take a bit of time for large files. We
            # might want to change this to do more intelligent guessing or
            # binary search.
@@ -530,7 +539,7 @@ class _NodesStack(object):
                    line_offset_index = -2

        elif (new_nodes[-1].type in ('error_leaf', 'error_node') or
-                          _is_flow_node(new_nodes[-1])):
+              _is_flow_node(new_nodes[-1])):
            # Error leafs/nodes don't have a defined start/end. Error
            # nodes might not end with a newline (e.g. if there's an
            # open `(`). Therefore ignore all of them unless they are
--- a/parso/python/errors.py
+++ b/parso/python/errors.py
@@ -306,12 +306,12 @@ class ErrorFinder(Normalizer):

    def visit_leaf(self, leaf):
        if leaf.type == 'error_leaf':
-            if leaf.original_type in ('indent', 'error_dedent'):
+            if leaf.token_type in ('INDENT', 'ERROR_DEDENT'):
                # Indents/Dedents itself never have a prefix. They are just
                # "pseudo" tokens that get removed by the syntax tree later.
                # Therefore in case of an error we also have to check for this.
                spacing = list(leaf.get_next_leaf()._split_prefix())[-1]
-                if leaf.original_type == 'indent':
+                if leaf.token_type == 'INDENT':
                    message = 'unexpected indent'
                else:
                    message = 'unindent does not match any outer indentation level'
@@ -563,7 +563,8 @@ class _ReturnAndYieldChecks(SyntaxRule):
                    and self._normalizer.version == (3, 5):
                self.add_issue(self.get_node(leaf), message=self.message_async_yield)

-@ErrorFinder.register_rule(type='atom')
+
+@ErrorFinder.register_rule(type='strings')
 class _BytesAndStringMix(SyntaxRule):
    # e.g. 's' b''
    message = "cannot mix bytes and nonbytes literals"
@@ -744,7 +745,12 @@ class _NonlocalModuleLevelRule(SyntaxRule):

@ErrorFinder.register_rule(type='arglist')
 class _ArglistRule(SyntaxRule):
-    message = "Generator expression must be parenthesized if not sole argument"
+    @property
+    def message(self):
+        if self._normalizer.version < (3, 7):
+            return "Generator expression must be parenthesized if not sole argument"
+        else:
+            return "Generator expression must be parenthesized"

    def is_issue(self, node):
        first_arg = node.children[0]
@@ -837,101 +843,36 @@ class _TryStmtRule(SyntaxRule):
                self.add_issue(default_except, message=self.message)


-@ErrorFinder.register_rule(type='string')
+@ErrorFinder.register_rule(type='fstring')
 class _FStringRule(SyntaxRule):
    _fstring_grammar = None
-    message_empty = "f-string: empty expression not allowed"  # f'{}'
-    message_single_closing = "f-string: single '}' is not allowed"  # f'}'
    message_nested = "f-string: expressions nested too deeply"
-    message_backslash = "f-string expression part cannot include a backslash"  # f'{"\"}' or f'{"\\"}'
-    message_comment = "f-string expression part cannot include '#'"  # f'{#}'
-    message_unterminated_string = "f-string: unterminated string"  # f'{"}'
    message_conversion = "f-string: invalid conversion character: expected 's', 'r', or 'a'"
-    message_incomplete = "f-string: expecting '}'"  # f'{'
-    message_syntax = "invalid syntax"

-    @classmethod
-    def _load_grammar(cls):
-        import parso
+    def _check_format_spec(self, format_spec, depth):
+        self._check_fstring_contents(format_spec.children[1:], depth)

-        if cls._fstring_grammar is None:
-            cls._fstring_grammar = parso.load_grammar(language='python-f-string')
-        return cls._fstring_grammar
+    def _check_fstring_expr(self, fstring_expr, depth):
+        if depth >= 2:
+            self.add_issue(fstring_expr, message=self.message_nested)
+
+        conversion = fstring_expr.children[2]
+        if conversion.type == 'fstring_conversion':
+            name = conversion.children[1]
+            if name.value not in ('s', 'r', 'a'):
+                self.add_issue(name, message=self.message_conversion)
+
+        format_spec = fstring_expr.children[-2]
+        if format_spec.type == 'fstring_format_spec':
+            self._check_format_spec(format_spec, depth + 1)

    def is_issue(self, fstring):
-        if 'f' not in fstring.string_prefix.lower():
-            return
+        self._check_fstring_contents(fstring.children[1:-1])

-        parsed = self._load_grammar().parse_leaf(fstring)
-        for child in parsed.children:
-            if child.type == 'expression':
-                self._check_expression(child)
-            elif child.type == 'error_node':
-                next_ = child.get_next_leaf()
-                if next_.type == 'error_leaf' and next_.original_type == 'unterminated_string':
-                    self.add_issue(next_, message=self.message_unterminated_string)
-                    # At this point nothing more is comming except the error
-                    # leaf that we've already checked here.
-                    break
-                self.add_issue(child, message=self.message_incomplete)
-            elif child.type == 'error_leaf':
-                self.add_issue(child, message=self.message_single_closing)
-
-    def _check_python_expr(self, python_expr):
-        value = python_expr.value
-        if '\\' in value:
-            self.add_issue(python_expr, message=self.message_backslash)
-            return
-        if '#' in value:
-            self.add_issue(python_expr, message=self.message_comment)
-            return
-        if re.match('\s*$', value) is not None:
-            self.add_issue(python_expr, message=self.message_empty)
-            return
-
-        # This is now nested parsing. We parsed the fstring and now
-        # we're parsing Python again.
-        try:
-            # CPython has a bit of a special ways to parse Python code within
-            # f-strings. It wraps the code in brackets to make sure that
-            # whitespace doesn't make problems (indentation/newlines).
-            # Just use that algorithm as well here and adapt start positions.
-            start_pos = python_expr.start_pos
-            start_pos = start_pos[0], start_pos[1] - 1
-            eval_input = self._normalizer.grammar._parse(
-                '(%s)' % value,
-                start_symbol='eval_input',
-                start_pos=start_pos,
-                error_recovery=False
-            )
-        except ParserSyntaxError as e:
-            self.add_issue(e.error_leaf, message=self.message_syntax)
-            return
-
-        issues = self._normalizer.grammar.iter_errors(eval_input)
-        self._normalizer.issues += issues
-
-    def _check_format_spec(self, format_spec):
-        for expression in format_spec.children[1:]:
-            nested_format_spec = expression.children[-2]
-            if nested_format_spec.type == 'format_spec':
-                if len(nested_format_spec.children) > 1:
-                    self.add_issue(
-                        nested_format_spec.children[1],
-                        message=self.message_nested
-                    )
-
-            self._check_expression(expression)
-
-    def _check_expression(self, expression):
-        for c in expression.children:
-            if c.type == 'python_expr':
-                self._check_python_expr(c)
-            elif c.type == 'conversion':
-                if c.value not in ('s', 'r', 'a'):
-                    self.add_issue(c, message=self.message_conversion)
-            elif c.type == 'format_spec':
-                self._check_format_spec(c)
+    def _check_fstring_contents(self, children, depth=0):
+        for fstring_content in children:
+            if fstring_content.type == 'fstring_expr':
+                self._check_fstring_expr(fstring_content, depth)


 class _CheckAssignmentRule(SyntaxRule):
@@ -944,7 +885,7 @@ class _CheckAssignmentRule(SyntaxRule):
            first, second = node.children[:2]
            error = _get_comprehension_type(node)
            if error is None:
-                if second.type in ('dictorsetmaker', 'string'):
+                if second.type == 'dictorsetmaker':
                    error = 'literal'
                elif first in ('(', '['):
                    if second.type == 'yield_expr':
@@ -963,7 +904,7 @@ class _CheckAssignmentRule(SyntaxRule):
                error = 'Ellipsis'
        elif type_ == 'comparison':
            error = 'comparison'
-        elif type_ in ('string', 'number'):
+        elif type_ in ('string', 'number', 'strings'):
            error = 'literal'
        elif type_ == 'yield_expr':
            # This one seems to be a slightly different warning in Python.
--- a/parso/python/fstring.py
+++ b/parso/python/fstring.py
@@ -1,211 +0,0 @@
-import re
-
-from itertools import count
-from parso.utils import PythonVersionInfo
-from parso.utils import split_lines
-from parso.python.tokenize import Token
-from parso import parser
-from parso.tree import TypedLeaf, ErrorNode, ErrorLeaf
-
-version36 = PythonVersionInfo(3, 6)
-
-
-class TokenNamespace:
-    _c = count()
-    LBRACE = next(_c)
-    RBRACE = next(_c)
-    ENDMARKER = next(_c)
-    COLON = next(_c)
-    CONVERSION = next(_c)
-    PYTHON_EXPR = next(_c)
-    EXCLAMATION_MARK = next(_c)
-    UNTERMINATED_STRING = next(_c)
-
-    token_map = dict((v, k) for k, v in locals().items() if not k.startswith('_'))
-
-    @classmethod
-    def generate_token_id(cls, string):
-        if string == '{':
-            return cls.LBRACE
-        elif string == '}':
-            return cls.RBRACE
-        elif string == '!':
-            return cls.EXCLAMATION_MARK
-        elif string == ':':
-            return cls.COLON
-        return getattr(cls, string)
-
-
-GRAMMAR = """
-fstring: expression* ENDMARKER
-format_spec: ':' expression*
-expression: '{' PYTHON_EXPR [ '!' CONVERSION ] [ format_spec ] '}'
-"""
-
-_prefix = r'((?:[^{}]+)*)'
-_expr = _prefix + r'(\{|\}|$)'
-_in_expr = r'([^{}\[\]:"\'!]*)(.?)'
-# There's only one conversion character allowed. But the rules have to be
-# checked later anyway, so allow more here. This makes error recovery nicer.
-_conversion = r'([^={}:]*)(.?)'
-
-_compiled_expr = re.compile(_expr)
-_compiled_in_expr = re.compile(_in_expr)
-_compiled_conversion = re.compile(_conversion)
-
-
-def tokenize(code, start_pos=(1, 0)):
-    def add_to_pos(string):
-        lines = split_lines(string)
-        l = len(lines[-1])
-        if len(lines) > 1:
-            start_pos[0] += len(lines) - 1
-            start_pos[1] = l
-        else:
-            start_pos[1] += l
-
-    def tok(value, type=None, prefix=''):
-        if type is None:
-            type = TokenNamespace.generate_token_id(value)
-
-        add_to_pos(prefix)
-        token = Token(type, value, tuple(start_pos), prefix)
-        add_to_pos(value)
-        return token
-
-    start = 0
-    recursion_level = 0
-    added_prefix = ''
-    start_pos = list(start_pos)
-    while True:
-        match = _compiled_expr.match(code, start)
-        prefix = added_prefix + match.group(1)
-        found = match.group(2)
-        start = match.end()
-        if not found:
-            # We're at the end.
-            break
-
-        if found == '}':
-            if recursion_level == 0 and len(code) > start  and code[start] == '}':
-                # This is a }} escape.
-                added_prefix = prefix + '}}'
-                start += 1
-                continue
-
-            recursion_level = max(0, recursion_level - 1)
-            yield tok(found, prefix=prefix)
-            added_prefix = ''
-        else:
-            assert found == '{'
-            if recursion_level == 0 and len(code) > start and code[start] == '{':
-                # This is a {{ escape.
-                added_prefix = prefix + '{{'
-                start += 1
-                continue
-
-            recursion_level += 1
-            yield tok(found, prefix=prefix)
-            added_prefix = ''
-
-            expression = ''
-            squared_count = 0
-            curly_count = 0
-            while True:
-                expr_match = _compiled_in_expr.match(code, start)
-                expression += expr_match.group(1)
-                found = expr_match.group(2)
-                start = expr_match.end()
-
-                if found == '{':
-                    curly_count += 1
-                    expression += found
-                elif found == '}' and curly_count > 0:
-                    curly_count -= 1
-                    expression += found
-                elif found == '[':
-                    squared_count += 1
-                    expression += found
-                elif found == ']':
-                    # Use a max function here, because the Python code might
-                    # just have syntax errors.
-                    squared_count = max(0, squared_count - 1)
-                    expression += found
-                elif found == ':' and (squared_count or curly_count):
-                    expression += found
-                elif found in ('"', "'"):
-                    search = found
-                    if len(code) > start + 1 and  \
-                            code[start] == found == code[start+1]:
-                        search *= 3
-                        start += 2
-
-                    index = code.find(search, start)
-                    if index == -1:
-                        yield tok(expression, type=TokenNamespace.PYTHON_EXPR)
-                        yield tok(
-                            found + code[start:],
-                            type=TokenNamespace.UNTERMINATED_STRING,
-                        )
-                        start = len(code)
-                        break
-                    expression += found + code[start:index+1]
-                    start = index + 1
-                elif found == '!' and len(code) > start and code[start] == '=':
-                    # This is a python `!=` and not a conversion.
-                    expression += found
-                else:
-                    yield tok(expression, type=TokenNamespace.PYTHON_EXPR)
-                    if found:
-                        yield tok(found)
-                    break
-
-            if found == '!':
-                conversion_match = _compiled_conversion.match(code, start)
-                found = conversion_match.group(2)
-                start = conversion_match.end()
-                yield tok(conversion_match.group(1), type=TokenNamespace.CONVERSION)
-                if found:
-                    yield tok(found)
-            if found == '}':
-                recursion_level -= 1
-
-            # We don't need to handle everything after ':', because that is
-            # basically new tokens.
-
-    yield tok('', type=TokenNamespace.ENDMARKER, prefix=prefix)
-
-
-class Parser(parser.BaseParser):
-    def parse(self, tokens):
-        node = super(Parser, self).parse(tokens)
-        if isinstance(node, self.default_leaf):  # Is an endmarker.
-            # If there's no curly braces we get back a non-module. We always
-            # want an fstring.
-            node = self.default_node('fstring', [node])
-
-        return node
-
-    def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
-        # TODO this is so ugly.
-        leaf_type = TokenNamespace.token_map[type].lower()
-        return TypedLeaf(leaf_type, value, start_pos, prefix)
-
-    def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
-                       add_token_callback):
-        if not self._error_recovery:
-            return super(Parser, self).error_recovery(
-                pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
-                add_token_callback
-            )
-
-        token_type = TokenNamespace.token_map[typ].lower()
-        if len(stack) == 1:
-            error_leaf = ErrorLeaf(token_type, value, start_pos, prefix)
-            stack[0][2][1].append(error_leaf)
-        else:
-            dfa, state, (type_, nodes) = stack[1]
-            stack[0][2][1].append(ErrorNode(nodes))
-            stack[1:] = []
-
-            add_token_callback(typ, value, start_pos, prefix)
--- a/parso/python/grammar26.txt
+++ b/parso/python/grammar26.txt
@@ -119,7 +119,8 @@ atom: ('(' [yield_expr|testlist_comp] ')' |
       '[' [listmaker] ']' |
       '{' [dictorsetmaker] '}' |
       '`' testlist1 '`' |
-       NAME | NUMBER | STRING+)
+       NAME | NUMBER | strings)
+strings: STRING+
 listmaker: test ( list_for | (',' test)* [','] )
 # Dave: Renamed testlist_gexpr to testlist_comp, because in 2.7+ this is the
 #       default. It's more consistent like this.
--- a/parso/python/grammar27.txt
+++ b/parso/python/grammar27.txt
@@ -104,7 +104,8 @@ atom: ('(' [yield_expr|testlist_comp] ')' |
       '[' [listmaker] ']' |
       '{' [dictorsetmaker] '}' |
       '`' testlist1 '`' |
-       NAME | NUMBER | STRING+)
+       NAME | NUMBER | strings)
+strings: STRING+
 listmaker: test ( list_for | (',' test)* [','] )
 testlist_comp: test ( comp_for | (',' test)* [','] )
 lambdef: 'lambda' [varargslist] ':' test
--- a/parso/python/grammar33.txt
+++ b/parso/python/grammar33.txt
@@ -103,7 +103,8 @@ power: atom trailer* ['**' factor]
 atom: ('(' [yield_expr|testlist_comp] ')' |
       '[' [testlist_comp] ']' |
       '{' [dictorsetmaker] '}' |
-       NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
+       NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
+strings: STRING+
 testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
 trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
 subscriptlist: subscript (',' subscript)* [',']
--- a/parso/python/grammar34.txt
+++ b/parso/python/grammar34.txt
@@ -103,7 +103,8 @@ power: atom trailer* ['**' factor]
 atom: ('(' [yield_expr|testlist_comp] ')' |
       '[' [testlist_comp] ']' |
       '{' [dictorsetmaker] '}' |
-       NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
+       NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
+strings: STRING+
 testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
 trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
 subscriptlist: subscript (',' subscript)* [',']
--- a/parso/python/grammar35.txt
+++ b/parso/python/grammar35.txt
@@ -110,7 +110,8 @@ atom_expr: ['await'] atom trailer*
 atom: ('(' [yield_expr|testlist_comp] ')' |
       '[' [testlist_comp] ']' |
       '{' [dictorsetmaker] '}' |
-       NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
+       NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
+strings: STRING+
 testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
 trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
 subscriptlist: subscript (',' subscript)* [',']
--- a/parso/python/grammar36.txt
+++ b/parso/python/grammar36.txt
@@ -108,7 +108,7 @@ atom_expr: ['await'] atom trailer*
 atom: ('(' [yield_expr|testlist_comp] ')' |
       '[' [testlist_comp] ']' |
       '{' [dictorsetmaker] '}' |
-       NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
+       NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
 testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
 trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
 subscriptlist: subscript (',' subscript)* [',']
@@ -148,3 +148,10 @@ encoding_decl: NAME

 yield_expr: 'yield' [yield_arg]
 yield_arg: 'from' test | testlist
+
+strings: (STRING | fstring)+
+fstring: FSTRING_START fstring_content* FSTRING_END
+fstring_content: FSTRING_STRING | fstring_expr
+fstring_conversion: '!' NAME
+fstring_expr: '{' testlist_comp [ fstring_conversion ] [ fstring_format_spec ] '}'
+fstring_format_spec: ':' fstring_content*
--- a/parso/python/grammar37.txt
+++ b/parso/python/grammar37.txt
@@ -108,7 +108,7 @@ atom_expr: ['await'] atom trailer*
 atom: ('(' [yield_expr|testlist_comp] ')' |
       '[' [testlist_comp] ']' |
       '{' [dictorsetmaker] '}' |
-       NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
+       NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
 testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
 trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
 subscriptlist: subscript (',' subscript)* [',']
@@ -148,3 +148,10 @@ encoding_decl: NAME

 yield_expr: 'yield' [yield_arg]
 yield_arg: 'from' test | testlist
+
+strings: (STRING | fstring)+
+fstring: FSTRING_START fstring_content* FSTRING_END
+fstring_content: FSTRING_STRING | fstring_expr
+fstring_conversion: '!' NAME
+fstring_expr: '{' testlist [ fstring_conversion ] [ fstring_format_spec ] '}'
+fstring_format_spec: ':' fstring_content*
--- a/parso/python/parser.py
+++ b/parso/python/parser.py
@@ -1,8 +1,11 @@
 from parso.python import tree
-from parso.python.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER,
-                                STRING, tok_name, NAME)
+from parso.python.token import PythonTokenTypes
 from parso.parser import BaseParser
-from parso.pgen2.parse import token_to_ilabel
+
+
+NAME = PythonTokenTypes.NAME
+INDENT = PythonTokenTypes.INDENT
+DEDENT = PythonTokenTypes.DEDENT


 class Parser(BaseParser):
@@ -50,44 +53,35 @@ class Parser(BaseParser):
    }
    default_node = tree.PythonNode

-    def __init__(self, pgen_grammar, error_recovery=True, start_symbol='file_input'):
-        super(Parser, self).__init__(pgen_grammar, start_symbol, error_recovery=error_recovery)
+    # Names/Keywords are handled separately
+    _leaf_map = {
+        PythonTokenTypes.STRING: tree.String,
+        PythonTokenTypes.NUMBER: tree.Number,
+        PythonTokenTypes.NEWLINE: tree.Newline,
+        PythonTokenTypes.ENDMARKER: tree.EndMarker,
+        PythonTokenTypes.FSTRING_STRING: tree.FStringString,
+        PythonTokenTypes.FSTRING_START: tree.FStringStart,
+        PythonTokenTypes.FSTRING_END: tree.FStringEnd,
+    }
+
+    def __init__(self, pgen_grammar, error_recovery=True, start_nonterminal='file_input'):
+        super(Parser, self).__init__(pgen_grammar, start_nonterminal,
+                                     error_recovery=error_recovery)

        self.syntax_errors = []
        self._omit_dedent_list = []
        self._indent_counter = 0

-        # TODO do print absolute import detection here.
-        # try:
-        #     del python_grammar_no_print_statement.keywords["print"]
-        # except KeyError:
-        #     pass  # Doesn't exist in the Python 3 grammar.
-
-        # if self.options["print_function"]:
-        #     python_grammar = pygram.python_grammar_no_print_statement
-        # else:
-
    def parse(self, tokens):
        if self._error_recovery:
-            if self._start_symbol != 'file_input':
+            if self._start_nonterminal != 'file_input':
                raise NotImplementedError

            tokens = self._recovery_tokenize(tokens)

-        node = super(Parser, self).parse(tokens)
+        return super(Parser, self).parse(tokens)

-        if self._start_symbol == 'file_input' != node.type:
-            # If there's only one statement, we get back a non-module. That's
-            # not what we want, we want a module, so we add it here:
-            node = self.convert_node(
-                self._pgen_grammar,
-                self._pgen_grammar.symbol2number['file_input'],
-                [node]
-            )
-
-        return node
-
-    def convert_node(self, pgen_grammar, type, children):
+    def convert_node(self, nonterminal, children):
        """
        Convert raw node information to a PythonBaseNode instance.

@@ -95,157 +89,114 @@ class Parser(BaseParser):
        grammar rule produces a new complete node, so that the tree is build
        strictly bottom-up.
        """
-        # TODO REMOVE symbol, we don't want type here.
-        symbol = pgen_grammar.number2symbol[type]
        try:
-            return self.node_map[symbol](children)
+            return self.node_map[nonterminal](children)
        except KeyError:
-            if symbol == 'suite':
+            if nonterminal == 'suite':
                # We don't want the INDENT/DEDENT in our parser tree. Those
                # leaves are just cancer. They are virtual leaves and not real
                # ones and therefore have pseudo start/end positions and no
                # prefixes. Just ignore them.
                children = [children[0]] + children[2:-1]
-            elif symbol == 'list_if':
+            elif nonterminal == 'list_if':
                # Make transitioning from 2 to 3 easier.
-                symbol = 'comp_if'
-            elif symbol == 'listmaker':
+                nonterminal = 'comp_if'
+            elif nonterminal == 'listmaker':
                # Same as list_if above.
-                symbol = 'testlist_comp'
-            return self.default_node(symbol, children)
+                nonterminal = 'testlist_comp'
+            return self.default_node(nonterminal, children)

-    def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
+    def convert_leaf(self, type, value, prefix, start_pos):
        # print('leaf', repr(value), token.tok_name[type])
        if type == NAME:
-            if value in pgen_grammar.keywords:
+            if value in self._pgen_grammar.reserved_syntax_strings:
                return tree.Keyword(value, start_pos, prefix)
            else:
                return tree.Name(value, start_pos, prefix)
-        elif type == STRING:
-            return tree.String(value, start_pos, prefix)
-        elif type == NUMBER:
-            return tree.Number(value, start_pos, prefix)
-        elif type == NEWLINE:
-            return tree.Newline(value, start_pos, prefix)
-        elif type == ENDMARKER:
-            return tree.EndMarker(value, start_pos, prefix)
-        else:
-            return tree.Operator(value, start_pos, prefix)

-    def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
-                       add_token_callback):
-        def get_symbol_and_nodes(stack):
-            for dfa, state, (type_, nodes) in stack:
-                symbol = pgen_grammar.number2symbol[type_]
-                yield symbol, nodes
+        return self._leaf_map.get(type, tree.Operator)(value, start_pos, prefix)

-        tos_nodes = stack.get_tos_nodes()
+    def error_recovery(self, token):
+        tos_nodes = self.stack[-1].nodes
        if tos_nodes:
            last_leaf = tos_nodes[-1].get_last_leaf()
        else:
            last_leaf = None

-        if self._start_symbol == 'file_input' and \
-                (typ == ENDMARKER or typ == DEDENT and '\n' not in last_leaf.value):
-            def reduce_stack(states, newstate):
-                # reduce
-                state = newstate
-                while states[state] == [(0, state)]:
-                    self.pgen_parser._pop()
-
-                    dfa, state, (type_, nodes) = stack[-1]
-                    states, first = dfa
-
-
+        if self._start_nonterminal == 'file_input' and \
+                (token.type == PythonTokenTypes.ENDMARKER or
+                 token.type == DEDENT and '\n' not in last_leaf.value):
            # In Python statements need to end with a newline. But since it's
            # possible (and valid in Python ) that there's no newline at the
            # end of a file, we have to recover even if the user doesn't want
            # error recovery.
-            #print('x', pprint.pprint(stack))
-            ilabel = token_to_ilabel(pgen_grammar, NEWLINE, value)
-
-            dfa, state, (type_, nodes) = stack[-1]
-            symbol = pgen_grammar.number2symbol[type_]
-            states, first = dfa
-            arcs = states[state]
-            # Look for a state with this label
-            for i, newstate in arcs:
-                if ilabel == i:
-                    if symbol == 'simple_stmt':
-                        # This is basically shifting
-                        stack[-1] = (dfa, newstate, (type_, nodes))
-
-                        reduce_stack(states, newstate)
-                        add_token_callback(typ, value, start_pos, prefix)
+            if self.stack[-1].dfa.from_rule == 'simple_stmt':
+                try:
+                    plan = self.stack[-1].dfa.transitions[PythonTokenTypes.NEWLINE]
+                except KeyError:
+                    pass
+                else:
+                    if plan.next_dfa.is_final and not plan.dfa_pushes:
+                        # We are ignoring here that the newline would be
+                        # required for a simple_stmt.
+                        self.stack[-1].dfa = plan.next_dfa
+                        self._add_token(token)
                        return
-                    # Check if we're at the right point
-                    #for symbol, nodes in get_symbol_and_nodes(stack):
-                    #        self.pgen_parser._pop()
-
-                            #break
-                    break
-            #symbol = pgen_grammar.number2symbol[type_]

        if not self._error_recovery:
-            return super(Parser, self).error_recovery(
-                pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
-                add_token_callback)
+            return super(Parser, self).error_recovery(token)

        def current_suite(stack):
            # For now just discard everything that is not a suite or
            # file_input, if we detect an error.
-            for index, (symbol, nodes) in reversed(list(enumerate(get_symbol_and_nodes(stack)))):
+            for until_index, stack_node in reversed(list(enumerate(stack))):
                # `suite` can sometimes be only simple_stmt, not stmt.
-                if symbol == 'file_input':
+                if stack_node.nonterminal == 'file_input':
                    break
-                elif symbol == 'suite' and len(nodes) > 1:
-                    # suites without an indent in them get discarded.
-                    break
-            return index, symbol, nodes
+                elif stack_node.nonterminal == 'suite':
+                    # In the case where we just have a newline we don't want to
+                    # do error recovery here. In all other cases, we want to do
+                    # error recovery.
+                    if len(stack_node.nodes) != 1:
+                        break
+            return until_index

-        index, symbol, nodes = current_suite(stack)
+        until_index = current_suite(self.stack)

-        # print('err', token.tok_name[typ], repr(value), start_pos, len(stack), index)
-        if self._stack_removal(pgen_grammar, stack, arcs, index + 1, value, start_pos):
-            add_token_callback(typ, value, start_pos, prefix)
+        if self._stack_removal(until_index + 1):
+            self._add_token(token)
        else:
+            typ, value, start_pos, prefix = token
            if typ == INDENT:
                # For every deleted INDENT we have to delete a DEDENT as well.
                # Otherwise the parser will get into trouble and DEDENT too early.
                self._omit_dedent_list.append(self._indent_counter)

-            error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix)
-            stack[-1][2][1].append(error_leaf)
+            error_leaf = tree.PythonErrorLeaf(typ.name, value, start_pos, prefix)
+            self.stack[-1].nodes.append(error_leaf)

-        if symbol == 'suite':
-            dfa, state, node = stack[-1]
-            states, first = dfa
-            arcs = states[state]
-            intended_label = pgen_grammar.symbol2label['stmt']
-            # Introduce a proper state transition. We're basically allowing
-            # there to be no valid statements inside a suite.
-            if [x[0] for x in arcs] == [intended_label]:
-                new_state = arcs[0][1]
-                stack[-1] = dfa, new_state, node
+        tos = self.stack[-1]
+        if tos.nonterminal == 'suite':
+            # Need at least one statement in the suite. This happend with the
+            # error recovery above.
+            try:
+                tos.dfa = tos.dfa.arcs['stmt']
+            except KeyError:
+                # We're already in a final state.
+                pass

-    def _stack_removal(self, pgen_grammar, stack, arcs, start_index, value, start_pos):
-        failed_stack = False
-        found = False
-        all_nodes = []
-        for dfa, state, (type_, nodes) in stack[start_index:]:
-            if nodes:
-                found = True
-            if found:
-                failed_stack = True
-                all_nodes += nodes
-        if failed_stack:
-            stack[start_index - 1][2][1].append(tree.PythonErrorNode(all_nodes))
+    def _stack_removal(self, start_index):
+        all_nodes = [node for stack_node in self.stack[start_index:] for node in stack_node.nodes]

-        stack[start_index:] = []
-        return failed_stack
+        if all_nodes:
+            self.stack[start_index - 1].nodes.append(tree.PythonErrorNode(all_nodes))
+
+        self.stack[start_index:] = []
+        return bool(all_nodes)

    def _recovery_tokenize(self, tokens):
-        for typ, value, start_pos, prefix in tokens:
+        for token in tokens:
+            typ = token[0]
            # print(tok_name[typ], repr(value), start_pos, repr(prefix))
            if typ == DEDENT:
                # We need to count indents, because if we just omit any DEDENT,
@@ -258,4 +209,4 @@ class Parser(BaseParser):
                self._indent_counter -= 1
            elif typ == INDENT:
                self._indent_counter += 1
-            yield typ, value, start_pos, prefix
+            yield token
--- a/parso/python/token.py
+++ b/parso/python/token.py
@@ -1,104 +1,27 @@
 from __future__ import absolute_import
-from itertools import count
-from token import *
-
-from parso._compatibility import py_version


-_counter = count(N_TOKENS)
-# Never want to see this thing again.
-del N_TOKENS
+class TokenType(object):
+    def __init__(self, name, contains_syntax=False):
+        self.name = name
+        self.contains_syntax = contains_syntax

-COMMENT = next(_counter)
-tok_name[COMMENT] = 'COMMENT'
-
-NL = next(_counter)
-tok_name[NL] = 'NL'
-
-# Sets the attributes that don't exist in these tok_name versions.
-if py_version >= 30:
-    BACKQUOTE = next(_counter)
-    tok_name[BACKQUOTE] = 'BACKQUOTE'
-else:
-    RARROW = next(_counter)
-    tok_name[RARROW] = 'RARROW'
-    ELLIPSIS = next(_counter)
-    tok_name[ELLIPSIS] = 'ELLIPSIS'
-
-if py_version < 35:
-    ATEQUAL = next(_counter)
-    tok_name[ATEQUAL] = 'ATEQUAL'
-
-ERROR_DEDENT = next(_counter)
-tok_name[ERROR_DEDENT] = 'ERROR_DEDENT'
+    def __repr__(self):
+        return '%s(%s)' % (self.__class__.__name__, self.name)


-# Map from operator to number (since tokenize doesn't do this)
-
-opmap_raw = """\
-( LPAR
-) RPAR
-[ LSQB
-] RSQB
-: COLON
-, COMMA
-; SEMI
-+ PLUS
- MINUS
-* STAR
-/ SLASH
-| VBAR
-& AMPER
-< LESS
-> GREATER
-= EQUAL
-. DOT
-% PERCENT
-` BACKQUOTE
-{ LBRACE
-} RBRACE
-@ AT
-== EQEQUAL
-!= NOTEQUAL
-<> NOTEQUAL
-<= LESSEQUAL
->= GREATEREQUAL
-~ TILDE
-^ CIRCUMFLEX
-<< LEFTSHIFT
->> RIGHTSHIFT
-** DOUBLESTAR
-+= PLUSEQUAL
-= MINEQUAL
-*= STAREQUAL
-/= SLASHEQUAL
-%= PERCENTEQUAL
-&= AMPEREQUAL
-|= VBAREQUAL
-@= ATEQUAL
-^= CIRCUMFLEXEQUAL
-<<= LEFTSHIFTEQUAL
->>= RIGHTSHIFTEQUAL
-**= DOUBLESTAREQUAL
-// DOUBLESLASH
-//= DOUBLESLASHEQUAL
-> RARROW
-... ELLIPSIS
-"""
-
-opmap = {}
-for line in opmap_raw.splitlines():
-    op, name = line.split()
-    opmap[op] = globals()[name]
-
-
-def generate_token_id(string):
+class TokenTypes(object):
    """
-    Uses a token in the grammar (e.g. `'+'` or `'and'`returns the corresponding
-    ID for it. The strings are part of the grammar file.
+    Basically an enum, but Python 2 doesn't have enums in the standard library.
    """
-    try:
-        return opmap[string]
-    except KeyError:
-        pass
-    return globals()[string]
+    def __init__(self, names, contains_syntax):
+        for name in names:
+            setattr(self, name, TokenType(name, contains_syntax=name in contains_syntax))
+
+
+PythonTokenTypes = TokenTypes((
+    'STRING', 'NUMBER', 'NAME', 'ERRORTOKEN', 'NEWLINE', 'INDENT', 'DEDENT',
+    'ERROR_DEDENT', 'FSTRING_STRING', 'FSTRING_START', 'FSTRING_END', 'OP',
+    'ENDMARKER'),
+    contains_syntax=('NAME', 'OP'),
+)
--- a/parso/python/tokenize.py
+++ b/parso/python/tokenize.py
@@ -18,16 +18,29 @@ from collections import namedtuple
 import itertools as _itertools
 from codecs import BOM_UTF8

-from parso.python.token import (tok_name, ENDMARKER, STRING, NUMBER, opmap,
-                                NAME, ERRORTOKEN, NEWLINE, INDENT, DEDENT,
-                                ERROR_DEDENT)
+from parso.python.token import PythonTokenTypes
 from parso._compatibility import py_version
 from parso.utils import split_lines


+STRING = PythonTokenTypes.STRING
+NAME = PythonTokenTypes.NAME
+NUMBER = PythonTokenTypes.NUMBER
+OP = PythonTokenTypes.OP
+NEWLINE = PythonTokenTypes.NEWLINE
+INDENT = PythonTokenTypes.INDENT
+DEDENT = PythonTokenTypes.DEDENT
+ENDMARKER = PythonTokenTypes.ENDMARKER
+ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
+ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
+FSTRING_START = PythonTokenTypes.FSTRING_START
+FSTRING_STRING = PythonTokenTypes.FSTRING_STRING
+FSTRING_END = PythonTokenTypes.FSTRING_END
+
 TokenCollection = namedtuple(
    'TokenCollection',
-    'pseudo_token single_quoted triple_quoted endpats always_break_tokens',
+    'pseudo_token single_quoted triple_quoted endpats whitespace '
+    'fstring_pattern_map always_break_tokens',
 )

 BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')
@@ -52,32 +65,35 @@ def group(*choices, **kwargs):
    return start + '|'.join(choices) + ')'


-def any(*choices):
-    return group(*choices) + '*'
-
-
 def maybe(*choices):
    return group(*choices) + '?'


 # Return the empty string, plus all of the valid string prefixes.
-def _all_string_prefixes(version_info):
+def _all_string_prefixes(version_info, include_fstring=False, only_fstring=False):
    def different_case_versions(prefix):
        for s in _itertools.product(*[(c, c.upper()) for c in prefix]):
            yield ''.join(s)
    # The valid string prefixes. Only contain the lower case versions,
    #  and don't contain any permuations (include 'fr', but not
    #  'rf'). The various permutations will be generated.
-    _valid_string_prefixes = ['b', 'r', 'u']
+    valid_string_prefixes = ['b', 'r', 'u']
    if version_info >= (3, 0):
-        _valid_string_prefixes.append('br')
+        valid_string_prefixes.append('br')

-    if version_info >= (3, 6):
-        _valid_string_prefixes += ['f', 'fr']
+    result = set([''])
+    if version_info >= (3, 6) and include_fstring:
+        f = ['f', 'fr']
+        if only_fstring:
+            valid_string_prefixes = f
+            result = set()
+        else:
+            valid_string_prefixes += f
+    elif only_fstring:
+        return set()

    # if we add binary f-strings, add: ['fb', 'fbr']
-    result = set([''])
-    for prefix in _valid_string_prefixes:
+    for prefix in valid_string_prefixes:
        for t in _itertools.permutations(prefix):
            # create a list with upper and lower versions of each
            #  character
@@ -102,10 +118,15 @@ def _get_token_collection(version_info):
        return result


+fstring_string_single_line = _compile(r'(?:[^{}\r\n]+|\{\{|\}\})+')
+fstring_string_multi_line = _compile(r'(?:[^{}]+|\{\{|\}\})+')
+
+
 def _create_token_collection(version_info):
    # Note: we use unicode matching for names ("\w") but ascii matching for
    # number literals.
    Whitespace = r'[ \f\t]*'
+    whitespace = _compile(Whitespace)
    Comment = r'#[^\r\n]*'
    Name = r'\w+'

@@ -141,6 +162,9 @@ def _create_token_collection(version_info):
    #  StringPrefix can be the empty string (making it optional).
    possible_prefixes = _all_string_prefixes(version_info)
    StringPrefix = group(*possible_prefixes)
+    StringPrefixWithF = group(*_all_string_prefixes(version_info, include_fstring=True))
+    fstring_prefixes = _all_string_prefixes(version_info, include_fstring=True, only_fstring=True)
+    FStringStart = group(*fstring_prefixes)

    # Tail end of ' string.
    Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
@@ -150,14 +174,14 @@ def _create_token_collection(version_info):
    Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
    # Tail end of """ string.
    Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
-    Triple = group(StringPrefix + "'''", StringPrefix + '"""')
+    Triple = group(StringPrefixWithF + "'''", StringPrefixWithF + '"""')

    # Because of leftmost-then-longest match semantics, be sure to put the
    # longest operators first (e.g., if = came before ==, == would get
    # recognized as two instances of =).
-    Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=",
+    Operator = group(r"\*\*=?", r">>=?", r"<<=?",
                     r"//=?", r"->",
-                     r"[+\-*/%&@`|^=<>]=?",
+                     r"[+\-*/%&@`|^!=<>]=?",
                     r"~")

    Bracket = '[][(){}]'
@@ -174,7 +198,12 @@ def _create_token_collection(version_info):
                    group("'", r'\\\r?\n'),
                    StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
                    group('"', r'\\\r?\n'))
-    PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple)
+    pseudo_extra_pool = [Comment, Triple]
+    all_quotes = '"', "'", '"""', "'''"
+    if fstring_prefixes:
+        pseudo_extra_pool.append(FStringStart + group(*all_quotes))
+
+    PseudoExtras = group(r'\\\r?\n|\Z', *pseudo_extra_pool)
    PseudoToken = group(Whitespace, capture=True) + \
        group(PseudoExtras, Number, Funny, ContStr, Name, capture=True)

@@ -192,18 +221,24 @@ def _create_token_collection(version_info):
    #  including the opening quotes.
    single_quoted = set()
    triple_quoted = set()
+    fstring_pattern_map = {}
    for t in possible_prefixes:
-        for p in (t + '"', t + "'"):
-            single_quoted.add(p)
-        for p in (t + '"""', t + "'''"):
-            triple_quoted.add(p)
+        for quote in '"', "'":
+            single_quoted.add(t + quote)
+
+        for quote in '"""', "'''":
+            triple_quoted.add(t + quote)
+
+    for t in fstring_prefixes:
+        for quote in all_quotes:
+            fstring_pattern_map[t + quote] = quote

    ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except',
                           'finally', 'while', 'with', 'return')
    pseudo_token_compiled = _compile(PseudoToken)
    return TokenCollection(
        pseudo_token_compiled, single_quoted, triple_quoted, endpats,
-        ALWAYS_BREAK_TOKENS
+        whitespace, fstring_pattern_map, ALWAYS_BREAK_TOKENS
    )


@@ -218,12 +253,88 @@ class Token(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])):


 class PythonToken(Token):
-    def _get_type_name(self, exact=True):
-        return tok_name[self.type]
-
    def __repr__(self):
-        return ('TokenInfo(type=%s, string=%r, start=%r, prefix=%r)' %
-                self._replace(type=self._get_type_name()))
+        return ('TokenInfo(type=%s, string=%r, start_pos=%r, prefix=%r)' %
+                self._replace(type=self.type.name))
+
+
+class FStringNode(object):
+    def __init__(self, quote):
+        self.quote = quote
+        self.parentheses_count = 0
+        self.previous_lines = ''
+        self.last_string_start_pos = None
+        # In the syntax there can be multiple format_spec's nested:
+        # {x:{y:3}}
+        self.format_spec_count = 0
+
+    def open_parentheses(self, character):
+        self.parentheses_count += 1
+
+    def close_parentheses(self, character):
+        self.parentheses_count -= 1
+
+    def allow_multiline(self):
+        return len(self.quote) == 3
+
+    def is_in_expr(self):
+        return (self.parentheses_count - self.format_spec_count) > 0
+
+
+def _check_fstring_ending(fstring_stack, token, from_start=False):
+    fstring_end = float('inf')
+    fstring_index = None
+    for i, node in enumerate(fstring_stack):
+        if from_start:
+            if token.startswith(node.quote):
+                fstring_index = i
+                fstring_end = len(node.quote)
+            else:
+                continue
+        else:
+            try:
+                end = token.index(node.quote)
+            except ValueError:
+                pass
+            else:
+                if fstring_index is None or end < fstring_end:
+                    fstring_index = i
+                    fstring_end = end
+    return fstring_index, fstring_end
+
+
+def _find_fstring_string(fstring_stack, line, lnum, pos):
+    tos = fstring_stack[-1]
+    if tos.is_in_expr():
+        return '', pos
+    else:
+        new_pos = pos
+        allow_multiline = tos.allow_multiline()
+        if allow_multiline:
+            match = fstring_string_multi_line.match(line, pos)
+        else:
+            match = fstring_string_single_line.match(line, pos)
+        if match is None:
+            string = tos.previous_lines
+        else:
+            if not tos.previous_lines:
+                tos.last_string_start_pos = (lnum, pos)
+
+            string = match.group(0)
+            for fstring_stack_node in fstring_stack:
+                try:
+                    string = string[:string.index(fstring_stack_node.quote)]
+                except ValueError:
+                    pass  # The string was not found.
+
+            new_pos += len(string)
+            if allow_multiline and string.endswith('\n'):
+                tos.previous_lines += string
+                string = ''
+            else:
+                string = tos.previous_lines + string
+
+        return string, new_pos


 def tokenize(code, version_info, start_pos=(1, 0)):
@@ -232,6 +343,19 @@ def tokenize(code, version_info, start_pos=(1, 0)):
    return tokenize_lines(lines, version_info, start_pos=start_pos)


+def _print_tokens(func):
+    """
+    A small helper function to help debug the tokenize_lines function.
+    """
+    def wrapper(*args, **kwargs):
+        for token in func(*args, **kwargs):
+            print(token)
+            yield token
+
+    return wrapper
+
+
+# @_print_tokens
 def tokenize_lines(lines, version_info, start_pos=(1, 0)):
    """
    A heavily modified Python standard library tokenizer.
@@ -240,7 +364,8 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
    token. This idea comes from lib2to3. The prefix contains all information
    that is irrelevant for the parser like newlines in parentheses or comments.
    """
-    pseudo_token, single_quoted, triple_quoted, endpats, always_break_tokens, = \
+    pseudo_token, single_quoted, triple_quoted, endpats, whitespace, \
+        fstring_pattern_map, always_break_tokens, = \
        _get_token_collection(version_info)
    paren_level = 0  # count parentheses
    indents = [0]
@@ -257,6 +382,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
    additional_prefix = ''
    first = True
    lnum = start_pos[0] - 1
+    fstring_stack = []
    for line in lines:  # loop over lines in stream
        lnum += 1
        pos = 0
@@ -278,7 +404,9 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
            endmatch = endprog.match(line)
            if endmatch:
                pos = endmatch.end(0)
-                yield PythonToken(STRING, contstr + line[:pos], contstr_start, prefix)
+                yield PythonToken(
+                    STRING, contstr + line[:pos],
+                    contstr_start, prefix)
                contstr = ''
                contline = None
            else:
@@ -287,12 +415,47 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
                continue

        while pos < max:
+            if fstring_stack:
+                string, pos = _find_fstring_string(fstring_stack, line, lnum, pos)
+                if string:
+                    yield PythonToken(
+                        FSTRING_STRING, string,
+                        fstring_stack[-1].last_string_start_pos,
+                        # Never has a prefix because it can start anywhere and
+                        # include whitespace.
+                        prefix=''
+                    )
+                    fstring_stack[-1].previous_lines = ''
+                    continue
+
+                if pos == max:
+                    break
+
+                rest = line[pos:]
+                fstring_index, end = _check_fstring_ending(fstring_stack, rest, from_start=True)
+
+                if fstring_index is not None:
+                    yield PythonToken(
+                        FSTRING_END,
+                        fstring_stack[fstring_index].quote,
+                        (lnum, pos),
+                        prefix=additional_prefix,
+                    )
+                    additional_prefix = ''
+                    del fstring_stack[fstring_index:]
+                    pos += end
+                    continue
+
            pseudomatch = pseudo_token.match(line, pos)
            if not pseudomatch:                             # scan for tokens
-                txt = line[pos:]
-                if txt.endswith('\n'):
+                if line.endswith('\n'):
                    new_line = True
-                yield PythonToken(ERRORTOKEN, txt, (lnum, pos), additional_prefix)
+                match = whitespace.match(line, pos)
+                pos = match.end()
+                yield PythonToken(
+                    ERRORTOKEN, line[pos:], (lnum, pos),
+                    additional_prefix + match.group(0)
+                )
                additional_prefix = ''
                break

@@ -311,10 +474,11 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):

            if new_line and initial not in '\r\n#':
                new_line = False
-                if paren_level == 0:
+                if paren_level == 0 and not fstring_stack:
                    i = 0
                    while line[i] == '\f':
                        i += 1
+                        # TODO don't we need to change spos as well?
                        start -= 1
                    if start > indents[-1]:
                        yield PythonToken(INDENT, '', spos, '')
@@ -326,11 +490,33 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
                        yield PythonToken(DEDENT, '', spos, '')
                        indents.pop()

+            if fstring_stack:
+                fstring_index, end = _check_fstring_ending(fstring_stack, token)
+                if fstring_index is not None:
+                    if end != 0:
+                        yield PythonToken(ERRORTOKEN, token[:end], spos, prefix)
+                        prefix = ''
+
+                    yield PythonToken(
+                        FSTRING_END,
+                        fstring_stack[fstring_index].quote,
+                        (lnum, spos[1] + 1),
+                        prefix=prefix
+                    )
+                    del fstring_stack[fstring_index:]
+                    pos -= len(token) - end
+                    continue
+
            if (initial in numchars or                      # ordinary number
                    (initial == '.' and token != '.' and token != '...')):
                yield PythonToken(NUMBER, token, spos, prefix)
            elif initial in '\r\n':
-                if not new_line and paren_level == 0:
+                if any(not f.allow_multiline() for f in fstring_stack):
+                    # Would use fstring_stack.clear, but that's not available
+                    # in Python 2.
+                    fstring_stack[:] = []
+
+                if not new_line and paren_level == 0 and not fstring_stack:
                    yield PythonToken(NEWLINE, token, spos, prefix)
                else:
                    additional_prefix = prefix + token
@@ -362,8 +548,12 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
                    break
                else:                                       # ordinary string
                    yield PythonToken(STRING, token, spos, prefix)
+            elif token in fstring_pattern_map:  # The start of an fstring.
+                fstring_stack.append(FStringNode(fstring_pattern_map[token]))
+                yield PythonToken(FSTRING_START, token, spos, prefix)
            elif is_identifier(initial):                      # ordinary name
                if token in always_break_tokens:
+                    fstring_stack[:] = []
                    paren_level = 0
                    while True:
                        indent = indents.pop()
@@ -378,17 +568,20 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
                break
            else:
                if token in '([{':
-                    paren_level += 1
+                    if fstring_stack:
+                        fstring_stack[-1].open_parentheses(token)
+                    else:
+                        paren_level += 1
                elif token in ')]}':
-                    paren_level -= 1
+                    if fstring_stack:
+                        fstring_stack[-1].close_parentheses(token)
+                    else:
+                        paren_level -= 1
+                elif token == ':' and fstring_stack \
+                        and fstring_stack[-1].parentheses_count == 1:
+                    fstring_stack[-1].format_spec_count += 1

-                try:
-                    # This check is needed in any case to check if it's a valid
-                    # operator or just some random unicode character.
-                    typ = opmap[token]
-                except KeyError:
-                    typ = ERRORTOKEN
-                yield PythonToken(typ, token, spos, prefix)
+                yield PythonToken(OP, token, spos, prefix)

    if contstr:
        yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix)
--- a/parso/python/tree.py
+++ b/parso/python/tree.py
@@ -60,7 +60,6 @@ _GET_DEFINITION_TYPES = set([
 _IMPORTS = set(['import_name', 'import_from'])


-
 class DocstringMixin(object):
    __slots__ = ()

@@ -125,7 +124,7 @@ class PythonLeaf(PythonMixin, Leaf):
        #   indent error leafs somehow? No idea how, though.
        previous_leaf = self.get_previous_leaf()
        if previous_leaf is not None and previous_leaf.type == 'error_leaf' \
-                and previous_leaf.original_type in ('indent', 'error_dedent'):
+                and previous_leaf.token_type in ('INDENT', 'ERROR_DEDENT'):
            previous_leaf = previous_leaf.get_previous_leaf()

        if previous_leaf is None:
@@ -133,7 +132,6 @@ class PythonLeaf(PythonMixin, Leaf):
        return previous_leaf.end_pos


-
 class _LeafWithoutNewlines(PythonLeaf):
    """
    Simply here to optimize performance.
@@ -166,6 +164,10 @@ class EndMarker(_LeafWithoutNewlines):
    __slots__ = ()
    type = 'endmarker'

+    @utf8_repr
+    def __repr__(self):
+        return "<%s: prefix=%s>" % (type(self).__name__, repr(self.prefix))
+

 class Newline(PythonLeaf):
    """Contains NEWLINE and ENDMARKER tokens."""
@@ -235,7 +237,6 @@ class Name(_LeafWithoutNewlines):
        return None


-
 class Literal(PythonLeaf):
    __slots__ = ()

@@ -262,6 +263,33 @@ class String(Literal):
        return match.group(2)[:-len(match.group(1))]


+class FStringString(Leaf):
+    """
+    f-strings contain f-string expressions and normal python strings. These are
+    the string parts of f-strings.
+    """
+    type = 'fstring_string'
+    __slots__ = ()
+
+
+class FStringStart(Leaf):
+    """
+    f-strings contain f-string expressions and normal python strings. These are
+    the string parts of f-strings.
+    """
+    type = 'fstring_start'
+    __slots__ = ()
+
+
+class FStringEnd(Leaf):
+    """
+    f-strings contain f-string expressions and normal python strings. These are
+    the string parts of f-strings.
+    """
+    type = 'fstring_end'
+    __slots__ = ()
+
+
 class _StringComparisonMixin(object):
    def __eq__(self, other):
        """
@@ -626,6 +654,7 @@ class Function(ClassOrFunc):
        except IndexError:
            return None

+
 class Lambda(Function):
    """
    Lambdas are basically trimmed functions, so give it the same interface.
--- a/parso/tree.py
+++ b/parso/tree.py
@@ -55,7 +55,6 @@ class NodeOrLeaf(object):
        Returns the node immediately preceding this node in this parent's
        children list. If this node does not have a previous sibling, it is
        None.
-        None.
        """
        # Can't use index(); we need to test by identity
        for i, child in enumerate(self.parent.children):
@@ -230,6 +229,7 @@ class Leaf(NodeOrLeaf):

 class TypedLeaf(Leaf):
    __slots__ = ('type',)
+
    def __init__(self, type, value, start_pos, prefix=''):
        super(TypedLeaf, self).__init__(value, start_pos, prefix)
        self.type = type
@@ -339,7 +339,7 @@ class Node(BaseNode):

 class ErrorNode(BaseNode):
    """
-    A node that containes valid nodes/leaves that we're follow by a token that
+    A node that contains valid nodes/leaves that we're follow by a token that
    was invalid. This basically means that the leaf after this node is where
    Python would mark a syntax error.
    """
@@ -352,13 +352,13 @@ class ErrorLeaf(Leaf):
    A leaf that is either completely invalid in a language (like `$` in Python)
    or is invalid at that position. Like the star in `1 +* 1`.
    """
-    __slots__ = ('original_type',)
+    __slots__ = ('token_type',)
    type = 'error_leaf'

-    def __init__(self, original_type, value, start_pos, prefix=''):
+    def __init__(self, token_type, value, start_pos, prefix=''):
        super(ErrorLeaf, self).__init__(value, start_pos, prefix)
-        self.original_type = original_type
+        self.token_type = token_type

    def __repr__(self):
        return "<%s: %s:%s, %s>" % \
-            (type(self).__name__, self.original_type, repr(self.value), self.start_pos)
+            (type(self).__name__, self.token_type, repr(self.value), self.start_pos)
--- a/test/failing_examples.py
+++ b/test/failing_examples.py
@@ -141,7 +141,7 @@ FAILING_EXAMPLES = [

    # f-strings
    'f"{}"',
-    'f"{\\}"',
+    r'f"{\}"',
    'f"{\'\\\'}"',
    'f"{#}"',
    "f'{1!b}'",
--- a/test/test_diff_parser.py
+++ b/test/test_diff_parser.py
@@ -484,3 +484,26 @@ def test_indentation_issue(differ):

    differ.initialize(code1)
    differ.parse(code2, parsers=2)
+
+
+def test_endmarker_newline(differ):
+    code1 = dedent('''\
+        docu = None
+        # some comment
+        result = codet
+        incomplete_dctassign = {
+            "module"
+
+        if "a":
+            x = 3 # asdf
+    ''')
+
+    code2 = code1.replace('codet', 'coded')
+
+    differ.initialize(code1)
+    differ.parse(code2, parsers=2, copies=2, expect_error_leaves=True)
+
+
+def test_newlines_at_end(differ):
+    differ.initialize('a\n\n')
+    differ.parse('a\n', copies=1)
--- a/test/test_error_recovery.py
+++ b/test/test_error_recovery.py
@@ -0,0 +1,61 @@
+from parso import parse
+
+
+def test_with_stmt():
+    module = parse('with x: f.\na')
+    assert module.children[0].type == 'with_stmt'
+    w, with_item, colon, f = module.children[0].children
+    assert f.type == 'error_node'
+    assert f.get_code(include_prefix=False) == 'f.'
+
+    assert module.children[2].type == 'name'
+
+
+def test_one_line_function(each_version):
+    module = parse('def x(): f.', version=each_version)
+    assert module.children[0].type == 'funcdef'
+    def_, name, parameters, colon, f = module.children[0].children
+    assert f.type == 'error_node'
+
+    module = parse('def x(a:', version=each_version)
+    func = module.children[0]
+    assert func.type == 'error_node'
+    if each_version.startswith('2'):
+        assert func.children[-1].value == 'a'
+    else:
+        assert func.children[-1] == ':'
+
+
+def test_if_else():
+    module = parse('if x:\n f.\nelse:\n g(')
+    if_stmt = module.children[0]
+    if_, test, colon, suite1, else_, colon, suite2 = if_stmt.children
+    f = suite1.children[1]
+    assert f.type == 'error_node'
+    assert f.children[0].value == 'f'
+    assert f.children[1].value == '.'
+    g = suite2.children[1]
+    assert g.children[0].value == 'g'
+    assert g.children[1].value == '('
+
+
+def test_if_stmt():
+    module = parse('if x: f.\nelse: g(')
+    if_stmt = module.children[0]
+    assert if_stmt.type == 'if_stmt'
+    if_, test, colon, f = if_stmt.children
+    assert f.type == 'error_node'
+    assert f.children[0].value == 'f'
+    assert f.children[1].value == '.'
+
+    assert module.children[1].type == 'newline'
+    assert module.children[1].value == '\n'
+    assert module.children[2].type == 'error_leaf'
+    assert module.children[2].value == 'else'
+    assert module.children[3].type == 'error_leaf'
+    assert module.children[3].value == ':'
+
+    in_else_stmt = module.children[4]
+    assert in_else_stmt.type == 'error_node'
+    assert in_else_stmt.children[0].value == 'g'
+    assert in_else_stmt.children[1].value == '('
--- a/test/test_fstring.py
+++ b/test/test_fstring.py
@@ -1,17 +1,19 @@
 import pytest
+from textwrap import dedent

 from parso import load_grammar, ParserSyntaxError
-from parso.python.fstring import tokenize
+from parso.python.tokenize import tokenize


@pytest.fixture
 def grammar():
-    return load_grammar(language="python-f-string")
+    return load_grammar(version='3.6')


@pytest.mark.parametrize(
    'code', [
        '{1}',
+        '{1:}',
        '',
        '{1!a}',
        '{1!a:1}',
@@ -26,22 +28,12 @@ def grammar():
        '{{{1}',
        '1{{2{{3',
        '}}',
-        '{:}}}',
-
-        # Invalid, but will be checked, later.
-        '{}',
-        '{1:}',
-        '{:}',
-        '{:1}',
-        '{!:}',
-        '{!}',
-        '{!a}',
-        '{1:{}}',
-        '{1:{:}}',
    ]
 )
 def test_valid(code, grammar):
-    fstring = grammar.parse(code, error_recovery=False)
+    code = 'f"""%s"""' % code
+    module = grammar.parse(code, error_recovery=False)
+    fstring = module.children[0]
    assert fstring.type == 'fstring'
    assert fstring.get_code() == code

@@ -52,24 +44,46 @@ def test_valid(code, grammar):
        '{',
        '{1!{a}}',
        '{!{a}}',
+        '{}',
+        '{:}',
+        '{:}}}',
+        '{:1}',
+        '{!:}',
+        '{!}',
+        '{!a}',
+        '{1:{}}',
+        '{1:{:}}',
    ]
 )
 def test_invalid(code, grammar):
+    code = 'f"""%s"""' % code
    with pytest.raises(ParserSyntaxError):
        grammar.parse(code, error_recovery=False)

    # It should work with error recovery.
-    #grammar.parse(code, error_recovery=True)
+    grammar.parse(code, error_recovery=True)


@pytest.mark.parametrize(
-    ('code', 'start_pos', 'positions'), [
+    ('code', 'positions'), [
        # 2 times 2, 5 because python expr and endmarker.
-        ('}{', (2, 3), [(2, 3), (2, 4), (2, 5), (2, 5)]),
-        (' :{ 1 : } ', (1, 0), [(1, 2), (1, 3), (1, 6), (1, 8), (1, 10)]),
-        ('\n{\nfoo\n }', (2, 1), [(3, 0), (3, 1), (5, 1), (5, 2)]),
+        ('f"}{"', [(1, 0), (1, 2), (1, 3), (1, 4), (1, 5)]),
+        ('f" :{ 1 : } "', [(1, 0), (1, 2), (1, 4), (1, 6), (1, 8), (1, 9),
+                           (1, 10), (1, 11), (1, 12), (1, 13)]),
+        ('f"""\n {\nfoo\n }"""', [(1, 0), (1, 4), (2, 1), (3, 0), (4, 1),
+                                  (4, 2), (4, 5)]),
    ]
 )
-def test_tokenize_start_pos(code, start_pos, positions):
-    tokens = tokenize(code, start_pos)
+def test_tokenize_start_pos(code, positions):
+    tokens = list(tokenize(code, version_info=(3, 6)))
    assert positions == [p.start_pos for p in tokens]
+
+
+def test_roundtrip(grammar):
+    code = dedent("""\
+        f'''s{
+           str.uppe
+        '''
+        """)
+    tree = grammar.parse(code)
+    assert tree.get_code() == code
--- a/test/test_pgen2.py
+++ b/test/test_pgen2.py
@@ -12,6 +12,8 @@ import pytest

 from parso import load_grammar
 from parso import ParserSyntaxError
+from parso.pgen2 import generate_grammar
+from parso.python import tokenize


 def _parse(code, version=None):
@@ -270,3 +272,19 @@ def py_br(each_version):
 def test_py3_rb(works_ge_py3):
    works_ge_py3.parse("rb'1'")
    works_ge_py3.parse("RB'1'")
+
+
+def test_left_recursion():
+    with pytest.raises(ValueError, match='left recursion'):
+        generate_grammar('foo: foo NAME\n', tokenize.PythonTokenTypes)
+
+
+def test_ambiguities():
+    with pytest.raises(ValueError, match='ambiguous'):
+        generate_grammar('foo: bar | baz\nbar: NAME\nbaz: NAME\n', tokenize.PythonTokenTypes)
+
+    with pytest.raises(ValueError, match='ambiguous'):
+        generate_grammar('''foo: bar | baz\nbar: 'x'\nbaz: "x"\n''', tokenize.PythonTokenTypes)
+
+    with pytest.raises(ValueError, match='ambiguous'):
+        generate_grammar('''foo: bar | 'x'\nbar: 'x'\n''', tokenize.PythonTokenTypes)
--- a/test/test_python_errors.py
+++ b/test/test_python_errors.py
@@ -114,6 +114,22 @@ def _get_actual_exception(code):
        # Python 3.4/3.4 have a bit of a different warning than 3.5/3.6 in
        # certain places. But in others this error makes sense.
        return [wanted, "SyntaxError: can't use starred expression here"], line_nr
+    elif wanted == 'SyntaxError: f-string: unterminated string':
+        wanted = 'SyntaxError: EOL while scanning string literal'
+    elif wanted == 'SyntaxError: f-string expression part cannot include a backslash':
+        return [
+            wanted,
+            "SyntaxError: EOL while scanning string literal",
+            "SyntaxError: unexpected character after line continuation character",
+        ], line_nr
+    elif wanted == "SyntaxError: f-string: expecting '}'":
+        wanted = 'SyntaxError: EOL while scanning string literal'
+    elif wanted == 'SyntaxError: f-string: empty expression not allowed':
+        wanted = 'SyntaxError: invalid syntax'
+    elif wanted == "SyntaxError: f-string expression part cannot include '#'":
+        wanted = 'SyntaxError: invalid syntax'
+    elif wanted == "SyntaxError: f-string: single '}' is not allowed":
+        wanted = 'SyntaxError: invalid syntax'
    return [wanted], line_nr


--- a/test/test_tokenize.py
+++ b/test/test_tokenize.py
@@ -6,13 +6,25 @@ import pytest

 from parso._compatibility import py_version
 from parso.utils import split_lines, parse_version_string
-from parso.python.token import (
-    NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER, ERROR_DEDENT)
+from parso.python.token import PythonTokenTypes
 from parso.python import tokenize
 from parso import parse
 from parso.python.tokenize import PythonToken


+# To make it easier to access some of the token types, just put them here.
+NAME = PythonTokenTypes.NAME
+NEWLINE = PythonTokenTypes.NEWLINE
+STRING = PythonTokenTypes.STRING
+INDENT = PythonTokenTypes.INDENT
+DEDENT = PythonTokenTypes.DEDENT
+ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
+OP = PythonTokenTypes.OP
+ENDMARKER = PythonTokenTypes.ENDMARKER
+ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
+FSTRING_START = PythonTokenTypes.FSTRING_START
+
+
 def _get_token_list(string):
    # Load the current version.
    version_info = parse_version_string()
@@ -126,7 +138,7 @@ def test_identifier_contains_unicode():
    else:
        # Unicode tokens in Python 2 seem to be identified as operators.
        # They will be ignored in the parser, that's ok.
-        assert unicode_token[0] == tokenize.ERRORTOKEN
+        assert unicode_token[0] == OP


 def test_quoted_strings():
@@ -162,8 +174,9 @@ def test_ur_literals():
        token_list = _get_token_list(literal)
        typ, result_literal, _, _ = token_list[0]
        if is_literal:
-            assert typ == STRING
-            assert result_literal == literal
+            if typ != FSTRING_START:
+                assert typ == STRING
+                assert result_literal == literal
        else:
            assert typ == NAME

@@ -175,6 +188,7 @@ def test_ur_literals():
    # Starting with Python 3.3 this ordering is also possible.
    if py_version >= 33:
        check('Rb""')
+
    # Starting with Python 3.6 format strings where introduced.
    check('fr""', is_literal=py_version >= 36)
    check('rF""', is_literal=py_version >= 36)
@@ -184,17 +198,17 @@ def test_ur_literals():

 def test_error_literal():
    error_token, endmarker = _get_token_list('"\n')
-    assert error_token.type == tokenize.ERRORTOKEN
+    assert error_token.type == ERRORTOKEN
    assert endmarker.prefix == ''
    assert error_token.string == '"\n'
-    assert endmarker.type == tokenize.ENDMARKER
+    assert endmarker.type == ENDMARKER
    assert endmarker.prefix == ''

    bracket, error_token, endmarker = _get_token_list('( """')
-    assert error_token.type == tokenize.ERRORTOKEN
+    assert error_token.type == ERRORTOKEN
    assert error_token.prefix == ' '
    assert error_token.string == '"""'
-    assert endmarker.type == tokenize.ENDMARKER
+    assert endmarker.type == ENDMARKER
    assert endmarker.prefix == ''


@@ -224,3 +238,11 @@ def test_endmarker_end_pos():
 def test_indentation(code, types):
    actual_types = [t.type for t in _get_token_list(code)]
    assert actual_types == types + [ENDMARKER]
+
+
+def test_error_string():
+    t1, endmarker = _get_token_list(' "\n')
+    assert t1.type == ERRORTOKEN
+    assert t1.prefix == ' '
+    assert t1.string == '"\n'
+    assert endmarker.string == ''
--- a/tox.ini
+++ b/tox.ini
@@ -1,14 +1,15 @@
 [tox]
-envlist = py26, py27, py33, py34, py35, py36
+envlist = py27, py33, py34, py35, py36, py37
 [testenv]
 deps =
-    pytest>=3.0.7
+    {env:_PARSO_TEST_PYTEST_DEP:pytest>=3.0.7}
 # For --lf and --ff.
    pytest-cache
 setenv =
 # https://github.com/tomchristie/django-rest-framework/issues/1957
 # tox corrupts __pycache__, solution from here:
    PYTHONDONTWRITEBYTECODE=1
+    py26,py33: _PARSO_TEST_PYTEST_DEP=pytest>=3.0.7,<3.3
 commands =
    pytest {posargs:parso test}
 [testenv:cov]
Author	SHA1	Message	Date
Dave Halter	ff67de248f	Merge branch 'pgen'	2018-06-29 18:14:03 +02:00
Dave Halter	1af5d9d46b	Add a changelog for 0.3.0	2018-06-29 18:13:53 +02:00
Dave Halter	fce3ead829	Bump version to 0.3.0	2018-06-29 18:04:55 +02:00
Dave Halter	55d5d39c53	Add a private API for jedi to work with the parser stack	2018-06-29 10:04:54 +02:00
Dave Halter	c8bf23b787	Remove get_tos_nodes and get_tos_first_tokens, because they are not used (not even in Jedi)	2018-06-29 00:00:09 +02:00
Dave Halter	98c9a1ec7f	Better documentation for _add_token	2018-06-28 10:11:44 +02:00
Dave Halter	ecdb90d9bc	Way better documentation for the DFA generator	2018-06-28 10:08:09 +02:00
Dave Halter	375ebf2181	Better documentation of the parser generator	2018-06-28 09:49:35 +02:00
Dave Halter	badb2fe010	Move transition_to_generator to transitions	2018-06-28 09:42:37 +02:00
Dave Halter	8e118c913c	Remove note about print as absolute import. This is probably not going to happen anymore, Python 2 is pretty much end-of-life	2018-06-28 01:01:46 +02:00
Dave Halter	52fc8fc569	Finish the stack in a way we want to.	2018-06-28 00:59:55 +02:00
Dave Halter	97cdb448d4	Pass tokens around and not all the different token values	2018-06-28 00:33:22 +02:00
Dave Halter	603b67ee6d	Just always pass token objects to the tokenizer	2018-06-28 00:18:44 +02:00
Dave Halter	7686273287	Use the stack from the parser itself	2018-06-28 00:12:18 +02:00
Dave Halter	692436ba12	Don't use grammar as an argument anymore, because it's already there	2018-06-28 00:01:47 +02:00
Dave Halter	f7d3d4e82f	Merge the PgenParser and our own parser	2018-06-27 23:45:04 +02:00
Dave Halter	edce279dee	Remove a function that was no longer used	2018-06-27 23:19:57 +02:00
Dave Halter	a9e40eb578	Simplify error recovery for suites	2018-06-27 22:21:17 +02:00
Dave Halter	b14f518306	Rename the last usage of ilabel to transition	2018-06-27 00:18:27 +02:00
Dave Halter	8407894b25	Fix python 2 tests	2018-06-27 00:15:00 +02:00
Dave Halter	e4efebc9f3	s/ilabel/transition/g	2018-06-26 23:05:04 +02:00
Dave Halter	f66e47c540	Check better for more transitions	2018-06-26 22:53:02 +02:00
Dave Halter	706a92ee0d	Merge branch 'master' of github.com:davidhalter/parso	2018-06-26 10:23:17 +02:00
Dave Halter	91d864b23d	Make it clearer which things are public in pgen	2018-06-26 10:22:38 +02:00
Dave Halter	e20f2069ba	Move the grammar to a fitting file.	2018-06-26 10:20:05 +02:00
Dave Halter	4cf198285a	Move things out of the grammar class	2018-06-26 10:15:31 +02:00
Dave Halter	30cf491b4f	Move the Grammar to the pgen module	2018-06-26 10:08:44 +02:00
Dave Halter	c1675da0cb	Make nonterminal_to_dfas public	2018-06-26 09:56:49 +02:00
Dave Halter	7b7b66eb3c	Get rid of the first_terminal variable in the grammar generator	2018-06-26 09:48:13 +02:00
Dave Halter	5d46c3e18b	Trying to reduce the amount of variables used in first sets	2018-06-26 01:04:22 +02:00
Dave Halter	e9fde82512	Remove the overlapcheck, it's probably not needed anymore	2018-06-26 01:00:06 +02:00
Dave Halter	a46ecbb499	Fix an ambiguity issue Unfortunately had to refactor most of the transition generation	2018-06-26 00:58:19 +02:00
Dave Halter	da5aa8a2ab	Better detection of ambiguities	2018-06-25 01:56:02 +02:00
Dave Halter	43d4a8a834	Don't use a function that doesn't work	2018-06-24 23:45:18 +02:00
Dave Halter	309033ae2d	Work with token types whenever possible	2018-06-24 17:59:32 +02:00
Dave Halter	2a9d8632fe	Remove label caching	2018-06-24 17:56:13 +02:00
Dave Halter	530a324643	Remove labels	2018-06-24 17:55:22 +02:00
Dave Halter	71003bc20e	Just use caching instead of strange transitions	2018-06-24 17:29:03 +02:00
Dave Halter	c5d141bf60	Make some more things faster	2018-06-24 16:43:28 +02:00
Dave Halter	e958b241c7	Use some tokenize names directly	2018-06-24 16:39:48 +02:00
Dave Halter	34ab35558f	Remove a lot of the old token code	2018-06-24 16:31:58 +02:00
Dave Halter	03de9cebb8	Introduce TokenTypes	2018-06-24 16:24:09 +02:00
Dave Halter	6098d89150	Add PythonTokens to get rid of a lot of the token module eventually	2018-06-24 13:03:07 +02:00
Dave Halter	ff4358cd97	Merge remote-tracking branch 'origin/master' into pgen	2018-06-24 11:49:34 +02:00
Dave Halter	b5378e4602	Use token.OP and use reserved words This change breaks the tokenizer backwards compatibility a bit. Details of operators is now part of the parser and not the tokenizer anymore. The parser does this anyway, so we don't need the complexity in the tokenizer.	2018-06-24 11:28:23 +02:00
Dave Halter	33e321a539	Don't set the root node before it's not actually defined	2018-06-22 13:04:00 +02:00
Dave Halter	a890ddd6cc	Remove make_first	2018-06-22 12:54:27 +02:00
Dave Halter	1362d4f05d	Remove more unused grammar stuff	2018-06-22 12:53:38 +02:00
Dave Halter	532aef2342	Remove nonterminal2number and number2nonterminal, they are no longer used	2018-06-22 12:52:44 +02:00
Dave Halter	878b4b2d3b	Use nonterminals instead of numbers if possible	2018-06-22 12:47:02 +02:00
Dave Halter	87299335c4	Remove more unused code	2018-06-22 12:36:48 +02:00
Dave Halter	4f0e9c0fd7	Remove old dfas and states from the parser generator	2018-06-22 11:47:18 +02:00
Dave Halter	67ca091631	delete a lot of the old parser code	2018-06-22 11:44:42 +02:00
Dave Halter	4e5ba02dbb	Fix the final issues of the new parser	2018-06-22 11:38:34 +02:00
Dave Halter	a85f544901	Fix all tests except diff tests. Mostly error recovery fixes	2018-06-22 11:12:10 +02:00
Dave Halter	9e8066c6fd	Fix a lot of the old error recovery	2018-06-22 09:56:58 +02:00
Dave Halter	68eab72229	Some slight changes to error recovery	2018-06-22 01:59:39 +02:00
Dave Halter	d9264609f2	Get quite a bit of the error recovery working	2018-06-22 01:56:29 +02:00
Dave Halter	79c7e0b59d	Use the new parser. Error recovery is not yet working	2018-06-22 00:38:18 +02:00
Dave Halter	f03a87b876	Actually parse some first things with the new approach	2018-06-21 23:56:34 +02:00
Dave Halter	2a082d69df	Add better reprs	2018-06-21 22:13:27 +02:00
Dave Halter	e6fc739670	Get most things ready for plans	2018-06-21 21:32:05 +02:00
Dave Halter	12e11b3d16	Remove a while loop that is not necessary	2018-06-21 21:14:14 +02:00
Dave Halter	cc8038966b	isfinal -> is_final	2018-06-21 21:11:46 +02:00
Dave Halter	31aecf2d35	Calculate the first plans in a very messy way	2018-06-21 21:10:28 +02:00
Dave Halter	d8554d86d1	A lot of new code to hopefully transition to a better parsing mechanism in the future	2018-06-21 18:17:32 +02:00
Dave Halter	d691bf0fd1	Some minor changes	2018-06-20 09:42:21 +02:00
Dave Halter	5712ffb5ca	Introduce a label cache that is currently not used	2018-06-18 20:14:29 +02:00
Dave Halter	55d6a69aad	Some more renames	2018-06-18 01:14:09 +02:00
Dave Halter	453471eeb6	Move some ParserGenerator stuff into the Grammar class	2018-06-18 00:15:31 +02:00
Dave Halter	a06c3a3129	name -> nonterminal	2018-06-17 23:10:27 +02:00
Dave Halter	73ce57428b	Try to completely remove the word symbol and use nonterminal The ones that we could not remove are in grammar.py, because that's the public documented API.	2018-06-17 18:30:20 +02:00
Dave Halter	640f544af9	One instance of symbol -> terminal	2018-06-17 18:12:13 +02:00
Dave Halter	b6cbf306d7	Use the name first_terminals instead of first	2018-06-17 18:09:12 +02:00
Dave Halter	95e4ecf592	next -> next_	2018-06-17 17:49:43 +02:00
Dave Halter	fbed1ecfe0	More dict to set	2018-06-17 17:45:40 +02:00
Dave Halter	1f27fa9320	Use more nonterminal/terminal terminology	2018-06-17 17:37:15 +02:00
Dave Halter	23362ec2d3	Start using the term nonterminal	2018-06-17 16:40:11 +02:00
Dave Halter	6b391af071	Use sets instead of dicts if possible	2018-06-17 16:36:27 +02:00
Dave Halter	c43cb21a0e	Add docstring	2018-06-15 17:57:59 +02:00
Dave Halter	24346a0d32	Add some comments	2018-06-15 10:21:00 +02:00
Dave Halter	9d452ec66a	Use a set instead of dict if it's not necessary	2018-06-15 00:35:39 +02:00
Dave Halter	567e0d7aed	Simplify some code	2018-06-15 00:06:56 +02:00
Dave Halter	1f02327cff	Some refactorings for simplicity	2018-06-14 18:18:57 +02:00
Dave Halter	8c348aee6f	Use NFAArc as a class	2018-06-14 10:29:19 +02:00
Dave Halter	a277ccf288	Some more renames	2018-06-14 01:10:23 +02:00
Dave Halter	a5ce2caab6	Another rename	2018-06-13 23:19:52 +02:00
Dave Halter	da4df9c0f1	Rename	2018-06-13 21:12:48 +02:00
Dave Halter	bd444df417	Move the grammar parsing to a separate module	2018-06-13 21:01:06 +02:00
Dave Halter	275dbca1b9	More renames	2018-06-13 20:54:18 +02:00
Dave Halter	9a0b6f4928	Rename	2018-06-13 20:47:16 +02:00
Dave Halter	fc5560874b	Separate generating dfas from parsing	2018-06-13 20:46:36 +02:00
Dave Halter	6e5a520e7b	Cleanup some names	2018-06-13 10:18:05 +02:00
Dave Halter	dcabf3d415	Remove some code that is not used anymore	2018-06-13 10:15:21 +02:00
Dave Halter	3bc82d112d	Some cleanups and documentation	2018-06-13 10:10:17 +02:00
Dave Halter	ec186a78f8	Move out some more functions out of classes	2018-06-13 01:50:25 +02:00
Dave Halter	3818fb2b22	Some more refactorings for clarification	2018-06-13 01:44:44 +02:00
Dave Halter	95ddeb4012	Factor out start_symbol into a better position	2018-06-13 00:54:18 +02:00
Dave Halter	f638abb08e	Refactor out dfas	2018-06-13 00:31:45 +02:00
Dave Halter	f8558df27a	Document pgen grammars a bit better	2018-06-13 00:27:51 +02:00
Dave Halter	bae56e72e1	addarc -> add_arc	2018-06-12 22:13:08 +02:00
Dave Halter	41c38311f7	Refactor some things in pgen	2018-06-12 22:08:19 +02:00
Dave Halter	eeb456a6d4	Move some initializations	2018-06-12 22:00:49 +02:00
Dave Halter	1c0956d9e0	Change license again. The year shouldn't matter	2018-06-12 21:58:27 +02:00
Dave Halter	c17156bd36	Make first private	2018-06-12 21:56:26 +02:00
Dave Halter	8865aa452c	Change copyright years	2018-06-12 21:55:37 +02:00
Dave Halter	e0c79a9fcc	Remove some code from the grammar	2018-06-12 21:49:18 +02:00
Dave Halter	3c08b1b058	Separate the grammar generation from the grammar parsing	2018-06-12 21:30:09 +02:00
Dave Halter	0f32673092	In pgen now everything is named grammar and not c	2018-06-12 18:17:02 +02:00
Dave Halter	1e18163402	Better recovery for online classes and functions	2018-06-12 13:23:49 +02:00
Dave Halter	cef9f1bdbd	Fix one-line error recovery for all things that are using a suite Fixes https://github.com/davidhalter/jedi/issues/1138.	2018-06-12 12:56:27 +02:00
Dave Halter	23db71a5f7	Add a debug function for first tokens	2018-06-12 01:50:37 +02:00
Aaron Meurer	34154d05a0	Don't mutate the standard library token.tok_name dictionary (#42 ) * Don't mutate the standard library token.tok_name dictionary Fixes #41. * More robust test that tok_name isn't mutated This test now works in Python 2.7, and actually tests something in Python 3.7, and it's better anyway because it tests the whole dictionary instead of just one token. * Fix test_tok_name_copied in Python 3.7 and PyPy Apparently Python 3.7 adds N_TOKENS to the tok_name dictionary, and PyPy doesn't have NT_OFFSET in it.	2018-06-08 18:46:16 +02:00
Dave Halter	6f385bdba1	Not testing Python 3.3 anymore on travis. It seems to be broken	2018-05-21 12:55:49 +02:00
Dave Halter	4fc31c58b3	Add a changelog for 0.2.1	2018-05-21 12:48:31 +02:00
Dave Halter	689decc66c	Push the version	2018-05-21 12:44:10 +02:00
Dave Halter	c2eacdb81c	The diff parser was slighly off with prefixes, fixes #1121	2018-05-20 19:13:50 +02:00
Dave Halter	ac0bf4fcdd	A better repr for the endmarker	2018-05-17 09:56:16 +02:00
Dave Halter	948f9ccecc	Merge branch 'master' of github.com:davidhalter/parso	2018-04-23 23:42:11 +02:00
Dave Halter	f20106d88e	Fix a prefix issue with error leafs.	2018-04-22 19:28:30 +02:00
Aaron Meurer	f4912f6c17	Use the correct field name in the PythonToken repr	2018-04-19 22:20:23 +02:00
Jonas Tranberg	bf5a4b7c2c	Added path param to load_grammar for loading custom grammar files	2018-04-19 10:16:33 +02:00
Dave Halter	579146b501	Don't test python 2.6 in tox by default, because the newer pip versions don't support it anymore	2018-04-15 14:46:35 +02:00
Dave Halter	deb4dbce1c	Set a release date	2018-04-15 13:54:10 +02:00
Dave Halter	8eda8decea	Fix whitespace issues with prefixes	2018-04-07 15:34:58 +02:00
Dave Halter	f6935935c0	Use proper leafs for fstring start/end	2018-04-07 12:32:33 +02:00
Dave Halter	d3fa7e1cad	Fix a Python 2 related issue.	2018-04-07 11:14:41 +02:00
Dave Halter	83d9abd036	Forgot to delete another print. WTF I'm tired	2018-04-07 02:22:45 +02:00
Dave Halter	222e9117b4	Unfortunately forgot to delete a print	2018-04-07 02:21:59 +02:00
Dave Halter	eda2207e6c	Start to write a changelog for 0.2.0	2018-04-07 02:20:00 +02:00
Dave Halter	a91e5f2775	Merge branch 'fstrings' f-strings are now parsed as part of the Python grammar and not in separate steps. Note that this is not the way that CPython does it. CPython still uses multiple parse steps in ast.c.	2018-04-07 02:17:02 +02:00
Dave Halter	cba4f2ccc1	Fix the syntax errors from f-strings	2018-04-07 02:14:35 +02:00
Dave Halter	8f1a436ba1	Remove the old f-string grammar and fix the tests with the new syntax	2018-04-07 02:11:26 +02:00
Dave Halter	9941348ec6	Add python 3.7 to tox	2018-04-06 20:30:07 +02:00
Dave Halter	afb71dc762	Remove the f-string rules and replace them with new ones	2018-04-06 20:29:22 +02:00
Dave Halter	0d96b12566	Fix the fstring syntax if there's a conversion with exclamation mark	2018-04-06 09:59:15 +02:00
Dave Halter	9d2ce4bcd4	Fix a few fstring error gatherings	2018-04-06 09:50:07 +02:00
Dave Halter	a3e280c2b9	Use strings as a non-terminal symbol in all grammars This makes it easier to write the same logic for all Python versions	2018-04-05 09:55:19 +02:00
Dave Halter	7c7f4f4e54	Fix a test	2018-04-05 00:45:23 +02:00
Dave Halter	56b3e2cdc8	Also use the fstring modfications for the 3.7 grammar	2018-04-05 00:45:03 +02:00
Dave Halter	97f042c6ba	Remove clutter from the grammar	2018-03-31 14:26:12 +02:00
Dave Halter	b1aa7c6a79	Cleanup a lot of details in the tokenizer for fstrings	2018-03-31 14:25:29 +02:00
Dave Halter	235fda3fbb	Fix a few things so that the tokenizer can at least parse the grammar.	2018-03-30 22:13:18 +02:00
Dave Halter	d8d2e596a5	A first implementation of the fstring tokenizer	2018-03-30 20:50:49 +02:00
Dave Halter	e05ce5ae31	Revert "A small improvement in checks" The problem with this commit is that it probably makes some checks slower. It's still slightly more beautiful, but we leave it for now. This reverts commit `25e4ea9c24`.	2018-03-28 09:51:37 +02:00
Dave Halter	25e4ea9c24	A small improvement in checks	2018-03-28 02:16:37 +02:00
Dave Halter	9f88fe16a3	Added the fstring grammar without the tokenization part This means that fstrings are not yet parsed, because there are no f-string tokens.	2018-03-28 02:03:18 +02:00
Dave Halter	ba0e7a2e9d	A comparison was slightly off	2018-03-24 22:40:12 +01:00
Dave Halter	dc80152ff8	Ignore the pytest cache	2018-03-23 20:24:08 +01:00
Dave Halter	9e3154d167	Fix an error message change in Python 3.7	2018-03-23 20:23:15 +01:00
Dave Halter	065da34272	Fix an issue in the diff parser about endmarker newlines This was discovered in https://github.com/davidhalter/jedi/issues/1000.	2018-03-11 23:41:18 +01:00
Dave Halter	f89809de9a	Remove the copyright for good	2018-01-09 23:27:46 +01:00
Chris Lamb	332c57ebcb	Remove copyright years from documentation. (Closes: #25 )	2018-01-09 23:24:39 +01:00
lcolaholicl	acb173b703	Fix typo: containes→contains	2017-12-31 13:59:41 +01:00
Daniel Hahler	47e78b37fe	tox: use older pytest only for py26/py33 Follow-up to https://github.com/davidhalter/parso/commit/73439d5863daea.	2017-12-30 22:11:07 +01:00
Dave Halter	fc44af6165	Merge branch 'master' of github.com:davidhalter/parso	2017-12-30 18:08:17 +01:00
Dave Halter	73439d5863	Don't use a newer pytest version Otherwise python 3.3 and 2.6 are not working anymore	2017-12-30 18:07:58 +01:00
Dave Halter	085aad3038	The tags should be annotated if possible	2017-12-30 14:05:50 +01:00
lcolaholicl	7db500bfbc	Fix another typo. Delete accidentally repeated `None.`	2017-12-30 13:52:33 +01:00
lcolaholicl	e689f3dce6	Fix typo keyworda → keyword	2017-12-30 13:52:33 +01:00
Max Nordlund	b076cdc12a	Fix typo	2017-12-26 03:09:40 +01:00
Dave Halter	0dea94c801	Bump version for the next release	2017-11-05 15:11:51 +01:00
Dave Halter	6cf487aee2	Use 3.7-dev not 3.7 for travis	2017-11-05 14:39:13 +01:00