Release of 0.3.3

Rename a test
Remove some unused code
2025-12-07 21:34:32 +08:00 · 2019-02-06 09:55:18 +01:00 · 2019-02-06 09:51:46 +01:00 · 2019-02-06 09:50:27 +01:00 · 2019-02-06 09:31:46 +01:00 · 2019-02-06 01:28:47 +01:00
50 changed files with 3227 additions and 1761 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,5 @@
 /dist/
 parso.egg-info/
 /.cache/
 /.pytest_cache
 test/fuzz-redo.pickle
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,18 +3,17 @@ sudo: false
 python:
  - 2.6
  - 2.7
  - 3.3
  - 3.4
  - 3.5
  - 3.6
  - 3.7
  - pypy
 matrix:
  allow_failures:
    - env: TOXENV=cov
  include:
    - { python: "3.7", dist: xenial, sudo: true }
    - python: 3.5
      env: TOXENV=cov
  allow_failures:
    - env: TOXENV=cov
 install:
    - pip install --quiet tox-travis
 script:
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -3,6 +3,42 @@
 Changelog
 ---------
 0.3.3 (2018-02-06)
 +++++++++++++++++++
 - Fix async errors in the diff parser
 - A fix in iter_errors
 - This is a very small bugfix release
 0.3.2 (2018-01-24)
 +++++++++++++++++++
 - 20+ bugfixes in the diff parser and 3 in the tokenizer
 - A fuzzer for the diff parser, to give confidence that the diff parser is in a
  good shape.
 - Some bugfixes for f-string
 0.3.1 (2018-07-09)
 +++++++++++++++++++
 - Bugfixes in the diff parser and keyword-only arguments
 0.3.0 (2018-06-30)
 +++++++++++++++++++
 - Rewrote the pgen2 parser generator.
 0.2.1 (2018-05-21)
 +++++++++++++++++++
 - A bugfix for the diff parser.
 - Grammar files can now be loaded from a specific path.
 0.2.0 (2018-04-15)
 +++++++++++++++++++
 - f-strings are now parsed as a part of the normal Python grammar. This makes
  it way easier to deal with them.
 0.1.1 (2017-11-05)
 +++++++++++++++++++
--- a/README.rst
+++ b/README.rst
@@ -2,12 +2,13 @@
 parso - A Python Parser
 ###################################################################
 .. image:: https://secure.travis-ci.org/davidhalter/parso.png?branch=master
    :target: http://travis-ci.org/davidhalter/parso
    :alt: Travis-CI build status
-.. image:: https://coveralls.io/repos/davidhalter/parso/badge.png?branch=master
+.. image:: https://travis-ci.org/davidhalter/parso.svg?branch=master
-    :target: https://coveralls.io/r/davidhalter/parso
+    :target: https://travis-ci.org/davidhalter/parso
    :alt: Travis CI build status
 .. image:: https://coveralls.io/repos/github/davidhalter/parso/badge.svg?branch=master
    :target: https://coveralls.io/github/davidhalter/parso?branch=master
    :alt: Coverage Status
 .. image:: https://raw.githubusercontent.com/davidhalter/parso/master/docs/_static/logo_characters.png
@@ -55,10 +56,10 @@ To list multiple issues:
 Resources
 =========
- `Testing <http://parso.readthedocs.io/en/latest/docs/development.html#testing>`_
+- `Testing <https://parso.readthedocs.io/en/latest/docs/development.html#testing>`_
 - `PyPI <https://pypi.python.org/pypi/parso>`_
 - `Docs <https://parso.readthedocs.org/en/latest/>`_
- Uses `semantic versioning <http://semver.org/>`_
+- Uses `semantic versioning <https://semver.org/>`_
 Installation
 ============
--- a/conftest.py
+++ b/conftest.py
@@ -57,6 +57,8 @@ def pytest_generate_tests(metafunc):
        metafunc.parametrize('each_py2_version', VERSIONS_2)
    elif 'each_py3_version' in metafunc.fixturenames:
        metafunc.parametrize('each_py3_version', VERSIONS_3)
    elif 'version_ge_py36' in metafunc.fixturenames:
        metafunc.parametrize('version_ge_py36', ['3.6', '3.7'])
 class NormalizerIssueCase(object):
@@ -151,8 +153,5 @@ def works_ge_py3(each_version):
@pytest.fixture
 def works_ge_py35(each_version):
    """
    Works only greater equal Python 3.3.
    """
    version_info = parse_version_string(each_version)
    return Checker(each_version, version_info >= (3, 5))
--- a/deploy-master.sh
+++ b/deploy-master.sh
@@ -36,7 +36,7 @@ if [[ $tag_ref ]]; then
        exit 1
    fi
 else
-    git tag $tag
+    git tag -a $tag
    git push --tags
 fi
--- a/docs/_themes/flask/layout.html
+++ b/docs/_themes/flask/layout.html
@@ -19,7 +19,6 @@
 {% endblock %}
 {%- block footer %}
  <div class="footer">
    &copy; Copyright {{ copyright }}.
    Created using <a href="http://sphinx.pocoo.org/">Sphinx</a>.
  </div>
  {% if pagename == 'index' %}
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -13,7 +13,6 @@
 import sys
 import os
 import datetime
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
@@ -45,7 +44,7 @@ master_doc = 'index'
 # General information about the project.
 project = u'parso'
-copyright = u'2012 - {today.year}, parso contributors'.format(today=datetime.date.today())
+copyright = u'parso contributors'
 import parso
 from parso.utils import version_info
@@ -145,7 +144,7 @@ html_sidebars = {
        #'relations.html',
        'ghbuttons.html',
        #'sourcelink.html',
-        #'searchbox.html'
+        'searchbox.html'
    ]
 }
--- a/docs/docs/usage.rst
+++ b/docs/docs/usage.rst
@@ -61,6 +61,8 @@ Used By
 -------
 - jedi_ (which is used by IPython and a lot of editor plugins).
 - mutmut_ (mutation tester)
 .. _jedi: https://github.com/davidhalter/jedi
 .. _mutmut: https://github.com/boxed/mutmut
--- a/parso/init.py
+++ b/parso/init.py
@@ -43,7 +43,7 @@ from parso.grammar import Grammar, load_grammar
 from parso.utils import split_lines, python_bytes_to_unicode
-__version__ = '0.1.1'
+__version__ = '0.3.3'
 def parse(code=None, **kwargs):
--- a/parso/_compatibility.py
+++ b/parso/_compatibility.py
@@ -36,7 +36,7 @@ except AttributeError:
 def u(string):
    """Cast to unicode DAMMIT!
    Written because Python2 repr always implicitly casts to a string, so we
-    have to cast back to a unicode (and we now that we always deal with valid
+    have to cast back to a unicode (and we know that we always deal with valid
    unicode, because we check that in the beginning).
    """
    if py_version >= 30:
--- a/parso/grammar.py
+++ b/parso/grammar.py
@@ -2,17 +2,16 @@ import hashlib
 import os
 from parso._compatibility import FileNotFoundError, is_pypy
-from parso.pgen2.pgen import generate_grammar
+from parso.pgen2 import generate_grammar
 from parso.utils import split_lines, python_bytes_to_unicode, parse_version_string
 from parso.python.diff import DiffParser
 from parso.python.tokenize import tokenize_lines, tokenize
-from parso.python import token
+from parso.python.token import PythonTokenTypes
 from parso.cache import parser_cache, load_module, save_module
 from parso.parser import BaseParser
 from parso.python.parser import Parser as PythonParser
 from parso.python.errors import ErrorFinderConfig
 from parso.python import pep8
 from parso.python import fstring
 _loaded_grammars = {}
@@ -21,7 +20,7 @@ class Grammar(object):
    """
    :py:func:`parso.load_grammar` returns instances of this class.
-    Creating custom grammars by calling this is not supported, yet.
+    Creating custom none-python grammars by calling this is not supported, yet.
    """
    #:param text: A BNF representation of your grammar.
    _error_normalizer_config = None
@@ -52,8 +51,8 @@ class Grammar(object):
            it is invalid, it will be returned as an error node. If disabled,
            you will get a ParseError when encountering syntax errors in your
            code.
-        :param str start_symbol: The grammar symbol that you want to parse. Only
+        :param str start_symbol: The grammar rule (nonterminal) that you want
-            allowed to be used when error_recovery is False.
+            to parse. Only allowed to be used when error_recovery is False.
        :param str path: The path to the file you want to open. Only needed for caching.
        :param bool cache: Keeps a copy of the parser tree in RAM and on disk
            if a path is given. Returns the cached trees if the corresponding
@@ -73,7 +72,7 @@ class Grammar(object):
            :py:class:`parso.python.tree.Module`.
        """
        if 'start_pos' in kwargs:
-            raise TypeError("parse() got an unexpected keyworda argument.")
+            raise TypeError("parse() got an unexpected keyword argument.")
        return self._parse(code=code, **kwargs)
    def _parse(self, code=None, error_recovery=True, path=None,
@@ -89,7 +88,7 @@ class Grammar(object):
            raise TypeError("Please provide either code or a path.")
        if start_symbol is None:
-            start_symbol = self._start_symbol
+            start_symbol = self._start_nonterminal
        if error_recovery and start_symbol != 'file_input':
            raise NotImplementedError("This is currently not implemented.")
@@ -137,7 +136,7 @@ class Grammar(object):
        p = self._parser(
            self._pgen_grammar,
            error_recovery=error_recovery,
-            start_symbol=start_symbol
+            start_nonterminal=start_symbol
        )
        root_node = p.parse(tokens=tokens)
@@ -186,17 +185,16 @@ class Grammar(object):
        normalizer.walk(node)
        return normalizer.issues
    def __repr__(self):
-        labels = self._pgen_grammar.number2symbol.values()
+        nonterminals = self._pgen_grammar.nonterminal_to_dfas.keys()
-        txt = ' '.join(list(labels)[:3]) + ' ...'
+        txt = ' '.join(list(nonterminals)[:3]) + ' ...'
        return '<%s:%s>' % (self.__class__.__name__, txt)
 class PythonGrammar(Grammar):
    _error_normalizer_config = ErrorFinderConfig()
-    _token_namespace = token
+    _token_namespace = PythonTokenTypes
-    _start_symbol = 'file_input'
+    _start_nonterminal = 'file_input'
    def __init__(self, version_info, bnf_text):
        super(PythonGrammar, self).__init__(
@@ -215,46 +213,19 @@ class PythonGrammar(Grammar):
        return tokenize(code, self.version_info)
 class PythonFStringGrammar(Grammar):
    _token_namespace = fstring.TokenNamespace
    _start_symbol = 'fstring'
    def __init__(self):
        super(PythonFStringGrammar, self).__init__(
            text=fstring.GRAMMAR,
            tokenizer=fstring.tokenize,
            parser=fstring.Parser
        )
    def parse(self, code, **kwargs):
        return self._parse(code, **kwargs)
    def _parse(self, code, error_recovery=True, start_pos=(1, 0)):
        tokens = self._tokenizer(code, start_pos=start_pos)
        p = self._parser(
            self._pgen_grammar,
            error_recovery=error_recovery,
            start_symbol=self._start_symbol,
        )
        return p.parse(tokens=tokens)
    def parse_leaf(self, leaf, error_recovery=True):
        code = leaf._get_payload()
        return self.parse(code, error_recovery=True, start_pos=leaf.start_pos)
 def load_grammar(**kwargs):
    """
    Loads a :py:class:`parso.Grammar`. The default version is the current Python
    version.
    :param str version: A python version string, e.g. ``version='3.3'``.
    :param str path: A path to a grammar file
    """
-    def load_grammar(language='python', version=None):
+    def load_grammar(language='python', version=None, path=None):
        if language == 'python':
            version_info = parse_version_string(version)
-            file = os.path.join(
+            file = path or os.path.join(
                'python',
                'grammar%s%s.txt' % (version_info.major, version_info.minor)
            )
@@ -273,10 +244,6 @@ def load_grammar(**kwargs):
                except FileNotFoundError:
                    message = "Python version %s is currently not supported." % version
                    raise NotImplementedError(message)
        elif language == 'python-f-string':
            if version is not None:
                raise NotImplementedError("Currently different versions are not supported.")
            return PythonFStringGrammar()
        else:
            raise NotImplementedError("No support for language %s." % language)
--- a/parso/parser.py
+++ b/parso/parser.py
@@ -1,3 +1,11 @@
 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
 # Licensed to PSF under a Contributor Agreement.
 # Modifications:
 # Copyright David Halter and Contributors
 # Modifications are dual-licensed: MIT and PSF.
 # 99% of the code is different from pgen2, now.
 """
 The ``Parser`` tries to convert the available Python code in an easy to read
 format, something like an abstract syntax tree. The classes who represent this
@@ -16,7 +24,7 @@ complexity of the ``Parser`` (there's another parser sitting inside
 ``Statement``, which produces ``Array`` and ``Call``).
 """
 from parso import tree
-from parso.pgen2.parse import PgenParser
+from parso.pgen2.generator import ReservedString
 class ParserSyntaxError(Exception):
@@ -30,7 +38,76 @@ class ParserSyntaxError(Exception):
        self.error_leaf = error_leaf
 class InternalParseError(Exception):
    """
    Exception to signal the parser is stuck and error recovery didn't help.
    Basically this shouldn't happen. It's a sign that something is really
    wrong.
    """
    def __init__(self, msg, type_, value, start_pos):
        Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" %
                           (msg, type_.name, value, start_pos))
        self.msg = msg
        self.type = type
        self.value = value
        self.start_pos = start_pos
 class Stack(list):
    def _allowed_transition_names_and_token_types(self):
        def iterate():
            # An API just for Jedi.
            for stack_node in reversed(self):
                for transition in stack_node.dfa.transitions:
                    if isinstance(transition, ReservedString):
                        yield transition.value
                    else:
                        yield transition  # A token type
                if not stack_node.dfa.is_final:
                    break
        return list(iterate())
 class StackNode(object):
    def __init__(self, dfa):
        self.dfa = dfa
        self.nodes = []
    @property
    def nonterminal(self):
        return self.dfa.from_rule
    def __repr__(self):
        return '%s(%s, %s)' % (self.__class__.__name__, self.dfa, self.nodes)
 def _token_to_transition(grammar, type_, value):
    # Map from token to label
    if type_.contains_syntax:
        # Check for reserved words (keywords)
        try:
            return grammar.reserved_syntax_strings[value]
        except KeyError:
            pass
    return type_
 class BaseParser(object):
    """Parser engine.
    A Parser instance contains state pertaining to the current token
    sequence, and should not be used concurrently by different threads
    to parse separate token sequences.
    See python/tokenize.py for how to get input tokens by a string.
    When a syntax error occurs, error_recovery() is called.
    """
    node_map = {}
    default_node = tree.Node
@@ -38,41 +115,97 @@ class BaseParser(object):
    }
    default_leaf = tree.Leaf
-    def __init__(self, pgen_grammar, start_symbol='file_input', error_recovery=False):
+    def __init__(self, pgen_grammar, start_nonterminal='file_input', error_recovery=False):
        self._pgen_grammar = pgen_grammar
-        self._start_symbol = start_symbol
+        self._start_nonterminal = start_nonterminal
        self._error_recovery = error_recovery
    def parse(self, tokens):
-        start_number = self._pgen_grammar.symbol2number[self._start_symbol]
+        first_dfa = self._pgen_grammar.nonterminal_to_dfas[self._start_nonterminal][0]
-        self.pgen_parser = PgenParser(
+        self.stack = Stack([StackNode(first_dfa)])
-            self._pgen_grammar, self.convert_node, self.convert_leaf,
+
-            self.error_recovery, start_number
+        for token in tokens:
            self._add_token(token)
        while True:
            tos = self.stack[-1]
            if not tos.dfa.is_final:
                # We never broke out -- EOF is too soon -- Unfinished statement.
                # However, the error recovery might have added the token again, if
                # the stack is empty, we're fine.
                raise InternalParseError(
                    "incomplete input", token.type, token.value, token.start_pos
                )
-        node = self.pgen_parser.parse(tokens)
+            if len(self.stack) > 1:
-        # The stack is empty now, we don't need it anymore.
+                self._pop()
-        del self.pgen_parser
+            else:
-        return node
+                return self.convert_node(tos.nonterminal, tos.nodes)
-    def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
+    def error_recovery(self, token):
                       add_token_callback):
        if self._error_recovery:
            raise NotImplementedError("Error Recovery is not implemented")
        else:
-            error_leaf = tree.ErrorLeaf('TODO %s' % typ, value, start_pos, prefix)
+            type_, value, start_pos, prefix = token
            error_leaf = tree.ErrorLeaf(type_, value, start_pos, prefix)
            raise ParserSyntaxError('SyntaxError: invalid syntax', error_leaf)
-    def convert_node(self, pgen_grammar, type_, children):
+    def convert_node(self, nonterminal, children):
        # TODO REMOVE symbol, we don't want type here.
        symbol = pgen_grammar.number2symbol[type_]
        try:
-            return self.node_map[symbol](children)
+            node = self.node_map[nonterminal](children)
        except KeyError:
-            return self.default_node(symbol, children)
+            node = self.default_node(nonterminal, children)
        for c in children:
            c.parent = node
        return node
-    def convert_leaf(self, pgen_grammar, type_, value, prefix, start_pos):
+    def convert_leaf(self, type_, value, prefix, start_pos):
        try:
            return self.leaf_map[type_](value, start_pos, prefix)
        except KeyError:
            return self.default_leaf(value, start_pos, prefix)
    def _add_token(self, token):
        """
        This is the only core function for parsing. Here happens basically
        everything. Everything is well prepared by the parser generator and we
        only apply the necessary steps here.
        """
        grammar = self._pgen_grammar
        stack = self.stack
        type_, value, start_pos, prefix = token
        transition = _token_to_transition(grammar, type_, value)
        while True:
            try:
                plan = stack[-1].dfa.transitions[transition]
                break
            except KeyError:
                if stack[-1].dfa.is_final:
                    self._pop()
                else:
                    self.error_recovery(token)
                    return
            except IndexError:
                raise InternalParseError("too much input", type_, value, start_pos)
        stack[-1].dfa = plan.next_dfa
        for push in plan.dfa_pushes:
            stack.append(StackNode(push))
        leaf = self.convert_leaf(type_, value, prefix, start_pos)
        stack[-1].nodes.append(leaf)
    def _pop(self):
        tos = self.stack.pop()
        # If there's exactly one child, return that child instead of
        # creating a new node.  We still create expr_stmt and
        # file_input though, because a lot of Jedi depends on its
        # logic.
        if len(tos.nodes) == 1:
            new_node = tos.nodes[0]
        else:
            new_node = self.convert_node(tos.dfa.from_rule, tos.nodes)
        self.stack[-1].nodes.append(new_node)
--- a/parso/pgen2/init.py
+++ b/parso/pgen2/init.py
@@ -4,5 +4,7 @@
 # Modifications:
 # Copyright 2006 Google, Inc. All Rights Reserved.
 # Licensed to PSF under a Contributor Agreement.
-# Copyright 2014 David Halter. Integration into Jedi.
+# Copyright 2014 David Halter and Contributors
 # Modifications are dual-licensed: MIT and PSF.
 from parso.pgen2.generator import generate_grammar
--- a/parso/pgen2/generator.py
+++ b/parso/pgen2/generator.py
@@ -0,0 +1,358 @@
 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
 # Licensed to PSF under a Contributor Agreement.
 # Modifications:
 # Copyright David Halter and Contributors
 # Modifications are dual-licensed: MIT and PSF.
 """
 This module defines the data structures used to represent a grammar.
 Specifying grammars in pgen is possible with this grammar::
    grammar: (NEWLINE | rule)* ENDMARKER
    rule: NAME ':' rhs NEWLINE
    rhs: items ('|' items)*
    items: item+
    item: '[' rhs ']' | atom ['+' | '*']
    atom: '(' rhs ')' | NAME | STRING
 This grammar is self-referencing.
 This parser generator (pgen2) was created by Guido Rossum and used for lib2to3.
 Most of the code has been refactored to make it more Pythonic. Since this was a
 "copy" of the CPython Parser parser "pgen", there was some work needed to make
 it more readable. It should also be slightly faster than the original pgen2,
 because we made some optimizations.
 """
 from ast import literal_eval
 from parso.pgen2.grammar_parser import GrammarParser, NFAState
 class Grammar(object):
    """
    Once initialized, this class supplies the grammar tables for the
    parsing engine implemented by parse.py.  The parsing engine
    accesses the instance variables directly.
    The only important part in this parsers are dfas and transitions between
    dfas.
    """
    def __init__(self, start_nonterminal, rule_to_dfas, reserved_syntax_strings):
        self.nonterminal_to_dfas = rule_to_dfas  # Dict[str, List[DFAState]]
        self.reserved_syntax_strings = reserved_syntax_strings
        self.start_nonterminal = start_nonterminal
 class DFAPlan(object):
    """
    Plans are used for the parser to create stack nodes and do the proper
    DFA state transitions.
    """
    def __init__(self, next_dfa, dfa_pushes=[]):
        self.next_dfa = next_dfa
        self.dfa_pushes = dfa_pushes
    def __repr__(self):
        return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes)
 class DFAState(object):
    """
    The DFAState object is the core class for pretty much anything. DFAState
    are the vertices of an ordered graph while arcs and transitions are the
    edges.
    Arcs are the initial edges, where most DFAStates are not connected and
    transitions are then calculated to connect the DFA state machines that have
    different nonterminals.
    """
    def __init__(self, from_rule, nfa_set, final):
        assert isinstance(nfa_set, set)
        assert isinstance(next(iter(nfa_set)), NFAState)
        assert isinstance(final, NFAState)
        self.from_rule = from_rule
        self.nfa_set = nfa_set
        self.arcs = {}  # map from terminals/nonterminals to DFAState
        # In an intermediary step we set these nonterminal arcs (which has the
        # same structure as arcs). These don't contain terminals anymore.
        self.nonterminal_arcs = {}
        # Transitions are basically the only thing that  the parser is using
        # with is_final. Everyting else is purely here to create a parser.
        self.transitions = {}  #: Dict[Union[TokenType, ReservedString], DFAPlan]
        self.is_final = final in nfa_set
    def add_arc(self, next_, label):
        assert isinstance(label, str)
        assert label not in self.arcs
        assert isinstance(next_, DFAState)
        self.arcs[label] = next_
    def unifystate(self, old, new):
        for label, next_ in self.arcs.items():
            if next_ is old:
                self.arcs[label] = new
    def __eq__(self, other):
        # Equality test -- ignore the nfa_set instance variable
        assert isinstance(other, DFAState)
        if self.is_final != other.is_final:
            return False
        # Can't just return self.arcs == other.arcs, because that
        # would invoke this method recursively, with cycles...
        if len(self.arcs) != len(other.arcs):
            return False
        for label, next_ in self.arcs.items():
            if next_ is not other.arcs.get(label):
                return False
        return True
    __hash__ = None  # For Py3 compatibility.
    def __repr__(self):
        return '<%s: %s is_final=%s>' % (
            self.__class__.__name__, self.from_rule, self.is_final
        )
 class ReservedString(object):
    """
    Most grammars will have certain keywords and operators that are mentioned
    in the grammar as strings (e.g. "if") and not token types (e.g. NUMBER).
    This class basically is the former.
    """
    def __init__(self, value):
        self.value = value
    def __repr__(self):
        return '%s(%s)' % (self.__class__.__name__, self.value)
 def _simplify_dfas(dfas):
    """
    This is not theoretically optimal, but works well enough.
    Algorithm: repeatedly look for two states that have the same
    set of arcs (same labels pointing to the same nodes) and
    unify them, until things stop changing.
    dfas is a list of DFAState instances
    """
    changes = True
    while changes:
        changes = False
        for i, state_i in enumerate(dfas):
            for j in range(i + 1, len(dfas)):
                state_j = dfas[j]
                if state_i == state_j:
                    #print "  unify", i, j
                    del dfas[j]
                    for state in dfas:
                        state.unifystate(state_j, state_i)
                    changes = True
                    break
 def _make_dfas(start, finish):
    """
    Uses the powerset construction algorithm to create DFA states from sets of
    NFA states.
    Also does state reduction if some states are not needed.
    """
    # To turn an NFA into a DFA, we define the states of the DFA
    # to correspond to *sets* of states of the NFA.  Then do some
    # state reduction.
    assert isinstance(start, NFAState)
    assert isinstance(finish, NFAState)
    def addclosure(nfa_state, base_nfa_set):
        assert isinstance(nfa_state, NFAState)
        if nfa_state in base_nfa_set:
            return
        base_nfa_set.add(nfa_state)
        for nfa_arc in nfa_state.arcs:
            if nfa_arc.nonterminal_or_string is None:
                addclosure(nfa_arc.next, base_nfa_set)
    base_nfa_set = set()
    addclosure(start, base_nfa_set)
    states = [DFAState(start.from_rule, base_nfa_set, finish)]
    for state in states:  # NB states grows while we're iterating
        arcs = {}
        # Find state transitions and store them in arcs.
        for nfa_state in state.nfa_set:
            for nfa_arc in nfa_state.arcs:
                if nfa_arc.nonterminal_or_string is not None:
                    nfa_set = arcs.setdefault(nfa_arc.nonterminal_or_string, set())
                    addclosure(nfa_arc.next, nfa_set)
        # Now create the dfa's with no None's in arcs anymore. All Nones have
        # been eliminated and state transitions (arcs) are properly defined, we
        # just need to create the dfa's.
        for nonterminal_or_string, nfa_set in arcs.items():
            for nested_state in states:
                if nested_state.nfa_set == nfa_set:
                    # The DFA state already exists for this rule.
                    break
            else:
                nested_state = DFAState(start.from_rule, nfa_set, finish)
                states.append(nested_state)
            state.add_arc(nested_state, nonterminal_or_string)
    return states  # List of DFAState instances; first one is start
 def _dump_nfa(start, finish):
    print("Dump of NFA for", start.from_rule)
    todo = [start]
    for i, state in enumerate(todo):
        print("  State", i, state is finish and "(final)" or "")
        for label, next_ in state.arcs:
            if next_ in todo:
                j = todo.index(next_)
            else:
                j = len(todo)
                todo.append(next_)
            if label is None:
                print("    -> %d" % j)
            else:
                print("    %s -> %d" % (label, j))
 def _dump_dfas(dfas):
    print("Dump of DFA for", dfas[0].from_rule)
    for i, state in enumerate(dfas):
        print("  State", i, state.is_final and "(final)" or "")
        for nonterminal, next_ in state.arcs.items():
            print("    %s -> %d" % (nonterminal, dfas.index(next_)))
 def generate_grammar(bnf_grammar, token_namespace):
    """
    ``bnf_text`` is a grammar in extended BNF (using * for repetition, + for
    at-least-once repetition, [] for optional parts, | for alternatives and ()
    for grouping).
    It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its
    own parser.
    """
    rule_to_dfas = {}
    start_nonterminal = None
    for nfa_a, nfa_z in GrammarParser(bnf_grammar).parse():
        #_dump_nfa(a, z)
        dfas = _make_dfas(nfa_a, nfa_z)
        #_dump_dfas(dfas)
        # oldlen = len(dfas)
        _simplify_dfas(dfas)
        # newlen = len(dfas)
        rule_to_dfas[nfa_a.from_rule] = dfas
        #print(nfa_a.from_rule, oldlen, newlen)
        if start_nonterminal is None:
            start_nonterminal = nfa_a.from_rule
    reserved_strings = {}
    for nonterminal, dfas in rule_to_dfas.items():
        for dfa_state in dfas:
            for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items():
                if terminal_or_nonterminal in rule_to_dfas:
                    dfa_state.nonterminal_arcs[terminal_or_nonterminal] = next_dfa
                else:
                    transition = _make_transition(
                        token_namespace,
                        reserved_strings,
                        terminal_or_nonterminal
                    )
                    dfa_state.transitions[transition] = DFAPlan(next_dfa)
    _calculate_tree_traversal(rule_to_dfas)
    return Grammar(start_nonterminal, rule_to_dfas, reserved_strings)
 def _make_transition(token_namespace, reserved_syntax_strings, label):
    """
    Creates a reserved string ("if", "for", "*", ...) or returns the token type
    (NUMBER, STRING, ...) for a given grammar terminal.
    """
    if label[0].isalpha():
        # A named token (e.g. NAME, NUMBER, STRING)
        return getattr(token_namespace, label)
    else:
        # Either a keyword or an operator
        assert label[0] in ('"', "'"), label
        assert not label.startswith('"""') and not label.startswith("'''")
        value = literal_eval(label)
        try:
            return reserved_syntax_strings[value]
        except KeyError:
            r = reserved_syntax_strings[value] = ReservedString(value)
            return r
 def _calculate_tree_traversal(nonterminal_to_dfas):
    """
    By this point we know how dfas can move around within a stack node, but we
    don't know how we can add a new stack node (nonterminal transitions).
    """
    # Map from grammar rule (nonterminal) name to a set of tokens.
    first_plans = {}
    nonterminals = list(nonterminal_to_dfas.keys())
    nonterminals.sort()
    for nonterminal in nonterminals:
        if nonterminal not in first_plans:
            _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal)
    # Now that we have calculated the first terminals, we are sure that
    # there is no left recursion or ambiguities.
    for dfas in nonterminal_to_dfas.values():
        for dfa_state in dfas:
            for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items():
                for transition, pushes in first_plans[nonterminal].items():
                    dfa_state.transitions[transition] = DFAPlan(next_dfa, pushes)
 def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal):
    """
    Calculates the first plan in the first_plans dictionary for every given
    nonterminal. This is going to be used to know when to create stack nodes.
    """
    dfas = nonterminal_to_dfas[nonterminal]
    new_first_plans = {}
    first_plans[nonterminal] = None  # dummy to detect left recursion
    # We only need to check the first dfa. All the following ones are not
    # interesting to find first terminals.
    state = dfas[0]
    for transition, next_ in state.transitions.items():
        # It's a string. We have finally found a possible first token.
        new_first_plans[transition] = [next_.next_dfa]
    for nonterminal2, next_ in state.nonterminal_arcs.items():
        # It's a nonterminal and we have either a left recursion issue
        # in the grammar or we have to recurse.
        try:
            first_plans2 = first_plans[nonterminal2]
        except KeyError:
            first_plans2 = _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal2)
        else:
            if first_plans2 is None:
                raise ValueError("left recursion for rule %r" % nonterminal)
        for t, pushes in first_plans2.items():
            check = new_first_plans.get(t)
            if check is not None:
                raise ValueError(
                    "Rule %s is ambiguous; %s is the"
                    " start of the rule %s as well as %s."
                    % (nonterminal, t, nonterminal2, check[-1].from_rule)
                )
            new_first_plans[t] = [next_] + pushes
    first_plans[nonterminal] = new_first_plans
    return new_first_plans
--- a/parso/pgen2/grammar.py
+++ b/parso/pgen2/grammar.py
@@ -1,128 +0,0 @@
 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
 # Licensed to PSF under a Contributor Agreement.
 # Modifications:
 # Copyright 2014 David Halter. Integration into Jedi.
 # Modifications are dual-licensed: MIT and PSF.
 """This module defines the data structures used to represent a grammar.
 These are a bit arcane because they are derived from the data
 structures used by Python's 'pgen' parser generator.
 There's also a table here mapping operators to their names in the
 token module; the Python tokenize module reports all operators as the
 fallback token code OP, but the parser needs the actual token code.
 """
 try:
    import cPickle as pickle
 except:
    import pickle
 class Grammar(object):
    """Pgen parsing tables conversion class.
    Once initialized, this class supplies the grammar tables for the
    parsing engine implemented by parse.py.  The parsing engine
    accesses the instance variables directly.  The class here does not
    provide initialization of the tables; several subclasses exist to
    do this (see the conv and pgen modules).
    The load() method reads the tables from a pickle file, which is
    much faster than the other ways offered by subclasses.  The pickle
    file is written by calling dump() (after loading the grammar
    tables using a subclass).  The report() method prints a readable
    representation of the tables to stdout, for debugging.
    The instance variables are as follows:
    symbol2number -- a dict mapping symbol names to numbers.  Symbol
                     numbers are always 256 or higher, to distinguish
                     them from token numbers, which are between 0 and
                     255 (inclusive).
    number2symbol -- a dict mapping numbers to symbol names;
                     these two are each other's inverse.
    states        -- a list of DFAs, where each DFA is a list of
                     states, each state is a list of arcs, and each
                     arc is a (i, j) pair where i is a label and j is
                     a state number.  The DFA number is the index into
                     this list.  (This name is slightly confusing.)
                     Final states are represented by a special arc of
                     the form (0, j) where j is its own state number.
    dfas          -- a dict mapping symbol numbers to (DFA, first)
                     pairs, where DFA is an item from the states list
                     above, and first is a set of tokens that can
                     begin this grammar rule (represented by a dict
                     whose values are always 1).
    labels        -- a list of (x, y) pairs where x is either a token
                     number or a symbol number, and y is either None
                     or a string; the strings are keywords.  The label
                     number is the index in this list; label numbers
                     are used to mark state transitions (arcs) in the
                     DFAs.
    start         -- the number of the grammar's start symbol.
    keywords      -- a dict mapping keyword strings to arc labels.
    tokens        -- a dict mapping token numbers to arc labels.
    """
    def __init__(self, bnf_text):
        self.symbol2number = {}
        self.number2symbol = {}
        self.states = []
        self.dfas = {}
        self.labels = [(0, "EMPTY")]
        self.keywords = {}
        self.tokens = {}
        self.symbol2label = {}
        self.label2symbol = {}
        self.start = 256
    def dump(self, filename):
        """Dump the grammar tables to a pickle file."""
        with open(filename, "wb") as f:
            pickle.dump(self.__dict__, f, 2)
    def load(self, filename):
        """Load the grammar tables from a pickle file."""
        with open(filename, "rb") as f:
            d = pickle.load(f)
        self.__dict__.update(d)
    def copy(self):
        """
        Copy the grammar.
        """
        new = self.__class__()
        for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
                          "tokens", "symbol2label"):
            setattr(new, dict_attr, getattr(self, dict_attr).copy())
        new.labels = self.labels[:]
        new.states = self.states[:]
        new.start = self.start
        return new
    def report(self):
        """Dump the grammar tables to standard output, for debugging."""
        from pprint import pprint
        print("s2n")
        pprint(self.symbol2number)
        print("n2s")
        pprint(self.number2symbol)
        print("states")
        pprint(self.states)
        print("dfas")
        pprint(self.dfas)
        print("labels")
        pprint(self.labels)
        print("start", self.start)
--- a/parso/pgen2/grammar_parser.py
+++ b/parso/pgen2/grammar_parser.py
@@ -0,0 +1,156 @@
 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
 # Licensed to PSF under a Contributor Agreement.
 # Modifications:
 # Copyright David Halter and Contributors
 # Modifications are dual-licensed: MIT and PSF.
 from parso.python.tokenize import tokenize
 from parso.utils import parse_version_string
 from parso.python.token import PythonTokenTypes
 class GrammarParser():
    """
    The parser for Python grammar files.
    """
    def __init__(self, bnf_grammar):
        self._bnf_grammar = bnf_grammar
        self.generator = tokenize(
            bnf_grammar,
            version_info=parse_version_string('3.6')
        )
        self._gettoken()  # Initialize lookahead
    def parse(self):
        # grammar: (NEWLINE | rule)* ENDMARKER
        while self.type != PythonTokenTypes.ENDMARKER:
            while self.type == PythonTokenTypes.NEWLINE:
                self._gettoken()
            # rule: NAME ':' rhs NEWLINE
            self._current_rule_name = self._expect(PythonTokenTypes.NAME)
            self._expect(PythonTokenTypes.OP, ':')
            a, z = self._parse_rhs()
            self._expect(PythonTokenTypes.NEWLINE)
            yield a, z
    def _parse_rhs(self):
        # rhs: items ('|' items)*
        a, z = self._parse_items()
        if self.value != "|":
            return a, z
        else:
            aa = NFAState(self._current_rule_name)
            zz = NFAState(self._current_rule_name)
            while True:
                # Add the possibility to go into the state of a and come back
                # to finish.
                aa.add_arc(a)
                z.add_arc(zz)
                if self.value != "|":
                    break
                self._gettoken()
                a, z = self._parse_items()
            return aa, zz
    def _parse_items(self):
        # items: item+
        a, b = self._parse_item()
        while self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING) \
                or self.value in ('(', '['):
            c, d = self._parse_item()
            # Need to end on the next item.
            b.add_arc(c)
            b = d
        return a, b
    def _parse_item(self):
        # item: '[' rhs ']' | atom ['+' | '*']
        if self.value == "[":
            self._gettoken()
            a, z = self._parse_rhs()
            self._expect(PythonTokenTypes.OP, ']')
            # Make it also possible that there is no token and change the
            # state.
            a.add_arc(z)
            return a, z
        else:
            a, z = self._parse_atom()
            value = self.value
            if value not in ("+", "*"):
                return a, z
            self._gettoken()
            # Make it clear that we can go back to the old state and repeat.
            z.add_arc(a)
            if value == "+":
                return a, z
            else:
                # The end state is the same as the beginning, nothing must
                # change.
                return a, a
    def _parse_atom(self):
        # atom: '(' rhs ')' | NAME | STRING
        if self.value == "(":
            self._gettoken()
            a, z = self._parse_rhs()
            self._expect(PythonTokenTypes.OP, ')')
            return a, z
        elif self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING):
            a = NFAState(self._current_rule_name)
            z = NFAState(self._current_rule_name)
            # Make it clear that the state transition requires that value.
            a.add_arc(z, self.value)
            self._gettoken()
            return a, z
        else:
            self._raise_error("expected (...) or NAME or STRING, got %s/%s",
                              self.type, self.value)
    def _expect(self, type_, value=None):
        if self.type != type_:
            self._raise_error("expected %s, got %s [%s]",
                              type_, self.type, self.value)
        if value is not None and self.value != value:
            self._raise_error("expected %s, got %s", value, self.value)
        value = self.value
        self._gettoken()
        return value
    def _gettoken(self):
        tup = next(self.generator)
        self.type, self.value, self.begin, prefix = tup
    def _raise_error(self, msg, *args):
        if args:
            try:
                msg = msg % args
            except:
                msg = " ".join([msg] + list(map(str, args)))
        line = self._bnf_grammar.splitlines()[self.begin[0] - 1]
        raise SyntaxError(msg, ('<grammar>', self.begin[0],
                                self.begin[1], line))
 class NFAArc(object):
    def __init__(self, next_, nonterminal_or_string):
        self.next = next_
        self.nonterminal_or_string = nonterminal_or_string
 class NFAState(object):
    def __init__(self, from_rule):
        self.from_rule = from_rule
        self.arcs = []  # List[nonterminal (str), NFAState]
    def add_arc(self, next_, nonterminal_or_string=None):
        assert nonterminal_or_string is None or isinstance(nonterminal_or_string, str)
        assert isinstance(next_, NFAState)
        self.arcs.append(NFAArc(next_, nonterminal_or_string))
    def __repr__(self):
        return '<%s: from %s>' % (self.__class__.__name__, self.from_rule)
--- a/parso/pgen2/parse.py
+++ b/parso/pgen2/parse.py
@@ -1,223 +0,0 @@
 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
 # Licensed to PSF under a Contributor Agreement.
 # Modifications:
 # Copyright 2014 David Halter. Integration into Jedi.
 # Modifications are dual-licensed: MIT and PSF.
 """
 Parser engine for the grammar tables generated by pgen.
 The grammar table must be loaded first.
 See Parser/parser.c in the Python distribution for additional info on
 how this parsing engine works.
 """
 from parso.python import tokenize
 class InternalParseError(Exception):
    """
    Exception to signal the parser is stuck and error recovery didn't help.
    Basically this shouldn't happen. It's a sign that something is really
    wrong.
    """
    def __init__(self, msg, type, value, start_pos):
        Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" %
                           (msg, tokenize.tok_name[type], value, start_pos))
        self.msg = msg
        self.type = type
        self.value = value
        self.start_pos = start_pos
 class Stack(list):
    def get_tos_nodes(self):
        tos = self[-1]
        return tos[2][1]
 def token_to_ilabel(grammar, type_, value):
    # Map from token to label
    if type_ == tokenize.NAME:
        # Check for reserved words (keywords)
        try:
            return grammar.keywords[value]
        except KeyError:
            pass
    try:
        return grammar.tokens[type_]
    except KeyError:
        return None
 class PgenParser(object):
    """Parser engine.
    The proper usage sequence is:
    p = Parser(grammar, [converter])  # create instance
    p.setup([start])                  # prepare for parsing
    <for each input token>:
        if p.add_token(...):           # parse a token
            break
    root = p.rootnode                 # root of abstract syntax tree
    A Parser instance may be reused by calling setup() repeatedly.
    A Parser instance contains state pertaining to the current token
    sequence, and should not be used concurrently by different threads
    to parse separate token sequences.
    See driver.py for how to get input tokens by tokenizing a file or
    string.
    Parsing is complete when add_token() returns True; the root of the
    abstract syntax tree can then be retrieved from the rootnode
    instance variable.  When a syntax error occurs, error_recovery()
    is called. There is no error recovery; the parser cannot be used
    after a syntax error was reported (but it can be reinitialized by
    calling setup()).
    """
    def __init__(self, grammar, convert_node, convert_leaf, error_recovery, start):
        """Constructor.
        The grammar argument is a grammar.Grammar instance; see the
        grammar module for more information.
        The parser is not ready yet for parsing; you must call the
        setup() method to get it started.
        The optional convert argument is a function mapping concrete
        syntax tree nodes to abstract syntax tree nodes.  If not
        given, no conversion is done and the syntax tree produced is
        the concrete syntax tree.  If given, it must be a function of
        two arguments, the first being the grammar (a grammar.Grammar
        instance), and the second being the concrete syntax tree node
        to be converted.  The syntax tree is converted from the bottom
        up.
        A concrete syntax tree node is a (type, nodes) tuple, where
        type is the node type (a token or symbol number) and nodes
        is a list of children for symbols, and None for tokens.
        An abstract syntax tree node may be anything; this is entirely
        up to the converter function.
        """
        self.grammar = grammar
        self.convert_node = convert_node
        self.convert_leaf = convert_leaf
        # Each stack entry is a tuple: (dfa, state, node).
        # A node is a tuple: (type, children),
        # where children is a list of nodes or None
        newnode = (start, [])
        stackentry = (self.grammar.dfas[start], 0, newnode)
        self.stack = Stack([stackentry])
        self.rootnode = None
        self.error_recovery = error_recovery
    def parse(self, tokens):
        for type_, value, start_pos, prefix in tokens:
            if self.add_token(type_, value, start_pos, prefix):
                break
        else:
            # We never broke out -- EOF is too soon -- Unfinished statement.
            # However, the error recovery might have added the token again, if
            # the stack is empty, we're fine.
            if self.stack:
                raise InternalParseError("incomplete input", type_, value, start_pos)
        return self.rootnode
    def add_token(self, type_, value, start_pos, prefix):
        """Add a token; return True if this is the end of the program."""
        ilabel = token_to_ilabel(self.grammar, type_, value)
        # Loop until the token is shifted; may raise exceptions
        _gram = self.grammar
        _labels = _gram.labels
        _push = self._push
        _pop = self._pop
        _shift = self._shift
        while True:
            dfa, state, node = self.stack[-1]
            states, first = dfa
            arcs = states[state]
            # Look for a state with this label
            for i, newstate in arcs:
                t, v = _labels[i]
                if ilabel == i:
                    # Look it up in the list of labels
                    assert t < 256
                    # Shift a token; we're done with it
                    _shift(type_, value, newstate, prefix, start_pos)
                    # Pop while we are in an accept-only state
                    state = newstate
                    while states[state] == [(0, state)]:
                        _pop()
                        if not self.stack:
                            # Done parsing!
                            return True
                        dfa, state, node = self.stack[-1]
                        states, first = dfa
                    # Done with this token
                    return False
                elif t >= 256:
                    # See if it's a symbol and if we're in its first set
                    itsdfa = _gram.dfas[t]
                    itsstates, itsfirst = itsdfa
                    if ilabel in itsfirst:
                        # Push a symbol
                        _push(t, itsdfa, newstate)
                        break  # To continue the outer while loop
            else:
                if (0, state) in arcs:
                    # An accepting state, pop it and try something else
                    _pop()
                    if not self.stack:
                        # Done parsing, but another token is input
                        raise InternalParseError("too much input", type_, value, start_pos)
                else:
                    self.error_recovery(self.grammar, self.stack, arcs, type_,
                                        value, start_pos, prefix, self.add_token)
                    break
    def _shift(self, type_, value, newstate, prefix, start_pos):
        """Shift a token.  (Internal)"""
        dfa, state, node = self.stack[-1]
        newnode = self.convert_leaf(self.grammar, type_, value, prefix, start_pos)
        node[-1].append(newnode)
        self.stack[-1] = (dfa, newstate, node)
    def _push(self, type_, newdfa, newstate):
        """Push a nonterminal.  (Internal)"""
        dfa, state, node = self.stack[-1]
        newnode = (type_, [])
        self.stack[-1] = (dfa, newstate, node)
        self.stack.append((newdfa, 0, newnode))
    def _pop(self):
        """Pop a nonterminal.  (Internal)"""
        popdfa, popstate, (type_, children) = self.stack.pop()
        # If there's exactly one child, return that child instead of creating a
        # new node.  We still create expr_stmt and file_input though, because a
        # lot of Jedi depends on its logic.
        if len(children) == 1:
            newnode = children[0]
        else:
            newnode = self.convert_node(self.grammar, type_, children)
        try:
            # Equal to:
            # dfa, state, node = self.stack[-1]
            # symbol, children = node
            self.stack[-1][2][1].append(newnode)
        except IndexError:
            # Stack is empty, set the rootnode.
            self.rootnode = newnode
--- a/parso/pgen2/pgen.py
+++ b/parso/pgen2/pgen.py
@@ -1,399 +0,0 @@
 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
 # Licensed to PSF under a Contributor Agreement.
 # Modifications:
 # Copyright 2014 David Halter. Integration into Jedi.
 # Modifications are dual-licensed: MIT and PSF.
 from parso.pgen2 import grammar
 from parso.python import token
 from parso.python import tokenize
 from parso.utils import parse_version_string
 class ParserGenerator(object):
    def __init__(self, bnf_text, token_namespace):
        self._bnf_text = bnf_text
        self.generator = tokenize.tokenize(
            bnf_text,
            version_info=parse_version_string('3.6')
        )
        self._gettoken()  # Initialize lookahead
        self.dfas, self.startsymbol = self._parse()
        self.first = {}  # map from symbol name to set of tokens
        self._addfirstsets()
        self._token_namespace = token_namespace
    def make_grammar(self):
        c = grammar.Grammar(self._bnf_text)
        names = list(self.dfas.keys())
        names.sort()
        names.remove(self.startsymbol)
        names.insert(0, self.startsymbol)
        for name in names:
            i = 256 + len(c.symbol2number)
            c.symbol2number[name] = i
            c.number2symbol[i] = name
        for name in names:
            dfa = self.dfas[name]
            states = []
            for state in dfa:
                arcs = []
                for label, next in state.arcs.items():
                    arcs.append((self._make_label(c, label), dfa.index(next)))
                if state.isfinal:
                    arcs.append((0, dfa.index(state)))
                states.append(arcs)
            c.states.append(states)
            c.dfas[c.symbol2number[name]] = (states, self._make_first(c, name))
        c.start = c.symbol2number[self.startsymbol]
        return c
    def _make_first(self, c, name):
        rawfirst = self.first[name]
        first = {}
        for label in rawfirst:
            ilabel = self._make_label(c, label)
            ##assert ilabel not in first # XXX failed on <> ... !=
            first[ilabel] = 1
        return first
    def _make_label(self, c, label):
        # XXX Maybe this should be a method on a subclass of converter?
        ilabel = len(c.labels)
        if label[0].isalpha():
            # Either a symbol name or a named token
            if label in c.symbol2number:
                # A symbol name (a non-terminal)
                if label in c.symbol2label:
                    return c.symbol2label[label]
                else:
                    c.labels.append((c.symbol2number[label], None))
                    c.symbol2label[label] = ilabel
                    c.label2symbol[ilabel] = label
                    return ilabel
            else:
                # A named token (NAME, NUMBER, STRING)
                itoken = getattr(self._token_namespace, label, None)
                assert isinstance(itoken, int), label
                if itoken in c.tokens:
                    return c.tokens[itoken]
                else:
                    c.labels.append((itoken, None))
                    c.tokens[itoken] = ilabel
                    return ilabel
        else:
            # Either a keyword or an operator
            assert label[0] in ('"', "'"), label
            value = eval(label)
            if value[0].isalpha():
                # A keyword
                if value in c.keywords:
                    return c.keywords[value]
                else:
                    # TODO this might be an issue?! Using token.NAME here?
                    c.labels.append((token.NAME, value))
                    c.keywords[value] = ilabel
                    return ilabel
            else:
                # An operator (any non-numeric token)
                itoken = self._token_namespace.generate_token_id(value)
                if itoken in c.tokens:
                    return c.tokens[itoken]
                else:
                    c.labels.append((itoken, None))
                    c.tokens[itoken] = ilabel
                    return ilabel
    def _addfirstsets(self):
        names = list(self.dfas.keys())
        names.sort()
        for name in names:
            if name not in self.first:
                self._calcfirst(name)
            #print name, self.first[name].keys()
    def _calcfirst(self, name):
        dfa = self.dfas[name]
        self.first[name] = None  # dummy to detect left recursion
        state = dfa[0]
        totalset = {}
        overlapcheck = {}
        for label, next in state.arcs.items():
            if label in self.dfas:
                if label in self.first:
                    fset = self.first[label]
                    if fset is None:
                        raise ValueError("recursion for rule %r" % name)
                else:
                    self._calcfirst(label)
                    fset = self.first[label]
                totalset.update(fset)
                overlapcheck[label] = fset
            else:
                totalset[label] = 1
                overlapcheck[label] = {label: 1}
        inverse = {}
        for label, itsfirst in overlapcheck.items():
            for symbol in itsfirst:
                if symbol in inverse:
                    raise ValueError("rule %s is ambiguous; %s is in the"
                                     " first sets of %s as well as %s" %
                                     (name, symbol, label, inverse[symbol]))
                inverse[symbol] = label
        self.first[name] = totalset
    def _parse(self):
        dfas = {}
        startsymbol = None
        # MSTART: (NEWLINE | RULE)* ENDMARKER
        while self.type != token.ENDMARKER:
            while self.type == token.NEWLINE:
                self._gettoken()
            # RULE: NAME ':' RHS NEWLINE
            name = self._expect(token.NAME)
            self._expect(token.COLON)
            a, z = self._parse_rhs()
            self._expect(token.NEWLINE)
            #self._dump_nfa(name, a, z)
            dfa = self._make_dfa(a, z)
            #self._dump_dfa(name, dfa)
            # oldlen = len(dfa)
            self._simplify_dfa(dfa)
            # newlen = len(dfa)
            dfas[name] = dfa
            #print name, oldlen, newlen
            if startsymbol is None:
                startsymbol = name
        return dfas, startsymbol
    def _make_dfa(self, start, finish):
        # To turn an NFA into a DFA, we define the states of the DFA
        # to correspond to *sets* of states of the NFA.  Then do some
        # state reduction.  Let's represent sets as dicts with 1 for
        # values.
        assert isinstance(start, NFAState)
        assert isinstance(finish, NFAState)
        def closure(state):
            base = {}
            addclosure(state, base)
            return base
        def addclosure(state, base):
            assert isinstance(state, NFAState)
            if state in base:
                return
            base[state] = 1
            for label, next in state.arcs:
                if label is None:
                    addclosure(next, base)
        states = [DFAState(closure(start), finish)]
        for state in states:  # NB states grows while we're iterating
            arcs = {}
            for nfastate in state.nfaset:
                for label, next in nfastate.arcs:
                    if label is not None:
                        addclosure(next, arcs.setdefault(label, {}))
            for label, nfaset in arcs.items():
                for st in states:
                    if st.nfaset == nfaset:
                        break
                else:
                    st = DFAState(nfaset, finish)
                    states.append(st)
                state.addarc(st, label)
        return states  # List of DFAState instances; first one is start
    def _dump_nfa(self, name, start, finish):
        print("Dump of NFA for", name)
        todo = [start]
        for i, state in enumerate(todo):
            print("  State", i, state is finish and "(final)" or "")
            for label, next in state.arcs:
                if next in todo:
                    j = todo.index(next)
                else:
                    j = len(todo)
                    todo.append(next)
                if label is None:
                    print("    -> %d" % j)
                else:
                    print("    %s -> %d" % (label, j))
    def _dump_dfa(self, name, dfa):
        print("Dump of DFA for", name)
        for i, state in enumerate(dfa):
            print("  State", i, state.isfinal and "(final)" or "")
            for label, next in state.arcs.items():
                print("    %s -> %d" % (label, dfa.index(next)))
    def _simplify_dfa(self, dfa):
        # This is not theoretically optimal, but works well enough.
        # Algorithm: repeatedly look for two states that have the same
        # set of arcs (same labels pointing to the same nodes) and
        # unify them, until things stop changing.
        # dfa is a list of DFAState instances
        changes = True
        while changes:
            changes = False
            for i, state_i in enumerate(dfa):
                for j in range(i + 1, len(dfa)):
                    state_j = dfa[j]
                    if state_i == state_j:
                        #print "  unify", i, j
                        del dfa[j]
                        for state in dfa:
                            state.unifystate(state_j, state_i)
                        changes = True
                        break
    def _parse_rhs(self):
        # RHS: ALT ('|' ALT)*
        a, z = self._parse_alt()
        if self.value != "|":
            return a, z
        else:
            aa = NFAState()
            zz = NFAState()
            aa.addarc(a)
            z.addarc(zz)
            while self.value == "|":
                self._gettoken()
                a, z = self._parse_alt()
                aa.addarc(a)
                z.addarc(zz)
            return aa, zz
    def _parse_alt(self):
        # ALT: ITEM+
        a, b = self._parse_item()
        while (self.value in ("(", "[") or
               self.type in (token.NAME, token.STRING)):
            c, d = self._parse_item()
            b.addarc(c)
            b = d
        return a, b
    def _parse_item(self):
        # ITEM: '[' RHS ']' | ATOM ['+' | '*']
        if self.value == "[":
            self._gettoken()
            a, z = self._parse_rhs()
            self._expect(token.RSQB)
            a.addarc(z)
            return a, z
        else:
            a, z = self._parse_atom()
            value = self.value
            if value not in ("+", "*"):
                return a, z
            self._gettoken()
            z.addarc(a)
            if value == "+":
                return a, z
            else:
                return a, a
    def _parse_atom(self):
        # ATOM: '(' RHS ')' | NAME | STRING
        if self.value == "(":
            self._gettoken()
            a, z = self._parse_rhs()
            self._expect(token.RPAR)
            return a, z
        elif self.type in (token.NAME, token.STRING):
            a = NFAState()
            z = NFAState()
            a.addarc(z, self.value)
            self._gettoken()
            return a, z
        else:
            self._raise_error("expected (...) or NAME or STRING, got %s/%s",
                              self.type, self.value)
    def _expect(self, type):
        if self.type != type:
            self._raise_error("expected %s, got %s(%s)",
                              type, self.type, self.value)
        value = self.value
        self._gettoken()
        return value
    def _gettoken(self):
        tup = next(self.generator)
        while tup[0] in (token.COMMENT, token.NL):
            tup = next(self.generator)
        self.type, self.value, self.begin, prefix = tup
    def _raise_error(self, msg, *args):
        if args:
            try:
                msg = msg % args
            except:
                msg = " ".join([msg] + list(map(str, args)))
        line = self._bnf_text.splitlines()[self.begin[0] - 1]
        raise SyntaxError(msg, ('<grammar>', self.begin[0],
                                self.begin[1], line))
 class NFAState(object):
    def __init__(self):
        self.arcs = []  # list of (label, NFAState) pairs
    def addarc(self, next, label=None):
        assert label is None or isinstance(label, str)
        assert isinstance(next, NFAState)
        self.arcs.append((label, next))
 class DFAState(object):
    def __init__(self, nfaset, final):
        assert isinstance(nfaset, dict)
        assert isinstance(next(iter(nfaset)), NFAState)
        assert isinstance(final, NFAState)
        self.nfaset = nfaset
        self.isfinal = final in nfaset
        self.arcs = {}  # map from label to DFAState
    def addarc(self, next, label):
        assert isinstance(label, str)
        assert label not in self.arcs
        assert isinstance(next, DFAState)
        self.arcs[label] = next
    def unifystate(self, old, new):
        for label, next in self.arcs.items():
            if next is old:
                self.arcs[label] = new
    def __eq__(self, other):
        # Equality test -- ignore the nfaset instance variable
        assert isinstance(other, DFAState)
        if self.isfinal != other.isfinal:
            return False
        # Can't just return self.arcs == other.arcs, because that
        # would invoke this method recursively, with cycles...
        if len(self.arcs) != len(other.arcs):
            return False
        for label, next in self.arcs.items():
            if next is not other.arcs.get(label):
                return False
        return True
    __hash__ = None  # For Py3 compatibility.
 def generate_grammar(bnf_text, token_namespace):
    """
    ``bnf_text`` is a grammar in extended BNF (using * for repetition, + for
    at-least-once repetition, [] for optional parts, | for alternatives and ()
    for grouping).
    It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its
    own parser.
    """
    p = ParserGenerator(bnf_text, token_namespace)
    return p.make_grammar()
--- a/parso/python/diff.py
+++ b/parso/python/diff.py
@@ -13,10 +13,81 @@ import logging
 from parso.utils import split_lines
 from parso.python.parser import Parser
 from parso.python.tree import EndMarker
-from parso.python.tokenize import (NEWLINE, PythonToken, ERROR_DEDENT,
+from parso.python.tokenize import PythonToken
-                                   ENDMARKER, INDENT, DEDENT)
+from parso.python.token import PythonTokenTypes
 LOG = logging.getLogger(__name__)
 DEBUG_DIFF_PARSER = False
 _INDENTATION_TOKENS = 'INDENT', 'ERROR_DEDENT', 'DEDENT'
 def _get_previous_leaf_if_indentation(leaf):
    while leaf and leaf.type == 'error_leaf' \
            and leaf.token_type in _INDENTATION_TOKENS:
        leaf = leaf.get_previous_leaf()
    return leaf
 def _get_next_leaf_if_indentation(leaf):
    while leaf and leaf.type == 'error_leaf' \
            and leaf.token_type in _INDENTATION_TOKENS:
        leaf = leaf.get_previous_leaf()
    return leaf
 def _assert_valid_graph(node):
    """
    Checks if the parent/children relationship is correct.
    This is a check that only runs during debugging/testing.
    """
    try:
        children = node.children
    except AttributeError:
        # Ignore INDENT is necessary, because indent/dedent tokens don't
        # contain value/prefix and are just around, because of the tokenizer.
        if node.type == 'error_leaf' and node.token_type in _INDENTATION_TOKENS:
            assert not node.value
            assert not node.prefix
            return
        # Calculate the content between two start positions.
        previous_leaf = _get_previous_leaf_if_indentation(node.get_previous_leaf())
        if previous_leaf is None:
            content = node.prefix
            previous_start_pos = 1, 0
        else:
            assert previous_leaf.end_pos <= node.start_pos, \
                (previous_leaf, node)
            content = previous_leaf.value + node.prefix
            previous_start_pos = previous_leaf.start_pos
        if '\n' in content or '\r' in content:
            splitted = split_lines(content)
            line = previous_start_pos[0] + len(splitted) - 1
            actual = line, len(splitted[-1])
        else:
            actual = previous_start_pos[0], previous_start_pos[1] + len(content)
        assert node.start_pos == actual, (node.start_pos, actual)
    else:
        for child in children:
            assert child.parent == node, (node, child)
            _assert_valid_graph(child)
 def _get_debug_error_message(module, old_lines, new_lines):
    current_lines = split_lines(module.get_code(), keepends=True)
    current_diff = difflib.unified_diff(new_lines, current_lines)
    old_new_diff = difflib.unified_diff(old_lines, new_lines)
    import parso
    return (
        "There's an issue with the diff parser. Please "
        "report (parso v%s) - Old/New:\n%s\nActual Diff (May be empty):\n%s"
        % (parso.__version__, ''.join(old_new_diff), ''.join(current_diff))
    )
 def _get_last_line(node_or_leaf):
@@ -27,13 +98,21 @@ def _get_last_line(node_or_leaf):
        return last_leaf.end_pos[0]
 def _skip_dedent_error_leaves(leaf):
    while leaf is not None and leaf.type == 'error_leaf' and leaf.token_type == 'DEDENT':
        leaf = leaf.get_previous_leaf()
    return leaf
 def _ends_with_newline(leaf, suffix=''):
    leaf = _skip_dedent_error_leaves(leaf)
    if leaf.type == 'error_leaf':
-        typ = leaf.original_type
+        typ = leaf.token_type.lower()
    else:
        typ = leaf.type
-    return typ == 'newline' or suffix.endswith('\n')
+    return typ == 'newline' or suffix.endswith('\n') or suffix.endswith('\r')
 def _flows_finished(pgen_grammar, stack):
@@ -41,32 +120,45 @@ def _flows_finished(pgen_grammar, stack):
    if, while, for and try might not be finished, because another part might
    still be parsed.
    """
-    for dfa, newstate, (symbol_number, nodes) in stack:
+    for stack_node in stack:
-        if pgen_grammar.number2symbol[symbol_number] in ('if_stmt', 'while_stmt',
+        if stack_node.nonterminal in ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt'):
                                                    'for_stmt', 'try_stmt'):
            return False
    return True
-def suite_or_file_input_is_valid(pgen_grammar, stack):
+def _func_or_class_has_suite(node):
    if node.type == 'decorated':
        node = node.children[-1]
    if node.type in ('async_funcdef', 'async_stmt'):
        node = node.children[-1]
    return node.type in ('classdef', 'funcdef') and node.children[-1].type == 'suite'
 def _suite_or_file_input_is_valid(pgen_grammar, stack):
    if not _flows_finished(pgen_grammar, stack):
        return False
-    for dfa, newstate, (symbol_number, nodes) in reversed(stack):
+    for stack_node in reversed(stack):
-        if pgen_grammar.number2symbol[symbol_number] == 'suite':
+        if stack_node.nonterminal == 'decorator':
            # A decorator is only valid with the upcoming function.
            return False
        if stack_node.nonterminal == 'suite':
            # If only newline is in the suite, the suite is not valid, yet.
-            return len(nodes) > 1
+            return len(stack_node.nodes) > 1
    # Not reaching a suite means that we're dealing with file_input levels
    # where there's no need for a valid statement in it. It can also be empty.
    return True
 def _is_flow_node(node):
    if node.type == 'async_stmt':
        node = node.children[1]
    try:
        value = node.children[0].value
    except AttributeError:
        return False
-    return value in ('if', 'for', 'while', 'try')
+    return value in ('if', 'for', 'while', 'try', 'with')
 class _PositionUpdatingFinished(Exception):
@@ -100,7 +192,7 @@ class DiffParser(object):
        self._copy_count = 0
        self._parser_count = 0
-        self._nodes_stack = _NodesStack(self._module)
+        self._nodes_tree = _NodesTree(self._module)
    def update(self, old_lines, new_lines):
        '''
@@ -129,11 +221,10 @@ class DiffParser(object):
        line_length = len(new_lines)
        sm = difflib.SequenceMatcher(None, old_lines, self._parser_lines_new)
        opcodes = sm.get_opcodes()
-        LOG.debug('diff parser calculated')
+        LOG.debug('line_lengths old: %s; new: %s' % (len(old_lines), line_length))
        LOG.debug('diff: line_lengths old: %s, new: %s' % (len(old_lines), line_length))
        for operation, i1, i2, j1, j2 in opcodes:
-            LOG.debug('diff %s old[%s:%s] new[%s:%s]',
+            LOG.debug('-> code[%s] old[%s:%s] new[%s:%s]',
                      operation, i1 + 1, i2, j1 + 1, j2)
            if j2 == line_length and new_lines[-1] == '':
@@ -152,48 +243,47 @@ class DiffParser(object):
        # With this action all change will finally be applied and we have a
        # changed module.
-        self._nodes_stack.close()
+        self._nodes_tree.close()
        if DEBUG_DIFF_PARSER:
            # If there is reasonable suspicion that the diff parser is not
            # behaving well, this should be enabled.
            try:
                assert self._module.get_code() == ''.join(new_lines)
                _assert_valid_graph(self._module)
            except AssertionError:
                print(_get_debug_error_message(self._module, old_lines, new_lines))
                raise
        last_pos = self._module.end_pos[0]
        if last_pos != line_length:
            current_lines = split_lines(self._module.get_code(), keepends=True)
            diff = difflib.unified_diff(current_lines, new_lines)
            raise Exception(
-                "There's an issue (%s != %s) with the diff parser. Please report:\n%s"
+                ('(%s != %s) ' % (last_pos, line_length))
-                % (last_pos, line_length, ''.join(diff))
+                + _get_debug_error_message(self._module, old_lines, new_lines)
            )
        LOG.debug('diff parser end')
        return self._module
    def _enabled_debugging(self, old_lines, lines_new):
        if self._module.get_code() != ''.join(lines_new):
-            LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines),
+            LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines), ''.join(lines_new))
                            ''.join(lines_new))
    def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new):
        copied_nodes = [None]
        last_until_line = -1
-        while until_line_new > self._nodes_stack.parsed_until_line:
+        while until_line_new > self._nodes_tree.parsed_until_line:
-            parsed_until_line_old = self._nodes_stack.parsed_until_line - line_offset
+            parsed_until_line_old = self._nodes_tree.parsed_until_line - line_offset
            line_stmt = self._get_old_line_stmt(parsed_until_line_old + 1)
            if line_stmt is None:
                # Parse 1 line at least. We don't need more, because we just
                # want to get into a state where the old parser has statements
                # again that can be copied (e.g. not lines within parentheses).
-                self._parse(self._nodes_stack.parsed_until_line + 1)
+                self._parse(self._nodes_tree.parsed_until_line + 1)
            elif not copied_nodes:
                # We have copied as much as possible (but definitely not too
                # much). Therefore we just parse the rest.
                # We might not reach the end, because there's a statement
                # that is not finished.
                self._parse(until_line_new)
            else:
                p_children = line_stmt.parent.children
                index = p_children.index(line_stmt)
-                copied_nodes = self._nodes_stack.copy_nodes(
+                from_ = self._nodes_tree.parsed_until_line + 1
                copied_nodes = self._nodes_tree.copy_nodes(
                    p_children[index:],
                    until_line_old,
                    line_offset
@@ -202,15 +292,19 @@ class DiffParser(object):
                if copied_nodes:
                    self._copy_count += 1
-                    from_ = copied_nodes[0].get_start_pos_of_prefix()[0] + line_offset
+                    to = self._nodes_tree.parsed_until_line
                    to = self._nodes_stack.parsed_until_line
-                    LOG.debug('diff actually copy %s to %s', from_, to)
+                    LOG.debug('copy old[%s:%s] new[%s:%s]',
                              copied_nodes[0].start_pos[0],
                              copied_nodes[-1].end_pos[0] - 1, from_, to)
                else:
                    # We have copied as much as possible (but definitely not too
                    # much). Therefore we just parse a bit more.
                    self._parse(self._nodes_tree.parsed_until_line + 1)
            # Since there are potential bugs that might loop here endlessly, we
            # just stop here.
-            assert last_until_line != self._nodes_stack.parsed_until_line \
+            assert last_until_line != self._nodes_tree.parsed_until_line, last_until_line
-                or not copied_nodes, last_until_line
+            last_until_line = self._nodes_tree.parsed_until_line
            last_until_line = self._nodes_stack.parsed_until_line
    def _get_old_line_stmt(self, old_line):
        leaf = self._module.get_leaf_for_position((old_line, 0), include_prefixes=True)
@@ -221,46 +315,36 @@ class DiffParser(object):
            node = leaf
            while node.parent.type not in ('file_input', 'suite'):
                node = node.parent
            # Make sure that if only the `else:` line of an if statement is
            # copied that not the whole thing is going to be copied.
            if node.start_pos[0] >= old_line:
                return node
        # Must be on the same line. Otherwise we need to parse that bit.
        return None
    def _get_before_insertion_node(self):
        if self._nodes_stack.is_empty():
            return None
        line = self._nodes_stack.parsed_until_line + 1
        node = self._new_module.get_last_leaf()
        while True:
            parent = node.parent
            if parent.type in ('suite', 'file_input'):
                assert node.end_pos[0] <= line
                assert node.end_pos[1] == 0 or '\n' in self._prefix
                return node
            node = parent
    def _parse(self, until_line):
        """
        Parses at least until the given line, but might just parse more until a
        valid state is reached.
        """
        last_until_line = 0
-        while until_line > self._nodes_stack.parsed_until_line:
+        while until_line > self._nodes_tree.parsed_until_line:
            node = self._try_parse_part(until_line)
            nodes = node.children
-            self._nodes_stack.add_parsed_nodes(nodes)
+            self._nodes_tree.add_parsed_nodes(nodes)
            LOG.debug(
                'parse_part from %s to %s (to %s in part parser)',
                nodes[0].get_start_pos_of_prefix()[0],
-                self._nodes_stack.parsed_until_line,
+                self._nodes_tree.parsed_until_line,
                node.end_pos[0] - 1
            )
            # Since the tokenizer sometimes has bugs, we cannot be sure that
            # this loop terminates. Therefore assert that there's always a
            # change.
-            assert last_until_line != self._nodes_stack.parsed_until_line, last_until_line
+            assert last_until_line != self._nodes_tree.parsed_until_line, last_until_line
-            last_until_line = self._nodes_stack.parsed_until_line
+            last_until_line = self._nodes_tree.parsed_until_line
    def _try_parse_part(self, until_line):
        """
@@ -271,9 +355,8 @@ class DiffParser(object):
        self._parser_count += 1
        # TODO speed up, shouldn't copy the whole list all the time.
        # memoryview?
-        parsed_until_line = self._nodes_stack.parsed_until_line
+        parsed_until_line = self._nodes_tree.parsed_until_line
        lines_after = self._parser_lines_new[parsed_until_line:]
        #print('parse_content', parsed_until_line, lines_after, until_line)
        tokens = self._diff_tokenize(
            lines_after,
            until_line,
@@ -290,10 +373,10 @@ class DiffParser(object):
        omitted_first_indent = False
        indents = []
        tokens = self._tokenizer(lines, (1, 0))
-        stack = self._active_parser.pgen_parser.stack
+        stack = self._active_parser.stack
        for typ, string, start_pos, prefix in tokens:
            start_pos = start_pos[0] + line_offset, start_pos[1]
-            if typ == INDENT:
+            if typ == PythonTokenTypes.INDENT:
                indents.append(start_pos[1])
                if is_first_token:
                    omitted_first_indent = True
@@ -306,29 +389,36 @@ class DiffParser(object):
            # In case of omitted_first_indent, it might not be dedented fully.
            # However this is a sign for us that a dedent happened.
-            if typ == DEDENT \
+            if typ == PythonTokenTypes.DEDENT \
-                    or typ == ERROR_DEDENT and omitted_first_indent and len(indents) == 1:
+                    or typ == PythonTokenTypes.ERROR_DEDENT \
                    and omitted_first_indent and len(indents) == 1:
                indents.pop()
                if omitted_first_indent and not indents:
                    # We are done here, only thing that can come now is an
                    # endmarker or another dedented code block.
                    typ, string, start_pos, prefix = next(tokens)
-                    if '\n' in prefix:
+                    if '\n' in prefix or '\r' in prefix:
-                        prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix)
+                        prefix = re.sub(r'[^\n\r]+\Z', '', prefix)
                    else:
                        assert start_pos[1] >= len(prefix), repr(prefix)
                        if start_pos[1] - len(prefix) == 0:
                            prefix = ''
-                    yield PythonToken(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix)
+                    yield PythonToken(
                        PythonTokenTypes.ENDMARKER, '',
                        (start_pos[0] + line_offset, 0),
                        prefix
                    )
                    break
-            elif typ == NEWLINE and start_pos[0] >= until_line:
+            elif typ == PythonTokenTypes.NEWLINE and start_pos[0] >= until_line:
                yield PythonToken(typ, string, start_pos, prefix)
                # Check if the parser is actually in a valid suite state.
-                if suite_or_file_input_is_valid(self._pgen_grammar, stack):
+                if _suite_or_file_input_is_valid(self._pgen_grammar, stack):
                    start_pos = start_pos[0] + 1, 0
                    while len(indents) > int(omitted_first_indent):
                        indents.pop()
-                        yield PythonToken(DEDENT, '', start_pos, '')
+                        yield PythonToken(PythonTokenTypes.DEDENT, '', start_pos, '')
-                    yield PythonToken(ENDMARKER, '', start_pos, '')
+                    yield PythonToken(PythonTokenTypes.ENDMARKER, '', start_pos, '')
                    break
                else:
                    continue
@@ -336,17 +426,23 @@ class DiffParser(object):
            yield PythonToken(typ, string, start_pos, prefix)
-class _NodesStackNode(object):
+class _NodesTreeNode(object):
-    ChildrenGroup = namedtuple('ChildrenGroup', 'children line_offset last_line_offset_leaf')
+    _ChildrenGroup = namedtuple('_ChildrenGroup', 'prefix children line_offset last_line_offset_leaf')
    def __init__(self, tree_node, parent=None):
        self.tree_node = tree_node
-        self.children_groups = []
+        self._children_groups = []
        self.parent = parent
        self._node_children = []
-    def close(self):
+    def finish(self):
        children = []
-        for children_part, line_offset, last_line_offset_leaf in self.children_groups:
+        for prefix, children_part, line_offset, last_line_offset_leaf in self._children_groups:
            first_leaf = _get_next_leaf_if_indentation(
                children_part[0].get_first_leaf()
            )
            first_leaf.prefix = prefix + first_leaf.prefix
            if line_offset != 0:
                try:
                    _update_positions(
@@ -359,59 +455,61 @@ class _NodesStackNode(object):
        for node in children:
            node.parent = self.tree_node
-    def add(self, children, line_offset=0, last_line_offset_leaf=None):
+        for node_child in self._node_children:
-        group = self.ChildrenGroup(children, line_offset, last_line_offset_leaf)
+            node_child.finish()
-        self.children_groups.append(group)
+
    def add_child_node(self, child_node):
        self._node_children.append(child_node)
    def add_tree_nodes(self, prefix, children, line_offset=0, last_line_offset_leaf=None):
        if last_line_offset_leaf is None:
            last_line_offset_leaf = children[-1].get_last_leaf()
        group = self._ChildrenGroup(prefix, children, line_offset, last_line_offset_leaf)
        self._children_groups.append(group)
    def get_last_line(self, suffix):
        line = 0
-        if self.children_groups:
+        if self._children_groups:
-            children_group = self.children_groups[-1]
+            children_group = self._children_groups[-1]
-            last_leaf = children_group.children[-1].get_last_leaf()
+            last_leaf = _get_previous_leaf_if_indentation(
-            line = last_leaf.end_pos[0]
+                children_group.last_line_offset_leaf
            )
-            # Calculate the line offsets
+            line = last_leaf.end_pos[0] + children_group.line_offset
            offset = children_group.line_offset
            if offset:
                # In case the line_offset is not applied to this specific leaf,
                # just ignore it.
                if last_leaf.line <= children_group.last_line_offset_leaf.line:
                    line += children_group.line_offset
            # Newlines end on the next line, which means that they would cover
            # the next line. That line is not fully parsed at this point.
            if _ends_with_newline(last_leaf, suffix):
                line -= 1
-        line += suffix.count('\n')
+        line += len(split_lines(suffix)) - 1
-        if suffix and not suffix.endswith('\n'):
+
        if suffix and not suffix.endswith('\n') and not suffix.endswith('\r'):
            # This is the end of a file (that doesn't end with a newline).
            line += 1
        if self._node_children:
            return max(line, self._node_children[-1].get_last_line(suffix))
        return line
-class _NodesStack(object):
+class _NodesTree(object):
    endmarker_type = 'endmarker'
    def __init__(self, module):
-        # Top of stack
+        self._base_node = _NodesTreeNode(module)
-        self._tos = self._base_node = _NodesStackNode(module)
+        self._working_stack = [self._base_node]
        self._module = module
-        self._last_prefix = ''
+        self._prefix_remainder = ''
        self.prefix = ''
    def is_empty(self):
        return not self._base_node.children
    @property
    def parsed_until_line(self):
-        return self._tos.get_last_line(self.prefix)
+        return self._working_stack[-1].get_last_line(self.prefix)
    def _get_insertion_node(self, indentation_node):
        indentation = indentation_node.start_pos[1]
        # find insertion node
        node = self._tos
        while True:
            node = self._working_stack[-1]
            tree_node = node.tree_node
            if tree_node.type == 'suite':
                # A suite starts with NEWLINE, ...
@@ -426,53 +524,57 @@ class _NodesStack(object):
            elif tree_node.type == 'file_input':
                return node
-            node = self._close_tos()
+            self._working_stack.pop()
    def _close_tos(self):
        self._tos.close()
        self._tos = self._tos.parent
        return self._tos
    def add_parsed_nodes(self, tree_nodes):
        old_prefix = self.prefix
        tree_nodes = self._remove_endmarker(tree_nodes)
        if not tree_nodes:
            self.prefix = old_prefix + self.prefix
            return
        assert tree_nodes[0].type != 'newline'
        node = self._get_insertion_node(tree_nodes[0])
        assert node.tree_node.type in ('suite', 'file_input')
-        node.add(tree_nodes)
+        node.add_tree_nodes(old_prefix, tree_nodes)
        # tos = Top of stack
        self._update_tos(tree_nodes[-1])
    def _update_tos(self, tree_node):
        if tree_node.type in ('suite', 'file_input'):
            new_tos = _NodesTreeNode(tree_node)
            new_tos.add_tree_nodes('', list(tree_node.children))
            self._working_stack[-1].add_child_node(new_tos)
            self._working_stack.append(new_tos)
            self._update_tos(tree_node.children[-1])
        elif _func_or_class_has_suite(tree_node):
            self._update_tos(tree_node.children[-1])
    def _remove_endmarker(self, tree_nodes):
        """
        Helps cleaning up the tree nodes that get inserted.
        """
        last_leaf = tree_nodes[-1].get_last_leaf()
-        is_endmarker = last_leaf.type == self.endmarker_type
+        is_endmarker = last_leaf.type == 'endmarker'
-        self._last_prefix = ''
+        self._prefix_remainder = ''
        if is_endmarker:
-            try:
+            separation = max(last_leaf.prefix.rfind('\n'), last_leaf.prefix.rfind('\r'))
-                separation = last_leaf.prefix.rindex('\n')
+            if separation > -1:
            except ValueError:
                pass
            else:
                # Remove the whitespace part of the prefix after a newline.
                # That is not relevant if parentheses were opened. Always parse
                # until the end of a line.
-                last_leaf.prefix, self._last_prefix = \
+                last_leaf.prefix, self._prefix_remainder = \
                    last_leaf.prefix[:separation + 1], last_leaf.prefix[separation + 1:]
        first_leaf = tree_nodes[0].get_first_leaf()
        first_leaf.prefix = self.prefix + first_leaf.prefix
        self.prefix = ''
        if is_endmarker:
            self.prefix = last_leaf.prefix
            tree_nodes = tree_nodes[:-1]
        return tree_nodes
    def copy_nodes(self, tree_nodes, until_line, line_offset):
@@ -481,55 +583,76 @@ class _NodesStack(object):
        Returns the number of tree nodes that were copied.
        """
-        tos = self._get_insertion_node(tree_nodes[0])
+        if tree_nodes[0].type in ('error_leaf', 'error_node'):
            # Avoid copying errors in the beginning. Can lead to a lot of
            # issues.
            return []
-        new_nodes, self._tos = self._copy_nodes(tos, tree_nodes, until_line, line_offset)
+        self._get_insertion_node(tree_nodes[0])
        new_nodes, self._working_stack, self.prefix = self._copy_nodes(
            list(self._working_stack),
            tree_nodes,
            until_line,
            line_offset,
            self.prefix,
        )
        return new_nodes
-    def _copy_nodes(self, tos, nodes, until_line, line_offset):
+    def _copy_nodes(self, working_stack, nodes, until_line, line_offset, prefix=''):
        new_nodes = []
-        new_tos = tos
+        new_prefix = ''
        for node in nodes:
-            if node.type == 'endmarker':
+            if node.start_pos[0] > until_line:
                # Endmarkers just distort all the checks below. Remove them.
                break
-            if node.start_pos[0] > until_line:
+            if node.type == 'endmarker':
                break
            if node.type == 'error_leaf' and node.token_type in ('DEDENT', 'ERROR_DEDENT'):
                break
            # TODO this check might take a bit of time for large files. We
            # might want to change this to do more intelligent guessing or
            # binary search.
            if _get_last_line(node) > until_line:
                # We can split up functions and classes later.
-                if node.type in ('classdef', 'funcdef') and node.children[-1].type == 'suite':
+                if _func_or_class_has_suite(node):
                    new_nodes.append(node)
                break
            new_nodes.append(node)
        if not new_nodes:
-            return [], tos
+            return [], working_stack, prefix
        tos = working_stack[-1]
        last_node = new_nodes[-1]
-        line_offset_index = -1
+        had_valid_suite_last = False
-        if last_node.type in ('classdef', 'funcdef'):
+        if _func_or_class_has_suite(last_node):
-            suite = last_node.children[-1]
+            suite = last_node
-            if suite.type == 'suite':
+            while suite.type != 'suite':
-                suite_tos = _NodesStackNode(suite)
+                suite = suite.children[-1]
            suite_tos = _NodesTreeNode(suite)
            # Don't need to pass line_offset here, it's already done by the
            # parent.
-                suite_nodes, recursive_tos = self._copy_nodes(
+            suite_nodes, new_working_stack, new_prefix = self._copy_nodes(
-                    suite_tos, suite.children, until_line, line_offset)
+                working_stack + [suite_tos], suite.children, until_line, line_offset
            )
            if len(suite_nodes) < 2:
                # A suite only with newline is not valid.
                new_nodes.pop()
                new_prefix = ''
            else:
-                    suite_tos.parent = tos
+                assert new_nodes
-                    new_tos = recursive_tos
+                tos.add_child_node(suite_tos)
-                    line_offset_index = -2
+                working_stack = new_working_stack
                had_valid_suite_last = True
-        elif (new_nodes[-1].type in ('error_leaf', 'error_node') or
+        if new_nodes:
            last_node = new_nodes[-1]
            if (last_node.type in ('error_leaf', 'error_node') or
                    _is_flow_node(new_nodes[-1])):
                # Error leafs/nodes don't have a defined start/end. Error
                # nodes might not end with a newline (e.g. if there's an
@@ -538,6 +661,7 @@ class _NodesStack(object):
                # If we copy flows at the end, they might be continued
                # after the copy limit (in the new parser).
                # In this while loop we try to remove until we find a newline.
                new_prefix = ''
                new_nodes.pop()
                while new_nodes:
                    last_node = new_nodes[-1]
@@ -546,34 +670,41 @@ class _NodesStack(object):
                    new_nodes.pop()
        if new_nodes:
-            try:
+            if not _ends_with_newline(new_nodes[-1].get_last_leaf()) and not had_valid_suite_last:
-                last_line_offset_leaf = new_nodes[line_offset_index].get_last_leaf()
+                p = new_nodes[-1].get_next_leaf().prefix
-            except IndexError:
+                # We are not allowed to remove the newline at the end of the
-                line_offset = 0
+                # line, otherwise it's going to be missing. This happens e.g.
-                # In this case we don't have to calculate an offset, because
+                # if a bracket is around before that moves newlines to
-                # there's no children to be managed.
+                # prefixes.
-                last_line_offset_leaf = None
+                new_prefix = split_lines(p, keepends=True)[0]
            tos.add(new_nodes, line_offset, last_line_offset_leaf)
        return new_nodes, new_tos
-    def _update_tos(self, tree_node):
+            if had_valid_suite_last:
-        if tree_node.type in ('suite', 'file_input'):
+                last = new_nodes[-1]
-            self._tos = _NodesStackNode(tree_node, self._tos)
+                if last.type == 'decorated':
-            self._tos.add(list(tree_node.children))
+                    last = last.children[-1]
-            self._update_tos(tree_node.children[-1])
+                if last.type in ('async_funcdef', 'async_stmt'):
-        elif tree_node.type in ('classdef', 'funcdef'):
+                    last = last.children[-1]
-            self._update_tos(tree_node.children[-1])
+                last_line_offset_leaf = last.children[-2].get_last_leaf()
                assert last_line_offset_leaf == ':'
            else:
                last_line_offset_leaf = new_nodes[-1].get_last_leaf()
            tos.add_tree_nodes(prefix, new_nodes, line_offset, last_line_offset_leaf)
            prefix = new_prefix
            self._prefix_remainder = ''
        return new_nodes, working_stack, prefix
    def close(self):
-        while self._tos is not None:
+        self._base_node.finish()
            self._close_tos()
        # Add an endmarker.
        try:
            last_leaf = self._module.get_last_leaf()
            end_pos = list(last_leaf.end_pos)
        except IndexError:
            end_pos = [1, 0]
        else:
            last_leaf = _skip_dedent_error_leaves(last_leaf)
            end_pos = list(last_leaf.end_pos)
        lines = split_lines(self.prefix)
        assert len(lines) > 0
        if len(lines) == 1:
@@ -582,6 +713,6 @@ class _NodesStack(object):
            end_pos[0] += len(lines) - 1
            end_pos[1] = len(lines[-1])
-        endmarker = EndMarker('', tuple(end_pos), self.prefix + self._last_prefix)
+        endmarker = EndMarker('', tuple(end_pos), self.prefix + self._prefix_remainder)
        endmarker.parent = self._module
        self._module.children.append(endmarker)
--- a/parso/python/errors.py
+++ b/parso/python/errors.py
@@ -306,12 +306,12 @@ class ErrorFinder(Normalizer):
    def visit_leaf(self, leaf):
        if leaf.type == 'error_leaf':
-            if leaf.original_type in ('indent', 'error_dedent'):
+            if leaf.token_type in ('INDENT', 'ERROR_DEDENT'):
                # Indents/Dedents itself never have a prefix. They are just
                # "pseudo" tokens that get removed by the syntax tree later.
                # Therefore in case of an error we also have to check for this.
                spacing = list(leaf.get_next_leaf()._split_prefix())[-1]
-                if leaf.original_type == 'indent':
+                if leaf.token_type == 'INDENT':
                    message = 'unexpected indent'
                else:
                    message = 'unindent does not match any outer indentation level'
@@ -563,17 +563,21 @@ class _ReturnAndYieldChecks(SyntaxRule):
                    and self._normalizer.version == (3, 5):
                self.add_issue(self.get_node(leaf), message=self.message_async_yield)
-@ErrorFinder.register_rule(type='atom')
+
@ErrorFinder.register_rule(type='strings')
 class _BytesAndStringMix(SyntaxRule):
    # e.g. 's' b''
    message = "cannot mix bytes and nonbytes literals"
    def _is_bytes_literal(self, string):
        if string.type == 'fstring':
            return False
        return 'b' in string.string_prefix.lower()
    def is_issue(self, node):
        first = node.children[0]
-        if first.type == 'string' and self._normalizer.version >= (3, 0):
+        # In Python 2 it's allowed to mix bytes and unicode.
        if self._normalizer.version >= (3, 0):
            first_is_bytes = self._is_bytes_literal(first)
            for string in node.children[1:]:
                if first_is_bytes != self._is_bytes_literal(string):
@@ -744,7 +748,12 @@ class _NonlocalModuleLevelRule(SyntaxRule):
@ErrorFinder.register_rule(type='arglist')
 class _ArglistRule(SyntaxRule):
-    message = "Generator expression must be parenthesized if not sole argument"
+    @property
    def message(self):
        if self._normalizer.version < (3, 7):
            return "Generator expression must be parenthesized if not sole argument"
        else:
            return "Generator expression must be parenthesized"
    def is_issue(self, node):
        first_arg = node.children[0]
@@ -837,101 +846,36 @@ class _TryStmtRule(SyntaxRule):
                self.add_issue(default_except, message=self.message)
-@ErrorFinder.register_rule(type='string')
+@ErrorFinder.register_rule(type='fstring')
 class _FStringRule(SyntaxRule):
    _fstring_grammar = None
    message_empty = "f-string: empty expression not allowed"  # f'{}'
    message_single_closing = "f-string: single '}' is not allowed"  # f'}'
    message_nested = "f-string: expressions nested too deeply"
    message_backslash = "f-string expression part cannot include a backslash"  # f'{"\"}' or f'{"\\"}'
    message_comment = "f-string expression part cannot include '#'"  # f'{#}'
    message_unterminated_string = "f-string: unterminated string"  # f'{"}'
    message_conversion = "f-string: invalid conversion character: expected 's', 'r', or 'a'"
    message_incomplete = "f-string: expecting '}'"  # f'{'
    message_syntax = "invalid syntax"
-    @classmethod
+    def _check_format_spec(self, format_spec, depth):
-    def _load_grammar(cls):
+        self._check_fstring_contents(format_spec.children[1:], depth)
        import parso
-        if cls._fstring_grammar is None:
+    def _check_fstring_expr(self, fstring_expr, depth):
-            cls._fstring_grammar = parso.load_grammar(language='python-f-string')
+        if depth >= 2:
-        return cls._fstring_grammar
+            self.add_issue(fstring_expr, message=self.message_nested)
        conversion = fstring_expr.children[2]
        if conversion.type == 'fstring_conversion':
            name = conversion.children[1]
            if name.value not in ('s', 'r', 'a'):
                self.add_issue(name, message=self.message_conversion)
        format_spec = fstring_expr.children[-2]
        if format_spec.type == 'fstring_format_spec':
            self._check_format_spec(format_spec, depth + 1)
    def is_issue(self, fstring):
-        if 'f' not in fstring.string_prefix.lower():
+        self._check_fstring_contents(fstring.children[1:-1])
            return
-        parsed = self._load_grammar().parse_leaf(fstring)
+    def _check_fstring_contents(self, children, depth=0):
-        for child in parsed.children:
+        for fstring_content in children:
-            if child.type == 'expression':
+            if fstring_content.type == 'fstring_expr':
-                self._check_expression(child)
+                self._check_fstring_expr(fstring_content, depth)
            elif child.type == 'error_node':
                next_ = child.get_next_leaf()
                if next_.type == 'error_leaf' and next_.original_type == 'unterminated_string':
                    self.add_issue(next_, message=self.message_unterminated_string)
                    # At this point nothing more is comming except the error
                    # leaf that we've already checked here.
                    break
                self.add_issue(child, message=self.message_incomplete)
            elif child.type == 'error_leaf':
                self.add_issue(child, message=self.message_single_closing)
    def _check_python_expr(self, python_expr):
        value = python_expr.value
        if '\\' in value:
            self.add_issue(python_expr, message=self.message_backslash)
            return
        if '#' in value:
            self.add_issue(python_expr, message=self.message_comment)
            return
        if re.match('\s*$', value) is not None:
            self.add_issue(python_expr, message=self.message_empty)
            return
        # This is now nested parsing. We parsed the fstring and now
        # we're parsing Python again.
        try:
            # CPython has a bit of a special ways to parse Python code within
            # f-strings. It wraps the code in brackets to make sure that
            # whitespace doesn't make problems (indentation/newlines).
            # Just use that algorithm as well here and adapt start positions.
            start_pos = python_expr.start_pos
            start_pos = start_pos[0], start_pos[1] - 1
            eval_input = self._normalizer.grammar._parse(
                '(%s)' % value,
                start_symbol='eval_input',
                start_pos=start_pos,
                error_recovery=False
            )
        except ParserSyntaxError as e:
            self.add_issue(e.error_leaf, message=self.message_syntax)
            return
        issues = self._normalizer.grammar.iter_errors(eval_input)
        self._normalizer.issues += issues
    def _check_format_spec(self, format_spec):
        for expression in format_spec.children[1:]:
            nested_format_spec = expression.children[-2]
            if nested_format_spec.type == 'format_spec':
                if len(nested_format_spec.children) > 1:
                    self.add_issue(
                        nested_format_spec.children[1],
                        message=self.message_nested
                    )
            self._check_expression(expression)
    def _check_expression(self, expression):
        for c in expression.children:
            if c.type == 'python_expr':
                self._check_python_expr(c)
            elif c.type == 'conversion':
                if c.value not in ('s', 'r', 'a'):
                    self.add_issue(c, message=self.message_conversion)
            elif c.type == 'format_spec':
                self._check_format_spec(c)
 class _CheckAssignmentRule(SyntaxRule):
@@ -944,7 +888,7 @@ class _CheckAssignmentRule(SyntaxRule):
            first, second = node.children[:2]
            error = _get_comprehension_type(node)
            if error is None:
-                if second.type in ('dictorsetmaker', 'string'):
+                if second.type == 'dictorsetmaker':
                    error = 'literal'
                elif first in ('(', '['):
                    if second.type == 'yield_expr':
@@ -963,7 +907,7 @@ class _CheckAssignmentRule(SyntaxRule):
                error = 'Ellipsis'
        elif type_ == 'comparison':
            error = 'comparison'
-        elif type_ in ('string', 'number'):
+        elif type_ in ('string', 'number', 'strings'):
            error = 'literal'
        elif type_ == 'yield_expr':
            # This one seems to be a slightly different warning in Python.
--- a/parso/python/fstring.py
+++ b/parso/python/fstring.py
@@ -1,211 +0,0 @@
 import re
 from itertools import count
 from parso.utils import PythonVersionInfo
 from parso.utils import split_lines
 from parso.python.tokenize import Token
 from parso import parser
 from parso.tree import TypedLeaf, ErrorNode, ErrorLeaf
 version36 = PythonVersionInfo(3, 6)
 class TokenNamespace:
    _c = count()
    LBRACE = next(_c)
    RBRACE = next(_c)
    ENDMARKER = next(_c)
    COLON = next(_c)
    CONVERSION = next(_c)
    PYTHON_EXPR = next(_c)
    EXCLAMATION_MARK = next(_c)
    UNTERMINATED_STRING = next(_c)
    token_map = dict((v, k) for k, v in locals().items() if not k.startswith('_'))
    @classmethod
    def generate_token_id(cls, string):
        if string == '{':
            return cls.LBRACE
        elif string == '}':
            return cls.RBRACE
        elif string == '!':
            return cls.EXCLAMATION_MARK
        elif string == ':':
            return cls.COLON
        return getattr(cls, string)
 GRAMMAR = """
 fstring: expression* ENDMARKER
 format_spec: ':' expression*
 expression: '{' PYTHON_EXPR [ '!' CONVERSION ] [ format_spec ] '}'
 """
 _prefix = r'((?:[^{}]+)*)'
 _expr = _prefix + r'(\{|\}|$)'
 _in_expr = r'([^{}\[\]:"\'!]*)(.?)'
 # There's only one conversion character allowed. But the rules have to be
 # checked later anyway, so allow more here. This makes error recovery nicer.
 _conversion = r'([^={}:]*)(.?)'
 _compiled_expr = re.compile(_expr)
 _compiled_in_expr = re.compile(_in_expr)
 _compiled_conversion = re.compile(_conversion)
 def tokenize(code, start_pos=(1, 0)):
    def add_to_pos(string):
        lines = split_lines(string)
        l = len(lines[-1])
        if len(lines) > 1:
            start_pos[0] += len(lines) - 1
            start_pos[1] = l
        else:
            start_pos[1] += l
    def tok(value, type=None, prefix=''):
        if type is None:
            type = TokenNamespace.generate_token_id(value)
        add_to_pos(prefix)
        token = Token(type, value, tuple(start_pos), prefix)
        add_to_pos(value)
        return token
    start = 0
    recursion_level = 0
    added_prefix = ''
    start_pos = list(start_pos)
    while True:
        match = _compiled_expr.match(code, start)
        prefix = added_prefix + match.group(1)
        found = match.group(2)
        start = match.end()
        if not found:
            # We're at the end.
            break
        if found == '}':
            if recursion_level == 0 and len(code) > start  and code[start] == '}':
                # This is a }} escape.
                added_prefix = prefix + '}}'
                start += 1
                continue
            recursion_level = max(0, recursion_level - 1)
            yield tok(found, prefix=prefix)
            added_prefix = ''
        else:
            assert found == '{'
            if recursion_level == 0 and len(code) > start and code[start] == '{':
                # This is a {{ escape.
                added_prefix = prefix + '{{'
                start += 1
                continue
            recursion_level += 1
            yield tok(found, prefix=prefix)
            added_prefix = ''
            expression = ''
            squared_count = 0
            curly_count = 0
            while True:
                expr_match = _compiled_in_expr.match(code, start)
                expression += expr_match.group(1)
                found = expr_match.group(2)
                start = expr_match.end()
                if found == '{':
                    curly_count += 1
                    expression += found
                elif found == '}' and curly_count > 0:
                    curly_count -= 1
                    expression += found
                elif found == '[':
                    squared_count += 1
                    expression += found
                elif found == ']':
                    # Use a max function here, because the Python code might
                    # just have syntax errors.
                    squared_count = max(0, squared_count - 1)
                    expression += found
                elif found == ':' and (squared_count or curly_count):
                    expression += found
                elif found in ('"', "'"):
                    search = found
                    if len(code) > start + 1 and  \
                            code[start] == found == code[start+1]:
                        search *= 3
                        start += 2
                    index = code.find(search, start)
                    if index == -1:
                        yield tok(expression, type=TokenNamespace.PYTHON_EXPR)
                        yield tok(
                            found + code[start:],
                            type=TokenNamespace.UNTERMINATED_STRING,
                        )
                        start = len(code)
                        break
                    expression += found + code[start:index+1]
                    start = index + 1
                elif found == '!' and len(code) > start and code[start] == '=':
                    # This is a python `!=` and not a conversion.
                    expression += found
                else:
                    yield tok(expression, type=TokenNamespace.PYTHON_EXPR)
                    if found:
                        yield tok(found)
                    break
            if found == '!':
                conversion_match = _compiled_conversion.match(code, start)
                found = conversion_match.group(2)
                start = conversion_match.end()
                yield tok(conversion_match.group(1), type=TokenNamespace.CONVERSION)
                if found:
                    yield tok(found)
            if found == '}':
                recursion_level -= 1
            # We don't need to handle everything after ':', because that is
            # basically new tokens.
    yield tok('', type=TokenNamespace.ENDMARKER, prefix=prefix)
 class Parser(parser.BaseParser):
    def parse(self, tokens):
        node = super(Parser, self).parse(tokens)
        if isinstance(node, self.default_leaf):  # Is an endmarker.
            # If there's no curly braces we get back a non-module. We always
            # want an fstring.
            node = self.default_node('fstring', [node])
        return node
    def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
        # TODO this is so ugly.
        leaf_type = TokenNamespace.token_map[type].lower()
        return TypedLeaf(leaf_type, value, start_pos, prefix)
    def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
                       add_token_callback):
        if not self._error_recovery:
            return super(Parser, self).error_recovery(
                pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
                add_token_callback
            )
        token_type = TokenNamespace.token_map[typ].lower()
        if len(stack) == 1:
            error_leaf = ErrorLeaf(token_type, value, start_pos, prefix)
            stack[0][2][1].append(error_leaf)
        else:
            dfa, state, (type_, nodes) = stack[1]
            stack[0][2][1].append(ErrorNode(nodes))
            stack[1:] = []
            add_token_callback(typ, value, start_pos, prefix)
--- a/parso/python/grammar26.txt
+++ b/parso/python/grammar26.txt
@@ -119,7 +119,8 @@ atom: ('(' [yield_expr|testlist_comp] ')' |
       '[' [listmaker] ']' |
       '{' [dictorsetmaker] '}' |
       '`' testlist1 '`' |
-       NAME | NUMBER | STRING+)
+       NAME | NUMBER | strings)
 strings: STRING+
 listmaker: test ( list_for | (',' test)* [','] )
 # Dave: Renamed testlist_gexpr to testlist_comp, because in 2.7+ this is the
 #       default. It's more consistent like this.
--- a/parso/python/grammar27.txt
+++ b/parso/python/grammar27.txt
@@ -104,7 +104,8 @@ atom: ('(' [yield_expr|testlist_comp] ')' |
       '[' [listmaker] ']' |
       '{' [dictorsetmaker] '}' |
       '`' testlist1 '`' |
-       NAME | NUMBER | STRING+)
+       NAME | NUMBER | strings)
 strings: STRING+
 listmaker: test ( list_for | (',' test)* [','] )
 testlist_comp: test ( comp_for | (',' test)* [','] )
 lambdef: 'lambda' [varargslist] ':' test
--- a/parso/python/grammar33.txt
+++ b/parso/python/grammar33.txt
@@ -103,7 +103,8 @@ power: atom trailer* ['**' factor]
 atom: ('(' [yield_expr|testlist_comp] ')' |
       '[' [testlist_comp] ']' |
       '{' [dictorsetmaker] '}' |
-       NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
+       NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
 strings: STRING+
 testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
 trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
 subscriptlist: subscript (',' subscript)* [',']
--- a/parso/python/grammar34.txt
+++ b/parso/python/grammar34.txt
@@ -103,7 +103,8 @@ power: atom trailer* ['**' factor]
 atom: ('(' [yield_expr|testlist_comp] ')' |
       '[' [testlist_comp] ']' |
       '{' [dictorsetmaker] '}' |
-       NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
+       NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
 strings: STRING+
 testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
 trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
 subscriptlist: subscript (',' subscript)* [',']
--- a/parso/python/grammar35.txt
+++ b/parso/python/grammar35.txt
@@ -110,7 +110,8 @@ atom_expr: ['await'] atom trailer*
 atom: ('(' [yield_expr|testlist_comp] ')' |
       '[' [testlist_comp] ']' |
       '{' [dictorsetmaker] '}' |
-       NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
+       NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
 strings: STRING+
 testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
 trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
 subscriptlist: subscript (',' subscript)* [',']
--- a/parso/python/grammar36.txt
+++ b/parso/python/grammar36.txt
@@ -108,7 +108,7 @@ atom_expr: ['await'] atom trailer*
 atom: ('(' [yield_expr|testlist_comp] ')' |
       '[' [testlist_comp] ']' |
       '{' [dictorsetmaker] '}' |
-       NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
+       NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
 testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
 trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
 subscriptlist: subscript (',' subscript)* [',']
@@ -148,3 +148,10 @@ encoding_decl: NAME
 yield_expr: 'yield' [yield_arg]
 yield_arg: 'from' test | testlist
 strings: (STRING | fstring)+
 fstring: FSTRING_START fstring_content* FSTRING_END
 fstring_content: FSTRING_STRING | fstring_expr
 fstring_conversion: '!' NAME
 fstring_expr: '{' testlist_comp [ fstring_conversion ] [ fstring_format_spec ] '}'
 fstring_format_spec: ':' fstring_content*
--- a/parso/python/grammar37.txt
+++ b/parso/python/grammar37.txt
@@ -15,8 +15,6 @@ decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
 decorators: decorator+
 decorated: decorators (classdef | funcdef | async_funcdef)
 # NOTE: Francisco Souza/Reinoud Elhorst, using ASYNC/'await' keywords instead of
 # skipping python3.5+ compatibility, in favour of 3.7 solution
 async_funcdef: 'async' funcdef
 funcdef: 'def' NAME parameters ['->' test] ':' suite
@@ -108,7 +106,7 @@ atom_expr: ['await'] atom trailer*
 atom: ('(' [yield_expr|testlist_comp] ')' |
       '[' [testlist_comp] ']' |
       '{' [dictorsetmaker] '}' |
-       NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
+       NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
 testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
 trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
 subscriptlist: subscript (',' subscript)* [',']
@@ -148,3 +146,10 @@ encoding_decl: NAME
 yield_expr: 'yield' [yield_arg]
 yield_arg: 'from' test | testlist
 strings: (STRING | fstring)+
 fstring: FSTRING_START fstring_content* FSTRING_END
 fstring_content: FSTRING_STRING | fstring_expr
 fstring_conversion: '!' NAME
 fstring_expr: '{' testlist [ fstring_conversion ] [ fstring_format_spec ] '}'
 fstring_format_spec: ':' fstring_content*
--- a/parso/python/grammar38.txt
+++ b/parso/python/grammar38.txt
@@ -0,0 +1,157 @@
 # Grammar for Python
 # NOTE WELL: You should also follow all the steps listed at
 # https://devguide.python.org/grammar/
 # Start symbols for the grammar:
 #       single_input is a single interactive statement;
 #       file_input is a module or sequence of commands read from an input file;
 #       eval_input is the input for the eval() functions.
 # NB: compound_stmt in single_input is followed by extra NEWLINE!
 single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
 file_input: (NEWLINE | stmt)* ENDMARKER
 eval_input: testlist NEWLINE* ENDMARKER
 decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
 decorators: decorator+
 decorated: decorators (classdef | funcdef | async_funcdef)
 async_funcdef: 'async' funcdef
 funcdef: 'def' NAME parameters ['->' test] ':' suite
 parameters: '(' [typedargslist] ')'
 typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [
        '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
      | '**' tfpdef [',']]]
  | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
  | '**' tfpdef [','])
 tfpdef: NAME [':' test]
 varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
        '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
      | '**' vfpdef [',']]]
  | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
  | '**' vfpdef [',']
 )
 vfpdef: NAME
 stmt: simple_stmt | compound_stmt
 simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
 small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
             import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
 expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
                     ('=' (yield_expr|testlist_star_expr))*)
 annassign: ':' test ['=' test]
 testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
 augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
            '<<=' | '>>=' | '**=' | '//=')
 # For normal and annotated assignments, additional restrictions enforced by the interpreter
 del_stmt: 'del' exprlist
 pass_stmt: 'pass'
 flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
 break_stmt: 'break'
 continue_stmt: 'continue'
 return_stmt: 'return' [testlist_star_expr]
 yield_stmt: yield_expr
 raise_stmt: 'raise' [test ['from' test]]
 import_stmt: import_name | import_from
 import_name: 'import' dotted_as_names
 # note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
 import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
              'import' ('*' | '(' import_as_names ')' | import_as_names))
 import_as_name: NAME ['as' NAME]
 dotted_as_name: dotted_name ['as' NAME]
 import_as_names: import_as_name (',' import_as_name)* [',']
 dotted_as_names: dotted_as_name (',' dotted_as_name)*
 dotted_name: NAME ('.' NAME)*
 global_stmt: 'global' NAME (',' NAME)*
 nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
 assert_stmt: 'assert' test [',' test]
 compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
 async_stmt: 'async' (funcdef | with_stmt | for_stmt)
 if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
 while_stmt: 'while' test ':' suite ['else' ':' suite]
 for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
 try_stmt: ('try' ':' suite
           ((except_clause ':' suite)+
            ['else' ':' suite]
            ['finally' ':' suite] |
           'finally' ':' suite))
 with_stmt: 'with' with_item (',' with_item)*  ':' suite
 with_item: test ['as' expr]
 # NB compile.c makes sure that the default except clause is last
 except_clause: 'except' [test ['as' NAME]]
 suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
 test: or_test ['if' or_test 'else' test] | lambdef
 test_nocond: or_test | lambdef_nocond
 lambdef: 'lambda' [varargslist] ':' test
 lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
 or_test: and_test ('or' and_test)*
 and_test: not_test ('and' not_test)*
 not_test: 'not' not_test | comparison
 comparison: expr (comp_op expr)*
 # <> isn't actually a valid comparison operator in Python. It's here for the
 # sake of a __future__ import described in PEP 401 (which really works :-)
 comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
 star_expr: '*' expr
 expr: xor_expr ('|' xor_expr)*
 xor_expr: and_expr ('^' and_expr)*
 and_expr: shift_expr ('&' shift_expr)*
 shift_expr: arith_expr (('<<'|'>>') arith_expr)*
 arith_expr: term (('+'|'-') term)*
 term: factor (('*'|'@'|'/'|'%'|'//') factor)*
 factor: ('+'|'-'|'~') factor | power
 power: atom_expr ['**' factor]
 atom_expr: ['await'] atom trailer*
 atom: ('(' [yield_expr|testlist_comp] ')' |
       '[' [testlist_comp] ']' |
       '{' [dictorsetmaker] '}' |
       NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
 testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
 trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
 subscriptlist: subscript (',' subscript)* [',']
 subscript: test | [test] ':' [test] [sliceop]
 sliceop: ':' [test]
 exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
 testlist: test (',' test)* [',']
 dictorsetmaker: ( ((test ':' test | '**' expr)
                   (comp_for | (',' (test ':' test | '**' expr))* [','])) |
                  ((test | star_expr)
                   (comp_for | (',' (test | star_expr))* [','])) )
 classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
 arglist: argument (',' argument)*  [',']
 # The reason that keywords are test nodes instead of NAME is that using NAME
 # results in an ambiguity. ast.c makes sure it's a NAME.
 # "test '=' test" is really "keyword '=' test", but we have no such token.
 # These need to be in a single rule to avoid grammar that is ambiguous
 # to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
 # we explicitly match '*' here, too, to give it proper precedence.
 # Illegal combinations and orderings are blocked in ast.c:
 # multiple (test comp_for) arguments are blocked; keyword unpackings
 # that precede iterable unpackings are blocked; etc.
 argument: ( test [comp_for] |
            test '=' test |
            '**' test |
            '*' test )
 comp_iter: comp_for | comp_if
 sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
 comp_for: ['async'] sync_comp_for
 comp_if: 'if' test_nocond [comp_iter]
 # not used in grammar, but may appear in "node" passed from Parser to Compiler
 encoding_decl: NAME
 yield_expr: 'yield' [yield_arg]
 yield_arg: 'from' test | testlist_star_expr
 strings: (STRING | fstring)+
 fstring: FSTRING_START fstring_content* FSTRING_END
 fstring_content: FSTRING_STRING | fstring_expr
 fstring_conversion: '!' NAME
 fstring_expr: '{' testlist [ fstring_conversion ] [ fstring_format_spec ] '}'
 fstring_format_spec: ':' fstring_content*
--- a/parso/python/parser.py
+++ b/parso/python/parser.py
@@ -1,8 +1,11 @@
 from parso.python import tree
-from parso.python.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER,
+from parso.python.token import PythonTokenTypes
                                STRING, tok_name, NAME)
 from parso.parser import BaseParser
-from parso.pgen2.parse import token_to_ilabel
+
 NAME = PythonTokenTypes.NAME
 INDENT = PythonTokenTypes.INDENT
 DEDENT = PythonTokenTypes.DEDENT
 class Parser(BaseParser):
@@ -50,44 +53,35 @@ class Parser(BaseParser):
    }
    default_node = tree.PythonNode
-    def __init__(self, pgen_grammar, error_recovery=True, start_symbol='file_input'):
+    # Names/Keywords are handled separately
-        super(Parser, self).__init__(pgen_grammar, start_symbol, error_recovery=error_recovery)
+    _leaf_map = {
        PythonTokenTypes.STRING: tree.String,
        PythonTokenTypes.NUMBER: tree.Number,
        PythonTokenTypes.NEWLINE: tree.Newline,
        PythonTokenTypes.ENDMARKER: tree.EndMarker,
        PythonTokenTypes.FSTRING_STRING: tree.FStringString,
        PythonTokenTypes.FSTRING_START: tree.FStringStart,
        PythonTokenTypes.FSTRING_END: tree.FStringEnd,
    }
    def __init__(self, pgen_grammar, error_recovery=True, start_nonterminal='file_input'):
        super(Parser, self).__init__(pgen_grammar, start_nonterminal,
                                     error_recovery=error_recovery)
        self.syntax_errors = []
        self._omit_dedent_list = []
        self._indent_counter = 0
        # TODO do print absolute import detection here.
        # try:
        #     del python_grammar_no_print_statement.keywords["print"]
        # except KeyError:
        #     pass  # Doesn't exist in the Python 3 grammar.
        # if self.options["print_function"]:
        #     python_grammar = pygram.python_grammar_no_print_statement
        # else:
    def parse(self, tokens):
        if self._error_recovery:
-            if self._start_symbol != 'file_input':
+            if self._start_nonterminal != 'file_input':
                raise NotImplementedError
            tokens = self._recovery_tokenize(tokens)
-        node = super(Parser, self).parse(tokens)
+        return super(Parser, self).parse(tokens)
-        if self._start_symbol == 'file_input' != node.type:
+    def convert_node(self, nonterminal, children):
            # If there's only one statement, we get back a non-module. That's
            # not what we want, we want a module, so we add it here:
            node = self.convert_node(
                self._pgen_grammar,
                self._pgen_grammar.symbol2number['file_input'],
                [node]
            )
        return node
    def convert_node(self, pgen_grammar, type, children):
        """
        Convert raw node information to a PythonBaseNode instance.
@@ -95,158 +89,121 @@ class Parser(BaseParser):
        grammar rule produces a new complete node, so that the tree is build
        strictly bottom-up.
        """
        # TODO REMOVE symbol, we don't want type here.
        symbol = pgen_grammar.number2symbol[type]
        try:
-            return self.node_map[symbol](children)
+            node = self.node_map[nonterminal](children)
        except KeyError:
-            if symbol == 'suite':
+            if nonterminal == 'suite':
                # We don't want the INDENT/DEDENT in our parser tree. Those
                # leaves are just cancer. They are virtual leaves and not real
                # ones and therefore have pseudo start/end positions and no
                # prefixes. Just ignore them.
                children = [children[0]] + children[2:-1]
-            elif symbol == 'list_if':
+            elif nonterminal == 'list_if':
                # Make transitioning from 2 to 3 easier.
-                symbol = 'comp_if'
+                nonterminal = 'comp_if'
-            elif symbol == 'listmaker':
+            elif nonterminal == 'listmaker':
                # Same as list_if above.
-                symbol = 'testlist_comp'
+                nonterminal = 'testlist_comp'
-            return self.default_node(symbol, children)
+            node = self.default_node(nonterminal, children)
        for c in children:
            c.parent = node
        return node
-    def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
+    def convert_leaf(self, type, value, prefix, start_pos):
        # print('leaf', repr(value), token.tok_name[type])
        if type == NAME:
-            if value in pgen_grammar.keywords:
+            if value in self._pgen_grammar.reserved_syntax_strings:
                return tree.Keyword(value, start_pos, prefix)
            else:
                return tree.Name(value, start_pos, prefix)
        elif type == STRING:
            return tree.String(value, start_pos, prefix)
        elif type == NUMBER:
            return tree.Number(value, start_pos, prefix)
        elif type == NEWLINE:
            return tree.Newline(value, start_pos, prefix)
        elif type == ENDMARKER:
            return tree.EndMarker(value, start_pos, prefix)
        else:
            return tree.Operator(value, start_pos, prefix)
-    def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
+        return self._leaf_map.get(type, tree.Operator)(value, start_pos, prefix)
                       add_token_callback):
        def get_symbol_and_nodes(stack):
            for dfa, state, (type_, nodes) in stack:
                symbol = pgen_grammar.number2symbol[type_]
                yield symbol, nodes
-        tos_nodes = stack.get_tos_nodes()
+    def error_recovery(self, token):
        tos_nodes = self.stack[-1].nodes
        if tos_nodes:
            last_leaf = tos_nodes[-1].get_last_leaf()
        else:
            last_leaf = None
-        if self._start_symbol == 'file_input' and \
+        if self._start_nonterminal == 'file_input' and \
-                (typ == ENDMARKER or typ == DEDENT and '\n' not in last_leaf.value):
+                (token.type == PythonTokenTypes.ENDMARKER
-            def reduce_stack(states, newstate):
+                 or token.type == DEDENT and '\n' not in last_leaf.value
-                # reduce
+                 and '\r' not in last_leaf.value):
                state = newstate
                while states[state] == [(0, state)]:
                    self.pgen_parser._pop()
                    dfa, state, (type_, nodes) = stack[-1]
                    states, first = dfa
            # In Python statements need to end with a newline. But since it's
            # possible (and valid in Python ) that there's no newline at the
            # end of a file, we have to recover even if the user doesn't want
            # error recovery.
-            #print('x', pprint.pprint(stack))
+            if self.stack[-1].dfa.from_rule == 'simple_stmt':
-            ilabel = token_to_ilabel(pgen_grammar, NEWLINE, value)
+                try:
-
+                    plan = self.stack[-1].dfa.transitions[PythonTokenTypes.NEWLINE]
-            dfa, state, (type_, nodes) = stack[-1]
+                except KeyError:
-            symbol = pgen_grammar.number2symbol[type_]
+                    pass
-            states, first = dfa
+                else:
-            arcs = states[state]
+                    if plan.next_dfa.is_final and not plan.dfa_pushes:
-            # Look for a state with this label
+                        # We are ignoring here that the newline would be
-            for i, newstate in arcs:
+                        # required for a simple_stmt.
-                if ilabel == i:
+                        self.stack[-1].dfa = plan.next_dfa
-                    if symbol == 'simple_stmt':
+                        self._add_token(token)
                        # This is basically shifting
                        stack[-1] = (dfa, newstate, (type_, nodes))
                        reduce_stack(states, newstate)
                        add_token_callback(typ, value, start_pos, prefix)
                        return
                    # Check if we're at the right point
                    #for symbol, nodes in get_symbol_and_nodes(stack):
                    #        self.pgen_parser._pop()
                            #break
                    break
            #symbol = pgen_grammar.number2symbol[type_]
        if not self._error_recovery:
-            return super(Parser, self).error_recovery(
+            return super(Parser, self).error_recovery(token)
                pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
                add_token_callback)
        def current_suite(stack):
            # For now just discard everything that is not a suite or
            # file_input, if we detect an error.
-            for index, (symbol, nodes) in reversed(list(enumerate(get_symbol_and_nodes(stack)))):
+            for until_index, stack_node in reversed(list(enumerate(stack))):
                # `suite` can sometimes be only simple_stmt, not stmt.
-                if symbol == 'file_input':
+                if stack_node.nonterminal == 'file_input':
                    break
-                elif symbol == 'suite' and len(nodes) > 1:
+                elif stack_node.nonterminal == 'suite':
-                    # suites without an indent in them get discarded.
+                    # In the case where we just have a newline we don't want to
                    # do error recovery here. In all other cases, we want to do
                    # error recovery.
                    if len(stack_node.nodes) != 1:
                        break
-            return index, symbol, nodes
+            return until_index
-        index, symbol, nodes = current_suite(stack)
+        until_index = current_suite(self.stack)
-        # print('err', token.tok_name[typ], repr(value), start_pos, len(stack), index)
+        if self._stack_removal(until_index + 1):
-        if self._stack_removal(pgen_grammar, stack, arcs, index + 1, value, start_pos):
+            self._add_token(token)
            add_token_callback(typ, value, start_pos, prefix)
        else:
            typ, value, start_pos, prefix = token
            if typ == INDENT:
                # For every deleted INDENT we have to delete a DEDENT as well.
                # Otherwise the parser will get into trouble and DEDENT too early.
                self._omit_dedent_list.append(self._indent_counter)
-            error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix)
+            error_leaf = tree.PythonErrorLeaf(typ.name, value, start_pos, prefix)
-            stack[-1][2][1].append(error_leaf)
+            self.stack[-1].nodes.append(error_leaf)
-        if symbol == 'suite':
+        tos = self.stack[-1]
-            dfa, state, node = stack[-1]
+        if tos.nonterminal == 'suite':
-            states, first = dfa
+            # Need at least one statement in the suite. This happend with the
-            arcs = states[state]
+            # error recovery above.
-            intended_label = pgen_grammar.symbol2label['stmt']
+            try:
-            # Introduce a proper state transition. We're basically allowing
+                tos.dfa = tos.dfa.arcs['stmt']
-            # there to be no valid statements inside a suite.
+            except KeyError:
-            if [x[0] for x in arcs] == [intended_label]:
+                # We're already in a final state.
-                new_state = arcs[0][1]
+                pass
                stack[-1] = dfa, new_state, node
-    def _stack_removal(self, pgen_grammar, stack, arcs, start_index, value, start_pos):
+    def _stack_removal(self, start_index):
-        failed_stack = False
+        all_nodes = [node for stack_node in self.stack[start_index:] for node in stack_node.nodes]
        found = False
        all_nodes = []
        for dfa, state, (type_, nodes) in stack[start_index:]:
            if nodes:
                found = True
            if found:
                failed_stack = True
                all_nodes += nodes
        if failed_stack:
            stack[start_index - 1][2][1].append(tree.PythonErrorNode(all_nodes))
-        stack[start_index:] = []
+        if all_nodes:
-        return failed_stack
+            node = tree.PythonErrorNode(all_nodes)
            for n in all_nodes:
                n.parent = node
            self.stack[start_index - 1].nodes.append(node)
        self.stack[start_index:] = []
        return bool(all_nodes)
    def _recovery_tokenize(self, tokens):
-        for typ, value, start_pos, prefix in tokens:
+        for token in tokens:
-            # print(tok_name[typ], repr(value), start_pos, repr(prefix))
+            typ = token[0]
            if typ == DEDENT:
                # We need to count indents, because if we just omit any DEDENT,
                # we might omit them in the wrong place.
@@ -258,4 +215,4 @@ class Parser(BaseParser):
                self._indent_counter -= 1
            elif typ == INDENT:
                self._indent_counter += 1
-            yield typ, value, start_pos, prefix
+            yield token
--- a/parso/python/pep8.py
+++ b/parso/python/pep8.py
@@ -391,11 +391,11 @@ class PEP8Normalizer(ErrorFinder):
                if value.lstrip('#'):
                    self.add_issue(part, 266, "Too many leading '#' for block comment.")
            elif self._on_newline:
-                if not re.match('#:? ', value) and not value == '#' \
+                if not re.match(r'#:? ', value) and not value == '#' \
                        and not (value.startswith('#!') and part.start_pos == (1, 0)):
                    self.add_issue(part, 265, "Block comment should start with '# '")
            else:
-                if not re.match('#:? [^ ]', value):
+                if not re.match(r'#:? [^ ]', value):
                    self.add_issue(part, 262, "Inline comment should start with '# '")
            self._reset_newlines(spacing, leaf, is_comment=True)
@@ -677,7 +677,7 @@ class PEP8Normalizer(ErrorFinder):
        elif typ == 'string':
            # Checking multiline strings
            for i, line in enumerate(leaf.value.splitlines()[1:]):
-                indentation = re.match('[ \t]*', line).group(0)
+                indentation = re.match(r'[ \t]*', line).group(0)
                start_pos = leaf.line + i, len(indentation)
                # TODO check multiline indentation.
        elif typ == 'endmarker':
--- a/parso/python/token.py
+++ b/parso/python/token.py
@@ -1,104 +1,27 @@
 from __future__ import absolute_import
 from itertools import count
 from token import *
 from parso._compatibility import py_version
-_counter = count(N_TOKENS)
+class TokenType(object):
-# Never want to see this thing again.
+    def __init__(self, name, contains_syntax=False):
-del N_TOKENS
+        self.name = name
        self.contains_syntax = contains_syntax
-COMMENT = next(_counter)
+    def __repr__(self):
-tok_name[COMMENT] = 'COMMENT'
+        return '%s(%s)' % (self.__class__.__name__, self.name)
 NL = next(_counter)
 tok_name[NL] = 'NL'
 # Sets the attributes that don't exist in these tok_name versions.
 if py_version >= 30:
    BACKQUOTE = next(_counter)
    tok_name[BACKQUOTE] = 'BACKQUOTE'
 else:
    RARROW = next(_counter)
    tok_name[RARROW] = 'RARROW'
    ELLIPSIS = next(_counter)
    tok_name[ELLIPSIS] = 'ELLIPSIS'
 if py_version < 35:
    ATEQUAL = next(_counter)
    tok_name[ATEQUAL] = 'ATEQUAL'
 ERROR_DEDENT = next(_counter)
 tok_name[ERROR_DEDENT] = 'ERROR_DEDENT'
-# Map from operator to number (since tokenize doesn't do this)
+class TokenTypes(object):
 opmap_raw = """\
 ( LPAR
 ) RPAR
 [ LSQB
 ] RSQB
 : COLON
 , COMMA
 ; SEMI
 + PLUS
 - MINUS
 * STAR
 / SLASH
 | VBAR
 & AMPER
 < LESS
 > GREATER
 = EQUAL
 . DOT
 % PERCENT
 ` BACKQUOTE
 { LBRACE
 } RBRACE
@ AT
 == EQEQUAL
 != NOTEQUAL
 <> NOTEQUAL
 <= LESSEQUAL
 >= GREATEREQUAL
 ~ TILDE
 ^ CIRCUMFLEX
 << LEFTSHIFT
 >> RIGHTSHIFT
 ** DOUBLESTAR
 += PLUSEQUAL
 -= MINEQUAL
 *= STAREQUAL
 /= SLASHEQUAL
 %= PERCENTEQUAL
 &= AMPEREQUAL
 |= VBAREQUAL
@= ATEQUAL
 ^= CIRCUMFLEXEQUAL
 <<= LEFTSHIFTEQUAL
 >>= RIGHTSHIFTEQUAL
 **= DOUBLESTAREQUAL
 // DOUBLESLASH
 //= DOUBLESLASHEQUAL
 -> RARROW
 ... ELLIPSIS
 """
 opmap = {}
 for line in opmap_raw.splitlines():
    op, name = line.split()
    opmap[op] = globals()[name]
 def generate_token_id(string):
    """
-    Uses a token in the grammar (e.g. `'+'` or `'and'`returns the corresponding
+    Basically an enum, but Python 2 doesn't have enums in the standard library.
    ID for it. The strings are part of the grammar file.
    """
-    try:
+    def __init__(self, names, contains_syntax):
-        return opmap[string]
+        for name in names:
-    except KeyError:
+            setattr(self, name, TokenType(name, contains_syntax=name in contains_syntax))
-        pass
+
-    return globals()[string]
+
 PythonTokenTypes = TokenTypes((
    'STRING', 'NUMBER', 'NAME', 'ERRORTOKEN', 'NEWLINE', 'INDENT', 'DEDENT',
    'ERROR_DEDENT', 'FSTRING_STRING', 'FSTRING_START', 'FSTRING_END', 'OP',
    'ENDMARKER'),
    contains_syntax=('NAME', 'OP'),
 )
--- a/parso/python/tokenize.py
+++ b/parso/python/tokenize.py
@@ -18,16 +18,29 @@ from collections import namedtuple
 import itertools as _itertools
 from codecs import BOM_UTF8
-from parso.python.token import (tok_name, ENDMARKER, STRING, NUMBER, opmap,
+from parso.python.token import PythonTokenTypes
                                NAME, ERRORTOKEN, NEWLINE, INDENT, DEDENT,
                                ERROR_DEDENT)
 from parso._compatibility import py_version
 from parso.utils import split_lines
 STRING = PythonTokenTypes.STRING
 NAME = PythonTokenTypes.NAME
 NUMBER = PythonTokenTypes.NUMBER
 OP = PythonTokenTypes.OP
 NEWLINE = PythonTokenTypes.NEWLINE
 INDENT = PythonTokenTypes.INDENT
 DEDENT = PythonTokenTypes.DEDENT
 ENDMARKER = PythonTokenTypes.ENDMARKER
 ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
 ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
 FSTRING_START = PythonTokenTypes.FSTRING_START
 FSTRING_STRING = PythonTokenTypes.FSTRING_STRING
 FSTRING_END = PythonTokenTypes.FSTRING_END
 TokenCollection = namedtuple(
    'TokenCollection',
-    'pseudo_token single_quoted triple_quoted endpats always_break_tokens',
+    'pseudo_token single_quoted triple_quoted endpats whitespace '
    'fstring_pattern_map always_break_tokens',
 )
 BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')
@@ -52,32 +65,35 @@ def group(*choices, **kwargs):
    return start + '|'.join(choices) + ')'
 def any(*choices):
    return group(*choices) + '*'
 def maybe(*choices):
    return group(*choices) + '?'
 # Return the empty string, plus all of the valid string prefixes.
-def _all_string_prefixes(version_info):
+def _all_string_prefixes(version_info, include_fstring=False, only_fstring=False):
    def different_case_versions(prefix):
        for s in _itertools.product(*[(c, c.upper()) for c in prefix]):
            yield ''.join(s)
    # The valid string prefixes. Only contain the lower case versions,
    #  and don't contain any permuations (include 'fr', but not
    #  'rf'). The various permutations will be generated.
-    _valid_string_prefixes = ['b', 'r', 'u']
+    valid_string_prefixes = ['b', 'r', 'u']
    if version_info >= (3, 0):
-        _valid_string_prefixes.append('br')
+        valid_string_prefixes.append('br')
-    if version_info >= (3, 6):
+    result = set([''])
-        _valid_string_prefixes += ['f', 'fr']
+    if version_info >= (3, 6) and include_fstring:
        f = ['f', 'fr']
        if only_fstring:
            valid_string_prefixes = f
            result = set()
        else:
            valid_string_prefixes += f
    elif only_fstring:
        return set()
    # if we add binary f-strings, add: ['fb', 'fbr']
-    result = set([''])
+    for prefix in valid_string_prefixes:
    for prefix in _valid_string_prefixes:
        for t in _itertools.permutations(prefix):
            # create a list with upper and lower versions of each
            #  character
@@ -102,10 +118,15 @@ def _get_token_collection(version_info):
        return result
 fstring_string_single_line = _compile(r'(?:[^{}\r\n]+|\{\{|\}\})+')
 fstring_string_multi_line = _compile(r'(?:[^{}]+|\{\{|\}\})+')
 def _create_token_collection(version_info):
    # Note: we use unicode matching for names ("\w") but ascii matching for
    # number literals.
    Whitespace = r'[ \f\t]*'
    whitespace = _compile(Whitespace)
    Comment = r'#[^\r\n]*'
    Name = r'\w+'
@@ -141,28 +162,31 @@ def _create_token_collection(version_info):
    #  StringPrefix can be the empty string (making it optional).
    possible_prefixes = _all_string_prefixes(version_info)
    StringPrefix = group(*possible_prefixes)
    StringPrefixWithF = group(*_all_string_prefixes(version_info, include_fstring=True))
    fstring_prefixes = _all_string_prefixes(version_info, include_fstring=True, only_fstring=True)
    FStringStart = group(*fstring_prefixes)
    # Tail end of ' string.
-    Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
+    Single = r"(?:\\.|[^'\\])*'"
    # Tail end of " string.
-    Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
+    Double = r'(?:\\.|[^"\\])*"'
    # Tail end of ''' string.
-    Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
+    Single3 = r"(?:\\.|'(?!'')|[^'\\])*'''"
    # Tail end of """ string.
-    Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
+    Double3 = r'(?:\\.|"(?!"")|[^"\\])*"""'
-    Triple = group(StringPrefix + "'''", StringPrefix + '"""')
+    Triple = group(StringPrefixWithF + "'''", StringPrefixWithF + '"""')
    # Because of leftmost-then-longest match semantics, be sure to put the
    # longest operators first (e.g., if = came before ==, == would get
    # recognized as two instances of =).
-    Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=",
+    Operator = group(r"\*\*=?", r">>=?", r"<<=?",
                     r"//=?", r"->",
-                     r"[+\-*/%&@`|^=<>]=?",
+                     r"[+\-*/%&@`|^!=<>]=?",
                     r"~")
    Bracket = '[][(){}]'
-    special_args = [r'\r?\n', r'[:;.,@]']
+    special_args = [r'\r\n?', r'\n', r'[:;.,@]']
    if version_info >= (3, 0):
        special_args.insert(0, r'\.\.\.')
    Special = group(*special_args)
@@ -170,11 +194,16 @@ def _create_token_collection(version_info):
    Funny = group(Operator, Bracket, Special)
    # First (or only) line of ' or " string.
-    ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
+    ContStr = group(StringPrefix + r"'[^\r\n'\\]*(?:\\.[^\r\n'\\]*)*" +
-                    group("'", r'\\\r?\n'),
+                    group("'", r'\\(?:\r\n?|\n)'),
-                    StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
+                    StringPrefix + r'"[^\r\n"\\]*(?:\\.[^\r\n"\\]*)*' +
-                    group('"', r'\\\r?\n'))
+                    group('"', r'\\(?:\r\n?|\n)'))
-    PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple)
+    pseudo_extra_pool = [Comment, Triple]
    all_quotes = '"', "'", '"""', "'''"
    if fstring_prefixes:
        pseudo_extra_pool.append(FStringStart + group(*all_quotes))
    PseudoExtras = group(r'\\(?:\r\n?|\n)|\Z', *pseudo_extra_pool)
    PseudoToken = group(Whitespace, capture=True) + \
        group(PseudoExtras, Number, Funny, ContStr, Name, capture=True)
@@ -192,18 +221,24 @@ def _create_token_collection(version_info):
    #  including the opening quotes.
    single_quoted = set()
    triple_quoted = set()
    fstring_pattern_map = {}
    for t in possible_prefixes:
-        for p in (t + '"', t + "'"):
+        for quote in '"', "'":
-            single_quoted.add(p)
+            single_quoted.add(t + quote)
-        for p in (t + '"""', t + "'''"):
+
-            triple_quoted.add(p)
+        for quote in '"""', "'''":
            triple_quoted.add(t + quote)
    for t in fstring_prefixes:
        for quote in all_quotes:
            fstring_pattern_map[t + quote] = quote
    ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except',
                           'finally', 'while', 'with', 'return')
    pseudo_token_compiled = _compile(PseudoToken)
    return TokenCollection(
        pseudo_token_compiled, single_quoted, triple_quoted, endpats,
-        ALWAYS_BREAK_TOKENS
+        whitespace, fstring_pattern_map, ALWAYS_BREAK_TOKENS
    )
@@ -218,12 +253,81 @@ class Token(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])):
 class PythonToken(Token):
    def _get_type_name(self, exact=True):
        return tok_name[self.type]
    def __repr__(self):
-        return ('TokenInfo(type=%s, string=%r, start=%r, prefix=%r)' %
+        return ('TokenInfo(type=%s, string=%r, start_pos=%r, prefix=%r)' %
-                self._replace(type=self._get_type_name()))
+                self._replace(type=self.type.name))
 class FStringNode(object):
    def __init__(self, quote):
        self.quote = quote
        self.parentheses_count = 0
        self.previous_lines = ''
        self.last_string_start_pos = None
        # In the syntax there can be multiple format_spec's nested:
        # {x:{y:3}}
        self.format_spec_count = 0
    def open_parentheses(self, character):
        self.parentheses_count += 1
    def close_parentheses(self, character):
        self.parentheses_count -= 1
        if self.parentheses_count == 0:
            # No parentheses means that the format spec is also finished.
            self.format_spec_count = 0
    def allow_multiline(self):
        return len(self.quote) == 3
    def is_in_expr(self):
        return (self.parentheses_count - self.format_spec_count) > 0
 def _close_fstring_if_necessary(fstring_stack, string, start_pos, additional_prefix):
    for fstring_stack_index, node in enumerate(fstring_stack):
        if string.startswith(node.quote):
            token = PythonToken(
                FSTRING_END,
                node.quote,
                start_pos,
                prefix=additional_prefix,
            )
            additional_prefix = ''
            assert not node.previous_lines
            del fstring_stack[fstring_stack_index:]
            return token, '', len(node.quote)
    return None, additional_prefix, 0
 def _find_fstring_string(endpats, fstring_stack, line, lnum, pos):
    tos = fstring_stack[-1]
    allow_multiline = tos.allow_multiline()
    if allow_multiline:
        match = fstring_string_multi_line.match(line, pos)
    else:
        match = fstring_string_single_line.match(line, pos)
    if match is None:
        return tos.previous_lines, pos
    if not tos.previous_lines:
        tos.last_string_start_pos = (lnum, pos)
    string = match.group(0)
    for fstring_stack_node in fstring_stack:
        end_match = endpats[fstring_stack_node.quote].match(string)
        if end_match is not None:
            string = end_match.group(0)[:-len(fstring_stack_node.quote)]
    new_pos = pos
    new_pos += len(string)
    if allow_multiline and (string.endswith('\n') or string.endswith('\r')):
        tos.previous_lines += string
        string = ''
    else:
        string = tos.previous_lines + string
    return string, new_pos
 def tokenize(code, version_info, start_pos=(1, 0)):
@@ -232,6 +336,18 @@ def tokenize(code, version_info, start_pos=(1, 0)):
    return tokenize_lines(lines, version_info, start_pos=start_pos)
 def _print_tokens(func):
    """
    A small helper function to help debug the tokenize_lines function.
    """
    def wrapper(*args, **kwargs):
        for token in func(*args, **kwargs):
            yield token
    return wrapper
 # @_print_tokens
 def tokenize_lines(lines, version_info, start_pos=(1, 0)):
    """
    A heavily modified Python standard library tokenizer.
@@ -240,7 +356,16 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
    token. This idea comes from lib2to3. The prefix contains all information
    that is irrelevant for the parser like newlines in parentheses or comments.
    """
-    pseudo_token, single_quoted, triple_quoted, endpats, always_break_tokens, = \
+    def dedent_if_necessary(start):
        while start < indents[-1]:
            if start > indents[-2]:
                yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '')
                break
            yield PythonToken(DEDENT, '', spos, '')
            indents.pop()
    pseudo_token, single_quoted, triple_quoted, endpats, whitespace, \
        fstring_pattern_map, always_break_tokens, = \
        _get_token_collection(version_info)
    paren_level = 0  # count parentheses
    indents = [0]
@@ -257,6 +382,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
    additional_prefix = ''
    first = True
    lnum = start_pos[0] - 1
    fstring_stack = []
    for line in lines:  # loop over lines in stream
        lnum += 1
        pos = 0
@@ -278,7 +404,9 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
            endmatch = endprog.match(line)
            if endmatch:
                pos = endmatch.end(0)
-                yield PythonToken(STRING, contstr + line[:pos], contstr_start, prefix)
+                yield PythonToken(
                    STRING, contstr + line[:pos],
                    contstr_start, prefix)
                contstr = ''
                contline = None
            else:
@@ -287,14 +415,50 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
                continue
        while pos < max:
            if fstring_stack:
                tos = fstring_stack[-1]
                if not tos.is_in_expr():
                    string, pos = _find_fstring_string(endpats, fstring_stack, line, lnum, pos)
                    if pos == max:
                        break
                    if string:
                        yield PythonToken(
                            FSTRING_STRING, string,
                            tos.last_string_start_pos,
                            # Never has a prefix because it can start anywhere and
                            # include whitespace.
                            prefix=''
                        )
                        tos.previous_lines = ''
                        continue
                rest = line[pos:]
                fstring_end_token, additional_prefix, quote_length = _close_fstring_if_necessary(
                    fstring_stack,
                    rest,
                    (lnum, pos),
                    additional_prefix,
                )
                pos += quote_length
                if fstring_end_token is not None:
                    yield fstring_end_token
                    continue
            pseudomatch = pseudo_token.match(line, pos)
            if not pseudomatch:                             # scan for tokens
-                txt = line[pos:]
+                match = whitespace.match(line, pos)
-                if txt.endswith('\n'):
+                if pos == 0:
-                    new_line = True
+                    for t in dedent_if_necessary(match.end()):
-                yield PythonToken(ERRORTOKEN, txt, (lnum, pos), additional_prefix)
+                        yield t
                pos = match.end()
                new_line = False
                yield PythonToken(
                    ERRORTOKEN, line[pos], (lnum, pos),
                    additional_prefix + match.group(0)
                )
                additional_prefix = ''
-                break
+                pos += 1
                continue
            prefix = additional_prefix + pseudomatch.group(1)
            additional_prefix = ''
@@ -309,28 +473,31 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
                break
            initial = token[0]
-            if new_line and initial not in '\r\n#':
+            if new_line and initial not in '\r\n\\#':
                new_line = False
-                if paren_level == 0:
+                if paren_level == 0 and not fstring_stack:
                    i = 0
                    indent_start = start
                    while line[i] == '\f':
                        i += 1
-                        start -= 1
+                        # TODO don't we need to change spos as well?
-                    if start > indents[-1]:
+                        indent_start -= 1
                    if indent_start > indents[-1]:
                        yield PythonToken(INDENT, '', spos, '')
-                        indents.append(start)
+                        indents.append(indent_start)
-                    while start < indents[-1]:
+                    for t in dedent_if_necessary(indent_start):
-                        if start > indents[-2]:
+                        yield t
                            yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '')
                            break
                        yield PythonToken(DEDENT, '', spos, '')
                        indents.pop()
            if (initial in numchars or                      # ordinary number
                    (initial == '.' and token != '.' and token != '...')):
                yield PythonToken(NUMBER, token, spos, prefix)
            elif initial in '\r\n':
-                if not new_line and paren_level == 0:
+                if any(not f.allow_multiline() for f in fstring_stack):
                    # Would use fstring_stack.clear, but that's not available
                    # in Python 2.
                    fstring_stack[:] = []
                if not new_line and paren_level == 0 and not fstring_stack:
                    yield PythonToken(NEWLINE, token, spos, prefix)
                else:
                    additional_prefix = prefix + token
@@ -350,10 +517,23 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
                    contstr = line[start:]
                    contline = line
                    break
            # Check up to the first 3 chars of the token to see if
            #  they're in the single_quoted set. If so, they start
            #  a string.
            # We're using the first 3, because we're looking for
            #  "rb'" (for example) at the start of the token. If
            #  we switch to longer prefixes, this needs to be
            #  adjusted.
            # Note that initial == token[:1].
            # Also note that single quote checking must come after
            #  triple quote checking (above).
            elif initial in single_quoted or \
                    token[:2] in single_quoted or \
                    token[:3] in single_quoted:
-                if token[-1] == '\n':                       # continued string
+                if token[-1] in '\r\n':                       # continued string
                    # This means that a single quoted string ends with a
                    # backslash and is continued.
                    contstr_start = lnum, start
                    endprog = (endpats.get(initial) or endpats.get(token[1])
                               or endpats.get(token[2]))
@@ -362,9 +542,15 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
                    break
                else:                                       # ordinary string
                    yield PythonToken(STRING, token, spos, prefix)
            elif token in fstring_pattern_map:  # The start of an fstring.
                fstring_stack.append(FStringNode(fstring_pattern_map[token]))
                yield PythonToken(FSTRING_START, token, spos, prefix)
            elif is_identifier(initial):                      # ordinary name
                if token in always_break_tokens:
                    fstring_stack[:] = []
                    paren_level = 0
                    # We only want to dedent if the token is on a new line.
                    if re.match(r'[ \f\t]*$', line[:start]):
                        while True:
                            indent = indents.pop()
                            if indent > start:
@@ -373,26 +559,30 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
                                indents.append(indent)
                                break
                yield PythonToken(NAME, token, spos, prefix)
-            elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'):  # continued stmt
+            elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n', '\\\r'):  # continued stmt
                additional_prefix += prefix + line[start:]
                break
            else:
                if token in '([{':
                    if fstring_stack:
                        fstring_stack[-1].open_parentheses(token)
                    else:
                        paren_level += 1
                elif token in ')]}':
                    if fstring_stack:
                        fstring_stack[-1].close_parentheses(token)
                    else:
                        if paren_level:
                            paren_level -= 1
                elif token == ':' and fstring_stack \
                        and fstring_stack[-1].parentheses_count == 1:
                    fstring_stack[-1].format_spec_count += 1
-                try:
+                yield PythonToken(OP, token, spos, prefix)
                    # This check is needed in any case to check if it's a valid
                    # operator or just some random unicode character.
                    typ = opmap[token]
                except KeyError:
                    typ = ERRORTOKEN
                yield PythonToken(typ, token, spos, prefix)
    if contstr:
        yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix)
-        if contstr.endswith('\n'):
+        if contstr.endswith('\n') or contstr.endswith('\r'):
            new_line = True
    end_pos = lnum, max
--- a/parso/python/tree.py
+++ b/parso/python/tree.py
@@ -48,6 +48,7 @@ from parso._compatibility import utf8_repr, unicode
 from parso.tree import Node, BaseNode, Leaf, ErrorNode, ErrorLeaf, \
    search_ancestor
 from parso.python.prefix import split_prefix
 from parso.utils import split_lines
 _FLOW_CONTAINERS = set(['if_stmt', 'while_stmt', 'for_stmt', 'try_stmt',
                        'with_stmt', 'async_stmt', 'suite'])
@@ -60,7 +61,6 @@ _GET_DEFINITION_TYPES = set([
 _IMPORTS = set(['import_name', 'import_from'])
 class DocstringMixin(object):
    __slots__ = ()
@@ -125,15 +125,16 @@ class PythonLeaf(PythonMixin, Leaf):
        #   indent error leafs somehow? No idea how, though.
        previous_leaf = self.get_previous_leaf()
        if previous_leaf is not None and previous_leaf.type == 'error_leaf' \
-                and previous_leaf.original_type in ('indent', 'error_dedent'):
+                and previous_leaf.token_type in ('INDENT', 'DEDENT', 'ERROR_DEDENT'):
            previous_leaf = previous_leaf.get_previous_leaf()
-        if previous_leaf is None:
+        if previous_leaf is None:  # It's the first leaf.
-            return self.line - self.prefix.count('\n'), 0  # It's the first leaf.
+            lines = split_lines(self.prefix)
            # + 1 is needed because split_lines always returns at least [''].
            return self.line - len(lines) + 1, 0  # It's the first leaf.
        return previous_leaf.end_pos
 class _LeafWithoutNewlines(PythonLeaf):
    """
    Simply here to optimize performance.
@@ -166,6 +167,12 @@ class EndMarker(_LeafWithoutNewlines):
    __slots__ = ()
    type = 'endmarker'
    @utf8_repr
    def __repr__(self):
        return "<%s: prefix=%s end_pos=%s>" % (
            type(self).__name__, repr(self.prefix), self.end_pos
        )
 class Newline(PythonLeaf):
    """Contains NEWLINE and ENDMARKER tokens."""
@@ -235,7 +242,6 @@ class Name(_LeafWithoutNewlines):
        return None
 class Literal(PythonLeaf):
    __slots__ = ()
@@ -251,7 +257,7 @@ class String(Literal):
    @property
    def string_prefix(self):
-        return re.match('\w*(?=[\'"])', self.value).group(0)
+        return re.match(r'\w*(?=[\'"])', self.value).group(0)
    def _get_payload(self):
        match = re.search(
@@ -262,6 +268,33 @@ class String(Literal):
        return match.group(2)[:-len(match.group(1))]
 class FStringString(PythonLeaf):
    """
    f-strings contain f-string expressions and normal python strings. These are
    the string parts of f-strings.
    """
    type = 'fstring_string'
    __slots__ = ()
 class FStringStart(PythonLeaf):
    """
    f-strings contain f-string expressions and normal python strings. These are
    the string parts of f-strings.
    """
    type = 'fstring_start'
    __slots__ = ()
 class FStringEnd(PythonLeaf):
    """
    f-strings contain f-string expressions and normal python strings. These are
    the string parts of f-strings.
    """
    type = 'fstring_end'
    __slots__ = ()
 class _StringComparisonMixin(object):
    def __eq__(self, other):
        """
@@ -509,7 +542,9 @@ def _create_params(parent, argslist_list):
            if child is None or child == ',':
                param_children = children[start:end]
                if param_children:  # Could as well be comma and then end.
-                    if param_children[0] == '*' and param_children[1] == ',' \
+                    if param_children[0] == '*' \
                            and (len(param_children) == 1
                                 or param_children[1] == ',') \
                            or check_python2_nested_param(param_children[0]):
                        for p in param_children:
                            p.parent = parent
@@ -626,6 +661,7 @@ class Function(ClassOrFunc):
        except IndexError:
            return None
 class Lambda(Function):
    """
    Lambdas are basically trimmed functions, so give it the same interface.
@@ -933,7 +969,7 @@ class ImportName(Import):
 class KeywordStatement(PythonBaseNode):
    """
    For the following statements: `assert`, `del`, `global`, `nonlocal`,
-    `raise`, `return`, `yield`, `return`, `yield`.
+    `raise`, `return`, `yield`.
    `pass`, `continue` and `break` are not in there, because they are just
    simple keywords and the parser reduces it to a keyword.
--- a/parso/tree.py
+++ b/parso/tree.py
@@ -1,5 +1,7 @@
 from abc import abstractmethod, abstractproperty
 from parso._compatibility import utf8_repr, encoding, py_version
 from parso.utils import split_lines
 def search_ancestor(node, *node_types):
@@ -55,7 +57,6 @@ class NodeOrLeaf(object):
        Returns the node immediately preceding this node in this parent's
        children list. If this node does not have a previous sibling, it is
        None.
        None.
        """
        # Can't use index(); we need to test by identity
        for i, child in enumerate(self.parent.children):
@@ -194,7 +195,9 @@ class Leaf(NodeOrLeaf):
    def get_start_pos_of_prefix(self):
        previous_leaf = self.get_previous_leaf()
        if previous_leaf is None:
-            return self.line - self.prefix.count('\n'), 0  # It's the first leaf.
+            lines = split_lines(self.prefix)
            # + 1 is needed because split_lines always returns at least [''].
            return self.line - len(lines) + 1, 0  # It's the first leaf.
        return previous_leaf.end_pos
    def get_first_leaf(self):
@@ -211,7 +214,7 @@ class Leaf(NodeOrLeaf):
    @property
    def end_pos(self):
-        lines = self.value.split('\n')
+        lines = split_lines(self.value)
        end_pos_line = self.line + len(lines) - 1
        # Check for multiline token
        if self.line == end_pos_line:
@@ -230,6 +233,7 @@ class Leaf(NodeOrLeaf):
 class TypedLeaf(Leaf):
    __slots__ = ('type',)
    def __init__(self, type, value, start_pos, prefix=''):
        super(TypedLeaf, self).__init__(value, start_pos, prefix)
        self.type = type
@@ -244,8 +248,6 @@ class BaseNode(NodeOrLeaf):
    type = None
    def __init__(self, children):
        for c in children:
            c.parent = self
        self.children = children
        """
        A list of :class:`NodeOrLeaf` child nodes.
@@ -318,7 +320,7 @@ class BaseNode(NodeOrLeaf):
    @utf8_repr
    def __repr__(self):
-        code = self.get_code().replace('\n', ' ').strip()
+        code = self.get_code().replace('\n', ' ').replace('\r', ' ').strip()
        if not py_version >= 30:
            code = code.encode(encoding, 'replace')
        return "<%s: %s@%s,%s>" % \
@@ -339,7 +341,7 @@ class Node(BaseNode):
 class ErrorNode(BaseNode):
    """
-    A node that containes valid nodes/leaves that we're follow by a token that
+    A node that contains valid nodes/leaves that we're follow by a token that
    was invalid. This basically means that the leaf after this node is where
    Python would mark a syntax error.
    """
@@ -352,13 +354,13 @@ class ErrorLeaf(Leaf):
    A leaf that is either completely invalid in a language (like `$` in Python)
    or is invalid at that position. Like the star in `1 +* 1`.
    """
-    __slots__ = ('original_type',)
+    __slots__ = ('token_type',)
    type = 'error_leaf'
-    def __init__(self, original_type, value, start_pos, prefix=''):
+    def __init__(self, token_type, value, start_pos, prefix=''):
        super(ErrorLeaf, self).__init__(value, start_pos, prefix)
-        self.original_type = original_type
+        self.token_type = token_type
    def __repr__(self):
        return "<%s: %s:%s, %s>" % \
-            (type(self).__name__, self.original_type, repr(self.value), self.start_pos)
+            (type(self).__name__, self.token_type, repr(self.value), self.start_pos)
--- a/parso/utils.py
+++ b/parso/utils.py
@@ -5,6 +5,20 @@ from ast import literal_eval
 from parso._compatibility import unicode, total_ordering
 # The following is a list in Python that are line breaks in str.splitlines, but
 # not in Python. In Python only \r (Carriage Return, 0xD) and \n (Line Feed,
 # 0xA) are allowed to split lines.
 _NON_LINE_BREAKS = (
    u'\v',  # Vertical Tabulation 0xB
    u'\f',  # Form Feed 0xC
    u'\x1C',  # File Separator
    u'\x1D',  # Group Separator
    u'\x1E',  # Record Separator
    u'\x85',  # Next Line (NEL - Equivalent to CR+LF.
              # Used to mark end-of-line on some IBM mainframes.)
    u'\u2028',  # Line Separator
    u'\u2029',  # Paragraph Separator
 )
 Version = namedtuple('Version', 'major, minor, micro')
@@ -26,7 +40,12 @@ def split_lines(string, keepends=False):
        # We have to merge lines that were broken by form feed characters.
        merge = []
        for i, line in enumerate(lst):
-            if line.endswith('\f'):
+            try:
                last_chr = line[-1]
            except IndexError:
                pass
            else:
                if last_chr in _NON_LINE_BREAKS:
                    merge.append(i)
        for index in reversed(merge):
@@ -41,11 +60,11 @@ def split_lines(string, keepends=False):
        # The stdlib's implementation of the end is inconsistent when calling
        # it with/without keepends. One time there's an empty string in the
        # end, one time there's none.
-        if string.endswith('\n') or string == '':
+        if string.endswith('\n') or string.endswith('\r') or string == '':
            lst.append('')
        return lst
    else:
-        return re.split('\n|\r\n', string)
+        return re.split(r'\n|\r\n|\r', string)
 def python_bytes_to_unicode(source, encoding='utf-8', errors='strict'):
--- a/setup.py
+++ b/setup.py
@@ -40,8 +40,16 @@ setup(name='parso',
          'Programming Language :: Python :: 3.3',
          'Programming Language :: Python :: 3.4',
          'Programming Language :: Python :: 3.5',
          'Programming Language :: Python :: 3.6',
          'Programming Language :: Python :: 3.7',
          'Topic :: Software Development :: Libraries :: Python Modules',
          'Topic :: Text Editors :: Integrated Development Environments (IDE)',
          'Topic :: Utilities',
      ],
      extras_require={
          'testing': [
              'pytest>=3.0.7',
              'docopt',
          ],
      },
      )
--- a/test/failing_examples.py
+++ b/test/failing_examples.py
@@ -141,7 +141,7 @@ FAILING_EXAMPLES = [
    # f-strings
    'f"{}"',
-    'f"{\\}"',
+    r'f"{\}"',
    'f"{\'\\\'}"',
    'f"{#}"',
    "f'{1!b}'",
@@ -285,6 +285,14 @@ if sys.version_info >= (3,):
        'b"ä"',
        # combining strings and unicode is allowed in Python 2.
        '"s" b""',
        '"s" b"" ""',
        'b"" "" b"" ""',
    ]
 if sys.version_info >= (3, 6):
    FAILING_EXAMPLES += [
        # Same as above, but for f-strings.
        'f"s" b""',
        'b"s" f""',
    ]
 if sys.version_info >= (2, 7):
    # This is something that raises a different error in 2.6 than in the other
--- a/test/fuzz_diff_parser.py
+++ b/test/fuzz_diff_parser.py
@@ -0,0 +1,290 @@
 """
 A script to find bugs in the diff parser.
 This script is extremely useful if changes are made to the diff parser. By
 running a few thousand iterations, we can assure that the diff parser is in
 good shape.
 Usage:
  fuzz_diff_parser.py [--pdb|--ipdb] [-l] [-n=<nr>] [-x=<nr>] random [<path>]
  fuzz_diff_parser.py [--pdb|--ipdb] [-l] redo [-o=<nr>] [-p]
  fuzz_diff_parser.py -h | --help
 Options:
  -h --help              Show this screen
  -n, --maxtries=<nr>    Maximum of random tries [default: 1000]
  -x, --changes=<nr>     Amount of changes to be done to a file per try [default: 5]
  -l, --logging          Prints all the logs
  -o, --only-last=<nr>   Only runs the last n iterations; Defaults to running all
  -p, --print-code      Print all test diffs
  --pdb                  Launch pdb when error is raised
  --ipdb                 Launch ipdb when error is raised
 """
 from __future__ import print_function
 import logging
 import sys
 import os
 import random
 import pickle
 import parso
 from parso.utils import split_lines
 from test.test_diff_parser import _check_error_leaves_nodes
 _latest_grammar = parso.load_grammar(version='3.8')
 _python_reserved_strings = tuple(
    # Keywords are ususally only interesting in combination with spaces after
    # them. We don't put a space before keywords, to avoid indentation errors.
    s + (' ' if s.isalpha() else '')
    for s in _latest_grammar._pgen_grammar.reserved_syntax_strings.keys()
 )
 _random_python_fragments = _python_reserved_strings + (
    ' ', '\t', '\n', '\r', '\f', 'f"', 'F"""', "fr'", "RF'''", '"', '"""', "'",
    "'''", ';', ' some_random_word ', '\\', '#',
 )
 def find_python_files_in_tree(file_path):
    if not os.path.isdir(file_path):
        yield file_path
        return
    for root, dirnames, filenames in os.walk(file_path):
        for name in filenames:
            if name.endswith('.py'):
                yield os.path.join(root, name)
 def _print_copyable_lines(lines):
    for line in lines:
        line = repr(line)[1:-1]
        if line.endswith(r'\n'):
            line = line[:-2] + '\n'
        print(line, end='')
 def _get_first_error_start_pos_or_none(module):
    error_leaf = _check_error_leaves_nodes(module)
    return None if error_leaf is None else error_leaf.start_pos
 class LineReplacement:
    def __init__(self, line_nr, new_line):
        self._line_nr = line_nr
        self._new_line = new_line
    def apply(self, code_lines):
        # print(repr(self._new_line))
        code_lines[self._line_nr] = self._new_line
 class LineDeletion:
    def __init__(self, line_nr):
        self.line_nr = line_nr
    def apply(self, code_lines):
        del code_lines[self.line_nr]
 class LineCopy:
    def __init__(self, copy_line, insertion_line):
        self._copy_line = copy_line
        self._insertion_line = insertion_line
    def apply(self, code_lines):
        code_lines.insert(
            self._insertion_line,
            # Use some line from the file. This doesn't feel totally
            # random, but for the diff parser it will feel like it.
            code_lines[self._copy_line]
        )
 class FileModification:
    @classmethod
    def generate(cls, code_lines, change_count):
        return cls(
            list(cls._generate_line_modifications(code_lines, change_count)),
            # work with changed trees more than with normal ones.
            check_original=random.random() > 0.8,
        )
    @staticmethod
    def _generate_line_modifications(lines, change_count):
        def random_line(include_end=False):
            return random.randint(0, len(lines) - (not include_end))
        lines = list(lines)
        for _ in range(change_count):
            rand = random.randint(1, 4)
            if rand == 1:
                if len(lines) == 1:
                    # We cannot delete every line, that doesn't make sense to
                    # fuzz and it would be annoying to rewrite everything here.
                    continue
                l = LineDeletion(random_line())
            elif rand == 2:
                # Copy / Insertion
                # Make it possible to insert into the first and the last line
                l = LineCopy(random_line(), random_line(include_end=True))
            elif rand in (3, 4):
                # Modify a line in some weird random ways.
                line_nr = random_line()
                line = lines[line_nr]
                column = random.randint(0, len(line))
                random_string = ''
                for _ in range(random.randint(1, 3)):
                    if random.random() > 0.8:
                        # The lower characters cause way more issues.
                        unicode_range = 0x1f if random.randint(0, 1) else 0x3000
                        random_string += chr(random.randint(0, unicode_range))
                    else:
                        # These insertions let us understand how random
                        # keyword/operator insertions work. Theoretically this
                        # could also be done with unicode insertions, but the
                        # fuzzer is just way more effective here.
                        random_string += random.choice(_random_python_fragments)
                if random.random() > 0.5:
                    # In this case we insert at a very random place that
                    # probably breaks syntax.
                    line = line[:column] + random_string + line[column:]
                else:
                    # Here we have better chances to not break syntax, because
                    # we really replace the line with something that has
                    # indentation.
                    line = ' ' * random.randint(0, 12) + random_string + '\n'
                l = LineReplacement(line_nr, line)
            l.apply(lines)
            yield l
    def __init__(self, modification_list, check_original):
        self._modification_list = modification_list
        self._check_original = check_original
    def _apply(self, code_lines):
        changed_lines = list(code_lines)
        for modification in self._modification_list:
            modification.apply(changed_lines)
        return changed_lines
    def run(self, grammar, code_lines, print_code):
        code = ''.join(code_lines)
        modified_lines = self._apply(code_lines)
        modified_code = ''.join(modified_lines)
        if print_code:
            if self._check_original:
                print('Original:')
                _print_copyable_lines(code_lines)
            print('\nModified:')
            _print_copyable_lines(modified_lines)
            print()
        if self._check_original:
            m = grammar.parse(code, diff_cache=True)
            start1 = _get_first_error_start_pos_or_none(m)
        grammar.parse(modified_code, diff_cache=True)
        if self._check_original:
            # Also check if it's possible to "revert" the changes.
            m = grammar.parse(code, diff_cache=True)
            start2 = _get_first_error_start_pos_or_none(m)
            assert start1 == start2, (start1, start2)
 class FileTests:
    def __init__(self, file_path, test_count, change_count):
        self._path = file_path
        with open(file_path) as f:
            code = f.read()
        self._code_lines = split_lines(code, keepends=True)
        self._test_count = test_count
        self._code_lines = self._code_lines
        self._change_count = change_count
        self._file_modifications = []
    def _run(self, grammar, file_modifications, debugger, print_code=False):
        try:
            for i, fm in enumerate(file_modifications, 1):
                fm.run(grammar, self._code_lines, print_code=print_code)
                print('.', end='')
                sys.stdout.flush()
            print()
        except Exception:
            print("Issue in file: %s" % self._path)
            if debugger:
                einfo = sys.exc_info()
                pdb = __import__(debugger)
                pdb.post_mortem(einfo[2])
            raise
    def redo(self, grammar, debugger, only_last, print_code):
        mods = self._file_modifications
        if only_last is not None:
            mods = mods[-only_last:]
        self._run(grammar, mods, debugger, print_code=print_code)
    def run(self, grammar, debugger):
        def iterate():
            for _ in range(self._test_count):
                fm = FileModification.generate(self._code_lines, self._change_count)
                self._file_modifications.append(fm)
                yield fm
        self._run(grammar, iterate(), debugger)
 def main(arguments):
    debugger = 'pdb' if arguments['--pdb'] else \
               'ipdb' if arguments['--ipdb'] else None
    redo_file = os.path.join(os.path.dirname(__file__), 'fuzz-redo.pickle')
    if arguments['--logging']:
        root = logging.getLogger()
        root.setLevel(logging.DEBUG)
        ch = logging.StreamHandler(sys.stdout)
        ch.setLevel(logging.DEBUG)
        root.addHandler(ch)
    grammar = parso.load_grammar()
    parso.python.diff.DEBUG_DIFF_PARSER = True
    if arguments['redo']:
        with open(redo_file, 'rb') as f:
            file_tests_obj = pickle.load(f)
        only_last = arguments['--only-last'] and int(arguments['--only-last'])
        file_tests_obj.redo(
            grammar,
            debugger,
            only_last=only_last,
            print_code=arguments['--print-code']
        )
    elif arguments['random']:
        # A random file is used to do diff parser checks if no file is given.
        # This helps us to find errors in a lot of different files.
        file_paths = list(find_python_files_in_tree(arguments['<path>'] or '.'))
        max_tries = int(arguments['--maxtries'])
        tries = 0
        try:
            while tries < max_tries:
                path = random.choice(file_paths)
                print("Checking %s: %s tries" % (path, tries))
                now_tries = min(1000, max_tries - tries)
                file_tests_obj = FileTests(path, now_tries, int(arguments['--changes']))
                file_tests_obj.run(grammar, debugger)
                tries += now_tries
        except Exception:
            with open(redo_file, 'wb') as f:
                pickle.dump(file_tests_obj, f)
            raise
    else:
        raise NotImplementedError('Command is not implemented')
 if __name__ == '__main__':
    from docopt import docopt
    arguments = docopt(__doc__)
    main(arguments)
--- a/test/test_diff_parser.py
+++ b/test/test_diff_parser.py
@@ -1,14 +1,18 @@
 # -*- coding: utf-8 -*-
 from textwrap import dedent
 import logging
 import sys
 import pytest
 from parso.utils import split_lines
 from parso import cache
 from parso import load_grammar
-from parso.python.diff import DiffParser
+from parso.python.diff import DiffParser, _assert_valid_graph
 from parso import parse
 ANY = object()
 def test_simple():
    """
@@ -21,7 +25,7 @@ def test_simple():
 def _check_error_leaves_nodes(node):
    if node.type in ('error_leaf', 'error_node'):
-        return True
+        return node
    try:
        children = node.children
@@ -29,23 +33,10 @@ def _check_error_leaves_nodes(node):
        pass
    else:
        for child in children:
-            if _check_error_leaves_nodes(child):
+            x_node = _check_error_leaves_nodes(child)
-                return True
+            if x_node is not None:
-    return False
+                return x_node
-
+    return None
 def _assert_valid_graph(node):
    """
    Checks if the parent/children relationship is correct.
    """
    try:
        children = node.children
    except AttributeError:
        return
    for child in children:
        assert child.parent == node
        _assert_valid_graph(child)
 class Differ(object):
@@ -60,6 +51,8 @@ class Differ(object):
        self.lines = split_lines(code, keepends=True)
        self.module = parse(code, diff_cache=True, cache=True)
        assert code == self.module.get_code()
        _assert_valid_graph(self.module)
        return self.module
    def parse(self, code, copies=0, parsers=0, expect_error_leaves=False):
@@ -73,11 +66,15 @@ class Differ(object):
        new_module = diff_parser.update(self.lines, lines)
        self.lines = lines
        assert code == new_module.get_code()
        assert diff_parser._copy_count == copies
        #assert diff_parser._parser_count == parsers
        assert expect_error_leaves == _check_error_leaves_nodes(new_module)
        _assert_valid_graph(new_module)
        error_node = _check_error_leaves_nodes(new_module)
        assert expect_error_leaves == (error_node is not None), error_node
        if parsers is not ANY:
            assert diff_parser._parser_count == parsers
        if copies is not ANY:
            assert diff_parser._copy_count == copies
        return new_module
@@ -122,7 +119,7 @@ def test_positions(differ):
    m = differ.parse('a\n\n', parsers=1)
    assert m.end_pos == (3, 0)
-    m = differ.parse('a\n\n ', copies=1, parsers=1)
+    m = differ.parse('a\n\n ', copies=1, parsers=2)
    assert m.end_pos == (3, 1)
    m = differ.parse('a ', parsers=1)
    assert m.end_pos == (1, 2)
@@ -138,7 +135,7 @@ def test_if_simple(differ):
    differ.initialize(src + 'a')
    differ.parse(src + else_ + "a", copies=0, parsers=1)
-    differ.parse(else_, parsers=1, expect_error_leaves=True)
+    differ.parse(else_, parsers=1, copies=1, expect_error_leaves=True)
    differ.parse(src + else_, parsers=1)
@@ -208,7 +205,7 @@ def test_open_parentheses(differ):
    differ.parse(new_code, parsers=1, expect_error_leaves=True)
    new_code = 'a = 1\n' + new_code
-    differ.parse(new_code, copies=1, parsers=1, expect_error_leaves=True)
+    differ.parse(new_code, parsers=2, expect_error_leaves=True)
    func += 'def other_func():\n pass\n'
    differ.initialize('isinstance(\n' + func)
@@ -222,6 +219,7 @@ def test_open_parentheses_at_end(differ):
    differ.initialize(code)
    differ.parse(code, parsers=1, expect_error_leaves=True)
 def test_backslash(differ):
    src = dedent(r"""
    a = 1\
@@ -255,7 +253,7 @@ def test_backslash(differ):
 def test_full_copy(differ):
    code = 'def foo(bar, baz):\n pass\n bar'
    differ.initialize(code)
-    differ.parse(code, copies=1, parsers=1)
+    differ.parse(code, copies=1)
 def test_wrong_whitespace(differ):
@@ -263,10 +261,10 @@ def test_wrong_whitespace(differ):
    hello
    '''
    differ.initialize(code)
-    differ.parse(code + 'bar\n    ', parsers=1)
+    differ.parse(code + 'bar\n    ', parsers=3)
    code += """abc(\npass\n    """
-    differ.parse(code, parsers=1, copies=1, expect_error_leaves=True)
+    differ.parse(code, parsers=2, copies=1, expect_error_leaves=True)
 def test_issues_with_error_leaves(differ):
@@ -367,7 +365,7 @@ def test_totally_wrong_whitespace(differ):
    '''
    differ.initialize(code1)
-    differ.parse(code2, parsers=3, copies=0, expect_error_leaves=True)
+    differ.parse(code2, parsers=4, copies=0, expect_error_leaves=True)
 def test_node_insertion(differ):
@@ -466,6 +464,9 @@ def test_in_parentheses_newlines(differ):
    b = 2""")
    differ.initialize(code1)
    differ.parse(code2, parsers=1, copies=1)
 def test_indentation_issue(differ):
    code1 = dedent("""
@@ -483,4 +484,803 @@ def test_indentation_issue(differ):
    """)
    differ.initialize(code1)
-    differ.parse(code2, parsers=2)
+    differ.parse(code2, parsers=1)
 def test_endmarker_newline(differ):
    code1 = dedent('''\
        docu = None
        # some comment
        result = codet
        incomplete_dctassign = {
            "module"
        if "a":
            x = 3 # asdf
    ''')
    code2 = code1.replace('codet', 'coded')
    differ.initialize(code1)
    differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True)
 def test_newlines_at_end(differ):
    differ.initialize('a\n\n')
    differ.parse('a\n', copies=1)
 def test_end_newline_with_decorator(differ):
    code = dedent('''\
        @staticmethod
        def spam():
            import json
            json.l''')
    differ.initialize(code)
    module = differ.parse(code + '\n', copies=1, parsers=1)
    decorated, endmarker = module.children
    assert decorated.type == 'decorated'
    decorator, func = decorated.children
    suite = func.children[-1]
    assert suite.type == 'suite'
    newline, first_stmt, second_stmt = suite.children
    assert first_stmt.get_code() == '    import json\n'
    assert second_stmt.get_code() == '    json.l\n'
 def test_invalid_to_valid_nodes(differ):
    code1 = dedent('''\
    def a():
        foo = 3
        def b():
            la = 3
            else:
                la
            return
        foo
    base
    ''')
    code2 = dedent('''\
    def a():
        foo = 3
        def b():
            la = 3
            if foo:
                latte = 3
            else:
                la
            return
        foo
    base
    ''')
    differ.initialize(code1)
    differ.parse(code2, parsers=1, copies=3)
 def test_if_removal_and_reappearence(differ):
    code1 = dedent('''\
        la = 3
        if foo:
            latte = 3
        else:
            la
        pass
    ''')
    code2 = dedent('''\
        la = 3
            latte = 3
        else:
            la
        pass
    ''')
    code3 = dedent('''\
        la = 3
        if foo:
            latte = 3
        else:
            la
    ''')
    differ.initialize(code1)
    differ.parse(code2, parsers=1, copies=4, expect_error_leaves=True)
    differ.parse(code1, parsers=1, copies=1)
    differ.parse(code3, parsers=1, copies=1)
 def test_add_error_indentation(differ):
    code = 'if x:\n 1\n'
    differ.initialize(code)
    differ.parse(code + '  2\n', parsers=1, copies=0, expect_error_leaves=True)
 def test_differing_docstrings(differ):
    code1 = dedent('''\
        def foobar(x, y):
            1
            return x
        def bazbiz():
            foobar()
        lala
        ''')
    code2 = dedent('''\
        def foobar(x, y):
            2
            return x + y
        def bazbiz():
            z = foobar()
        lala
        ''')
    differ.initialize(code1)
    differ.parse(code2, parsers=3, copies=1)
    differ.parse(code1, parsers=3, copies=1)
 def test_one_call_in_function_change(differ):
    code1 = dedent('''\
        def f(self):
            mro = [self]
            for a in something:
                yield a
        def g(self):
            return C(
                a=str,
                b=self,
            )
        ''')
    code2 = dedent('''\
        def f(self):
            mro = [self]
        def g(self):
            return C(
                a=str,
                t
                b=self,
            )
        ''')
    differ.initialize(code1)
    differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True)
    differ.parse(code1, parsers=2, copies=1)
 def test_function_deletion(differ):
    code1 = dedent('''\
        class C(list):
            def f(self):
                def iterate():
                    for x in b:
                        break
                return list(iterate())
        ''')
    code2 = dedent('''\
        class C():
            def f(self):
                    for x in b:
                        break
                return list(iterate())
        ''')
    differ.initialize(code1)
    differ.parse(code2, parsers=1, copies=0, expect_error_leaves=True)
    differ.parse(code1, parsers=1, copies=0)
 def test_docstring_removal(differ):
    code1 = dedent('''\
        class E(Exception):
            """
            1
            2
            3
            """
        class S(object):
            @property
            def f(self):
                return cmd
            def __repr__(self):
                return cmd2
        ''')
    code2 = dedent('''\
        class E(Exception):
            """
            1
            3
            """
        class S(object):
            @property
            def f(self):
                return cmd
                return cmd2
        ''')
    differ.initialize(code1)
    differ.parse(code2, parsers=1, copies=2)
    differ.parse(code1, parsers=2, copies=1)
 def test_paren_in_strange_position(differ):
    code1 = dedent('''\
        class C:
            """ ha """
            def __init__(self, message):
                self.message = message
        ''')
    code2 = dedent('''\
        class C:
            """ ha """
                    )
            def __init__(self, message):
                self.message = message
        ''')
    differ.initialize(code1)
    differ.parse(code2, parsers=1, copies=2, expect_error_leaves=True)
    differ.parse(code1, parsers=0, copies=2)
 def insert_line_into_code(code, index, line):
    lines = split_lines(code, keepends=True)
    lines.insert(index, line)
    return ''.join(lines)
 def test_paren_before_docstring(differ):
    code1 = dedent('''\
        # comment
        """
        The
        """
        from parso import tree
        from parso import python
        ''')
    code2 = insert_line_into_code(code1, 1, ' ' * 16 + 'raise InternalParseError(\n')
    differ.initialize(code1)
    differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True)
    differ.parse(code1, parsers=2, copies=1)
 def test_parentheses_before_method(differ):
    code1 = dedent('''\
        class A:
            def a(self):
                pass
        class B:
            def b(self):
                if 1:
                    pass
        ''')
    code2 = dedent('''\
        class A:
            def a(self):
                pass
                Exception.__init__(self, "x" %
            def b(self):
                if 1:
                    pass
        ''')
    differ.initialize(code1)
    differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True)
    differ.parse(code1, parsers=1, copies=1)
 def test_indentation_issues(differ):
    code1 = dedent('''\
        class C:
            def f():
                1
                if 2:
                    return 3
            def g():
                to_be_removed
                pass
        ''')
    code2 = dedent('''\
        class C:
            def f():
                1
        ``something``, very ``weird``).
                if 2:
                    return 3
            def g():
                to_be_removed
                pass
        ''')
    code3 = dedent('''\
        class C:
            def f():
                1
                if 2:
                    return 3
            def g():
                pass
        ''')
    differ.initialize(code1)
    differ.parse(code2, parsers=2, copies=2, expect_error_leaves=True)
    differ.parse(code1, copies=2)
    differ.parse(code3, parsers=2, copies=1)
    differ.parse(code1, parsers=1, copies=2)
 def test_error_dedent_issues(differ):
    code1 = dedent('''\
        while True:
            try:
                1
            except KeyError:
                if 2:
                    3
            except IndexError:
                4
        5
        ''')
    code2 = dedent('''\
        while True:
            try:
        except KeyError:
                1
            except KeyError:
                if 2:
                    3
            except IndexError:
                4
                    something_inserted
        5
        ''')
    differ.initialize(code1)
    differ.parse(code2, parsers=6, copies=2, expect_error_leaves=True)
    differ.parse(code1, parsers=1, copies=0)
 def test_random_text_insertion(differ):
    code1 = dedent('''\
 class C:
    def f():
        return node
    def g():
        try:
            1
        except KeyError:
            2
        ''')
    code2 = dedent('''\
 class C:
    def f():
        return node
 Some'random text: yeah
        for push in plan.dfa_pushes:
    def g():
        try:
            1
        except KeyError:
            2
        ''')
    differ.initialize(code1)
    differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True)
    differ.parse(code1, parsers=1, copies=1)
 def test_many_nested_ifs(differ):
    code1 = dedent('''\
        class C:
            def f(self):
                def iterate():
                    if 1:
                        yield t
                    else:
                        yield
                return
        def g():
            3
        ''')
    code2 = dedent('''\
            def f(self):
                def iterate():
                    if 1:
                        yield t
        hahahaha
                        if 2:
                            else:
                                yield
                return
        def g():
            3
        ''')
    differ.initialize(code1)
    differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True)
    differ.parse(code1, parsers=1, copies=1)
@pytest.mark.skipif(sys.version_info < (3, 5), reason="Async starts working in 3.5")
@pytest.mark.parametrize('prefix', ['', 'async '])
 def test_with_and_funcdef_in_call(differ, prefix):
    code1 = prefix + dedent('''\
        with x:
            la = C(
                a=1,
                b=2,
                c=3,
            )
        ''')
    code2 = insert_line_into_code(code1, 3, 'def y(self, args):\n')
    differ.initialize(code1)
    differ.parse(code2, parsers=3, expect_error_leaves=True)
    differ.parse(code1, parsers=1)
 def test_wrong_backslash(differ):
    code1 = dedent('''\
        def y():
            1
            for x in y:
                continue
        ''')
    code2 = insert_line_into_code(code1, 3, '\\.whl$\n')
    differ.initialize(code1)
    differ.parse(code2, parsers=2, copies=2, expect_error_leaves=True)
    differ.parse(code1, parsers=1, copies=1)
 def test_comment_change(differ):
    differ.initialize('')
 def test_random_unicode_characters(differ):
    """
    Those issues were all found with the fuzzer.
    """
    differ.initialize('')
    differ.parse(u'\x1dĔBϞɛˁşʑ˳˻ȣſéÎ\x90̕ȟòwʘ\x1dĔBϞɛˁşʑ˳˻ȣſéÎ', parsers=1, expect_error_leaves=True)
    differ.parse(u'\r\r', parsers=1)
    differ.parse(u"˟Ę\x05À\r   rúƣ@\x8a\x15r()\n", parsers=1, expect_error_leaves=True)
    differ.parse(u'a\ntaǁ\rGĒōns__\n\nb', parsers=1)
    s = '        if not (self, "_fi\x02\x0e\x08\n\nle"):'
    differ.parse(s, parsers=1, expect_error_leaves=True)
    differ.parse('')
    differ.parse(s + '\n', parsers=1, expect_error_leaves=True)
    differ.parse(u'   result = (\r\f\x17\t\x11res)', parsers=2, expect_error_leaves=True)
    differ.parse('')
    differ.parse('   a( # xx\ndef', parsers=2, expect_error_leaves=True)
@pytest.mark.skipif(sys.version_info < (2, 7), reason="No set literals in Python 2.6")
 def test_dedent_end_positions(differ):
    code1 = dedent('''\
        if 1:
            if b:
                2
                c = {
                     5}
        ''')
    code2 = dedent('''\
        if 1:
            if ⌟ഒᜈྡྷṭb:
                2
                 'l': ''}
                c = {
                     5}
        ''')
    differ.initialize(code1)
    differ.parse(code2, parsers=1, expect_error_leaves=True)
    differ.parse(code1, parsers=1)
 def test_special_no_newline_ending(differ):
    code1 = dedent('''\
        1
        ''')
    code2 = dedent('''\
        1
         is ''')
    differ.initialize(code1)
    differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True)
    differ.parse(code1, copies=1, parsers=0)
 def test_random_character_insertion(differ):
    code1 = dedent('''\
        def create(self):
            1
            if self.path is not None:
                return
            # 3
            # 4
        ''')
    code2 = dedent('''\
        def create(self):
            1
            if 2:
         x       return
            # 3
            # 4
        ''')
    differ.initialize(code1)
    differ.parse(code2, copies=1, parsers=3, expect_error_leaves=True)
    differ.parse(code1, copies=1, parsers=1)
 def test_import_opening_bracket(differ):
    code1 = dedent('''\
        1
        2
        from bubu import (X,
        ''')
    code2 = dedent('''\
        11
        2
        from bubu import (X,
        ''')
    differ.initialize(code1)
    differ.parse(code2, copies=1, parsers=2, expect_error_leaves=True)
    differ.parse(code1, copies=1, parsers=2, expect_error_leaves=True)
 def test_opening_bracket_at_end(differ):
    code1 = dedent('''\
        class C:
            1
            [
        ''')
    code2 = dedent('''\
        3
        class C:
            1
            [
        ''')
    differ.initialize(code1)
    differ.parse(code2, copies=1, parsers=2, expect_error_leaves=True)
    differ.parse(code1, copies=1, parsers=1, expect_error_leaves=True)
 def test_all_sorts_of_indentation(differ):
    code1 = dedent('''\
        class C:
            1
            def f():
                    'same'
                    if foo:
                        a = b
                end
        ''')
    code2 = dedent('''\
        class C:
            1
            def f(yield await %|(
                    'same'
          \x02\x06\x0f\x1c\x11
                    if foo:
                        a = b
                end
        ''')
    differ.initialize(code1)
    differ.parse(code2, copies=1, parsers=4, expect_error_leaves=True)
    differ.parse(code1, copies=1, parsers=3)
    code3 = dedent('''\
            if 1:
                a
                 b
                  c
                   d
        \x00
        ''')
    differ.parse(code3, parsers=2, expect_error_leaves=True)
    differ.parse('')
 def test_dont_copy_dedents_in_beginning(differ):
    code1 = dedent('''\
        a
        4
        ''')
    code2 = dedent('''\
        1
         2
          3
        4
        ''')
    differ.initialize(code1)
    differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True)
    differ.parse(code1, parsers=2)
 def test_dont_copy_error_leaves(differ):
    code1 = dedent('''\
        def f(n):
            x
            if 2:
                3
        ''')
    code2 = dedent('''\
        def f(n):
        def if 1:
                indent
            x
            if 2:
                3
        ''')
    differ.initialize(code1)
    differ.parse(code2, parsers=1, expect_error_leaves=True)
    differ.parse(code1, parsers=2)
 def test_error_dedent_in_between(differ):
    code1 = dedent('''\
        class C:
            def f():
                a
                if something:
                    x
            z
        ''')
    code2 = dedent('''\
        class C:
            def f():
                a
        dedent
                if other_thing:
                    b
                if something:
                    x
            z
        ''')
    differ.initialize(code1)
    differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True)
    differ.parse(code1, copies=1, parsers=2)
 def test_some_other_indentation_issues(differ):
    code1 = dedent('''\
        class C:
            x
            def f():
                ""
                copied
        a
        ''')
    code2 = dedent('''\
        try:
            de
                a
                    b
                c
                    d
            def f():
                ""
                copied
        a
        ''')
    differ.initialize(code1)
    differ.parse(code2, copies=2, parsers=1, expect_error_leaves=True)
    differ.parse(code1, copies=2, parsers=2)
 def test_open_bracket_case1(differ):
    code1 = dedent('''\
        class C:
            1
            2 # ha
        ''')
    code2 = insert_line_into_code(code1, 2, '    [str\n')
    code3 = insert_line_into_code(code2, 4, '    str\n')
    differ.initialize(code1)
    differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True)
    differ.parse(code3, copies=1, parsers=1, expect_error_leaves=True)
    differ.parse(code1, copies=1, parsers=1)
 def test_open_bracket_case2(differ):
    code1 = dedent('''\
        class C:
            def f(self):
                (
                b
                c
            def g(self):
                d
        ''')
    code2 = dedent('''\
        class C:
            def f(self):
                (
                b
                c
                self.
            def g(self):
                d
        ''')
    differ.initialize(code1)
    differ.parse(code2, copies=1, parsers=2, expect_error_leaves=True)
    differ.parse(code1, copies=2, parsers=0, expect_error_leaves=True)
 def test_some_weird_removals(differ):
    code1 = dedent('''\
        class C:
            1
        ''')
    code2 = dedent('''\
        class C:
            1
            @property
                A
                    return
            # x
            omega
        ''')
    code3 = dedent('''\
        class C:
            1
        ;
            omega
        ''')
    differ.initialize(code1)
    differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True)
    differ.parse(code3, copies=1, parsers=2, expect_error_leaves=True)
    differ.parse(code1, copies=1)
@pytest.mark.skipif(sys.version_info < (3, 5), reason="Async starts working in 3.5")
 def test_async_copy(differ):
    code1 = dedent('''\
        async def main():
            x = 3
            print(
        ''')
    code2 = dedent('''\
        async def main():
            x = 3
            print()
        ''')
    differ.initialize(code1)
    differ.parse(code2, copies=1, parsers=1)
    differ.parse(code1, copies=1, parsers=1, expect_error_leaves=True)
--- a/test/test_error_recovery.py
+++ b/test/test_error_recovery.py
@@ -0,0 +1,85 @@
 from parso import parse, load_grammar
 def test_with_stmt():
    module = parse('with x: f.\na')
    assert module.children[0].type == 'with_stmt'
    w, with_item, colon, f = module.children[0].children
    assert f.type == 'error_node'
    assert f.get_code(include_prefix=False) == 'f.'
    assert module.children[2].type == 'name'
 def test_one_line_function(each_version):
    module = parse('def x(): f.', version=each_version)
    assert module.children[0].type == 'funcdef'
    def_, name, parameters, colon, f = module.children[0].children
    assert f.type == 'error_node'
    module = parse('def x(a:', version=each_version)
    func = module.children[0]
    assert func.type == 'error_node'
    if each_version.startswith('2'):
        assert func.children[-1].value == 'a'
    else:
        assert func.children[-1] == ':'
 def test_if_else():
    module = parse('if x:\n f.\nelse:\n g(')
    if_stmt = module.children[0]
    if_, test, colon, suite1, else_, colon, suite2 = if_stmt.children
    f = suite1.children[1]
    assert f.type == 'error_node'
    assert f.children[0].value == 'f'
    assert f.children[1].value == '.'
    g = suite2.children[1]
    assert g.children[0].value == 'g'
    assert g.children[1].value == '('
 def test_if_stmt():
    module = parse('if x: f.\nelse: g(')
    if_stmt = module.children[0]
    assert if_stmt.type == 'if_stmt'
    if_, test, colon, f = if_stmt.children
    assert f.type == 'error_node'
    assert f.children[0].value == 'f'
    assert f.children[1].value == '.'
    assert module.children[1].type == 'newline'
    assert module.children[1].value == '\n'
    assert module.children[2].type == 'error_leaf'
    assert module.children[2].value == 'else'
    assert module.children[3].type == 'error_leaf'
    assert module.children[3].value == ':'
    in_else_stmt = module.children[4]
    assert in_else_stmt.type == 'error_node'
    assert in_else_stmt.children[0].value == 'g'
    assert in_else_stmt.children[1].value == '('
 def test_invalid_token():
    module = parse('a + ? + b')
    error_node, q, plus_b, endmarker = module.children
    assert error_node.get_code() == 'a +'
    assert q.value == '?'
    assert q.type == 'error_leaf'
    assert plus_b.type == 'factor'
    assert plus_b.get_code() == ' + b'
 def test_invalid_token_in_fstr():
    module = load_grammar(version='3.6').parse('f"{a + ? + b}"')
    error_node, q, plus_b, error1, error2, endmarker = module.children
    assert error_node.get_code() == 'f"{a +'
    assert q.value == '?'
    assert q.type == 'error_leaf'
    assert plus_b.type == 'error_node'
    assert plus_b.get_code() == ' + b'
    assert error1.value == '}'
    assert error1.type == 'error_leaf'
    assert error2.value == '"'
    assert error2.type == 'error_leaf'
--- a/test/test_fstring.py
+++ b/test/test_fstring.py
@@ -1,17 +1,19 @@
 import pytest
 from textwrap import dedent
 from parso import load_grammar, ParserSyntaxError
-from parso.python.fstring import tokenize
+from parso.python.tokenize import tokenize
@pytest.fixture
 def grammar():
-    return load_grammar(language="python-f-string")
+    return load_grammar(version='3.6')
@pytest.mark.parametrize(
    'code', [
        '{1}',
        '{1:}',
        '',
        '{1!a}',
        '{1!a:1}',
@@ -26,22 +28,12 @@ def grammar():
        '{{{1}',
        '1{{2{{3',
        '}}',
        '{:}}}',
        # Invalid, but will be checked, later.
        '{}',
        '{1:}',
        '{:}',
        '{:1}',
        '{!:}',
        '{!}',
        '{!a}',
        '{1:{}}',
        '{1:{:}}',
    ]
 )
 def test_valid(code, grammar):
-    fstring = grammar.parse(code, error_recovery=False)
+    code = 'f"""%s"""' % code
    module = grammar.parse(code, error_recovery=False)
    fstring = module.children[0]
    assert fstring.type == 'fstring'
    assert fstring.get_code() == code
@@ -52,24 +44,46 @@ def test_valid(code, grammar):
        '{',
        '{1!{a}}',
        '{!{a}}',
        '{}',
        '{:}',
        '{:}}}',
        '{:1}',
        '{!:}',
        '{!}',
        '{!a}',
        '{1:{}}',
        '{1:{:}}',
    ]
 )
 def test_invalid(code, grammar):
    code = 'f"""%s"""' % code
    with pytest.raises(ParserSyntaxError):
        grammar.parse(code, error_recovery=False)
    # It should work with error recovery.
-    #grammar.parse(code, error_recovery=True)
+    grammar.parse(code, error_recovery=True)
@pytest.mark.parametrize(
-    ('code', 'start_pos', 'positions'), [
+    ('code', 'positions'), [
        # 2 times 2, 5 because python expr and endmarker.
-        ('}{', (2, 3), [(2, 3), (2, 4), (2, 5), (2, 5)]),
+        ('f"}{"', [(1, 0), (1, 2), (1, 3), (1, 4), (1, 5)]),
-        (' :{ 1 : } ', (1, 0), [(1, 2), (1, 3), (1, 6), (1, 8), (1, 10)]),
+        ('f" :{ 1 : } "', [(1, 0), (1, 2), (1, 4), (1, 6), (1, 8), (1, 9),
-        ('\n{\nfoo\n }', (2, 1), [(3, 0), (3, 1), (5, 1), (5, 2)]),
+                           (1, 10), (1, 11), (1, 12), (1, 13)]),
        ('f"""\n {\nfoo\n }"""', [(1, 0), (1, 4), (2, 1), (3, 0), (4, 1),
                                  (4, 2), (4, 5)]),
    ]
 )
-def test_tokenize_start_pos(code, start_pos, positions):
+def test_tokenize_start_pos(code, positions):
-    tokens = tokenize(code, start_pos)
+    tokens = list(tokenize(code, version_info=(3, 6)))
    assert positions == [p.start_pos for p in tokens]
 def test_roundtrip(grammar):
    code = dedent("""\
        f'''s{
           str.uppe
        '''
        """)
    tree = grammar.parse(code)
    assert tree.get_code() == code
--- a/test/test_get_code.py
+++ b/test/test_get_code.py
@@ -106,14 +106,15 @@ def test_end_newlines():
@pytest.mark.parametrize(('code', 'types'), [
-    ('\r', ['error_leaf', 'endmarker']),
+    ('\r', ['endmarker']),
-    ('\n\r', ['error_leaf', 'endmarker'])
+    ('\n\r', ['endmarker'])
 ])
 def test_carriage_return_at_end(code, types):
    """
-    By adding an artificial newline this creates weird side effects for
+    By adding an artificial newline this created weird side effects for
-    \r at the end of files that would normally be error leafs.
+    \r at the end of files.
    """
    tree = parse(code)
    assert tree.get_code() == code
    assert [c.type for c in tree.children] == types
    assert tree.end_pos == (len(code) + 1, 0)
--- a/test/test_param_splitting.py
+++ b/test/test_param_splitting.py
@@ -32,3 +32,16 @@ def test_split_params_with_stars():
    assert_params(u'x, *args', x=None, args=None)
    assert_params(u'**kwargs', kwargs=None)
    assert_params(u'*args, **kwargs', args=None, kwargs=None)
 def test_kw_only_no_kw(works_ge_py3):
    """
    Parsing this should be working. In CPython the parser also parses this and
    in a later step the AST complains.
    """
    module = works_ge_py3.parse('def test(arg, *):\n    pass')
    if module is not None:
        func = module.children[0]
        open_, p1, asterisk, close = func._get_param_nodes()
        assert p1.get_code('arg,')
        assert asterisk.value == '*'
--- a/test/test_pgen2.py
+++ b/test/test_pgen2.py
@@ -12,6 +12,8 @@ import pytest
 from parso import load_grammar
 from parso import ParserSyntaxError
 from parso.pgen2 import generate_grammar
 from parso.python import tokenize
 def _parse(code, version=None):
@@ -270,3 +272,19 @@ def py_br(each_version):
 def test_py3_rb(works_ge_py3):
    works_ge_py3.parse("rb'1'")
    works_ge_py3.parse("RB'1'")
 def test_left_recursion():
    with pytest.raises(ValueError, match='left recursion'):
        generate_grammar('foo: foo NAME\n', tokenize.PythonTokenTypes)
 def test_ambiguities():
    with pytest.raises(ValueError, match='ambiguous'):
        generate_grammar('foo: bar | baz\nbar: NAME\nbaz: NAME\n', tokenize.PythonTokenTypes)
    with pytest.raises(ValueError, match='ambiguous'):
        generate_grammar('''foo: bar | baz\nbar: 'x'\nbaz: "x"\n''', tokenize.PythonTokenTypes)
    with pytest.raises(ValueError, match='ambiguous'):
        generate_grammar('''foo: bar | 'x'\nbar: 'x'\n''', tokenize.PythonTokenTypes)
--- a/test/test_python_errors.py
+++ b/test/test_python_errors.py
@@ -114,6 +114,22 @@ def _get_actual_exception(code):
        # Python 3.4/3.4 have a bit of a different warning than 3.5/3.6 in
        # certain places. But in others this error makes sense.
        return [wanted, "SyntaxError: can't use starred expression here"], line_nr
    elif wanted == 'SyntaxError: f-string: unterminated string':
        wanted = 'SyntaxError: EOL while scanning string literal'
    elif wanted == 'SyntaxError: f-string expression part cannot include a backslash':
        return [
            wanted,
            "SyntaxError: EOL while scanning string literal",
            "SyntaxError: unexpected character after line continuation character",
        ], line_nr
    elif wanted == "SyntaxError: f-string: expecting '}'":
        wanted = 'SyntaxError: EOL while scanning string literal'
    elif wanted == 'SyntaxError: f-string: empty expression not allowed':
        wanted = 'SyntaxError: invalid syntax'
    elif wanted == "SyntaxError: f-string expression part cannot include '#'":
        wanted = 'SyntaxError: invalid syntax'
    elif wanted == "SyntaxError: f-string: single '}' is not allowed":
        wanted = 'SyntaxError: invalid syntax'
    return [wanted], line_nr
@@ -242,6 +258,11 @@ def test_too_many_levels_of_indentation():
@pytest.mark.parametrize(
    'code', [
        "f'{*args,}'",
        r'f"\""',
        r'f"\\\""',
        r'fr"\""',
        r'fr"\\\""',
        r"print(f'Some {x:.2f} and some {y}')",
    ]
 )
 def test_valid_fstrings(code):
@@ -251,6 +272,8 @@ def test_valid_fstrings(code):
@pytest.mark.parametrize(
    ('code', 'message'), [
        ("f'{1+}'", ('invalid syntax')),
        (r'f"\"', ('invalid syntax')),
        (r'fr"\"', ('invalid syntax')),
    ]
 )
 def test_invalid_fstrings(code, message):
--- a/test/test_tokenize.py
+++ b/test/test_tokenize.py
@@ -6,16 +6,30 @@ import pytest
 from parso._compatibility import py_version
 from parso.utils import split_lines, parse_version_string
-from parso.python.token import (
+from parso.python.token import PythonTokenTypes
    NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER, ERROR_DEDENT)
 from parso.python import tokenize
 from parso import parse
 from parso.python.tokenize import PythonToken
-def _get_token_list(string):
+# To make it easier to access some of the token types, just put them here.
 NAME = PythonTokenTypes.NAME
 NEWLINE = PythonTokenTypes.NEWLINE
 STRING = PythonTokenTypes.STRING
 INDENT = PythonTokenTypes.INDENT
 DEDENT = PythonTokenTypes.DEDENT
 ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
 OP = PythonTokenTypes.OP
 ENDMARKER = PythonTokenTypes.ENDMARKER
 ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
 FSTRING_START = PythonTokenTypes.FSTRING_START
 FSTRING_STRING = PythonTokenTypes.FSTRING_STRING
 FSTRING_END = PythonTokenTypes.FSTRING_END
 def _get_token_list(string, version=None):
    # Load the current version.
-    version_info = parse_version_string()
+    version_info = parse_version_string(version)
    return list(tokenize.tokenize(string, version_info))
@@ -126,7 +140,7 @@ def test_identifier_contains_unicode():
    else:
        # Unicode tokens in Python 2 seem to be identified as operators.
        # They will be ignored in the parser, that's ok.
-        assert unicode_token[0] == tokenize.ERRORTOKEN
+        assert unicode_token[0] == OP
 def test_quoted_strings():
@@ -162,6 +176,7 @@ def test_ur_literals():
        token_list = _get_token_list(literal)
        typ, result_literal, _, _ = token_list[0]
        if is_literal:
            if typ != FSTRING_START:
                assert typ == STRING
                assert result_literal == literal
        else:
@@ -175,6 +190,7 @@ def test_ur_literals():
    # Starting with Python 3.3 this ordering is also possible.
    if py_version >= 33:
        check('Rb""')
    # Starting with Python 3.6 format strings where introduced.
    check('fr""', is_literal=py_version >= 36)
    check('rF""', is_literal=py_version >= 36)
@@ -183,18 +199,18 @@ def test_ur_literals():
 def test_error_literal():
-    error_token, endmarker = _get_token_list('"\n')
+    error_token, newline, endmarker = _get_token_list('"\n')
-    assert error_token.type == tokenize.ERRORTOKEN
+    assert error_token.type == ERRORTOKEN
-    assert endmarker.prefix == ''
+    assert error_token.string == '"'
-    assert error_token.string == '"\n'
+    assert newline.type == NEWLINE
-    assert endmarker.type == tokenize.ENDMARKER
+    assert endmarker.type == ENDMARKER
    assert endmarker.prefix == ''
    bracket, error_token, endmarker = _get_token_list('( """')
-    assert error_token.type == tokenize.ERRORTOKEN
+    assert error_token.type == ERRORTOKEN
    assert error_token.prefix == ' '
    assert error_token.string == '"""'
-    assert endmarker.type == tokenize.ENDMARKER
+    assert endmarker.type == ENDMARKER
    assert endmarker.prefix == ''
@@ -224,3 +240,105 @@ def test_endmarker_end_pos():
 def test_indentation(code, types):
    actual_types = [t.type for t in _get_token_list(code)]
    assert actual_types == types + [ENDMARKER]
 def test_error_string():
    t1, newline, endmarker = _get_token_list(' "\n')
    assert t1.type == ERRORTOKEN
    assert t1.prefix == ' '
    assert t1.string == '"'
    assert newline.type == NEWLINE
    assert endmarker.prefix == ''
    assert endmarker.string == ''
 def test_indent_error_recovery():
    code = dedent("""\
                        str(
        from x import a
        def
        """)
    lst = _get_token_list(code)
    expected = [
        # `str(`
        INDENT, NAME, OP,
        # `from parso`
        NAME, NAME,
        # `import a` on same line as the previous from parso
        NAME, NAME, NEWLINE,
        # Dedent happens, because there's an import now and the import
        # statement "breaks" out of the opening paren on the first line.
        DEDENT,
        # `b`
        NAME, NEWLINE, ENDMARKER]
    assert [t.type for t in lst] == expected
 def test_error_token_after_dedent():
    code = dedent("""\
        class C:
            pass
        $foo
        """)
    lst = _get_token_list(code)
    expected = [
        NAME, NAME, OP, NEWLINE, INDENT, NAME, NEWLINE, DEDENT,
        # $foo\n
        ERRORTOKEN, NAME, NEWLINE, ENDMARKER
    ]
    assert [t.type for t in lst] == expected
 def test_brackets_no_indentation():
    """
    There used to be an issue that the parentheses counting would go below
    zero. This should not happen.
    """
    code = dedent("""\
        }
        {
          }
        """)
    lst = _get_token_list(code)
    assert [t.type for t in lst] == [OP, NEWLINE, OP, OP, NEWLINE, ENDMARKER]
 def test_form_feed():
    error_token, endmarker = _get_token_list(dedent('''\
        \f"""'''))
    assert error_token.prefix == '\f'
    assert error_token.string == '"""'
    assert endmarker.prefix == ''
 def test_carriage_return():
    lst = _get_token_list(' =\\\rclass')
    assert [t.type for t in lst] == [INDENT, OP, DEDENT, NAME, ENDMARKER]
 def test_backslash():
    code = '\\\n# 1 \n'
    endmarker, = _get_token_list(code)
    assert endmarker.prefix == code
@pytest.mark.parametrize(
    ('code', 'types'), [
        ('f"', [FSTRING_START]),
        ('f""', [FSTRING_START, FSTRING_END]),
        ('f" {}"', [FSTRING_START, FSTRING_STRING, OP, OP, FSTRING_END]),
        ('f" "{}', [FSTRING_START, FSTRING_STRING, FSTRING_END, OP, OP]),
        (r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
        (r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
        (r'f"Some {x:.2f}{y}"', [FSTRING_START, FSTRING_STRING, OP, NAME, OP,
                                 FSTRING_STRING, OP, OP, NAME, OP, FSTRING_END]),
        (r'print(f"Some {x:.2f}a{y}")', [
            NAME, OP, FSTRING_START, FSTRING_STRING, OP, NAME, OP,
            FSTRING_STRING, OP, FSTRING_STRING, OP, NAME, OP, FSTRING_END, OP
        ]),
    ]
 )
 def test_fstring(code, types, version_ge_py36):
    actual_types = [t.type for t in _get_token_list(code, version_ge_py36)]
    assert types + [ENDMARKER] == actual_types
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -3,21 +3,42 @@ from codecs import BOM_UTF8
 from parso.utils import split_lines, python_bytes_to_unicode
 import parso
-
+import pytest
 def test_split_lines_no_keepends():
    assert split_lines('asd\r\n') == ['asd', '']
    assert split_lines('asd\r\n\f') == ['asd', '\f']
    assert split_lines('\fasd\r\n') == ['\fasd', '']
    assert split_lines('') == ['']
    assert split_lines('\n') == ['', '']
-def test_split_lines_keepends():
+@pytest.mark.parametrize(
-    assert split_lines('asd\r\n', keepends=True) == ['asd\r\n', '']
+    ('string', 'expected_result', 'keepends'), [
-    assert split_lines('asd\r\n\f', keepends=True) == ['asd\r\n', '\f']
+        ('asd\r\n', ['asd', ''], False),
-    assert split_lines('\fasd\r\n', keepends=True) == ['\fasd\r\n', '']
+        ('asd\r\n', ['asd\r\n', ''], True),
-    assert split_lines('', keepends=True) == ['']
+        ('asd\r', ['asd', ''], False),
-    assert split_lines('\n', keepends=True) == ['\n', '']
+        ('asd\r', ['asd\r', ''], True),
        ('asd\n', ['asd', ''], False),
        ('asd\n', ['asd\n', ''], True),
        ('asd\r\n\f', ['asd', '\f'], False),
        ('asd\r\n\f', ['asd\r\n', '\f'], True),
        ('\fasd\r\n', ['\fasd', ''], False),
        ('\fasd\r\n', ['\fasd\r\n', ''], True),
        ('', [''], False),
        ('', [''], True),
        ('\n', ['', ''], False),
        ('\n', ['\n', ''], True),
        ('\r', ['', ''], False),
        ('\r', ['\r', ''], True),
        # Invalid line breaks
        ('a\vb', ['a\vb'], False),
        ('a\vb', ['a\vb'], True),
        ('\x1C', ['\x1C'], False),
        ('\x1C', ['\x1C'], True),
    ]
 )
 def test_split_lines(string, expected_result, keepends):
    assert split_lines(string, keepends=keepends) == expected_result
 def test_python_bytes_to_unicode_unicode_text():
--- a/tox.ini
+++ b/tox.ini
@@ -1,10 +1,10 @@
 [tox]
-envlist = py26, py27, py33, py34, py35, py36
+envlist = py27, py33, py34, py35, py36, py37, pypy
 [testenv]
 extras = testing
 deps =
-    pytest>=3.0.7
+    py26,py33: pytest>=3.0.7,<3.3
-# For --lf and --ff.
+    py26,py33: setuptools<37
    pytest-cache
 setenv =
 # https://github.com/tomchristie/django-rest-framework/issues/1957
 # tox corrupts __pycache__, solution from here:
@@ -14,7 +14,6 @@ commands =
 [testenv:cov]
 deps =
    coverage
    {[testenv]deps}
 commands =
    coverage run --source parso -m pytest
    coverage report