Rename _get_definition to get_definition.

Remove the old get_definition function.
Add an option to the new get_definition.
2025-12-06 21:04:29 +08:00 · 2017-09-03 00:42:06 +02:00 · 2017-09-03 00:41:39 +02:00 · 2017-09-02 23:45:50 +02:00 · 2017-09-02 22:41:15 +02:00 · 2017-09-02 17:26:29 +02:00
99 changed files with 8545 additions and 1238 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -10,7 +10,6 @@ python:
  - pypy
 matrix:
  allow_failures:
-    - python: pypy
    - env: TOXENV=cov
  include:
    - python: 3.5
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,5 +1,8 @@
-We <3 Pull Requests! Only two things:
+We <3 Pull Requests! Three core things:

 1. If you are adding functionality or fixing a bug, please add a test!
 2. Add your name to AUTHORS.txt
 3. Use the PEP8 style guide.
+
+ If you want to add methods to the parser tree, we will need to discuss this in
+ an issue first.
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -4,11 +4,14 @@ Some Python files have been taken from the standard library and are therefore
 PSF licensed. Modifications on these files are dual licensed (both MIT and
 PSF). These files are:

- parso/pgen2
+- parso/pgen2/*
 - parso/tokenize.py
 - parso/token.py
 - test/test_pgen2.py

+Also some test files under test/normalizer_issue_files have been copied from
+https://github.com/PyCQA/pycodestyle (Expat License == MIT License).
+
 -------------------------------------------------------------------------------
 The MIT License (MIT)

--- a/README.rst
+++ b/README.rst
@@ -32,6 +32,17 @@ Installation

    pip install parso

+Future
+======
+
+- There will be better support for refactoring and comments. Stay tuned.
+- There's a WIP PEP8 validator. It's however not in a good shape, yet.
+
+Known Issues
+============
+
+- `async`/`await` are already used as keywords in Python3.6.
+- `from __future__ import print_function` is not supported,

 Testing
 =======
--- a/conftest.py
+++ b/conftest.py
@@ -1,9 +1,15 @@
+import re
 import tempfile
 import shutil
+import logging
+import sys
+import os

 import pytest

+import parso
 from parso import cache
+from parso.utils import parse_version_string


 collect_ignore = ["setup.py"]
@@ -25,3 +31,144 @@ def clean_parso_cache():
    yield
    cache._default_cache_path = old
    shutil.rmtree(tmp)
+
+
+def pytest_addoption(parser):
+    parser.addoption("--logging", "-L", action='store_true',
+                     help="Enables the logging output.")
+
+
+def pytest_generate_tests(metafunc):
+    if 'normalizer_issue_case' in metafunc.fixturenames:
+        base_dir = os.path.join(os.path.dirname(__file__), 'test', 'normalizer_issue_files')
+
+        cases = list(colllect_normalizer_tests(base_dir))
+        metafunc.parametrize(
+            'normalizer_issue_case',
+            cases,
+            ids=[c.name for c in cases]
+        )
+    elif 'each_version' in metafunc.fixturenames:
+        metafunc.parametrize(
+            'each_version',
+            ['2.6', '2.7', '3.3', '3.4', '3.5', '3.6'],
+        )
+    elif 'each_py2_version' in metafunc.fixturenames:
+        metafunc.parametrize(
+            'each_py2_version',
+            ['2.6', '2.7'],
+        )
+    elif 'each_py3_version' in metafunc.fixturenames:
+        metafunc.parametrize(
+            'each_py3_version',
+            ['3.3', '3.4', '3.5', '3.6'],
+        )
+
+
+class NormalizerIssueCase(object):
+    """
+    Static Analysis cases lie in the static_analysis folder.
+    The tests also start with `#!`, like the goto_definition tests.
+    """
+    def __init__(self, path):
+        self.path = path
+        self.name = os.path.basename(path)
+        match = re.search(r'python([\d.]+)\.py', self.name)
+        self.python_version = match and match.group(1)
+
+
+def colllect_normalizer_tests(base_dir):
+    for f_name in os.listdir(base_dir):
+        if f_name.endswith(".py"):
+            path = os.path.join(base_dir, f_name)
+            yield NormalizerIssueCase(path)
+
+
+def pytest_configure(config):
+    if config.option.logging:
+        root = logging.getLogger()
+        root.setLevel(logging.DEBUG)
+
+        ch = logging.StreamHandler(sys.stdout)
+        ch.setLevel(logging.DEBUG)
+        #formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+        #ch.setFormatter(formatter)
+
+        root.addHandler(ch)
+
+
+@pytest.fixture
+def each_py3_version():
+    return '3.3', '3.4', '3.5', '3.6'
+
+
+@pytest.fixture
+def each_py2_version():
+    return '2.6', '2.7'
+
+
+class Checker():
+    def __init__(self, version, is_passing):
+        self.version = version
+        self._is_passing = is_passing
+        self.grammar = parso.load_grammar(version=self.version)
+
+    def parse(self, code):
+        if self._is_passing:
+            return parso.parse(code, version=self.version, error_recovery=False)
+        else:
+            self._invalid_syntax(code)
+
+    def _invalid_syntax(self, code):
+        with pytest.raises(parso.ParserSyntaxError):
+            module = parso.parse(code, version=self.version, error_recovery=False)
+            # For debugging
+            print(module.children)
+
+    def get_error(self, code):
+        errors = list(self.grammar.iter_errors(self.grammar.parse(code)))
+        assert bool(errors) != self._is_passing
+        if errors:
+            return errors[0]
+
+    def get_error_message(self, code):
+        error = self.get_error(code)
+        if error is None:
+            return
+        return error.message
+
+    def assert_no_error_in_passing(self, code):
+        if self._is_passing:
+            module = self.grammar.parse(code)
+            assert not list(self.grammar.iter_errors(module))
+
+
+@pytest.fixture
+def works_not_in_py(each_version):
+    return Checker(each_version, False)
+
+
+@pytest.fixture
+def works_in_py2(each_version):
+    return Checker(each_version, each_version.startswith('2'))
+
+
+@pytest.fixture
+def works_ge_py27(each_version):
+    version_info = parse_version_string(each_version)
+    return Checker(each_version, version_info >= (2, 7))
+
+
+@pytest.fixture
+def works_ge_py3(each_version):
+    version_info = parse_version_string(each_version)
+    return Checker(each_version, version_info >= (3, 0))
+
+
+@pytest.fixture
+def works_ge_py35(each_version):
+    """
+    Works only greater equal Python 3.3.
+    """
+    version_info = parse_version_string(each_version)
+    return Checker(each_version, version_info >= (3, 5))
--- a/deploy-master.sh
+++ b/deploy-master.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+# The script creates a separate folder in build/ and creates tags there, pushes
+# them and then uploads the package to PyPI.
+
+set -eu -o pipefail
+
+BASE_DIR=$(dirname $(readlink -f "$0"))
+cd $BASE_DIR
+
+git fetch --tags
+
+PROJECT_NAME=parso
+BRANCH=master
+BUILD_FOLDER=build
+
+[ -d $BUILD_FOLDER ] || mkdir $BUILD_FOLDER
+# Remove the previous deployment first.
+# Checkout the right branch
+cd $BUILD_FOLDER
+rm -rf $PROJECT_NAME
+git clone .. $PROJECT_NAME
+cd $PROJECT_NAME
+git checkout $BRANCH
+
+# Test first.
+tox
+
+# Create tag
+tag=v$(python -c "import $PROJECT_NAME; print($PROJECT_NAME.__version__)")
+
+master_ref=$(git show-ref -s heads/$BRANCH)
+tag_ref=$(git show-ref -s $tag || true)
+if [[ $tag_ref ]]; then
+    if [[ $tag_ref != $master_ref ]]; then
+        echo 'Cannot tag something that has already been tagged with another commit.'
+        exit 1
+    fi
+else
+    git tag $tag
+    git push --tags
+fi
+
+# Package and upload to PyPI
+#rm -rf dist/ - Not needed anymore, because the folder is never reused.
+echo `pwd`
+python setup.py sdist bdist_wheel
+# Maybe do a pip install twine before.
+twine upload dist/*
+
+cd $BASE_DIR
+# Back in the development directory fetch tags.
+git fetch --tags
--- a/deploy.sh
+++ b/deploy.sh
@@ -1,13 +0,0 @@
-#!/usr/bin/env bash
-
-set -eu -o pipefail
-
-# Create tag
-git tag $(python -c 'import parso; print(parso.__version__)')
-git push --tags
-
-# Package and upload to PyPI
-rm -rf dist/
-python setup.py sdist bdist_wheel
-# Maybe do a pip install twine before.
-twine upload dist/*
--- a/docs/docs/development.rst
+++ b/docs/docs/development.rst
@@ -0,0 +1,37 @@
+.. include:: ../global.rst
+
+Development
+===========
+
+If you want to contribute anything to |parso|, just open an issue or pull
+request to discuss it. We welcome changes!
+
+
+Deprecations Process
+--------------------
+
+The deprecation process is as follows:
+
+1. A deprecation is announced in the next major/minor release.
+2. We wait either at least a year & at least two minor releases until we remove
+   the deprecated functionality.
+
+
+Testing
+-------
+
+The test suite depends on ``tox`` and ``pytest``::
+
+    pip install tox pytest
+
+To run the tests for all supported Python versions::
+
+    tox
+
+If you want to test only a specific Python version (e.g. Python 2.7), it's as
+easy as::
+
+    tox -e py27
+
+Tests are also run automatically on `Travis CI
+<https://travis-ci.org/davidhalter/parso/>`_.
--- a/docs/docs/installation.rst
+++ b/docs/docs/installation.rst
@@ -0,0 +1,32 @@
+.. include:: ../global.rst
+
+Installation and Configuration
+==============================
+
+The preferred way
+-----------------
+
+On any system you can install |parso| directly from the Python package index
+using pip::
+
+    sudo pip install parso
+
+
+From git
+--------
+If you want to install the current development version (master branch)::
+
+    sudo pip install -e git://github.com/davidhalter/parso.git#egg=parso
+
+
+Manual installation from a downloaded package (not recommended)
+---------------------------------------------------------------
+
+If you prefer not to use an automated package installer, you can `download
+<https://github.com/davidhalter/parso/archive/master.zip>`__ a current copy of
+|parso| and install it manually.
+
+To install it, navigate to the directory containing `setup.py` on your console
+and type::
+
+    sudo python setup.py install
--- a/docs/docs/parser-tree.rst
+++ b/docs/docs/parser-tree.rst
@@ -0,0 +1,36 @@
+.. _parser-tree:
+
+Parser Tree
+===========
+
+Usage
+-----
+
+.. automodule:: parso.python
+    :members:
+    :undoc-members:
+
+
+Parser Tree Base Class
+----------------------
+
+All nodes and leaves have these methods/properties:
+
+.. autoclass:: parso.tree.NodeOrLeaf
+    :members:
+    :undoc-members:
+
+
+Python Parser Tree
+------------------
+
+.. automodule:: parso.python.tree
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Utility
+-------
+
+.. autofunction:: parso.tree.search_ancestor
--- a/docs/docs/usage.rst
+++ b/docs/docs/usage.rst
@@ -0,0 +1,41 @@
+.. include:: ../global.rst
+
+Usage
+=====
+
+|parso| works around grammars. You can simply create Python grammars by calling
+``load_grammar``. Grammars (with a custom tokenizer and custom parser trees)
+can also be created by directly instantiating ``Grammar``. More information
+about the resulting objects can be found in the :ref:`parser tree documentation
+<parser-tree>`.
+
+The simplest way of using parso is without even loading a grammar:
+
+.. sourcecode:: python
+
+   >>> import parso
+   >>> parso.parse('foo + bar')
+   <Module: @1-1>
+
+.. automodule:: parso.grammar
+    :members:
+    :undoc-members:
+
+
+Utility
+-------
+
+.. autofunction:: parso.parse
+
+.. automodule:: parso.utils
+    :members:
+    :undoc-members:
+
+
+Used By
+-------
+
+- jedi_ (which is used by IPython and a lot of plugins).
+
+
+.. _jedi: https://github.com/davidhalter/jedi
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -0,0 +1,31 @@
+.. include global.rst
+
+parso - A Python Parser Written in Python
+=========================================
+
+Release v\ |release|. (:doc:`Installation <docs/installation>`)
+
+.. automodule:: parso
+
+.. _toc:
+
+Docs
+----
+
+.. toctree::
+   :maxdepth: 2
+
+   docs/installation
+   docs/usage
+   docs/parser-tree
+   docs/development
+
+
+.. _resources:
+
+Resources
+---------
+
+- `Source Code on Github <https://github.com/davidhalter/parso>`_
+- `Travis Testing <https://travis-ci.org/davidhalter/parso>`_
+- `Python Package Index <http://pypi.python.org/pypi/parso/>`_
--- a/parso/init.py
+++ b/parso/init.py
@@ -1,11 +1,36 @@
+"""
+parso is a Python parser. It's really easy to use and supports multiple Python
+versions, file caching, round-trips and other stuff:
+
+>>> from parso import load_grammar
+>>> grammar = load_grammar(version='2.7')
+>>> module = grammar.parse('hello + 1')
+>>> expr = module.children[0]
+>>> expr
+PythonNode(arith_expr, [<Name: hello@1,0>, <Operator: +>, <Number: 1>])
+>>> print(expr.get_code())
+hello + 1
+>>> name = expr.children[0]
+>>> name
+<Name: hello@1,0>
+>>> name.end_pos
+(1, 5)
+>>> expr.end_pos
+(1, 9)
+"""
+
 from parso.parser import ParserSyntaxError
-from parso.pgen2.pgen import generate_grammar
-from parso import python
+from parso.grammar import Grammar, load_grammar


-__version__ = '0.0.1'
+__version__ = '0.0.4'


-def parse(grammar, code):
-    raise NotImplementedError
-    Parser(grammar, code)
+def parse(code=None, **kwargs):
+    """
+    A utility function to parse Python with the current Python version. Params
+    are documented in ``Grammar.parse``.
+    """
+    version = kwargs.pop('version', None)
+    grammar = load_grammar(version=version)
+    return grammar.parse(code, **kwargs)
--- a/parso/_compatibility.py
+++ b/parso/_compatibility.py
@@ -3,6 +3,7 @@ To ensure compatibility from Python ``2.6`` - ``3.3``, a module has been
 created. Clearly there is huge need to use conforming syntax.
 """
 import sys
+import platform

 # Cannot use sys.version.major and minor names, because in Python 2.6 it's not
 # a namedtuple.
@@ -14,6 +15,8 @@ try:
 except NameError:
    unicode = str

+is_pypy = platform.python_implementation() == 'PyPy'
+

 def use_metaclass(meta, *bases):
    """ Create a class with a metaclass. """
@@ -66,3 +69,35 @@ def utf8_repr(func):
        return func
    else:
        return wrapper
+
+
+try:
+    from functools import total_ordering
+except ImportError:
+    # Python 2.6
+    def total_ordering(cls):
+        """Class decorator that fills in missing ordering methods"""
+        convert = {
+            '__lt__': [('__gt__', lambda self, other: not (self < other or self == other)),
+                       ('__le__', lambda self, other: self < other or self == other),
+                       ('__ge__', lambda self, other: not self < other)],
+            '__le__': [('__ge__', lambda self, other: not self <= other or self == other),
+                       ('__lt__', lambda self, other: self <= other and not self == other),
+                       ('__gt__', lambda self, other: not self <= other)],
+            '__gt__': [('__lt__', lambda self, other: not (self > other or self == other)),
+                       ('__ge__', lambda self, other: self > other or self == other),
+                       ('__le__', lambda self, other: not self > other)],
+            '__ge__': [('__le__', lambda self, other: (not self >= other) or self == other),
+                       ('__gt__', lambda self, other: self >= other and not self == other),
+                       ('__lt__', lambda self, other: not self >= other)]
+        }
+        roots = set(dir(cls)) & set(convert)
+        if not roots:
+            raise ValueError('must define at least one ordering operation: < > <= >=')
+        root = max(roots)       # prefer __lt__ to __le__ to __gt__ to __ge__
+        for opname, opfunc in convert[root]:
+            if opname not in roots:
+                opfunc.__name__ = opname
+                opfunc.__doc__ = getattr(int, opname).__doc__
+                setattr(cls, opname, opfunc)
+        return cls
--- a/parso/cache.py
+++ b/parso/cache.py
@@ -41,7 +41,7 @@ http://docs.python.org/3/library/sys.html#sys.implementation

 def _get_default_cache_path():
    if platform.system().lower() == 'windows':
-        dir_ = os.path.join(os.getenv('APPDATA') or '~', 'Parso', 'Parso')
+        dir_ = os.path.join(os.getenv('LOCALAPPDATA') or '~', 'Parso', 'Parso')
    elif platform.system().lower() == 'darwin':
        dir_ = os.path.join('~', 'Library', 'Caches', 'Parso')
    else:
@@ -53,16 +53,14 @@ _default_cache_path = _get_default_cache_path()
 The path where the cache is stored.

 On Linux, this defaults to ``~/.cache/parso/``, on OS X to
-``~/Library/Caches/Parso/`` and on Windows to ``%APPDATA%\\Parso\\Parso\\``.
+``~/Library/Caches/Parso/`` and on Windows to ``%LOCALAPPDATA%\\Parso\\Parso\\``.
 On Linux, if environment variable ``$XDG_CACHE_HOME`` is set,
 ``$XDG_CACHE_HOME/parso`` is used instead of the default one.
 """

-# for fast_parser, should not be deleted
 parser_cache = {}


-
 class _NodeCacheItem(object):
    def __init__(self, node, lines, change_time=None):
        self.node = node
@@ -72,7 +70,7 @@ class _NodeCacheItem(object):
        self.change_time = change_time


-def load_module(grammar, path, cache_path=None):
+def load_module(hashed_grammar, path, cache_path=None):
    """
    Returns a module or None, if it fails.
    """
@@ -82,16 +80,15 @@ def load_module(grammar, path, cache_path=None):
        return None

    try:
-        # TODO Add grammar sha256
-        module_cache_item = parser_cache[path]
+        module_cache_item = parser_cache[hashed_grammar][path]
        if p_time <= module_cache_item.change_time:
            return module_cache_item.node
    except KeyError:
-        return _load_from_file_system(grammar, path, p_time, cache_path=cache_path)
+        return _load_from_file_system(hashed_grammar, path, p_time, cache_path=cache_path)


-def _load_from_file_system(grammar, path, p_time, cache_path=None):
-    cache_path = _get_hashed_path(grammar, path, cache_path=cache_path)
+def _load_from_file_system(hashed_grammar, path, p_time, cache_path=None):
+    cache_path = _get_hashed_path(hashed_grammar, path, cache_path=cache_path)
    try:
        try:
            if p_time > os.path.getmtime(cache_path):
@@ -113,12 +110,12 @@ def _load_from_file_system(grammar, path, p_time, cache_path=None):
    except FileNotFoundError:
        return None
    else:
-        parser_cache[path] = module_cache_item
+        parser_cache.setdefault(hashed_grammar, {})[path] = module_cache_item
        logging.debug('pickle loaded: %s', path)
        return module_cache_item.node


-def save_module(grammar, path, module, lines, pickling=True, cache_path=None):
+def save_module(hashed_grammar, path, module, lines, pickling=True, cache_path=None):
    try:
        p_time = None if path is None else os.path.getmtime(path)
    except OSError:
@@ -126,24 +123,16 @@ def save_module(grammar, path, module, lines, pickling=True, cache_path=None):
        pickling = False

    item = _NodeCacheItem(module, lines, p_time)
-    parser_cache[path] = item
+    parser_cache.setdefault(hashed_grammar, {})[path] = item
    if pickling and path is not None:
-        _save_to_file_system(grammar, path, item)
+        _save_to_file_system(hashed_grammar, path, item)


-def _save_to_file_system(grammar, path, item, cache_path=None):
-    with open(_get_hashed_path(grammar, path, cache_path=cache_path), 'wb') as f:
+def _save_to_file_system(hashed_grammar, path, item, cache_path=None):
+    with open(_get_hashed_path(hashed_grammar, path, cache_path=cache_path), 'wb') as f:
        pickle.dump(item, f, pickle.HIGHEST_PROTOCOL)


-def remove_old_modules(self):
-    """
-    # TODO Might want to use such a function to clean up the cache (if it's
-    # too old). We could potentially also scan for old files in the
-    # directory and delete those.
-    """
-
-
 def clear_cache(cache_path=None):
    if cache_path is None:
        cache_path = _default_cache_path
@@ -151,11 +140,11 @@ def clear_cache(cache_path=None):
    parser_cache.clear()


-def _get_hashed_path(grammar, path, cache_path=None):
+def _get_hashed_path(hashed_grammar, path, cache_path=None):
    directory = _get_cache_directory_path(cache_path=cache_path)

    file_hash = hashlib.sha256(path.encode("utf-8")).hexdigest()
-    return os.path.join(directory, '%s-%s.pkl' % (grammar.sha256, file_hash))
+    return os.path.join(directory, '%s-%s.pkl' % (hashed_grammar, file_hash))


 def _get_cache_directory_path(cache_path=None):
--- a/parso/grammar.py
+++ b/parso/grammar.py
@@ -0,0 +1,272 @@
+import hashlib
+import os
+
+from parso._compatibility import FileNotFoundError, is_pypy
+from parso.pgen2.pgen import generate_grammar
+from parso.utils import split_lines, python_bytes_to_unicode, parse_version_string
+from parso.python.diff import DiffParser
+from parso.python.tokenize import tokenize_lines, tokenize
+from parso.python import token
+from parso.cache import parser_cache, load_module, save_module
+from parso.parser import BaseParser
+from parso.python.parser import Parser as PythonParser
+from parso.python.errors import ErrorFinderConfig
+from parso.python import pep8
+from parso.python import fstring
+
+_loaded_grammars = {}
+
+
+class Grammar(object):
+    """
+    Create custom grammars by calling this. It's not really supported, yet.
+
+    :param text: A BNF representation of your grammar.
+    """
+    _error_normalizer_config = None
+    _token_namespace = None
+    _default_normalizer_config = pep8.PEP8NormalizerConfig()
+
+    def __init__(self, text, tokenizer, parser=BaseParser, diff_parser=None):
+        self._pgen_grammar = generate_grammar(
+            text,
+            token_namespace=self._get_token_namespace()
+        )
+        self._parser = parser
+        self._tokenizer = tokenizer
+        self._diff_parser = diff_parser
+        self._hashed = hashlib.sha256(text.encode("utf-8")).hexdigest()
+
+    def parse(self, code=None, **kwargs):
+        """
+        If you want to parse a Python file you want to start here, most likely.
+
+        If you need finer grained control over the parsed instance, there will be
+        other ways to access it.
+
+        :param code str: A unicode string that contains Python code.
+        :param path str: The path to the file you want to open. Only needed for caching.
+        :param error_recovery bool: If enabled, any code will be returned. If
+            it is invalid, it will be returned as an error node. If disabled,
+            you will get a ParseError when encountering syntax errors in your
+            code.
+        :param start_symbol str: The grammar symbol that you want to parse. Only
+            allowed to be used when error_recovery is False.
+        :param cache bool: Keeps a copy of the parser tree in RAM and on disk
+            if a path is given. Returns the cached trees if the corresponding
+            files on disk have not changed.
+        :param diff_cache bool: Diffs the cached python module against the new
+            code and tries to parse only the parts that have changed. Returns
+            the same (changed) module that is found in cache. Using this option
+            requires you to not do anything anymore with the old cached module,
+            because the contents of it might have changed.
+        :param cache_path bool: If given saves the parso cache in this
+            directory. If not given, defaults to the default cache places on
+            each platform.
+
+        :return: A syntax tree node. Typically the module.
+        """
+        if 'start_pos' in kwargs:
+            raise TypeError("parse() got an unexpected keyworda argument.")
+        return self._parse(code=code, **kwargs)
+
+    def _parse(self, code=None, path=None, error_recovery=True,
+               start_symbol=None, cache=False, diff_cache=False,
+               cache_path=None, start_pos=(1, 0)):
+        """
+        Wanted python3.5 * operator and keyword only arguments. Therefore just
+        wrap it all.
+        start_pos here is just a parameter internally used. Might be public
+        sometime in the future.
+        """
+        if code is None and path is None:
+            raise TypeError("Please provide either code or a path.")
+
+        if start_symbol is None:
+            start_symbol = self._start_symbol
+
+        if error_recovery and start_symbol != 'file_input':
+            raise NotImplementedError("This is currently not implemented.")
+
+        if cache and code is None and path is not None:
+            # With the current architecture we cannot load from cache if the
+            # code is given, because we just load from cache if it's not older than
+            # the latest change (file last modified).
+            module_node = load_module(self._hashed, path, cache_path=cache_path)
+            if module_node is not None:
+                return module_node
+
+        if code is None:
+            with open(path, 'rb') as f:
+                code = f.read()
+
+        code = python_bytes_to_unicode(code)
+
+        lines = split_lines(code, keepends=True)
+        if diff_cache:
+            if self._diff_parser is None:
+                raise TypeError("You have to define a diff parser to be able "
+                                "to use this option.")
+            try:
+                module_cache_item = parser_cache[self._hashed][path]
+            except KeyError:
+                pass
+            else:
+                module_node = module_cache_item.node
+                old_lines = module_cache_item.lines
+                if old_lines == lines:
+                    return module_node
+
+                new_node = self._diff_parser(
+                    self._pgen_grammar, self._tokenizer, module_node
+                ).update(
+                    old_lines=old_lines,
+                    new_lines=lines
+                )
+                save_module(self._hashed, path, new_node, lines,
+                            # Never pickle in pypy, it's slow as hell.
+                            pickling=cache and not is_pypy,
+                            cache_path=cache_path)
+                return new_node
+
+        tokens = self._tokenizer(lines, start_pos)
+
+        p = self._parser(
+            self._pgen_grammar,
+            error_recovery=error_recovery,
+            start_symbol=start_symbol
+        )
+        root_node = p.parse(tokens=tokens)
+
+        if cache or diff_cache:
+            save_module(self._hashed, path, root_node, lines,
+                        # Never pickle in pypy, it's slow as hell.
+                        pickling=cache and not is_pypy,
+                        cache_path=cache_path)
+        return root_node
+
+    def _get_token_namespace(self):
+        ns = self._token_namespace
+        if ns is None:
+            raise ValueError("The token namespace should be set.")
+        return ns
+
+    def iter_errors(self, node):
+        if self._error_normalizer_config is None:
+            raise ValueError("No error normalizer specified for this grammar.")
+
+        return self._get_normalizer_issues(node, self._error_normalizer_config)
+
+    def _get_normalizer(self, normalizer_config):
+        if normalizer_config is None:
+            normalizer_config = self._default_normalizer_config
+            if normalizer_config is None:
+                raise ValueError("You need to specify a normalizer, because "
+                                 "there's no default normalizer for this tree.")
+        return normalizer_config.create_normalizer(self)
+
+    def _normalize(self, node, normalizer_config=None):
+        """
+        TODO this is not public, yet.
+        The returned code will be normalized, e.g. PEP8 for Python.
+        """
+        normalizer = self._get_normalizer(normalizer_config)
+        return normalizer.walk(node)
+
+    def _get_normalizer_issues(self, node, normalizer_config=None):
+        normalizer = self._get_normalizer(normalizer_config)
+        normalizer.walk(node)
+        return normalizer.issues
+
+
+    def __repr__(self):
+        labels = self._pgen_grammar.number2symbol.values()
+        txt = ' '.join(list(labels)[:3]) + ' ...'
+        return '<%s:%s>' % (self.__class__.__name__, txt)
+
+
+class PythonGrammar(Grammar):
+    _error_normalizer_config = ErrorFinderConfig()
+    _token_namespace = token
+    _start_symbol = 'file_input'
+
+    def __init__(self, version_info, bnf_text):
+        super(PythonGrammar, self).__init__(
+            bnf_text,
+            tokenizer=self._tokenize_lines,
+            parser=PythonParser,
+            diff_parser=DiffParser
+        )
+        self.version_info = version_info
+
+    def _tokenize_lines(self, lines, start_pos):
+        return tokenize_lines(lines, self.version_info, start_pos=start_pos)
+
+    def _tokenize(self, code):
+        # Used by Jedi.
+        return tokenize(code, self.version_info)
+
+
+class PythonFStringGrammar(Grammar):
+    _token_namespace = fstring.TokenNamespace
+    _start_symbol = 'fstring'
+
+    def __init__(self):
+        super(PythonFStringGrammar, self).__init__(
+            text=fstring.GRAMMAR,
+            tokenizer=fstring.tokenize,
+            parser=fstring.Parser
+        )
+
+    def parse(self, code, **kwargs):
+        return self._parse(code, **kwargs)
+
+    def _parse(self, code, error_recovery=True, start_pos=(1, 0)):
+        tokens = self._tokenizer(code, start_pos=start_pos)
+        p = self._parser(
+            self._pgen_grammar,
+            error_recovery=error_recovery,
+            start_symbol=self._start_symbol,
+        )
+        return p.parse(tokens=tokens)
+
+    def parse_leaf(self, leaf, error_recovery=True):
+        code = leaf._get_payload()
+        return self.parse(code, error_recovery=True, start_pos=leaf.start_pos)
+
+
+def load_grammar(**kwargs):
+    """
+    Loads a Python grammar. The default version is the current Python version.
+
+    If you need support for a specific version, please use e.g.
+    `version='3.3'`.
+    """
+    def load_grammar(language='python', version=None):
+        if language == 'python':
+            version_info = parse_version_string(version)
+
+            file = 'python/grammar%s%s.txt' % (version_info.major, version_info.minor)
+
+            global _loaded_grammars
+            path = os.path.join(os.path.dirname(__file__), file)
+            try:
+                return _loaded_grammars[path]
+            except KeyError:
+                try:
+                    with open(path) as f:
+                        bnf_text = f.read()
+
+                    grammar = PythonGrammar(version_info, bnf_text)
+                    return _loaded_grammars.setdefault(path, grammar)
+                except FileNotFoundError:
+                    message = "Python version %s is currently not supported." % version
+                    raise NotImplementedError(message)
+        elif language == 'python-f-string':
+            if version is not None:
+                raise NotImplementedError("Currently different versions are not supported.")
+            return PythonFStringGrammar()
+        else:
+            raise NotImplementedError("No support for language %s." % language)
+
+    return load_grammar(**kwargs)
--- a/parso/normalizer.py
+++ b/parso/normalizer.py
@@ -0,0 +1,174 @@
+from contextlib import contextmanager
+
+from parso._compatibility import use_metaclass
+
+
+class _NormalizerMeta(type):
+    def __new__(cls, name, bases, dct):
+        new_cls = type.__new__(cls, name, bases, dct)
+        new_cls.rule_value_classes = {}
+        new_cls.rule_type_classes = {}
+        return new_cls
+
+
+class Normalizer(use_metaclass(_NormalizerMeta)):
+    def __init__(self, grammar, config):
+        self.grammar = grammar
+        self._config = config
+        self.issues = []
+
+        self._rule_type_instances = self._instantiate_rules('rule_type_classes')
+        self._rule_value_instances = self._instantiate_rules('rule_value_classes')
+
+    def _instantiate_rules(self, attr):
+        dct = {}
+        for base in type(self).mro():
+            rules_map = getattr(base, attr, {})
+            for type_, rule_classes in rules_map.items():
+                new = [rule_cls(self) for rule_cls in rule_classes]
+                dct.setdefault(type_, []).extend(new)
+        return dct
+
+    def walk(self, node):
+        self.initialize(node)
+        value = self.visit(node)
+        self.finalize()
+        return value
+
+    def visit(self, node):
+        try:
+            children = node.children
+        except AttributeError:
+            return self.visit_leaf(node)
+        else:
+           with self.visit_node(node):
+               return ''.join(self.visit(child) for child in children)
+
+    @contextmanager
+    def visit_node(self, node):
+        self._check_type_rules(node)
+        yield
+
+    def _check_type_rules(self, node):
+        for rule in self._rule_type_instances.get(node.type, []):
+            rule.feed_node(node)
+
+    def visit_leaf(self, leaf):
+        self._check_type_rules(leaf)
+
+        for rule in self._rule_value_instances.get(leaf.value, []):
+            rule.feed_node(leaf)
+
+        return leaf.prefix + leaf.value
+
+    def initialize(self, node):
+        pass
+
+    def finalize(self):
+        pass
+
+    def add_issue(self, node, code, message):
+        issue = Issue(node, code, message)
+        if issue not in self.issues:
+            self.issues.append(issue)
+        return True
+
+    @classmethod
+    def register_rule(cls, **kwargs):
+        """
+        Use it as a class decorator::
+
+            normalizer = Normalizer('grammar', 'config')
+            @normalizer.register_rule(value='foo')
+            class MyRule(Rule):
+                error_code = 42
+        """
+        return cls._register_rule(**kwargs)
+
+    @classmethod
+    def _register_rule(cls, value=None, values=(), type=None, types=()):
+        values = list(values)
+        types = list(types)
+        if value is not None:
+            values.append(value)
+        if type is not None:
+            types.append(type)
+
+        if not values and not types:
+            raise ValueError("You must register at least something.")
+
+        def decorator(rule_cls):
+            for v in values:
+                cls.rule_value_classes.setdefault(v, []).append(rule_cls)
+            for t in types:
+                cls.rule_type_classes.setdefault(t, []).append(rule_cls)
+            return rule_cls
+
+        return decorator
+
+
+class NormalizerConfig(object):
+    normalizer_class = Normalizer
+
+    def create_normalizer(self, grammar):
+        if self.normalizer_class is None:
+            return None
+
+        return self.normalizer_class(grammar, self)
+
+
+class Issue(object):
+    def __init__(self, node, code, message):
+        self._node = node
+        self.code = code
+        self.message = message
+        self.start_pos = node.start_pos
+
+    def __eq__(self, other):
+        return self.start_pos == other.start_pos and self.code == other.code
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __hash__(self):
+        return hash((self.code, self.start_pos))
+
+    def __repr__(self):
+        return '<%s: %s>' % (self.__class__.__name__, self.code)
+
+
+
+class Rule(object):
+    code = None
+    message = None
+
+    def __init__(self, normalizer):
+        self._normalizer = normalizer
+
+    def is_issue(self, node):
+        raise NotImplementedError()
+
+    def get_node(self, node):
+        return node
+
+    def _get_message(self, message):
+        if message is None:
+            message = self.message
+            if message is None:
+                raise ValueError("The message on the class is not set.")
+        return message
+
+    def add_issue(self, node, code=None, message=None):
+        if code is None:
+            code = self.code
+            if code is None:
+                raise ValueError("The error code on the class is not set.")
+
+        message = self._get_message(message)
+
+        self._normalizer.add_issue(node, code, message)
+
+    def feed_node(self, node):
+        if self.is_issue(node):
+            issue_node = self.get_node(node)
+            self.add_issue(issue_node)
--- a/parso/parser.py
+++ b/parso/parser.py
@@ -25,9 +25,9 @@ class ParserSyntaxError(Exception):

    May be raised as an exception.
    """
-    def __init__(self, message, position):
+    def __init__(self, message, error_leaf):
        self.message = message
-        self.position = position
+        self.error_leaf = error_leaf


 class BaseParser(object):
@@ -38,15 +38,15 @@ class BaseParser(object):
    }
    default_leaf = tree.Leaf

-    def __init__(self, grammar, start_symbol='file_input', error_recovery=False):
-        self._grammar = grammar
+    def __init__(self, pgen_grammar, start_symbol='file_input', error_recovery=False):
+        self._pgen_grammar = pgen_grammar
        self._start_symbol = start_symbol
        self._error_recovery = error_recovery

    def parse(self, tokens):
-        start_number = self._grammar.symbol2number[self._start_symbol]
+        start_number = self._pgen_grammar.symbol2number[self._start_symbol]
        self.pgen_parser = PgenParser(
-            self._grammar, self.convert_node, self.convert_leaf,
+            self._pgen_grammar, self.convert_node, self.convert_leaf,
            self.error_recovery, start_number
        )

@@ -55,22 +55,23 @@ class BaseParser(object):
        del self.pgen_parser
        return node

-    def error_recovery(self, grammar, stack, arcs, typ, value, start_pos, prefix,
+    def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
                       add_token_callback):
        if self._error_recovery:
            raise NotImplementedError("Error Recovery is not implemented")
        else:
-            raise ParserSyntaxError('SyntaxError: invalid syntax', start_pos)
+            error_leaf = tree.ErrorLeaf('TODO %s' % typ, value, start_pos, prefix)
+            raise ParserSyntaxError('SyntaxError: invalid syntax', error_leaf)

-    def convert_node(self, grammar, type_, children):
+    def convert_node(self, pgen_grammar, type_, children):
        # TODO REMOVE symbol, we don't want type here.
-        symbol = grammar.number2symbol[type_]
+        symbol = pgen_grammar.number2symbol[type_]
        try:
            return self.node_map[symbol](children)
        except KeyError:
            return self.default_node(symbol, children)

-    def convert_leaf(self, grammar, type_, value, prefix, start_pos):
+    def convert_leaf(self, pgen_grammar, type_, value, prefix, start_pos):
        try:
            return self.leaf_map[type_](value, start_pos, prefix)
        except KeyError:
--- a/parso/pgen2/grammar.py
+++ b/parso/pgen2/grammar.py
@@ -17,8 +17,6 @@ fallback token code OP, but the parser needs the actual token code.
 """

 import pickle
-import hashlib
-


 class Grammar(object):
@@ -84,8 +82,8 @@ class Grammar(object):
        self.keywords = {}
        self.tokens = {}
        self.symbol2label = {}
+        self.label2symbol = {}
        self.start = 256
-        self.sha256 = hashlib.sha256(bnf_text.encode("utf-8")).hexdigest()

    def dump(self, filename):
        """Dump the grammar tables to a pickle file."""
--- a/parso/pgen2/parse.py
+++ b/parso/pgen2/parse.py
@@ -14,7 +14,7 @@ See Parser/parser.c in the Python distribution for additional info on
 how this parsing engine works.
 """

-from parso import tokenize
+from parso.python import tokenize


 class InternalParseError(Exception):
@@ -33,6 +33,12 @@ class InternalParseError(Exception):
        self.start_pos = start_pos


+class Stack(list):
+    def get_tos_nodes(self):
+        tos = self[-1]
+        return tos[2][1]
+
+
 def token_to_ilabel(grammar, type_, value):
    # Map from token to label
    if type_ == tokenize.NAME:
@@ -56,7 +62,7 @@ class PgenParser(object):
    p = Parser(grammar, [converter])  # create instance
    p.setup([start])                  # prepare for parsing
    <for each input token>:
-        if p.addtoken(...):           # parse a token
+        if p.add_token(...):           # parse a token
            break
    root = p.rootnode                 # root of abstract syntax tree

@@ -69,7 +75,7 @@ class PgenParser(object):
    See driver.py for how to get input tokens by tokenizing a file or
    string.

-    Parsing is complete when addtoken() returns True; the root of the
+    Parsing is complete when add_token() returns True; the root of the
    abstract syntax tree can then be retrieved from the rootnode
    instance variable.  When a syntax error occurs, error_recovery()
    is called. There is no error recovery; the parser cannot be used
@@ -113,13 +119,13 @@ class PgenParser(object):
        # where children is a list of nodes or None
        newnode = (start, [])
        stackentry = (self.grammar.dfas[start], 0, newnode)
-        self.stack = [stackentry]
+        self.stack = Stack([stackentry])
        self.rootnode = None
        self.error_recovery = error_recovery

    def parse(self, tokens):
        for type_, value, start_pos, prefix in tokens:
-            if self.addtoken(type_, value, start_pos, prefix):
+            if self.add_token(type_, value, start_pos, prefix):
                break
        else:
            # We never broke out -- EOF is too soon -- Unfinished statement.
@@ -129,7 +135,7 @@ class PgenParser(object):
                raise InternalParseError("incomplete input", type_, value, start_pos)
        return self.rootnode

-    def addtoken(self, type_, value, start_pos, prefix):
+    def add_token(self, type_, value, start_pos, prefix):
        """Add a token; return True if this is the end of the program."""
        ilabel = token_to_ilabel(self.grammar, type_, value)

@@ -179,7 +185,7 @@ class PgenParser(object):
                        raise InternalParseError("too much input", type_, value, start_pos)
                else:
                    self.error_recovery(self.grammar, self.stack, arcs, type_,
-                                        value, start_pos, prefix, self.addtoken)
+                                        value, start_pos, prefix, self.add_token)
                    break

    def _shift(self, type_, value, newstate, prefix, start_pos):
--- a/parso/pgen2/pgen.py
+++ b/parso/pgen2/pgen.py
@@ -6,18 +6,23 @@
 # Modifications are dual-licensed: MIT and PSF.

 from parso.pgen2 import grammar
-from parso import token
-from parso import tokenize
+from parso.python import token
+from parso.python import tokenize
+from parso.utils import parse_version_string


 class ParserGenerator(object):
-    def __init__(self, bnf_text):
+    def __init__(self, bnf_text, token_namespace):
        self._bnf_text = bnf_text
-        self.generator = tokenize.source_tokens(bnf_text)
+        self.generator = tokenize.tokenize(
+            bnf_text,
+            version_info=parse_version_string('3.6')
+        )
        self._gettoken()  # Initialize lookahead
        self.dfas, self.startsymbol = self._parse()
        self.first = {}  # map from symbol name to set of tokens
        self._addfirstsets()
+        self._token_namespace = token_namespace

    def make_grammar(self):
        c = grammar.Grammar(self._bnf_text)
@@ -65,12 +70,12 @@ class ParserGenerator(object):
                else:
                    c.labels.append((c.symbol2number[label], None))
                    c.symbol2label[label] = ilabel
+                    c.label2symbol[ilabel] = label
                    return ilabel
            else:
                # A named token (NAME, NUMBER, STRING)
-                itoken = getattr(token, label, None)
+                itoken = getattr(self._token_namespace, label, None)
                assert isinstance(itoken, int), label
-                assert itoken in token.tok_name, label
                if itoken in c.tokens:
                    return c.tokens[itoken]
                else:
@@ -86,12 +91,13 @@ class ParserGenerator(object):
                if value in c.keywords:
                    return c.keywords[value]
                else:
+                    # TODO this might be an issue?! Using token.NAME here?
                    c.labels.append((token.NAME, value))
                    c.keywords[value] = ilabel
                    return ilabel
            else:
                # An operator (any non-numeric token)
-                itoken = token.opmap[value]  # Fails if unknown token
+                itoken = self._token_namespace.generate_token_id(value)
                if itoken in c.tokens:
                    return c.tokens[itoken]
                else:
@@ -146,7 +152,7 @@ class ParserGenerator(object):
                self._gettoken()
            # RULE: NAME ':' RHS NEWLINE
            name = self._expect(token.NAME)
-            self._expect(token.OP, ":")
+            self._expect(token.COLON)
            a, z = self._parse_rhs()
            self._expect(token.NEWLINE)
            #self._dump_nfa(name, a, z)
@@ -276,7 +282,7 @@ class ParserGenerator(object):
        if self.value == "[":
            self._gettoken()
            a, z = self._parse_rhs()
-            self._expect(token.OP, "]")
+            self._expect(token.RSQB)
            a.addarc(z)
            return a, z
        else:
@@ -296,7 +302,7 @@ class ParserGenerator(object):
        if self.value == "(":
            self._gettoken()
            a, z = self._parse_rhs()
-            self._expect(token.OP, ")")
+            self._expect(token.RPAR)
            return a, z
        elif self.type in (token.NAME, token.STRING):
            a = NFAState()
@@ -308,10 +314,10 @@ class ParserGenerator(object):
            self._raise_error("expected (...) or NAME or STRING, got %s/%s",
                              self.type, self.value)

-    def _expect(self, type, value=None):
-        if self.type != type or (value is not None and self.value != value):
-            self._raise_error("expected %s/%s, got %s/%s",
-                              type, value, self.type, self.value)
+    def _expect(self, type):
+        if self.type != type:
+            self._raise_error("expected %s, got %s(%s)",
+                              type, self.type, self.value)
        value = self.value
        self._gettoken()
        return value
@@ -321,7 +327,6 @@ class ParserGenerator(object):
        while tup[0] in (token.COMMENT, token.NL):
            tup = next(self.generator)
        self.type, self.value, self.begin, prefix = tup
-        #print tokenize.tok_name[self.type], repr(self.value)

    def _raise_error(self, msg, *args):
        if args:
@@ -329,8 +334,8 @@ class ParserGenerator(object):
                msg = msg % args
            except:
                msg = " ".join([msg] + list(map(str, args)))
-        line = open(self.filename).readlines()[self.begin[0]]
-        raise SyntaxError(msg, (self.filename, self.begin[0],
+        line = self._bnf_text.splitlines()[self.begin[0] - 1]
+        raise SyntaxError(msg, ('<grammar>', self.begin[0],
                                self.begin[1], line))


@@ -381,7 +386,7 @@ class DFAState(object):
    __hash__ = None  # For Py3 compatibility.


-def generate_grammar(bnf_text):
+def generate_grammar(bnf_text, token_namespace):
    """
    ``bnf_text`` is a grammar in extended BNF (using * for repetition, + for
    at-least-once repetition, [] for optional parts, | for alternatives and ()
@@ -390,5 +395,5 @@ def generate_grammar(bnf_text):
    It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its
    own parser.
    """
-    p = ParserGenerator(bnf_text)
+    p = ParserGenerator(bnf_text, token_namespace)
    return p.make_grammar()
--- a/parso/python/init.py
+++ b/parso/python/init.py
@@ -1,144 +0,0 @@
-"""
-Parsers for Python
-"""
-import os
-
-from parso.utils import splitlines, source_to_unicode
-from parso._compatibility import FileNotFoundError
-from parso.pgen2.pgen import generate_grammar
-from parso.python.parser import Parser, remove_last_newline
-from parso.python.diff import DiffParser
-from parso.tokenize import generate_tokens
-from parso.cache import parser_cache, load_module, save_module
-
-
-_loaded_grammars = {}
-
-
-def load_grammar(version=None):
-    """
-    Loads a Python grammar. The default version is always the latest.
-
-    If you need support for a specific version, please use e.g.
-    `version='3.3'`.
-    """
-    if version is None:
-        version = '3.6'
-
-    if version in ('3.2', '3.3'):
-        version = '3.4'
-    elif version == '2.6':
-        version = '2.7'
-
-    file = 'grammar' + version + '.txt'
-
-    global _loaded_grammars
-    path = os.path.join(os.path.dirname(__file__), file)
-    try:
-        return _loaded_grammars[path]
-    except KeyError:
-        try:
-            with open(path) as f:
-                bnf_text = f.read()
-            grammar = generate_grammar(bnf_text)
-            return _loaded_grammars.setdefault(path, grammar)
-        except FileNotFoundError:
-            # Just load the default if the file does not exist.
-            return load_grammar()
-
-
-def parse(code=None, **kwargs):
-    """
-    If you want to parse a Python file you want to start here, most likely.
-
-    If you need finer grained control over the parsed instance, there will be
-    other ways to access it.
-
-    :param code: A unicode string that contains Python code.
-    :param path: The path to the file you want to open. Only needed for caching.
-    :param grammar: A Python grammar file, created with load_grammar. You may
-        not specify it. In that case it's the current Python version.
-    :param error_recovery: If enabled, any code will be returned. If it is
-        invalid, it will be returned as an error node. If disabled, you will
-        get a ParseError when encountering syntax errors in your code.
-    :param start_symbol: The grammar symbol that you want to parse. Only
-        allowed to be used when error_recovery is disabled.
-    :param cache_path: If given saves the parso cache in this directory. If not
-        given, defaults to the default cache places on each platform.
-
-    :return: A syntax tree node. Typically the module.
-    """
-    # Wanted python3.5 * operator and keyword only arguments.
-    path = kwargs.pop('path', None)
-    grammar = kwargs.pop('grammar', None)
-    error_recovery = kwargs.pop('error_recovery', True)
-    start_symbol = kwargs.pop('start_symbol', 'file_input')
-    cache = kwargs.pop('cache', False)
-    diff_cache = kwargs.pop('diff_cache', False)
-    cache_path = kwargs.pop('cache_path', None)
-
-    if kwargs:
-        raise TypeError(
-            "parse() got an unexpected keyword argument '%s'"
-            % next(iter(kwargs)))
-
-    # Start with actual code.
-    if code is None and path is None:
-        raise TypeError("Please provide either code or a path.")
-
-    if grammar is None:
-        grammar = load_grammar()
-
-    if cache and code is None and path is not None:
-        # With the current architecture we cannot load from cache if the
-        # code is given, because we just load from cache if it's not older than
-        # the latest change (file last modified).
-        module_node = load_module(grammar, path, cache_path=cache_path)
-        if module_node is not None:
-            return module_node
-
-    if code is None:
-        with open(path, 'rb') as f:
-            code = source_to_unicode(f.read())
-
-    lines = tokenize_lines = splitlines(code, keepends=True)
-    if diff_cache:
-        try:
-            module_cache_item = parser_cache[path]
-        except KeyError:
-            pass
-        else:
-            module_node = module_cache_item.node
-            old_lines = module_cache_item.lines
-            if old_lines == lines:
-                # TODO remove this line? I think it's not needed. (dave)
-                save_module(grammar, path, module_node, lines, pickling=False,
-                            cache_path=cache_path)
-                return module_node
-
-            new_node = DiffParser(grammar, module_node).update(
-                old_lines=old_lines,
-                new_lines=lines
-            )
-            save_module(grammar, path, new_node, lines, pickling=cache,
-                        cache_path=cache_path)
-            return new_node
-
-    added_newline = not code.endswith('\n')
-    if added_newline:
-        code += '\n'
-        tokenize_lines = list(tokenize_lines)
-        tokenize_lines[-1] += '\n'
-        tokenize_lines.append('')
-
-    tokens = generate_tokens(tokenize_lines, use_exact_op_types=True)
-
-    p = Parser(grammar, error_recovery=error_recovery, start_symbol=start_symbol)
-    root_node = p.parse(tokens=tokens)
-    if added_newline:
-        remove_last_newline(root_node)
-
-    if cache or diff_cache:
-        save_module(grammar, path, root_node, lines, pickling=cache,
-                    cache_path=cache_path)
-    return root_node
--- a/parso/python/diff.py
+++ b/parso/python/diff.py
@@ -10,11 +10,11 @@ import difflib
 from collections import namedtuple
 import logging

-from parso.utils import splitlines
-from parso.python.parser import Parser, remove_last_newline
+from parso.utils import split_lines
+from parso.python.parser import Parser
 from parso.python.tree import EndMarker
-from parso.tokenize import (generate_tokens, NEWLINE, TokenInfo,
-                            ENDMARKER, INDENT, DEDENT)
+from parso.python.tokenize import (NEWLINE, PythonToken, ERROR_DEDENT,
+                                   ENDMARKER, INDENT, DEDENT)


 def _get_last_line(node_or_leaf):
@@ -34,24 +34,24 @@ def _ends_with_newline(leaf, suffix=''):
    return typ == 'newline' or suffix.endswith('\n')


-def _flows_finished(grammar, stack):
+def _flows_finished(pgen_grammar, stack):
    """
    if, while, for and try might not be finished, because another part might
    still be parsed.
    """
    for dfa, newstate, (symbol_number, nodes) in stack:
-        if grammar.number2symbol[symbol_number] in ('if_stmt', 'while_stmt',
+        if pgen_grammar.number2symbol[symbol_number] in ('if_stmt', 'while_stmt',
                                                    'for_stmt', 'try_stmt'):
            return False
    return True


-def suite_or_file_input_is_valid(grammar, stack):
-    if not _flows_finished(grammar, stack):
+def suite_or_file_input_is_valid(pgen_grammar, stack):
+    if not _flows_finished(pgen_grammar, stack):
        return False

    for dfa, newstate, (symbol_number, nodes) in reversed(stack):
-        if grammar.number2symbol[symbol_number] == 'suite':
+        if pgen_grammar.number2symbol[symbol_number] == 'suite':
            # If only newline is in the suite, the suite is not valid, yet.
            return len(nodes) > 1
    # Not reaching a suite means that we're dealing with file_input levels
@@ -89,8 +89,9 @@ class DiffParser(object):
    An advanced form of parsing a file faster. Unfortunately comes with huge
    side effects. It changes the given module.
    """
-    def __init__(self, grammar, module):
-        self._grammar = grammar
+    def __init__(self, pgen_grammar, tokenizer, module):
+        self._pgen_grammar = pgen_grammar
+        self._tokenizer = tokenizer
        self._module = module

    def _reset(self):
@@ -120,14 +121,6 @@ class DiffParser(object):
        self._module._used_names = None

        self._parser_lines_new = new_lines
-        self._added_newline = False
-        if new_lines[-1] != '':
-            # The Python grammar needs a newline at the end of a file, but for
-            # everything else we keep working with new_lines here.
-            self._parser_lines_new = list(new_lines)
-            self._parser_lines_new[-1] += '\n'
-            self._parser_lines_new.append('')
-            self._added_newline = True

        self._reset()

@@ -141,7 +134,7 @@ class DiffParser(object):
            logging.debug('diff %s old[%s:%s] new[%s:%s]',
                      operation, i1 + 1, i2, j1 + 1, j2)

-            if j2 == line_length + int(self._added_newline):
+            if j2 == line_length and new_lines[-1] == '':
                # The empty part after the last newline is not relevant.
                j2 -= 1

@@ -159,12 +152,9 @@ class DiffParser(object):
        # changed module.
        self._nodes_stack.close()

-        if self._added_newline:
-            remove_last_newline(self._module)
-
        last_pos = self._module.end_pos[0]
        if last_pos != line_length:
-            current_lines = splitlines(self._module.get_code(), keepends=True)
+            current_lines = split_lines(self._module.get_code(), keepends=True)
            diff = difflib.unified_diff(current_lines, new_lines)
            raise Exception(
                "There's an issue (%s != %s) with the diff parser. Please report:\n%s"
@@ -255,12 +245,11 @@ class DiffParser(object):
        last_until_line = 0
        while until_line > self._nodes_stack.parsed_until_line:
            node = self._try_parse_part(until_line)
-            nodes = self._get_children_nodes(node)
-            #self._insert_nodes(nodes)
+            nodes = node.children

            self._nodes_stack.add_parsed_nodes(nodes)
            logging.debug(
-                'parse part %s to %s (to %s in parser)',
+                'parse_part from %s to %s (to %s in part parser)',
                nodes[0].get_start_pos_of_prefix()[0],
                self._nodes_stack.parsed_until_line,
                node.end_pos[0] - 1
@@ -271,16 +260,6 @@ class DiffParser(object):
            assert last_until_line != self._nodes_stack.parsed_until_line, last_until_line
            last_until_line = self._nodes_stack.parsed_until_line

-    def _get_children_nodes(self, node):
-        nodes = node.children
-        first_element = nodes[0]
-        # TODO this looks very strange...
-        if first_element.type == 'error_leaf' and \
-                first_element.original_type == 'indent':
-            assert False, str(nodes)
-
-        return nodes
-
    def _try_parse_part(self, until_line):
        """
        Sets up a normal parser that uses a spezialized tokenizer to only parse
@@ -299,7 +278,7 @@ class DiffParser(object):
            line_offset=parsed_until_line
        )
        self._active_parser = Parser(
-            self._grammar,
+            self._pgen_grammar,
            error_recovery=True
        )
        return self._active_parser.parse(tokens=tokens)
@@ -308,7 +287,7 @@ class DiffParser(object):
        is_first_token = True
        omitted_first_indent = False
        indents = []
-        tokens = generate_tokens(lines, use_exact_op_types=True)
+        tokens = self._tokenizer(lines, (1, 0))
        stack = self._active_parser.pgen_parser.stack
        for typ, string, start_pos, prefix in tokens:
            start_pos = start_pos[0] + line_offset, start_pos[1]
@@ -323,7 +302,10 @@ class DiffParser(object):
                    continue
            is_first_token = False

-            if typ == DEDENT:
+            # In case of omitted_first_indent, it might not be dedented fully.
+            # However this is a sign for us that a dedent happened.
+            if typ == DEDENT \
+                    or typ == ERROR_DEDENT and omitted_first_indent and len(indents) == 1:
                indents.pop()
                if omitted_first_indent and not indents:
                    # We are done here, only thing that can come now is an
@@ -333,23 +315,23 @@ class DiffParser(object):
                        prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix)
                    else:
                        prefix = ''
-                    yield TokenInfo(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix)
+                    yield PythonToken(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix)
                    break
            elif typ == NEWLINE and start_pos[0] >= until_line:
-                yield TokenInfo(typ, string, start_pos, prefix)
+                yield PythonToken(typ, string, start_pos, prefix)
                # Check if the parser is actually in a valid suite state.
-                if suite_or_file_input_is_valid(self._grammar, stack):
+                if suite_or_file_input_is_valid(self._pgen_grammar, stack):
                    start_pos = start_pos[0] + 1, 0
                    while len(indents) > int(omitted_first_indent):
                        indents.pop()
-                        yield TokenInfo(DEDENT, '', start_pos, '')
+                        yield PythonToken(DEDENT, '', start_pos, '')

-                    yield TokenInfo(ENDMARKER, '', start_pos, '')
+                    yield PythonToken(ENDMARKER, '', start_pos, '')
                    break
                else:
                    continue

-            yield TokenInfo(typ, string, start_pos, prefix)
+            yield PythonToken(typ, string, start_pos, prefix)


 class _NodesStackNode(object):
@@ -399,6 +381,9 @@ class _NodesStackNode(object):
            if _ends_with_newline(last_leaf, suffix):
                line -= 1
        line += suffix.count('\n')
+        if suffix and not suffix.endswith('\n'):
+            # This is the end of a file (that doesn't end with a newline).
+            line += 1
        return line


@@ -587,7 +572,7 @@ class _NodesStack(object):
            end_pos = list(last_leaf.end_pos)
        except IndexError:
            end_pos = [1, 0]
-        lines = splitlines(self.prefix)
+        lines = split_lines(self.prefix)
        assert len(lines) > 0
        if len(lines) == 1:
            end_pos[1] += len(lines[0])
--- a/parso/python/errors.py
+++ b/parso/python/errors.py
--- a/parso/python/fstring.py
+++ b/parso/python/fstring.py
@@ -0,0 +1,211 @@
+import re
+
+from itertools import count
+from parso.utils import PythonVersionInfo
+from parso.utils import split_lines
+from parso.python.tokenize import Token
+from parso import parser
+from parso.tree import TypedLeaf, ErrorNode, ErrorLeaf
+
+version36 = PythonVersionInfo(3, 6)
+
+
+class TokenNamespace:
+    _c = count()
+    LBRACE = next(_c)
+    RBRACE = next(_c)
+    ENDMARKER = next(_c)
+    COLON = next(_c)
+    CONVERSION = next(_c)
+    PYTHON_EXPR = next(_c)
+    EXCLAMATION_MARK = next(_c)
+    UNTERMINATED_STRING = next(_c)
+
+    token_map = dict((v, k) for k, v in locals().items() if not k.startswith('_'))
+
+    @classmethod
+    def generate_token_id(cls, string):
+        if string == '{':
+            return cls.LBRACE
+        elif string == '}':
+            return cls.RBRACE
+        elif string == '!':
+            return cls.EXCLAMATION_MARK
+        elif string == ':':
+            return cls.COLON
+        return getattr(cls, string)
+
+
+GRAMMAR = """
+fstring: expression* ENDMARKER
+format_spec: ':' expression*
+expression: '{' PYTHON_EXPR [ '!' CONVERSION ] [ format_spec ] '}'
+"""
+
+_prefix = r'((?:[^{}]+)*)'
+_expr = _prefix + r'(\{|\}|$)'
+_in_expr = r'([^{}\[\]:"\'!]*)(.?)'
+# There's only one conversion character allowed. But the rules have to be
+# checked later anyway, so allow more here. This makes error recovery nicer.
+_conversion = r'([^={}:]*)(.?)'
+
+_compiled_expr = re.compile(_expr)
+_compiled_in_expr = re.compile(_in_expr)
+_compiled_conversion = re.compile(_conversion)
+
+
+def tokenize(code, start_pos=(1, 0)):
+    def add_to_pos(string):
+        lines = split_lines(string)
+        l = len(lines[-1])
+        if len(lines) > 1:
+            start_pos[0] += len(lines) - 1
+            start_pos[1] = l
+        else:
+            start_pos[1] += l
+
+    def tok(value, type=None, prefix=''):
+        if type is None:
+            type = TokenNamespace.generate_token_id(value)
+
+        add_to_pos(prefix)
+        token = Token(type, value, tuple(start_pos), prefix)
+        add_to_pos(value)
+        return token
+
+    start = 0
+    recursion_level = 0
+    added_prefix = ''
+    start_pos = list(start_pos)
+    while True:
+        match = _compiled_expr.match(code, start)
+        prefix = added_prefix + match.group(1)
+        found = match.group(2)
+        start = match.end()
+        if not found:
+            # We're at the end.
+            break
+
+        if found == '}':
+            if recursion_level == 0 and len(code) > start  and code[start] == '}':
+                # This is a }} escape.
+                added_prefix = prefix + '}}'
+                start += 1
+                continue
+
+            recursion_level = max(0, recursion_level - 1)
+            yield tok(found, prefix=prefix)
+            added_prefix = ''
+        else:
+            assert found == '{'
+            if recursion_level == 0 and len(code) > start and code[start] == '{':
+                # This is a {{ escape.
+                added_prefix = prefix + '{{'
+                start += 1
+                continue
+
+            recursion_level += 1
+            yield tok(found, prefix=prefix)
+            added_prefix = ''
+
+            expression = ''
+            squared_count = 0
+            curly_count = 0
+            while True:
+                expr_match = _compiled_in_expr.match(code, start)
+                expression += expr_match.group(1)
+                found = expr_match.group(2)
+                start = expr_match.end()
+
+                if found == '{':
+                    curly_count += 1
+                    expression += found
+                elif found == '}' and curly_count > 0:
+                    curly_count -= 1
+                    expression += found
+                elif found == '[':
+                    squared_count += 1
+                    expression += found
+                elif found == ']':
+                    # Use a max function here, because the Python code might
+                    # just have syntax errors.
+                    squared_count = max(0, squared_count - 1)
+                    expression += found
+                elif found == ':' and (squared_count or curly_count):
+                    expression += found
+                elif found in ('"', "'"):
+                    search = found
+                    if len(code) > start + 1 and  \
+                            code[start] == found == code[start+1]:
+                        search *= 3
+                        start += 2
+
+                    index = code.find(search, start)
+                    if index == -1:
+                        yield tok(expression, type=TokenNamespace.PYTHON_EXPR)
+                        yield tok(
+                            found + code[start:],
+                            type=TokenNamespace.UNTERMINATED_STRING,
+                        )
+                        start = len(code)
+                        break
+                    expression += found + code[start:index+1]
+                    start = index + 1
+                elif found == '!' and len(code) > start and code[start] == '=':
+                    # This is a python `!=` and not a conversion.
+                    expression += found
+                else:
+                    yield tok(expression, type=TokenNamespace.PYTHON_EXPR)
+                    if found:
+                        yield tok(found)
+                    break
+
+            if found == '!':
+                conversion_match = _compiled_conversion.match(code, start)
+                found = conversion_match.group(2)
+                start = conversion_match.end()
+                yield tok(conversion_match.group(1), type=TokenNamespace.CONVERSION)
+                if found:
+                    yield tok(found)
+            if found == '}':
+                recursion_level -= 1
+
+            # We don't need to handle everything after ':', because that is
+            # basically new tokens.
+
+    yield tok('', type=TokenNamespace.ENDMARKER, prefix=prefix)
+
+
+class Parser(parser.BaseParser):
+    def parse(self, tokens):
+        node = super(Parser, self).parse(tokens)
+        if isinstance(node, self.default_leaf):  # Is an endmarker.
+            # If there's no curly braces we get back a non-module. We always
+            # want an fstring.
+            node = self.default_node('fstring', [node])
+
+        return node
+
+    def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
+        # TODO this is so ugly.
+        leaf_type = TokenNamespace.token_map[type].lower()
+        return TypedLeaf(leaf_type, value, start_pos, prefix)
+
+    def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
+                       add_token_callback):
+        if not self._error_recovery:
+            return super(Parser, self).error_recovery(
+                pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
+                add_token_callback
+            )
+
+        token_type = TokenNamespace.token_map[typ].lower()
+        if len(stack) == 1:
+            error_leaf = ErrorLeaf(token_type, value, start_pos, prefix)
+            stack[0][2][1].append(error_leaf)
+        else:
+            dfa, state, (type_, nodes) = stack[1]
+            stack[0][2][1].append(ErrorNode(nodes))
+            stack[1:] = []
+
+            add_token_callback(typ, value, start_pos, prefix)
--- a/parso/python/grammar26.txt
+++ b/parso/python/grammar26.txt
@@ -0,0 +1,158 @@
+# Grammar for Python
+
+# Note:  Changing the grammar specified in this file will most likely
+#        require corresponding changes in the parser module
+#        (../Modules/parsermodule.c).  If you can't make the changes to
+#        that module yourself, please co-ordinate the required changes
+#        with someone who can; ask around on python-dev for help.  Fred
+#        Drake <fdrake@acm.org> will probably be listening there.
+
+# NOTE WELL: You should also follow all the steps listed in PEP 306,
+# "How to Change Python's Grammar"
+
+# Commands for Kees Blom's railroad program
+#diagram:token NAME
+#diagram:token NUMBER
+#diagram:token STRING
+#diagram:token NEWLINE
+#diagram:token ENDMARKER
+#diagram:token INDENT
+#diagram:output\input python.bla
+#diagram:token DEDENT
+#diagram:output\textwidth 20.04cm\oddsidemargin  0.0cm\evensidemargin 0.0cm
+#diagram:rules
+
+# Start symbols for the grammar:
+#	single_input is a single interactive statement;
+#	file_input is a module or sequence of commands read from an input file;
+#	eval_input is the input for the eval() and input() functions.
+# NB: compound_stmt in single_input is followed by extra NEWLINE!
+single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
+file_input: (NEWLINE | stmt)* ENDMARKER
+eval_input: testlist NEWLINE* ENDMARKER
+
+decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
+decorators: decorator+
+decorated: decorators (classdef | funcdef)
+funcdef: 'def' NAME parameters ':' suite
+parameters: '(' [varargslist] ')'
+varargslist: ((fpdef ['=' test] ',')*
+              ('*' NAME [',' '**' NAME] | '**' NAME) |
+              fpdef ['=' test] (',' fpdef ['=' test])* [','])
+fpdef: NAME | '(' fplist ')'
+fplist: fpdef (',' fpdef)* [',']
+
+stmt: simple_stmt | compound_stmt
+simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
+small_stmt: (expr_stmt | print_stmt  | del_stmt | pass_stmt | flow_stmt |
+             import_stmt | global_stmt | exec_stmt | assert_stmt)
+expr_stmt: testlist (augassign (yield_expr|testlist) |
+                     ('=' (yield_expr|testlist))*)
+augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
+            '<<=' | '>>=' | '**=' | '//=')
+# For normal assignments, additional restrictions enforced by the interpreter
+print_stmt: 'print' ( [ test (',' test)* [','] ] |
+                      '>>' test [ (',' test)+ [','] ] )
+del_stmt: 'del' exprlist
+pass_stmt: 'pass'
+flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
+break_stmt: 'break'
+continue_stmt: 'continue'
+return_stmt: 'return' [testlist]
+yield_stmt: yield_expr
+raise_stmt: 'raise' [test [',' test [',' test]]]
+import_stmt: import_name | import_from
+import_name: 'import' dotted_as_names
+import_from: ('from' ('.'* dotted_name | '.'+)
+              'import' ('*' | '(' import_as_names ')' | import_as_names))
+import_as_name: NAME ['as' NAME]
+dotted_as_name: dotted_name ['as' NAME]
+import_as_names: import_as_name (',' import_as_name)* [',']
+dotted_as_names: dotted_as_name (',' dotted_as_name)*
+dotted_name: NAME ('.' NAME)*
+global_stmt: 'global' NAME (',' NAME)*
+exec_stmt: 'exec' expr ['in' test [',' test]]
+assert_stmt: 'assert' test [',' test]
+
+compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
+if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
+while_stmt: 'while' test ':' suite ['else' ':' suite]
+for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
+try_stmt: ('try' ':' suite
+           ((except_clause ':' suite)+
+	    ['else' ':' suite]
+	    ['finally' ':' suite] |
+	   'finally' ':' suite))
+with_stmt: 'with' with_item ':' suite
+# Dave: Python2.6 actually defines a little bit of a different label called
+#       'with_var'. However in 2.7+ this is the default. Apply it for
+#       consistency reasons.
+with_item: test ['as' expr]
+# NB compile.c makes sure that the default except clause is last
+except_clause: 'except' [test [('as' | ',') test]]
+suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
+
+# Backward compatibility cruft to support:
+# [ x for x in lambda: True, lambda: False if x() ]
+# even while also allowing:
+# lambda x: 5 if x else 2
+# (But not a mix of the two)
+testlist_safe: old_test [(',' old_test)+ [',']]
+old_test: or_test | old_lambdef
+old_lambdef: 'lambda' [varargslist] ':' old_test
+
+test: or_test ['if' or_test 'else' test] | lambdef
+or_test: and_test ('or' and_test)*
+and_test: not_test ('and' not_test)*
+not_test: 'not' not_test | comparison
+comparison: expr (comp_op expr)*
+comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
+expr: xor_expr ('|' xor_expr)*
+xor_expr: and_expr ('^' and_expr)*
+and_expr: shift_expr ('&' shift_expr)*
+shift_expr: arith_expr (('<<'|'>>') arith_expr)*
+arith_expr: term (('+'|'-') term)*
+term: factor (('*'|'/'|'%'|'//') factor)*
+factor: ('+'|'-'|'~') factor | power
+power: atom trailer* ['**' factor]
+atom: ('(' [yield_expr|testlist_comp] ')' |
+       '[' [listmaker] ']' |
+       '{' [dictorsetmaker] '}' |
+       '`' testlist1 '`' |
+       NAME | NUMBER | STRING+)
+listmaker: test ( list_for | (',' test)* [','] )
+# Dave: Renamed testlist_gexpr to testlist_comp, because in 2.7+ this is the
+#       default. It's more consistent like this.
+testlist_comp: test ( gen_for | (',' test)* [','] )
+lambdef: 'lambda' [varargslist] ':' test
+trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
+subscriptlist: subscript (',' subscript)* [',']
+subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
+sliceop: ':' [test]
+exprlist: expr (',' expr)* [',']
+testlist: test (',' test)* [',']
+# Dave: Rename from dictmaker to dictorsetmaker, because this is more
+#       consistent with the following grammars.
+dictorsetmaker: test ':' test (',' test ':' test)* [',']
+
+classdef: 'class' NAME ['(' [testlist] ')'] ':' suite
+
+arglist: (argument ',')* (argument [',']
+                         |'*' test (',' argument)* [',' '**' test] 
+                         |'**' test)
+argument: test [gen_for] | test '=' test  # Really [keyword '='] test
+
+list_iter: list_for | list_if
+list_for: 'for' exprlist 'in' testlist_safe [list_iter]
+list_if: 'if' old_test [list_iter]
+
+gen_iter: gen_for | gen_if
+gen_for: 'for' exprlist 'in' or_test [gen_iter]
+gen_if: 'if' old_test [gen_iter]
+
+testlist1: test (',' test)*
+
+# not used in grammar, but may appear in "node" passed from Parser to Compiler
+encoding_decl: NAME
+
+yield_expr: 'yield' [testlist]
--- a/parso/python/grammar2.7.txt
+++ b/parso/python/grammar2.7.txt
@@ -1,4 +1,4 @@
-# Grammar for 2to3. This grammar supports Python 2.x and 3.x.
+# Grammar for Python

 # Note:  Changing the grammar specified in this file will most likely
 #        require corresponding changes in the parser module
@@ -10,41 +10,32 @@
 # NOTE WELL: You should also follow all the steps listed in PEP 306,
 # "How to Change Python's Grammar"

-
 # Start symbols for the grammar:
-#	    file_input is a module or sequence of commands read from an input file;
-#	    single_input is a single interactive statement;
-#	    eval_input is the input for the eval() and input() functions.
+#       single_input is a single interactive statement;
+#       file_input is a module or sequence of commands read from an input file;
+#       eval_input is the input for the eval() and input() functions.
 # NB: compound_stmt in single_input is followed by extra NEWLINE!
-file_input: (NEWLINE | stmt)* ENDMARKER
 single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
+file_input: (NEWLINE | stmt)* ENDMARKER
 eval_input: testlist NEWLINE* ENDMARKER

 decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
 decorators: decorator+
 decorated: decorators (classdef | funcdef)
-funcdef: 'def' NAME parameters ['->' test] ':' suite
-parameters: '(' [typedargslist] ')'
-typedargslist: ((tfpdef ['=' test] ',')*
-                ('*' [tname] (',' tname ['=' test])* [',' '**' tname] | '**' tname)
-                | tfpdef ['=' test] (',' tfpdef ['=' test])* [','])
-tname: NAME [':' test]
-tfpdef: tname | '(' tfplist ')'
-tfplist: tfpdef (',' tfpdef)* [',']
-varargslist: ((vfpdef ['=' test] ',')*
-              ('*' [vname] (',' vname ['=' test])*  [',' '**' vname] | '**' vname)
-              | vfpdef ['=' test] (',' vfpdef ['=' test])* [','])
-vname: NAME
-vfpdef: vname | '(' vfplist ')'
-vfplist: vfpdef (',' vfpdef)* [',']
+funcdef: 'def' NAME parameters ':' suite
+parameters: '(' [varargslist] ')'
+varargslist: ((fpdef ['=' test] ',')*
+              ('*' NAME [',' '**' NAME] | '**' NAME) |
+              fpdef ['=' test] (',' fpdef ['=' test])* [','])
+fpdef: NAME | '(' fplist ')'
+fplist: fpdef (',' fpdef)* [',']

 stmt: simple_stmt | compound_stmt
 simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
 small_stmt: (expr_stmt | print_stmt  | del_stmt | pass_stmt | flow_stmt |
             import_stmt | global_stmt | exec_stmt | assert_stmt)
-expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) |
-                     ('=' (yield_expr|testlist_star_expr))*)
-testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
+expr_stmt: testlist (augassign (yield_expr|testlist) |
+                     ('=' (yield_expr|testlist))*)
 augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
            '<<=' | '>>=' | '**=' | '//=')
 # For normal assignments, additional restrictions enforced by the interpreter
@@ -60,8 +51,7 @@ yield_stmt: yield_expr
 raise_stmt: 'raise' [test [',' test [',' test]]]
 import_stmt: import_name | import_from
 import_name: 'import' dotted_as_names
-# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
-import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
+import_from: ('from' ('.'* dotted_name | '.'+)
              'import' ('*' | '(' import_as_names ')' | import_as_names))
 import_as_name: NAME ['as' NAME]
 dotted_as_name: dotted_name ['as' NAME]
@@ -78,17 +68,14 @@ while_stmt: 'while' test ':' suite ['else' ':' suite]
 for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
 try_stmt: ('try' ':' suite
           ((except_clause ':' suite)+
-	        ['else' ':' suite]
-	        ['finally' ':' suite] |
-	       'finally' ':' suite))
+            ['else' ':' suite]
+            ['finally' ':' suite] |
+           'finally' ':' suite))
 with_stmt: 'with' with_item (',' with_item)*  ':' suite
 with_item: test ['as' expr]
-with_var: 'as' expr
 # NB compile.c makes sure that the default except clause is last
-except_clause: 'except' [test [(',' | 'as') test]]
-# Edit by David Halter: The stmt is now optional. This reflects how Jedi allows
-# classes and functions to be empty, which is beneficial for autocompletion.
-suite: simple_stmt | NEWLINE INDENT stmt* DEDENT
+except_clause: 'except' [test [('as' | ',') test]]
+suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT

 # Backward compatibility cruft to support:
 # [ x for x in lambda: True, lambda: False if x() ]
@@ -105,7 +92,6 @@ and_test: not_test ('and' not_test)*
 not_test: 'not' not_test | comparison
 comparison: expr (comp_op expr)*
 comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
-star_expr: '*' expr
 expr: xor_expr ('|' xor_expr)*
 xor_expr: and_expr ('^' and_expr)*
 and_expr: shift_expr ('&' shift_expr)*
@@ -115,33 +101,37 @@ term: factor (('*'|'/'|'%'|'//') factor)*
 factor: ('+'|'-'|'~') factor | power
 power: atom trailer* ['**' factor]
 atom: ('(' [yield_expr|testlist_comp] ')' |
-       '[' [testlist_comp] ']' |
+       '[' [listmaker] ']' |
       '{' [dictorsetmaker] '}' |
       '`' testlist1 '`' |
-       NAME | NUMBER | STRING+ | '.' '.' '.')
-# Modification by David Halter, remove `testlist_gexp` and `listmaker`
-testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
+       NAME | NUMBER | STRING+)
+listmaker: test ( list_for | (',' test)* [','] )
+testlist_comp: test ( comp_for | (',' test)* [','] )
 lambdef: 'lambda' [varargslist] ':' test
 trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
 subscriptlist: subscript (',' subscript)* [',']
-subscript: test | [test] ':' [test] [sliceop]
+subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
 sliceop: ':' [test]
-exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
+exprlist: expr (',' expr)* [',']
 testlist: test (',' test)* [',']
-# Modification by David Halter, dictsetmaker -> dictorsetmaker (so that it's
-# the same as in the 3.4 grammar).
 dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) |
-                (test (comp_for | (',' test)* [','])) )
+                  (test (comp_for | (',' test)* [','])) )

-classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
+classdef: 'class' NAME ['(' [testlist] ')'] ':' suite

 arglist: (argument ',')* (argument [',']
                         |'*' test (',' argument)* [',' '**' test] 
                         |'**' test)
-argument: test [comp_for] | test '=' test  # Really [keyword '='] test
+# The reason that keywords are test nodes instead of NAME is that using NAME
+# results in an ambiguity. ast.c makes sure it's a NAME.
+argument: test [comp_for] | test '=' test
+
+list_iter: list_for | list_if
+list_for: 'for' exprlist 'in' testlist_safe [list_iter]
+list_if: 'if' old_test [list_iter]

 comp_iter: comp_for | comp_if
-comp_for: 'for' exprlist 'in' testlist_safe [comp_iter]
+comp_for: 'for' exprlist 'in' or_test [comp_iter]
 comp_if: 'if' old_test [comp_iter]

 testlist1: test (',' test)*
--- a/parso/python/grammar3.4.txt
+++ b/parso/python/grammar3.4.txt
@@ -15,8 +15,8 @@
 #       file_input is a module or sequence of commands read from an input file;
 #       eval_input is the input for the eval() functions.
 # NB: compound_stmt in single_input is followed by extra NEWLINE!
-file_input: (NEWLINE | stmt)* ENDMARKER
 single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
+file_input: (NEWLINE | stmt)* ENDMARKER
 eval_input: testlist NEWLINE* ENDMARKER

 decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
@@ -78,9 +78,7 @@ with_stmt: 'with' with_item (',' with_item)*  ':' suite
 with_item: test ['as' expr]
 # NB compile.c makes sure that the default except clause is last
 except_clause: 'except' [test ['as' NAME]]
-# Edit by David Halter: The stmt is now optional. This reflects how Jedi allows
-# classes and functions to be empty, which is beneficial for autocompletion.
-suite: simple_stmt | NEWLINE INDENT stmt* DEDENT
+suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT

 test: or_test ['if' or_test 'else' test] | lambdef
 test_nocond: or_test | lambdef_nocond
--- a/parso/python/grammar34.txt
+++ b/parso/python/grammar34.txt
@@ -0,0 +1,133 @@
+# Grammar for Python
+
+# Note:  Changing the grammar specified in this file will most likely
+#        require corresponding changes in the parser module
+#        (../Modules/parsermodule.c).  If you can't make the changes to
+#        that module yourself, please co-ordinate the required changes
+#        with someone who can; ask around on python-dev for help.  Fred
+#        Drake <fdrake@acm.org> will probably be listening there.
+
+# NOTE WELL: You should also follow all the steps listed at
+# https://docs.python.org/devguide/grammar.html
+
+# Start symbols for the grammar:
+#       single_input is a single interactive statement;
+#       file_input is a module or sequence of commands read from an input file;
+#       eval_input is the input for the eval() functions.
+# NB: compound_stmt in single_input is followed by extra NEWLINE!
+single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
+file_input: (NEWLINE | stmt)* ENDMARKER
+eval_input: testlist NEWLINE* ENDMARKER
+
+decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
+decorators: decorator+
+decorated: decorators (classdef | funcdef)
+funcdef: 'def' NAME parameters ['->' test] ':' suite
+parameters: '(' [typedargslist] ')'
+typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [','
+       ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]]
+     |  '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef)
+tfpdef: NAME [':' test]
+varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [','
+       ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]]
+     |  '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef)
+vfpdef: NAME
+
+stmt: simple_stmt | compound_stmt
+simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
+small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
+             import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
+expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) |
+                     ('=' (yield_expr|testlist_star_expr))*)
+testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
+augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
+            '<<=' | '>>=' | '**=' | '//=')
+# For normal assignments, additional restrictions enforced by the interpreter
+del_stmt: 'del' exprlist
+pass_stmt: 'pass'
+flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
+break_stmt: 'break'
+continue_stmt: 'continue'
+return_stmt: 'return' [testlist]
+yield_stmt: yield_expr
+raise_stmt: 'raise' [test ['from' test]]
+import_stmt: import_name | import_from
+import_name: 'import' dotted_as_names
+# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
+import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
+              'import' ('*' | '(' import_as_names ')' | import_as_names))
+import_as_name: NAME ['as' NAME]
+dotted_as_name: dotted_name ['as' NAME]
+import_as_names: import_as_name (',' import_as_name)* [',']
+dotted_as_names: dotted_as_name (',' dotted_as_name)*
+dotted_name: NAME ('.' NAME)*
+global_stmt: 'global' NAME (',' NAME)*
+nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
+assert_stmt: 'assert' test [',' test]
+
+compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
+if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
+while_stmt: 'while' test ':' suite ['else' ':' suite]
+for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
+try_stmt: ('try' ':' suite
+           ((except_clause ':' suite)+
+            ['else' ':' suite]
+            ['finally' ':' suite] |
+           'finally' ':' suite))
+with_stmt: 'with' with_item (',' with_item)*  ':' suite
+with_item: test ['as' expr]
+# NB compile.c makes sure that the default except clause is last
+except_clause: 'except' [test ['as' NAME]]
+suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
+
+test: or_test ['if' or_test 'else' test] | lambdef
+test_nocond: or_test | lambdef_nocond
+lambdef: 'lambda' [varargslist] ':' test
+lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
+or_test: and_test ('or' and_test)*
+and_test: not_test ('and' not_test)*
+not_test: 'not' not_test | comparison
+comparison: expr (comp_op expr)*
+# <> isn't actually a valid comparison operator in Python. It's here for the
+# sake of a __future__ import described in PEP 401
+comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
+star_expr: '*' expr
+expr: xor_expr ('|' xor_expr)*
+xor_expr: and_expr ('^' and_expr)*
+and_expr: shift_expr ('&' shift_expr)*
+shift_expr: arith_expr (('<<'|'>>') arith_expr)*
+arith_expr: term (('+'|'-') term)*
+term: factor (('*'|'/'|'%'|'//') factor)*
+factor: ('+'|'-'|'~') factor | power
+power: atom trailer* ['**' factor]
+atom: ('(' [yield_expr|testlist_comp] ')' |
+       '[' [testlist_comp] ']' |
+       '{' [dictorsetmaker] '}' |
+       NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
+testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
+trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
+subscriptlist: subscript (',' subscript)* [',']
+subscript: test | [test] ':' [test] [sliceop]
+sliceop: ':' [test]
+exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
+testlist: test (',' test)* [',']
+dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) |
+                  (test (comp_for | (',' test)* [','])) )
+
+classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
+
+arglist: (argument ',')* (argument [',']
+                         |'*' test (',' argument)* [',' '**' test] 
+                         |'**' test)
+# The reason that keywords are test nodes instead of NAME is that using NAME
+# results in an ambiguity. ast.c makes sure it's a NAME.
+argument: test [comp_for] | test '=' test  # Really [keyword '='] test
+comp_iter: comp_for | comp_if
+comp_for: 'for' exprlist 'in' or_test [comp_iter]
+comp_if: 'if' test_nocond [comp_iter]
+
+# not used in grammar, but may appear in "node" passed from Parser to Compiler
+encoding_decl: NAME
+
+yield_expr: 'yield' [yield_arg]
+yield_arg: 'from' test | testlist
--- a/parso/python/grammar3.5.txt
+++ b/parso/python/grammar3.5.txt
@@ -15,8 +15,8 @@
 #       file_input is a module or sequence of commands read from an input file;
 #       eval_input is the input for the eval() functions.
 # NB: compound_stmt in single_input is followed by extra NEWLINE!
-file_input: (NEWLINE | stmt)* ENDMARKER
 single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
+file_input: (NEWLINE | stmt)* ENDMARKER
 eval_input: testlist NEWLINE* ENDMARKER

 decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
@@ -84,9 +84,7 @@ with_stmt: 'with' with_item (',' with_item)*  ':' suite
 with_item: test ['as' expr]
 # NB compile.c makes sure that the default except clause is last
 except_clause: 'except' [test ['as' NAME]]
-# Edit by David Halter: The stmt is now optional. This reflects how Jedi allows
-# classes and functions to be empty, which is beneficial for autocompletion.
-suite: simple_stmt | NEWLINE INDENT stmt* DEDENT
+suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT

 test: or_test ['if' or_test 'else' test] | lambdef
 test_nocond: or_test | lambdef_nocond
@@ -136,7 +134,7 @@ arglist: argument (',' argument)*  [',']
 # to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
 # we explicitly match '*' here, too, to give it proper precedence.
 # Illegal combinations and orderings are blocked in ast.c:
-# multiple (test comp_for) arguements are blocked; keyword unpackings
+# multiple (test comp_for) arguments are blocked; keyword unpackings
 # that precede iterable unpackings are blocked; etc.
 argument: ( test [comp_for] |
            test '=' test |
--- a/parso/python/grammar3.6.txt
+++ b/parso/python/grammar3.6.txt
@@ -1,24 +1,16 @@
 # Grammar for Python

-# Note:  Changing the grammar specified in this file will most likely
-#        require corresponding changes in the parser module
-#        (../Modules/parsermodule.c).  If you can't make the changes to
-#        that module yourself, please co-ordinate the required changes
-#        with someone who can; ask around on python-dev for help.  Fred
-#        Drake <fdrake@acm.org> will probably be listening there.
-
 # NOTE WELL: You should also follow all the steps listed at
 # https://docs.python.org/devguide/grammar.html

 # Start symbols for the grammar:
-#       file_input is a module or sequence of commands read from an input file;
 #       single_input is a single interactive statement;
+#       file_input is a module or sequence of commands read from an input file;
 #       eval_input is the input for the eval() functions.
 # NB: compound_stmt in single_input is followed by extra NEWLINE!
-file_input: (NEWLINE | stmt)* ENDMARKER
 single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
+file_input: (NEWLINE | stmt)* ENDMARKER
 eval_input: testlist NEWLINE* ENDMARKER
-
 decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
 decorators: decorator+
 decorated: decorators (classdef | funcdef | async_funcdef)
@@ -90,10 +82,7 @@ with_stmt: 'with' with_item (',' with_item)*  ':' suite
 with_item: test ['as' expr]
 # NB compile.c makes sure that the default except clause is last
 except_clause: 'except' [test ['as' NAME]]
-# Edit by Francisco Souza/David Halter: The stmt is now optional. This reflects
-# how Jedi allows classes and functions to be empty, which is beneficial for
-# autocompletion.
-suite: simple_stmt | NEWLINE INDENT stmt* DEDENT
+suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT

 test: or_test ['if' or_test 'else' test] | lambdef
 test_nocond: or_test | lambdef_nocond
--- a/parso/python/issue_list.txt
+++ b/parso/python/issue_list.txt
@@ -0,0 +1,176 @@
+A list of syntax/indentation errors I've encountered in CPython.
+
+# Python/compile.c
+    "'continue' not properly in loop"
+    "'continue' not supported inside 'finally' clause"  # Until loop
+    "default 'except:' must be last"
+    "from __future__ imports must occur at the beginning of the file"
+    "'return' outside function"
+    "'return' with value in async generator"
+    "'break' outside loop"
+    "two starred expressions in assignment"
+    "asynchronous comprehension outside of an asynchronous function"
+    "'yield' outside function"  # For both yield and yield from
+    "'yield from' inside async function"
+    "'await' outside function"
+    "'await' outside async function"
+    "starred assignment target must be in a list or tuple"
+    "can't use starred expression here"
+    "too many statically nested blocks"  # Max. 20
+    # This is one of the few places in the cpython code base that I really
+    # don't understand. It feels a bit hacky if you look at the implementation
+    # of UNPACK_EX.
+    "too many expressions in star-unpacking assignment"
+
+    # Just ignore this one, newer versions will not be affected anymore and
+    # it's a limit of 2^16 - 1.
+    "too many annotations" # Only python 3.0 - 3.5, 3.6 is not affected.
+
+# Python/ast.c
+    # used with_item exprlist expr_stmt
+    "can't %s %s" % ("assign to" or "delete",
+            "lambda"
+            "function call"  # foo()
+            "generator expression"
+            "list comprehension"
+            "set comprehension"
+            "dict comprehension"
+            "keyword"
+            "Ellipsis"
+            "comparison"
+        Dict: Set: Num: Str: Bytes: JoinedStr: FormattedValue:
+            "literal"
+        BoolOp: BinOp: UnaryOp:
+            "operator"
+        Yield: YieldFrom:
+            "yield expression"
+        Await:
+            "await expression"
+        IfExp:
+            "conditional expression"
+    "assignment to keyword"  # (keywords + __debug__)  # None = 2
+    "named arguments must follow bare *"  # def foo(*): pass
+    "non-default argument follows default argument" # def f(x=3, y): pass
+    "iterable unpacking cannot be used in comprehension"  # [*[] for a in [1]]
+    "dict unpacking cannot be used in dict comprehension"  # {**{} for a in [1]}
+    "Generator expression must be parenthesized if not sole argument"  # foo(x for x in [], b)
+    "positional argument follows keyword argument unpacking" # f(**x, y) >= 3.5
+    "positional argument follows keyword argument"  # f(x=2, y) >= 3.5
+    "iterable argument unpacking follows keyword argument unpacking" # foo(**kwargs, *args)
+    "lambda cannot contain assignment"  # f(lambda: 1=1)
+    "keyword can't be an expression" # f(+x=1)
+    "keyword argument repeated"  # f(x=1, x=2)
+    "illegal expression for augmented assignment"  # x, y += 1
+    "only single target (not list) can be annotated" # [x, y]: int
+    "only single target (not tuple) can be annotated" # x, y: str
+    "illegal target for annotation" # True: 1`
+    "trailing comma not allowed without surrounding parentheses" # from foo import a,
+    "bytes can only contain ASCII literal characters." # b'ä'  # prob. only python 3
+    "cannot mix bytes and nonbytes literals" # 's' b''
+    "assignment to yield expression not possible"  # x = yield 1 = 3
+
+    "f-string: empty expression not allowed"  # f'{}'
+    "f-string: single '}' is not allowed"  # f'}'
+    "f-string: expressions nested too deeply"  # f'{1:{5:{3}}}'
+    "f-string expression part cannot include a backslash"  # f'{"\"}' or f'{"\\"}'
+    "f-string expression part cannot include '#'"  # f'{#}'
+    "f-string: unterminated string"  # f'{"}'
+    "f-string: mismatched '(', '{', or '['"
+    "f-string: invalid conversion character: expected 's', 'r', or 'a'" # f'{1!b}'
+    "f-string: unexpected end of string"  # Doesn't really happen?!
+    "f-string: expecting '}'"  # f'{'
+    "(unicode error) unknown error
+    "(value error) unknown error
+    "(unicode error) MESSAGE
+        MESSAGES = {
+            "\\ at end of string"
+            "truncated \\xXX escape"
+            "truncated \\uXXXX escape"
+            "truncated \\UXXXXXXXX escape"
+            "illegal Unicode character"       # '\Uffffffff'
+            "malformed \\N character escape"  # '\N{}'
+            "unknown Unicode character name"  # '\N{foo}'
+        }
+    "(value error) MESSAGE  # bytes
+        MESSAGES = {
+            "Trailing \\ in string"
+            "invalid \\x escape at position %d"
+        }
+
+    "invalid escape sequence \\%c"  # Only happens when used in `python -W error`
+    "unexpected node"  # Probably irrelevant
+    "Unexpected node-type in from-import"  # Irrelevant, doesn't happen.
+    "malformed 'try' statement"  # Irrelevant, doesn't happen.
+
+# Python/symtable.c
+    "duplicate argument '%U' in function definition"
+    "name '%U' is assigned to before global declaration"
+    "name '%U' is assigned to before nonlocal declaration"
+    "name '%U' is used prior to global declaration"
+    "name '%U' is used prior to nonlocal declaration"
+    "annotated name '%U' can't be global"
+    "annotated name '%U' can't be nonlocal"
+    "import * only allowed at module level"
+
+    "name '%U' is parameter and global",
+    "name '%U' is nonlocal and global",
+    "name '%U' is parameter and nonlocal",
+
+    "nonlocal declaration not allowed at module level");
+    "no binding for nonlocal '%U' found",
+    # RecursionError. Not handled. For all human written code, this is probably
+    # not an issue. eval("()"*x) with x>=2998 for example fails, but that's
+    # more than 2000 executions on one line.
+    "maximum recursion depth exceeded during compilation");
+
+# Python/future.c
+    "not a chance"
+    "future feature %.100s is not defined"
+    "from __future__ imports must occur at the beginning of the file"  # Also in compile.c
+
+# Parser/tokenizer.c
+    # All the following issues seem to be irrelevant for parso, because the
+    # encoding stuff is done before it reaches the tokenizer. It's already
+    # unicode at that point.
+    "encoding problem: %s"
+    "encoding problem: %s with BOM"
+    "Non-UTF-8 code starting with '\\x%.2x' in file %U on line %i, but no encoding declared; see http://python.org/dev/peps/pep-0263/ for details"
+
+# Parser/pythonrun.c
+    E_SYNTAX: "invalid syntax"
+    E_LINECONT: "unexpected character after line continuation character"
+    E_IDENTIFIER: "invalid character in identifier"
+    # Also just use 'invalid syntax'. Happens mostly with stuff like `(`. This
+    # message doesn't really help the user, because it only appears very
+    # randomly, e.g. `(or` wouldn't yield this error.
+    E_EOF: "unexpected EOF while parsing"
+    # Even in 3.6 this is implemented kind of shaky. Not implemented, I think
+    # cPython needs to fix this one first.
+    # e.g. `ast.parse('def x():\n\t if 1:\n \t \tpass')` works :/
+    E_TABSPACE: "inconsistent use of tabs and spaces in indentation"
+    # Ignored, just shown as "invalid syntax". The error has mostly to do with
+    # numbers like 0b2 everywhere or 1.6_ in Python3.6.
+    E_TOKEN: "invalid token"
+    E_EOFS: "EOF while scanning triple-quoted string literal"
+    E_EOLS: "EOL while scanning string literal"
+
+    # IndentationError
+    E_DEDENT: "unindent does not match any outer indentation level"
+    E_TOODEEP: "too many levels of indentation"  # 100 levels
+    E_SYNTAX: "expected an indented block"
+              "unexpected indent"
+              # I don't think this actually ever happens.
+              "unexpected unindent"
+
+
+    # Irrelevant for parso for now.
+    E_OVERFLOW: "expression too long"
+    E_DECODE: "unknown decode error"
+    E_BADSINGLE: "multiple statements found while compiling a single statement"
+
+
+Version specific:
+Python 3.5:
+    'yield' inside async function
+Python 3.3/3.4:
+    can use starred expression only as assignment target
--- a/parso/python/parser.py
+++ b/parso/python/parser.py
@@ -1,9 +1,8 @@
 from parso.python import tree
-from parso import tokenize
-from parso.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER,
-                               STRING, tok_name)
+from parso.python.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER,
+                                STRING, tok_name, NAME)
 from parso.parser import BaseParser
-from parso.utils import splitlines
+from parso.pgen2.parse import token_to_ilabel


 class Parser(BaseParser):
@@ -11,7 +10,7 @@ class Parser(BaseParser):
    This class is used to parse a Python file, it then divides them into a
    class structure of different scopes.

-    :param grammar: The grammar object of pgen2. Loaded by load_grammar.
+    :param pgen_grammar: The grammar object of pgen2. Loaded by load_grammar.
    """

    node_map = {
@@ -38,6 +37,12 @@ class Parser(BaseParser):
        'while_stmt': tree.WhileStmt,
        'try_stmt': tree.TryStmt,
        'comp_for': tree.CompFor,
+        # Not sure if this is the best idea, but IMO it's the easiest way to
+        # avoid extreme amounts of work around the subtle difference of 2/3
+        # grammar in list comoprehensions.
+        'list_for': tree.CompFor,
+        # Same here. This just exists in Python 2.6.
+        'gen_for': tree.CompFor,
        'decorator': tree.Decorator,
        'lambdef': tree.Lambda,
        'old_lambdef': tree.Lambda,
@@ -45,8 +50,8 @@ class Parser(BaseParser):
    }
    default_node = tree.PythonNode

-    def __init__(self, grammar, error_recovery=True, start_symbol='file_input'):
-        super(Parser, self).__init__(grammar, start_symbol, error_recovery=error_recovery)
+    def __init__(self, pgen_grammar, error_recovery=True, start_symbol='file_input'):
+        super(Parser, self).__init__(pgen_grammar, start_symbol, error_recovery=error_recovery)

        self.syntax_errors = []
        self._omit_dedent_list = []
@@ -75,14 +80,14 @@ class Parser(BaseParser):
            # If there's only one statement, we get back a non-module. That's
            # not what we want, we want a module, so we add it here:
            node = self.convert_node(
-                self._grammar,
-                self._grammar.symbol2number['file_input'],
+                self._pgen_grammar,
+                self._pgen_grammar.symbol2number['file_input'],
                [node]
            )

        return node

-    def convert_node(self, grammar, type, children):
+    def convert_node(self, pgen_grammar, type, children):
        """
        Convert raw node information to a PythonBaseNode instance.

@@ -91,7 +96,7 @@ class Parser(BaseParser):
        strictly bottom-up.
        """
        # TODO REMOVE symbol, we don't want type here.
-        symbol = grammar.number2symbol[type]
+        symbol = pgen_grammar.number2symbol[type]
        try:
            return self.node_map[symbol](children)
        except KeyError:
@@ -101,12 +106,18 @@ class Parser(BaseParser):
                # ones and therefore have pseudo start/end positions and no
                # prefixes. Just ignore them.
                children = [children[0]] + children[2:-1]
+            elif symbol == 'list_if':
+                # Make transitioning from 2 to 3 easier.
+                symbol = 'comp_if'
+            elif symbol == 'listmaker':
+                # Same as list_if above.
+                symbol = 'testlist_comp'
            return self.default_node(symbol, children)

-    def convert_leaf(self, grammar, type, value, prefix, start_pos):
+    def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
        # print('leaf', repr(value), token.tok_name[type])
-        if type == tokenize.NAME:
-            if value in grammar.keywords:
+        if type == NAME:
+            if value in pgen_grammar.keywords:
                return tree.Keyword(value, start_pos, prefix)
            else:
                return tree.Name(value, start_pos, prefix)
@@ -121,24 +132,70 @@ class Parser(BaseParser):
        else:
            return tree.Operator(value, start_pos, prefix)

-    def error_recovery(self, grammar, stack, arcs, typ, value, start_pos, prefix,
+    def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
                       add_token_callback):
-        """
-        This parser is written in a dynamic way, meaning that this parser
-        allows using different grammars (even non-Python). However, error
-        recovery is purely written for Python.
-        """
+        def get_symbol_and_nodes(stack):
+            for dfa, state, (type_, nodes) in stack:
+                symbol = pgen_grammar.number2symbol[type_]
+                yield symbol, nodes
+
+        tos_nodes = stack.get_tos_nodes()
+        if tos_nodes:
+            last_leaf = tos_nodes[-1].get_last_leaf()
+        else:
+            last_leaf = None
+
+        if self._start_symbol == 'file_input' and \
+                (typ == ENDMARKER or typ == DEDENT and '\n' not in last_leaf.value):
+            def reduce_stack(states, newstate):
+                # reduce
+                state = newstate
+                while states[state] == [(0, state)]:
+                    self.pgen_parser._pop()
+
+                    dfa, state, (type_, nodes) = stack[-1]
+                    states, first = dfa
+
+
+            # In Python statements need to end with a newline. But since it's
+            # possible (and valid in Python ) that there's no newline at the
+            # end of a file, we have to recover even if the user doesn't want
+            # error recovery.
+            #print('x', pprint.pprint(stack))
+            ilabel = token_to_ilabel(pgen_grammar, NEWLINE, value)
+
+            dfa, state, (type_, nodes) = stack[-1]
+            symbol = pgen_grammar.number2symbol[type_]
+            states, first = dfa
+            arcs = states[state]
+            # Look for a state with this label
+            for i, newstate in arcs:
+                if ilabel == i:
+                    if symbol == 'simple_stmt':
+                        # This is basically shifting
+                        stack[-1] = (dfa, newstate, (type_, nodes))
+
+                        reduce_stack(states, newstate)
+                        add_token_callback(typ, value, start_pos, prefix)
+                        return
+                    # Check if we're at the right point
+                    #for symbol, nodes in get_symbol_and_nodes(stack):
+                    #        self.pgen_parser._pop()
+
+                            #break
+                    break
+            #symbol = pgen_grammar.number2symbol[type_]
+
        if not self._error_recovery:
            return super(Parser, self).error_recovery(
-                grammar, stack, arcs, typ, value, start_pos, prefix,
+                pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
                add_token_callback)

        def current_suite(stack):
            # For now just discard everything that is not a suite or
            # file_input, if we detect an error.
-            for index, (dfa, state, (type_, nodes)) in reversed(list(enumerate(stack))):
+            for index, (symbol, nodes) in reversed(list(enumerate(get_symbol_and_nodes(stack)))):
                # `suite` can sometimes be only simple_stmt, not stmt.
-                symbol = grammar.number2symbol[type_]
                if symbol == 'file_input':
                    break
                elif symbol == 'suite' and len(nodes) > 1:
@@ -149,27 +206,37 @@ class Parser(BaseParser):
        index, symbol, nodes = current_suite(stack)

        # print('err', token.tok_name[typ], repr(value), start_pos, len(stack), index)
-        if self._stack_removal(grammar, stack, arcs, index + 1, value, start_pos):
+        if self._stack_removal(pgen_grammar, stack, arcs, index + 1, value, start_pos):
            add_token_callback(typ, value, start_pos, prefix)
        else:
            if typ == INDENT:
                # For every deleted INDENT we have to delete a DEDENT as well.
                # Otherwise the parser will get into trouble and DEDENT too early.
                self._omit_dedent_list.append(self._indent_counter)
-            else:
-                error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix)
-                stack[-1][2][1].append(error_leaf)

-    def _stack_removal(self, grammar, stack, arcs, start_index, value, start_pos):
-        failed_stack = []
+            error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix)
+            stack[-1][2][1].append(error_leaf)
+
+        if symbol == 'suite':
+            dfa, state, node = stack[-1]
+            states, first = dfa
+            arcs = states[state]
+            intended_label = pgen_grammar.symbol2label['stmt']
+            # Introduce a proper state transition. We're basically allowing
+            # there to be no valid statements inside a suite.
+            if [x[0] for x in arcs] == [intended_label]:
+                new_state = arcs[0][1]
+                stack[-1] = dfa, new_state, node
+
+    def _stack_removal(self, pgen_grammar, stack, arcs, start_index, value, start_pos):
+        failed_stack = False
        found = False
        all_nodes = []
-        for dfa, state, (typ, nodes) in stack[start_index:]:
+        for dfa, state, (type_, nodes) in stack[start_index:]:
            if nodes:
                found = True
            if found:
-                symbol = grammar.number2symbol[typ]
-                failed_stack.append((symbol, nodes))
+                failed_stack = True
                all_nodes += nodes
        if failed_stack:
            stack[start_index - 1][2][1].append(tree.PythonErrorNode(all_nodes))
@@ -179,7 +246,7 @@ class Parser(BaseParser):

    def _recovery_tokenize(self, tokens):
        for typ, value, start_pos, prefix in tokens:
-            # print(tokenize.tok_name[typ], repr(value), start_pos, repr(prefix))
+            # print(tok_name[typ], repr(value), start_pos, repr(prefix))
            if typ == DEDENT:
                # We need to count indents, because if we just omit any DEDENT,
                # we might omit them in the wrong place.
@@ -191,42 +258,4 @@ class Parser(BaseParser):
                self._indent_counter -= 1
            elif typ == INDENT:
                self._indent_counter += 1
-
            yield typ, value, start_pos, prefix
-
-
-def remove_last_newline(node):
-    endmarker = node.children[-1]
-    # The newline is either in the endmarker as a prefix or the previous
-    # leaf as a newline token.
-    prefix = endmarker.prefix
-    leaf = endmarker.get_previous_leaf()
-    if prefix:
-        text = prefix
-    else:
-        if leaf is None:
-            raise ValueError("You're trying to remove a newline from an empty module.")
-
-        text = leaf.value
-
-    if not text.endswith('\n'):
-        raise ValueError("There's no newline at the end, cannot remove it.")
-
-    text = text[:-1]
-    if prefix:
-        endmarker.prefix = text
-
-        if leaf is None:
-            end_pos = (1, 0)
-        else:
-            end_pos = leaf.end_pos
-
-        lines = splitlines(text, keepends=True)
-        if len(lines) == 1:
-            end_pos = end_pos[0], end_pos[1] + len(lines[0])
-        else:
-            end_pos = end_pos[0] + len(lines) - 1,  len(lines[-1])
-        endmarker.start_pos = end_pos
-    else:
-        leaf.value = text
-        endmarker.start_pos = leaf.end_pos
--- a/parso/python/pep8.py
+++ b/parso/python/pep8.py
@@ -0,0 +1,727 @@
+import re
+from contextlib import contextmanager
+
+from parso.python.errors import ErrorFinder, ErrorFinderConfig
+from parso.normalizer import Rule
+from parso.python.tree import search_ancestor, Flow, Scope
+
+
+_IMPORT_TYPES = ('import_name', 'import_from')
+_SUITE_INTRODUCERS = ('classdef', 'funcdef', 'if_stmt', 'while_stmt',
+                      'for_stmt', 'try_stmt', 'with_stmt')
+_NON_STAR_TYPES = ('term', 'import_from', 'power')
+_OPENING_BRACKETS = '(', '[', '{'
+_CLOSING_BRACKETS = ')', ']', '}'
+_FACTOR = '+', '-', '~'
+_ALLOW_SPACE = '*', '+', '-', '**', '/', '//', '@'
+_BITWISE_OPERATOR = '<<', '>>', '|', '&', '^'
+_NEEDS_SPACE = ('=', '%', '->',
+                '<', '>', '==', '>=', '<=', '<>', '!=',
+                '+=', '-=', '*=', '@=', '/=', '%=', '&=', '|=', '^=', '<<=',
+                '>>=', '**=', '//=')
+_NEEDS_SPACE += _BITWISE_OPERATOR
+_IMPLICIT_INDENTATION_TYPES = ('dictorsetmaker', 'argument')
+_POSSIBLE_SLICE_PARENTS = ('subscript', 'subscriptlist', 'sliceop')
+
+
+class IndentationTypes(object):
+    VERTICAL_BRACKET = object()
+    HANGING_BRACKET = object()
+    BACKSLASH = object()
+    SUITE = object()
+    IMPLICIT = object()
+
+
+class IndentationNode(object):
+    type = IndentationTypes.SUITE
+
+    def __init__(self, config, indentation, parent=None):
+        self.bracket_indentation = self.indentation = indentation
+        self.parent = parent
+
+    def __repr__(self):
+        return '<%s>' % self.__class__.__name__
+
+    def get_latest_suite_node(self):
+        n = self
+        while n is not None:
+            if n.type == IndentationTypes.SUITE:
+                return n
+
+            n = n.parent
+
+
+class BracketNode(IndentationNode):
+    def __init__(self, config, leaf, parent, in_suite_introducer=False):
+        self.leaf = leaf
+
+        # Figure out here what the indentation is. For chained brackets
+        # we can basically use the previous indentation.
+        previous_leaf = leaf
+        n = parent
+        if n.type == IndentationTypes.IMPLICIT:
+            n = n.parent
+        while True:
+            if hasattr(n, 'leaf') and previous_leaf.line != n.leaf.line:
+                break
+
+            previous_leaf = previous_leaf.get_previous_leaf()
+            if not isinstance(n, BracketNode) or previous_leaf != n.leaf:
+                break
+            n = n.parent
+        parent_indentation = n.indentation
+
+
+        next_leaf = leaf.get_next_leaf()
+        if '\n' in next_leaf.prefix:
+            # This implies code like:
+            # foobarbaz(
+            #     a,
+            #     b,
+            # )
+            self.bracket_indentation = parent_indentation \
+                + config.closing_bracket_hanging_indentation
+            self.indentation = parent_indentation + config.indentation
+            self.type = IndentationTypes.HANGING_BRACKET
+        else:
+            # Implies code like:
+            # foobarbaz(
+            #           a,
+            #           b,
+            #           )
+            expected_end_indent = leaf.end_pos[1]
+            if '\t' in config.indentation:
+                self.indentation = None
+            else:
+                self.indentation =  ' ' * expected_end_indent
+            self.bracket_indentation = self.indentation
+            self.type = IndentationTypes.VERTICAL_BRACKET
+
+        if in_suite_introducer and parent.type == IndentationTypes.SUITE \
+                and self.indentation == parent_indentation + config.indentation:
+            self.indentation += config.indentation
+            # The closing bracket should have the same indentation.
+            self.bracket_indentation = self.indentation
+        self.parent = parent
+
+
+class ImplicitNode(BracketNode):
+    """
+    Implicit indentation after keyword arguments, default arguments,
+    annotations and dict values.
+    """
+    def __init__(self, config, leaf, parent):
+        super(ImplicitNode, self).__init__(config, leaf, parent)
+        self.type = IndentationTypes.IMPLICIT
+
+        next_leaf = leaf.get_next_leaf()
+        if leaf == ':' and '\n' not in next_leaf.prefix:
+            self.indentation += ' '
+
+
+class BackslashNode(IndentationNode):
+    type = IndentationTypes.BACKSLASH
+
+    def __init__(self, config, parent_indentation, containing_leaf, spacing, parent=None):
+        expr_stmt = search_ancestor(containing_leaf, 'expr_stmt')
+        if expr_stmt is not None:
+            equals = expr_stmt.children[-2]
+
+            if '\t' in config.indentation:
+                # TODO unite with the code of BracketNode
+                self.indentation = None
+            else:
+                # If the backslash follows the equals, use normal indentation
+                # otherwise it should align with the equals.
+                if equals.end_pos == spacing.start_pos:
+                    self.indentation = parent_indentation + config.indentation
+                else:
+                    # +1 because there is a space.
+                    self.indentation =  ' ' * (equals.end_pos[1] + 1)
+        else:
+            self.indentation = parent_indentation + config.indentation
+        self.bracket_indentation = self.indentation
+        self.parent = parent
+
+
+def _is_magic_name(name):
+    return name.value.startswith('__') and name.value.startswith('__')
+
+
+class PEP8Normalizer(ErrorFinder):
+    def __init__(self, *args, **kwargs):
+        super(PEP8Normalizer, self).__init__(*args, **kwargs)
+        self._previous_part = None
+        self._previous_leaf = None
+        self._on_newline = True
+        self._newline_count = 0
+        self._wanted_newline_count = None
+        self._max_new_lines_in_prefix = 0
+        self._new_statement = True
+        self._implicit_indentation_possible = False
+        # The top of stack of the indentation nodes.
+        self._indentation_tos = self._last_indentation_tos = \
+            IndentationNode(self._config, indentation='')
+        self._in_suite_introducer = False
+
+        if ' ' in self._config.indentation:
+            self._indentation_type = 'spaces'
+            self._wrong_indentation_char = '\t'
+        else:
+            self._indentation_type = 'tabs'
+            self._wrong_indentation_char = ' '
+
+    @contextmanager
+    def visit_node(self, node):
+        with super(PEP8Normalizer, self).visit_node(node):
+            with self._visit_node(node):
+                yield
+
+    @contextmanager
+    def _visit_node(self, node):
+        typ = node.type
+
+        if typ in 'import_name':
+            names = node.get_defined_names()
+            if len(names) > 1:
+                for name in names[:1]:
+                    self.add_issue(name, 401, 'Multiple imports on one line')
+        elif typ == 'lambdef':
+            expr_stmt = node.parent
+            # Check if it's simply defining a single name, not something like
+            # foo.bar or x[1], where using a lambda could make more sense.
+            if expr_stmt.type == 'expr_stmt' and any(n.type == 'name' for n in expr_stmt.children[:-2:2]):
+                self.add_issue(node, 731, 'Do not assign a lambda expression, use a def')
+        elif typ == 'try_stmt':
+            for child in node.children:
+                # Here we can simply check if it's an except, because otherwise
+                # it would be an except_clause.
+                if child.type == 'keyword' and child.value == 'except':
+                    self.add_issue(child, 722, 'Do not use bare except, specify exception instead')
+        elif typ == 'comparison':
+            for child in node.children:
+                if child.type not in ('atom_expr', 'power'):
+                    continue
+                if len(child.children) > 2:
+                    continue
+                trailer = child.children[1]
+                atom = child.children[0]
+                if trailer.type == 'trailer' and atom.type == 'name' \
+                        and atom.value == 'type':
+                    self.add_issue(node, 721, "Do not compare types, use 'isinstance()")
+                    break
+        elif typ == 'file_input':
+            endmarker = node.children[-1]
+            prev = endmarker.get_previous_leaf()
+            prefix = endmarker.prefix
+            if (not prefix.endswith('\n') and (
+                    prefix or prev is None or prev.value != '\n')):
+                self.add_issue(endmarker, 292, "No newline at end of file")
+
+        if typ in _IMPORT_TYPES:
+            simple_stmt = node.parent
+            module = simple_stmt.parent
+            #if module.type == 'simple_stmt':
+            if module.type == 'file_input':
+                index = module.children.index(simple_stmt)
+                for child in module.children[:index]:
+                    children = [child]
+                    if child.type == 'simple_stmt':
+                        # Remove the newline.
+                        children = child.children[:-1]
+
+                    found_docstring = False
+                    for c in children:
+                        if c.type == 'string' and not found_docstring:
+                            continue
+                        found_docstring = True
+
+                        if c.type == 'expr_stmt' and \
+                                all(_is_magic_name(n) for n in c.get_defined_names()):
+                            continue
+
+                        if c.type in _IMPORT_TYPES or isinstance(c, Flow):
+                            continue
+
+                        self.add_issue(node, 402, 'Module level import not at top of file')
+                        break
+                    else:
+                        continue
+                    break
+
+        implicit_indentation_possible = typ in _IMPLICIT_INDENTATION_TYPES
+        in_introducer = typ in _SUITE_INTRODUCERS
+        if in_introducer:
+            self._in_suite_introducer = True
+        elif typ == 'suite':
+            if self._indentation_tos.type == IndentationTypes.BACKSLASH:
+                self._indentation_tos = self._indentation_tos.parent
+
+            self._indentation_tos = IndentationNode(
+                self._config,
+                self._indentation_tos.indentation + self._config.indentation,
+                parent=self._indentation_tos
+            )
+        elif implicit_indentation_possible:
+            self._implicit_indentation_possible = True
+        yield
+        if typ == 'suite':
+            assert self._indentation_tos.type == IndentationTypes.SUITE
+            self._indentation_tos = self._indentation_tos.parent
+            # If we dedent, no lines are needed anymore.
+            self._wanted_newline_count = None
+        elif implicit_indentation_possible:
+            self._implicit_indentation_possible = False
+            if self._indentation_tos.type == IndentationTypes.IMPLICIT:
+                self._indentation_tos = self._indentation_tos.parent
+        elif in_introducer:
+            self._in_suite_introducer = False
+            if typ in ('classdef', 'funcdef'):
+                self._wanted_newline_count = self._get_wanted_blank_lines_count()
+
+    def _check_tabs_spaces(self, spacing):
+        if self._wrong_indentation_char in spacing.value:
+            self.add_issue(spacing, 101, 'Indentation contains ' + self._indentation_type)
+            return True
+        return False
+
+    def _get_wanted_blank_lines_count(self):
+        suite_node = self._indentation_tos.get_latest_suite_node()
+        return int(suite_node.parent is None) + 1
+
+    def _reset_newlines(self, spacing, leaf, is_comment=False):
+        self._max_new_lines_in_prefix = \
+            max(self._max_new_lines_in_prefix, self._newline_count)
+
+        wanted = self._wanted_newline_count
+        if wanted is not None:
+            # Need to substract one
+            blank_lines = self._newline_count - 1
+            if wanted > blank_lines and leaf.type != 'endmarker':
+                # In case of a comment we don't need to add the issue, yet.
+                if not is_comment:
+                    # TODO end_pos wrong.
+                    code = 302 if wanted == 2 else 301
+                    message = "expected %s blank line, found %s" \
+                        % (wanted, blank_lines)
+                    self.add_issue(spacing, code, message)
+                    self._wanted_newline_count = None
+            else:
+                self._wanted_newline_count = None
+
+        if not is_comment:
+            wanted = self._get_wanted_blank_lines_count()
+            actual = self._max_new_lines_in_prefix - 1
+
+            val = leaf.value
+            needs_lines = (
+                val == '@' and leaf.parent.type == 'decorator'
+                or (
+                    val == 'class'
+                    or val == 'async' and leaf.get_next_leaf() == 'def'
+                    or val == 'def' and self._previous_leaf != 'async'
+                ) and leaf.parent.parent.type != 'decorated'
+            )
+            if needs_lines and actual < wanted:
+                func_or_cls = leaf.parent
+                suite = func_or_cls.parent
+                if suite.type == 'decorated':
+                    suite = suite.parent
+
+                # The first leaf of a file or a suite should not need blank
+                # lines.
+                if suite.children[int(suite.type == 'suite')] != func_or_cls:
+                    code = 302 if wanted == 2 else 301
+                    message = "expected %s blank line, found %s" \
+                        % (wanted, actual)
+                    self.add_issue(spacing, code, message)
+
+            self._max_new_lines_in_prefix = 0
+
+        self._newline_count = 0
+
+    def visit_leaf(self, leaf):
+        super(PEP8Normalizer, self).visit_leaf(leaf)
+        for part in leaf._split_prefix():
+            if part.type == 'spacing':
+                # This part is used for the part call after for.
+                break
+            self._visit_part(part, part.create_spacing_part(), leaf)
+
+        self._analyse_non_prefix(leaf)
+        self._visit_part(leaf, part, leaf)
+
+        # Cleanup
+        self._last_indentation_tos = self._indentation_tos
+
+        self._new_statement = leaf.type == 'newline'
+
+        # TODO does this work? with brackets and stuff?
+        if leaf.type == 'newline' and \
+                self._indentation_tos.type == IndentationTypes.BACKSLASH:
+            self._indentation_tos = self._indentation_tos.parent
+
+        if leaf.value == ':' and leaf.parent.type in _SUITE_INTRODUCERS:
+            self._in_suite_introducer = False
+        elif leaf.value == 'elif':
+            self._in_suite_introducer = True
+
+        if not self._new_statement:
+            self._reset_newlines(part, leaf)
+            self._max_blank_lines = 0
+
+        self._previous_leaf = leaf
+
+        return leaf.value
+
+    def _visit_part(self, part, spacing, leaf):
+        value = part.value
+        type_ = part.type
+        if type_ == 'error_leaf':
+            return
+
+        if value == ',' and part.parent.type == 'dictorsetmaker':
+            self._indentation_tos = self._indentation_tos.parent
+
+        node = self._indentation_tos
+
+        if type_ == 'comment':
+            if value.startswith('##'):
+                # Whole blocks of # should not raise an error.
+                if value.lstrip('#'):
+                    self.add_issue(part, 266, "Too many leading '#' for block comment.")
+            elif self._on_newline:
+                if not re.match('#:? ', value) and not value == '#' \
+                        and not (value.startswith('#!') and part.start_pos == (1, 0)):
+                    self.add_issue(part, 265, "Block comment should start with '# '")
+            else:
+                if not re.match('#:? [^ ]', value):
+                    self.add_issue(part, 262, "Inline comment should start with '# '")
+
+            self._reset_newlines(spacing, leaf, is_comment=True)
+        elif type_ == 'newline':
+            if self._newline_count > self._get_wanted_blank_lines_count():
+                self.add_issue(part, 303, "Too many blank lines (%s)" % self._newline_count)
+            elif leaf in ('def', 'class') \
+                    and leaf.parent.parent.type == 'decorated':
+                self.add_issue(part, 304, "Blank lines found after function decorator")
+
+
+            self._newline_count += 1
+
+        if type_ == 'backslash':
+            # TODO is this enough checking? What about ==?
+            if node.type != IndentationTypes.BACKSLASH:
+                if node.type != IndentationTypes.SUITE:
+                    self.add_issue(part, 502, 'The backslash is redundant between brackets')
+                else:
+                    indentation = node.indentation
+                    if self._in_suite_introducer and node.type == IndentationTypes.SUITE:
+                        indentation += self._config.indentation
+
+                    self._indentation_tos = BackslashNode(
+                        self._config,
+                        indentation,
+                        part,
+                        spacing,
+                        parent=self._indentation_tos
+                    )
+        elif self._on_newline:
+            indentation = spacing.value
+            if node.type == IndentationTypes.BACKSLASH \
+                    and self._previous_part.type == 'newline':
+                self._indentation_tos = self._indentation_tos.parent
+
+            if not self._check_tabs_spaces(spacing):
+                should_be_indentation = node.indentation
+                if type_ == 'comment':
+                    # Comments can be dedented. So we have to care for that.
+                    n = self._last_indentation_tos
+                    while True:
+                        if len(indentation) > len(n.indentation):
+                            break
+
+                        should_be_indentation = n.indentation
+
+                        self._last_indentation_tos = n
+                        if n == node:
+                            break
+                        n = n.parent
+
+                if self._new_statement:
+                    if type_ == 'newline':
+                        if indentation:
+                            self.add_issue(spacing, 291, 'Trailing whitespace')
+                    elif indentation != should_be_indentation:
+                        s = '%s %s' % (len(self._config.indentation), self._indentation_type)
+                        self.add_issue(part, 111, 'Indentation is not a multiple of ' + s)
+                else:
+                    if value in '])}':
+                        should_be_indentation = node.bracket_indentation
+                    else:
+                        should_be_indentation = node.indentation
+                    if self._in_suite_introducer and indentation == \
+                                node.get_latest_suite_node().indentation \
+                                + self._config.indentation:
+                            self.add_issue(part, 129, "Line with same indent as next logical block")
+                    elif indentation != should_be_indentation:
+                        if not self._check_tabs_spaces(spacing) and part.value != '\n':
+                            if value in '])}':
+                                if node.type == IndentationTypes.VERTICAL_BRACKET:
+                                    self.add_issue(part, 124, "Closing bracket does not match visual indentation")
+                                else:
+                                    self.add_issue(part, 123, "Losing bracket does not match indentation of opening bracket's line")
+                            else:
+                                if len(indentation) < len(should_be_indentation):
+                                    if node.type == IndentationTypes.VERTICAL_BRACKET:
+                                        self.add_issue(part, 128, 'Continuation line under-indented for visual indent')
+                                    elif node.type == IndentationTypes.BACKSLASH:
+                                        self.add_issue(part, 122, 'Continuation line missing indentation or outdented')
+                                    elif node.type == IndentationTypes.IMPLICIT:
+                                        self.add_issue(part, 135, 'xxx')
+                                    else:
+                                        self.add_issue(part, 121, 'Continuation line under-indented for hanging indent')
+                                else:
+                                    if node.type == IndentationTypes.VERTICAL_BRACKET:
+                                        self.add_issue(part, 127, 'Continuation line over-indented for visual indent')
+                                    elif node.type == IndentationTypes.IMPLICIT:
+                                        self.add_issue(part, 136, 'xxx')
+                                    else:
+                                        self.add_issue(part, 126, 'Continuation line over-indented for hanging indent')
+        else:
+            self._check_spacing(part, spacing)
+
+        self._check_line_length(part, spacing)
+        # -------------------------------
+        # Finalizing. Updating the state.
+        # -------------------------------
+        if value and value in '()[]{}' and type_ != 'error_leaf' \
+                and part.parent.type != 'error_node':
+            if value in _OPENING_BRACKETS:
+                self._indentation_tos = BracketNode(
+                    self._config, part,
+                    parent=self._indentation_tos,
+                    in_suite_introducer=self._in_suite_introducer
+                )
+            else:
+                assert node.type != IndentationTypes.IMPLICIT
+                self._indentation_tos = self._indentation_tos.parent
+        elif value in ('=', ':') and self._implicit_indentation_possible \
+                and part.parent.type in _IMPLICIT_INDENTATION_TYPES:
+            indentation = node.indentation
+            self._indentation_tos = ImplicitNode(
+                self._config, part, parent=self._indentation_tos
+            )
+
+        self._on_newline = type_ in ('newline', 'backslash', 'bom')
+
+        self._previous_part = part
+        self._previous_spacing = spacing
+
+    def _check_line_length(self, part, spacing):
+        if part.type == 'backslash':
+            last_column = part.start_pos[1] + 1
+        else:
+            last_column = part.end_pos[1]
+        if last_column > self._config.max_characters \
+                and spacing.start_pos[1] <= self._config.max_characters :
+            # Special case for long URLs in multi-line docstrings or comments,
+            # but still report the error when the 72 first chars are whitespaces.
+            report = True
+            if part.type == 'comment':
+                splitted = part.value[1:].split()
+                if len(splitted) == 1 \
+                        and (part.end_pos[1] - len(splitted[0])) < 72:
+                    report = False
+            if report:
+                self.add_issue(
+                    part,
+                    501,
+                    'Line too long (%s > %s characters)' %
+                        (last_column, self._config.max_characters),
+                )
+
+    def _check_spacing(self, part, spacing):
+        def add_if_spaces(*args):
+            if spaces:
+                return self.add_issue(*args)
+
+        def add_not_spaces(*args):
+            if not spaces:
+                return self.add_issue(*args)
+
+        spaces = spacing.value
+        prev = self._previous_part
+        if prev is not None and prev.type == 'error_leaf' or part.type == 'error_leaf':
+            return
+
+        type_ = part.type
+        if '\t' in spaces:
+            self.add_issue(spacing, 223, 'Used tab to separate tokens')
+        elif type_ == 'comment':
+            if len(spaces) < self._config.spaces_before_comment:
+                self.add_issue(spacing, 261, 'At least two spaces before inline comment')
+        elif type_ == 'newline':
+            add_if_spaces(spacing, 291, 'Trailing whitespace')
+        elif len(spaces) > 1:
+            self.add_issue(spacing, 221, 'Multiple spaces used')
+        else:
+            if prev in _OPENING_BRACKETS:
+                message = "Whitespace after '%s'" % part.value
+                add_if_spaces(spacing, 201, message)
+            elif part in _CLOSING_BRACKETS:
+                message = "Whitespace before '%s'" % part.value
+                add_if_spaces(spacing, 202, message)
+            elif part in (',', ';') or part == ':' \
+                    and part.parent.type not in  _POSSIBLE_SLICE_PARENTS:
+                message = "Whitespace before '%s'" % part.value
+                add_if_spaces(spacing, 203, message)
+            elif prev == ':' and prev.parent.type in _POSSIBLE_SLICE_PARENTS:
+                pass # TODO
+            elif prev in (',', ';', ':'):
+                add_not_spaces(spacing, 231, "missing whitespace after '%s'")
+            elif part == ':':  # Is a subscript
+                # TODO
+                pass
+            elif part in ('*', '**') and part.parent.type not in _NON_STAR_TYPES \
+                    or prev in ('*', '**') \
+                    and prev.parent.type not in _NON_STAR_TYPES:
+                # TODO
+                pass
+            elif prev in _FACTOR and prev.parent.type == 'factor':
+                pass
+            elif prev == '@' and prev.parent.type == 'decorator':
+                pass  # TODO should probably raise an error if there's a space here
+            elif part in _NEEDS_SPACE or prev in _NEEDS_SPACE:
+                if part == '=' and part.parent.type in ('argument', 'param') \
+                        or prev == '=' and prev.parent.type in ('argument', 'param'):
+                    if part == '=':
+                        param = part.parent
+                    else:
+                        param = prev.parent
+                    if param.type == 'param' and param.annotation:
+                        add_not_spaces(spacing, 252, 'Expected spaces around annotation equals')
+                    else:
+                        add_if_spaces(spacing, 251, 'Unexpected spaces around keyword / parameter equals')
+                elif part in _BITWISE_OPERATOR or prev in _BITWISE_OPERATOR:
+                    add_not_spaces(spacing, 227, 'Missing whitespace around bitwise or shift operator')
+                elif part == '%' or prev == '%':
+                    add_not_spaces(spacing, 228, 'Missing whitespace around modulo operator')
+                else:
+                    message_225 = 'Missing whitespace between tokens'
+                    add_not_spaces(spacing, 225, message_225)
+            elif type_ == 'keyword' or prev.type == 'keyword':
+                add_not_spaces(spacing, 275, 'Missing whitespace around keyword')
+            else:
+                prev_spacing = self._previous_spacing
+                if prev in _ALLOW_SPACE and spaces != prev_spacing.value \
+                        and '\n' not in self._previous_leaf.prefix:
+                    message = "Whitespace before operator doesn't match with whitespace after"
+                    self.add_issue(spacing, 229, message)
+
+                if spaces and part not in _ALLOW_SPACE and prev not in _ALLOW_SPACE:
+                    message_225 = 'Missing whitespace between tokens'
+                    #print('xy', spacing)
+                    #self.add_issue(spacing, 225, message_225)
+                    # TODO why only brackets?
+                    if part in _OPENING_BRACKETS:
+                        message = "Whitespace before '%s'" % part.value
+                        add_if_spaces(spacing, 211, message)
+
+    def _analyse_non_prefix(self, leaf):
+        typ = leaf.type
+        if typ == 'name' and leaf.value in ('l', 'O', 'I'):
+            if leaf.is_definition():
+                message = "Do not define %s named 'l', 'O', or 'I' one line"
+                if leaf.parent.type == 'class' and leaf.parent.name == leaf:
+                    self.add_issue(leaf, 742, message % 'classes')
+                elif leaf.parent.type == 'function' and leaf.parent.name == leaf:
+                    self.add_issue(leaf, 743, message % 'function')
+                else:
+                    self.add_issuadd_issue(741, message % 'variables', leaf)
+        elif leaf.value == ':':
+            if isinstance(leaf.parent, (Flow, Scope)) and leaf.parent.type != 'lambdef':
+                next_leaf = leaf.get_next_leaf()
+                if next_leaf.type != 'newline':
+                    if leaf.parent.type == 'funcdef':
+                        self.add_issue(next_leaf, 704, 'Multiple statements on one line (def)')
+                    else:
+                        self.add_issue(next_leaf, 701, 'Multiple statements on one line (colon)')
+        elif leaf.value == ';':
+            if leaf.get_next_leaf().type in ('newline', 'endmarker'):
+                self.add_issue(leaf, 703, 'Statement ends with a semicolon')
+            else:
+                self.add_issue(leaf, 702, 'Multiple statements on one line (semicolon)')
+        elif leaf.value in ('==', '!='):
+            comparison = leaf.parent
+            index = comparison.children.index(leaf)
+            left = comparison.children[index - 1]
+            right = comparison.children[index + 1]
+            for node in left, right:
+                if node.type == 'keyword' or node.type == 'name':
+                    if node.value == 'None':
+                        message = "comparison to None should be 'if cond is None:'"
+                        self.add_issue(leaf, 711, message)
+                        break
+                    elif node.value in ('True', 'False'):
+                        message = "comparison to False/True should be 'if cond is True:' or 'if cond:'"
+                        self.add_issue(leaf, 712, message)
+                        break
+        elif leaf.value in ('in', 'is'):
+            comparison = leaf.parent
+            if comparison.type == 'comparison' and comparison.parent.type == 'not_test':
+                if leaf.value == 'in':
+                    self.add_issue(leaf, 713, "test for membership should be 'not in'")
+                else:
+                    self.add_issue(leaf, 714, "test for object identity should be 'is not'")
+        elif typ == 'string':
+            # Checking multiline strings
+            for i, line in enumerate(leaf.value.splitlines()[1:]):
+                indentation = re.match('[ \t]*', line).group(0)
+                start_pos = leaf.line + i, len(indentation)
+                # TODO check multiline indentation.
+        elif typ == 'endmarker':
+            if self._newline_count >= 2:
+                self.add_issue(leaf, 391, 'Blank line at end of file')
+
+    def add_issue(self, node, code, message):
+        if self._previous_leaf is not None:
+            if search_ancestor(self._previous_leaf, 'error_node') is not None:
+                return
+            if self._previous_leaf.type == 'error_leaf':
+                return
+        if search_ancestor(node, 'error_node') is not None:
+            return
+        if code in (901, 903):
+            # 901 and 903 are raised by the ErrorFinder.
+            super(PEP8Normalizer, self).add_issue(node, code, message)
+        else:
+            # Skip ErrorFinder here, because it has custom behavior.
+            super(ErrorFinder, self).add_issue(node, code, message)
+
+
+class PEP8NormalizerConfig(ErrorFinderConfig):
+    normalizer_class = PEP8Normalizer
+    """
+    Normalizing to PEP8. Not really implemented, yet.
+    """
+    def __init__(self, indentation=' ' * 4, hanging_indentation=None,
+                 max_characters=79, spaces_before_comment=2):
+        self.indentation = indentation
+        if hanging_indentation is None:
+            hanging_indentation = indentation
+        self.hanging_indentation = hanging_indentation
+        self.closing_bracket_hanging_indentation = ''
+        self.break_after_binary = False
+        self.max_characters = max_characters
+        self.spaces_before_comment = spaces_before_comment
+
+
+# TODO this is not yet ready.
+#@PEP8Normalizer.register_rule(type='endmarker')
+class BlankLineAtEnd(Rule):
+    code = 392
+    message = 'Blank line at end of file'
+
+    def is_issue(self, leaf):
+        return self._newline_count >= 2
--- a/parso/python/prefix.py
+++ b/parso/python/prefix.py
@@ -0,0 +1,97 @@
+import re
+from codecs import BOM_UTF8
+
+from parso.python.tokenize import group
+
+unicode_bom = BOM_UTF8.decode('utf-8')
+
+
+class PrefixPart(object):
+    def __init__(self, leaf, typ, value, spacing='', start_pos=None):
+        assert start_pos is not None
+        self.parent = leaf
+        self.type = typ
+        self.value = value
+        self.spacing = spacing
+        self.start_pos = start_pos
+
+    @property
+    def end_pos(self):
+        if self.value.endswith('\n'):
+            return self.start_pos[0] + 1, 0
+        if self.value == unicode_bom:
+            # The bom doesn't have a length at the start of a Python file.
+            return self.start_pos
+        return self.start_pos[0], self.start_pos[1] + len(self.value)
+
+    def create_spacing_part(self):
+        column = self.start_pos[1] - len(self.spacing)
+        return PrefixPart(
+            self.parent, 'spacing', self.spacing,
+            start_pos=(self.start_pos[0], column)
+        )
+
+    def __repr__(self):
+        return '%s(%s, %s, %s)' % (
+            self.__class__.__name__,
+            self.type,
+            repr(self.value),
+            self.start_pos
+        )
+
+
+_comment = r'#[^\n\r\f]*'
+_backslash = r'\\\r?\n'
+_newline = r'\r?\n'
+_form_feed = r'\f'
+_only_spacing = '$'
+_spacing = r'[ \t]*'
+_bom = unicode_bom
+
+_regex = group(
+    _comment, _backslash, _newline, _form_feed, _only_spacing, _bom,
+    capture=True
+)
+_regex = re.compile(group(_spacing, capture=True) + _regex)
+
+
+_types = {
+    '#': 'comment',
+    '\\': 'backslash',
+    '\f': 'formfeed',
+    '\n': 'newline',
+    '\r': 'newline',
+    unicode_bom: 'bom'
+}
+
+
+def split_prefix(leaf, start_pos):
+    line, column = start_pos
+    start = 0
+    value = spacing = ''
+    bom = False
+    while start != len(leaf.prefix):
+        match =_regex.match(leaf.prefix, start)
+        spacing = match.group(1)
+        value = match.group(2)
+        if not value:
+            break
+        type_ = _types[value[0]]
+        yield PrefixPart(
+            leaf, type_, value, spacing,
+            start_pos=(line, column + start - int(bom) + len(spacing))
+        )
+        if type_ == 'bom':
+            bom = True
+
+        start = match.end(0)
+        if value.endswith('\n'):
+            line += 1
+            column = -start
+
+    if value:
+        spacing = ''
+    yield PrefixPart(
+        leaf, 'spacing', spacing,
+        start_pos=(line, column + start)
+    )
--- a/parso/python/token.py
+++ b/parso/python/token.py
@@ -1,34 +1,36 @@
 from __future__ import absolute_import
-
-from parso._compatibility import py_version
+from itertools import count
 from token import *

+from parso._compatibility import py_version

-COMMENT = N_TOKENS
+
+_counter = count(N_TOKENS)
+# Never want to see this thing again.
+del N_TOKENS
+
+COMMENT = next(_counter)
 tok_name[COMMENT] = 'COMMENT'
-N_TOKENS += 1

-NL = N_TOKENS
+NL = next(_counter)
 tok_name[NL] = 'NL'
-N_TOKENS += 1

+# Sets the attributes that don't exist in these tok_name versions.
 if py_version >= 30:
-    BACKQUOTE = N_TOKENS
+    BACKQUOTE = next(_counter)
    tok_name[BACKQUOTE] = 'BACKQUOTE'
-    N_TOKENS += 1
 else:
-    RARROW = N_TOKENS
+    RARROW = next(_counter)
    tok_name[RARROW] = 'RARROW'
-    N_TOKENS += 1
-    ELLIPSIS = N_TOKENS
+    ELLIPSIS = next(_counter)
    tok_name[ELLIPSIS] = 'ELLIPSIS'
-    N_TOKENS += 1

-if not py_version >= 35:
-    ATEQUAL = N_TOKENS
+if py_version < 35:
+    ATEQUAL = next(_counter)
    tok_name[ATEQUAL] = 'ATEQUAL'
-    N_TOKENS += 1

+ERROR_DEDENT = next(_counter)
+tok_name[ERROR_DEDENT] = 'ERROR_DEDENT'


 # Map from operator to number (since tokenize doesn't do this)
@@ -88,3 +90,15 @@ opmap = {}
 for line in opmap_raw.splitlines():
    op, name = line.split()
    opmap[op] = globals()[name]
+
+
+def generate_token_id(string):
+    """
+    Uses a token in the grammar (e.g. `'+'` or `'and'`returns the corresponding
+    ID for it. The strings are part of the grammar file.
+    """
+    try:
+        return opmap[string]
+    except KeyError:
+        pass
+    return globals()[string]
--- a/parso/python/tokenize.py
+++ b/parso/python/tokenize.py
@@ -0,0 +1,420 @@
+# -*- coding: utf-8 -*-
+"""
+This tokenizer has been copied from the ``tokenize.py`` standard library
+tokenizer. The reason was simple: The standard library tokenizer fails
+if the indentation is not right. To make it possible to do error recovery the
+    tokenizer needed to be rewritten.
+
+Basically this is a stripped down version of the standard library module, so
+you can read the documentation there. Additionally we included some speed and
+memory optimizations here.
+"""
+from __future__ import absolute_import
+
+import sys
+import string
+import re
+from collections import namedtuple
+import itertools as _itertools
+from codecs import BOM_UTF8
+
+from parso.python.token import (tok_name, ENDMARKER, STRING, NUMBER, opmap,
+                                NAME, ERRORTOKEN, NEWLINE, INDENT, DEDENT,
+                                ERROR_DEDENT)
+from parso._compatibility import py_version
+from parso.utils import split_lines
+
+
+TokenCollection = namedtuple(
+    'TokenCollection',
+    'pseudo_token single_quoted triple_quoted endpats always_break_tokens',
+)
+
+BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')
+
+_token_collection_cache = {}
+
+if py_version >= 30:
+    # Python 3 has str.isidentifier() to check if a char is a valid identifier
+    is_identifier = str.isidentifier
+else:
+    namechars = string.ascii_letters + '_'
+    is_identifier = lambda s: s in namechars
+
+
+def group(*choices, **kwargs):
+    capture = kwargs.pop('capture', False)  # Python 2, arrghhhhh :(
+    assert not kwargs
+
+    start = '('
+    if not capture:
+        start += '?:'
+    return start + '|'.join(choices) + ')'
+
+
+def any(*choices):
+    return group(*choices) + '*'
+
+
+def maybe(*choices):
+    return group(*choices) + '?'
+
+
+# Return the empty string, plus all of the valid string prefixes.
+def _all_string_prefixes(version_info):
+    def different_case_versions(prefix):
+        for s in _itertools.product(*[(c, c.upper()) for c in prefix]):
+            yield ''.join(s)
+    # The valid string prefixes. Only contain the lower case versions,
+    #  and don't contain any permuations (include 'fr', but not
+    #  'rf'). The various permutations will be generated.
+    _valid_string_prefixes = ['b', 'r', 'u']
+    if version_info >= (3, 0):
+        _valid_string_prefixes.append('br')
+
+    if version_info >= (3, 6):
+        _valid_string_prefixes += ['f', 'fr']
+
+    # if we add binary f-strings, add: ['fb', 'fbr']
+    result = set([''])
+    for prefix in _valid_string_prefixes:
+        for t in _itertools.permutations(prefix):
+            # create a list with upper and lower versions of each
+            #  character
+            result.update(different_case_versions(t))
+    if version_info <= (2, 7):
+        # In Python 2 the order cannot just be random.
+        result.update(different_case_versions('ur'))
+        result.update(different_case_versions('br'))
+    return result
+
+
+def _compile(expr):
+    return re.compile(expr, re.UNICODE)
+
+
+def _get_token_collection(version_info):
+    try:
+        return _token_collection_cache[tuple(version_info)]
+    except KeyError:
+        _token_collection_cache[tuple(version_info)] = result = \
+            _create_token_collection(version_info)
+        return result
+
+
+def _create_token_collection(version_info):
+    # Note: we use unicode matching for names ("\w") but ascii matching for
+    # number literals.
+    Whitespace = r'[ \f\t]*'
+    Comment = r'#[^\r\n]*'
+    Name = r'\w+'
+
+    if version_info >= (3, 6):
+        Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
+        Binnumber = r'0[bB](?:_?[01])+'
+        Octnumber = r'0[oO](?:_?[0-7])+'
+        Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)'
+        Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
+        Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*'
+        Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?',
+                           r'\.[0-9](?:_?[0-9])*') + maybe(Exponent)
+        Expfloat = r'[0-9](?:_?[0-9])*' + Exponent
+        Floatnumber = group(Pointfloat, Expfloat)
+        Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]')
+    else:
+        Hexnumber = r'0[xX][0-9a-fA-F]+'
+        Binnumber = r'0[bB][01]+'
+        if version_info >= (3, 0):
+            Octnumber = r'0[oO][0-7]+'
+        else:
+            Octnumber = '0[oO]?[0-7]+'
+        Decnumber = r'(?:0+|[1-9][0-9]*)'
+        Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
+        Exponent = r'[eE][-+]?[0-9]+'
+        Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent)
+        Expfloat = r'[0-9]+' + Exponent
+        Floatnumber = group(Pointfloat, Expfloat)
+        Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
+    Number = group(Imagnumber, Floatnumber, Intnumber)
+
+    # Note that since _all_string_prefixes includes the empty string,
+    #  StringPrefix can be the empty string (making it optional).
+    possible_prefixes = _all_string_prefixes(version_info)
+    StringPrefix = group(*possible_prefixes)
+
+    # Tail end of ' string.
+    Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
+    # Tail end of " string.
+    Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
+    # Tail end of ''' string.
+    Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
+    # Tail end of """ string.
+    Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
+    Triple = group(StringPrefix + "'''", StringPrefix + '"""')
+
+    # Because of leftmost-then-longest match semantics, be sure to put the
+    # longest operators first (e.g., if = came before ==, == would get
+    # recognized as two instances of =).
+    Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=",
+                     r"//=?", r"->",
+                     r"[+\-*/%&@`|^=<>]=?",
+                     r"~")
+
+    Bracket = '[][(){}]'
+
+    special_args = [r'\r?\n', r'[:;.,@]']
+    if version_info >= (3, 0):
+        special_args.insert(0, r'\.\.\.')
+    Special = group(*special_args)
+
+    Funny = group(Operator, Bracket, Special)
+
+    # First (or only) line of ' or " string.
+    ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
+                    group("'", r'\\\r?\n'),
+                    StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
+                    group('"', r'\\\r?\n'))
+    PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple)
+    PseudoToken = group(Whitespace, capture=True) + \
+        group(PseudoExtras, Number, Funny, ContStr, Name, capture=True)
+
+    # For a given string prefix plus quotes, endpats maps it to a regex
+    #  to match the remainder of that string. _prefix can be empty, for
+    #  a normal single or triple quoted string (with no prefix).
+    endpats = {}
+    for _prefix in possible_prefixes:
+        endpats[_prefix + "'"] = _compile(Single)
+        endpats[_prefix + '"'] = _compile(Double)
+        endpats[_prefix + "'''"] = _compile(Single3)
+        endpats[_prefix + '"""'] = _compile(Double3)
+
+    # A set of all of the single and triple quoted string prefixes,
+    #  including the opening quotes.
+    single_quoted = set()
+    triple_quoted = set()
+    for t in possible_prefixes:
+        for p in (t + '"', t + "'"):
+            single_quoted.add(p)
+        for p in (t + '"""', t + "'''"):
+            triple_quoted.add(p)
+
+    ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except',
+                           'finally', 'while', 'with', 'return')
+    pseudo_token_compiled = _compile(PseudoToken)
+    return TokenCollection(
+        pseudo_token_compiled, single_quoted, triple_quoted, endpats,
+        ALWAYS_BREAK_TOKENS
+    )
+
+
+class Token(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])):
+    @property
+    def end_pos(self):
+        lines = split_lines(self.string)
+        if len(lines) > 1:
+            return self.start_pos[0] + len(lines) - 1, 0
+        else:
+            return self.start_pos[0], self.start_pos[1] + len(self.string)
+
+
+class PythonToken(Token):
+    def _get_type_name(self, exact=True):
+        return tok_name[self.type]
+
+    def __repr__(self):
+        return ('TokenInfo(type=%s, string=%r, start=%r, prefix=%r)' %
+                self._replace(type=self._get_type_name()))
+
+
+def tokenize(code, version_info, start_pos=(1, 0)):
+    """Generate tokens from a the source code (string)."""
+    lines = split_lines(code, keepends=True)
+    return tokenize_lines(lines, version_info, start_pos=start_pos)
+
+
+def tokenize_lines(lines, version_info, start_pos=(1, 0)):
+    """
+    A heavily modified Python standard library tokenizer.
+
+    Additionally to the default information, yields also the prefix of each
+    token. This idea comes from lib2to3. The prefix contains all information
+    that is irrelevant for the parser like newlines in parentheses or comments.
+    """
+    pseudo_token, single_quoted, triple_quoted, endpats, always_break_tokens, = \
+        _get_token_collection(version_info)
+    paren_level = 0  # count parentheses
+    indents = [0]
+    max = 0
+    numchars = '0123456789'
+    contstr = ''
+    contline = None
+    # We start with a newline. This makes indent at the first position
+    # possible. It's not valid Python, but still better than an INDENT in the
+    # second line (and not in the first). This makes quite a few things in
+    # Jedi's fast parser possible.
+    new_line = True
+    prefix = ''  # Should never be required, but here for safety
+    additional_prefix = ''
+    first = True
+    lnum = start_pos[0] - 1
+    for line in lines:  # loop over lines in stream
+        lnum += 1
+        pos = 0
+        max = len(line)
+        if first:
+            if line.startswith(BOM_UTF8_STRING):
+                additional_prefix = BOM_UTF8_STRING
+                line = line[1:]
+                max = len(line)
+
+            # Fake that the part before was already parsed.
+            line = '^' * start_pos[1] + line
+            pos = start_pos[1]
+            max += start_pos[1]
+
+            first = False
+
+        if contstr:                                         # continued string
+            endmatch = endprog.match(line)
+            if endmatch:
+                pos = endmatch.end(0)
+                yield PythonToken(STRING, contstr + line[:pos], contstr_start, prefix)
+                contstr = ''
+                contline = None
+            else:
+                contstr = contstr + line
+                contline = contline + line
+                continue
+
+        while pos < max:
+            pseudomatch = pseudo_token.match(line, pos)
+            if not pseudomatch:                             # scan for tokens
+                txt = line[pos:]
+                if txt.endswith('\n'):
+                    new_line = True
+                yield PythonToken(ERRORTOKEN, txt, (lnum, pos), additional_prefix)
+                additional_prefix = ''
+                break
+
+            prefix = additional_prefix + pseudomatch.group(1)
+            additional_prefix = ''
+            start, pos = pseudomatch.span(2)
+            spos = (lnum, start)
+            token = pseudomatch.group(2)
+            if token == '':
+                assert prefix
+                additional_prefix = prefix
+                # This means that we have a line with whitespace/comments at
+                # the end, which just results in an endmarker.
+                break
+            initial = token[0]
+
+            if new_line and initial not in '\r\n#':
+                new_line = False
+                if paren_level == 0:
+                    i = 0
+                    while line[i] == '\f':
+                        i += 1
+                        start -= 1
+                    if start > indents[-1]:
+                        yield PythonToken(INDENT, '', spos, '')
+                        indents.append(start)
+                    while start < indents[-1]:
+                        if start > indents[-2]:
+                            yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '')
+                            break
+                        yield PythonToken(DEDENT, '', spos, '')
+                        indents.pop()
+
+            if (initial in numchars or                      # ordinary number
+                    (initial == '.' and token != '.' and token != '...')):
+                yield PythonToken(NUMBER, token, spos, prefix)
+            elif initial in '\r\n':
+                if not new_line and paren_level == 0:
+                    yield PythonToken(NEWLINE, token, spos, prefix)
+                else:
+                    additional_prefix = prefix + token
+                new_line = True
+            elif initial == '#':  # Comments
+                assert not token.endswith("\n")
+                additional_prefix = prefix + token
+            elif token in triple_quoted:
+                endprog = endpats[token]
+                endmatch = endprog.match(line, pos)
+                if endmatch:                                # all on one line
+                    pos = endmatch.end(0)
+                    token = line[start:pos]
+                    yield PythonToken(STRING, token, spos, prefix)
+                else:
+                    contstr_start = (lnum, start)           # multiple lines
+                    contstr = line[start:]
+                    contline = line
+                    break
+            elif initial in single_quoted or \
+                    token[:2] in single_quoted or \
+                    token[:3] in single_quoted:
+                if token[-1] == '\n':                       # continued string
+                    contstr_start = lnum, start
+                    endprog = (endpats.get(initial) or endpats.get(token[1])
+                               or endpats.get(token[2]))
+                    contstr = line[start:]
+                    contline = line
+                    break
+                else:                                       # ordinary string
+                    yield PythonToken(STRING, token, spos, prefix)
+            elif is_identifier(initial):                      # ordinary name
+                if token in always_break_tokens:
+                    paren_level = 0
+                    while True:
+                        indent = indents.pop()
+                        if indent > start:
+                            yield PythonToken(DEDENT, '', spos, '')
+                        else:
+                            indents.append(indent)
+                            break
+                yield PythonToken(NAME, token, spos, prefix)
+            elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'):  # continued stmt
+                additional_prefix += prefix + line[start:]
+                break
+            else:
+                if token in '([{':
+                    paren_level += 1
+                elif token in ')]}':
+                    paren_level -= 1
+
+                try:
+                    # This check is needed in any case to check if it's a valid
+                    # operator or just some random unicode character.
+                    typ = opmap[token]
+                except KeyError:
+                    typ = ERRORTOKEN
+                yield PythonToken(typ, token, spos, prefix)
+
+    if contstr:
+        yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix)
+        if contstr.endswith('\n'):
+            new_line = True
+
+    end_pos = lnum, max
+    # As the last position we just take the maximally possible position. We
+    # remove -1 for the last new line.
+    for indent in indents[1:]:
+        yield PythonToken(DEDENT, '', end_pos, '')
+    yield PythonToken(ENDMARKER, '', end_pos, additional_prefix)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) >= 2:
+        path = sys.argv[1]
+        with open(path) as f:
+            code = f.read()
+    else:
+        code = sys.stdin.read()
+
+    from parso.utils import python_bytes_to_unicode, parse_version_string
+
+    if isinstance(code, bytes):
+        code = python_bytes_to_unicode(code)
+
+    for token in tokenize(code, parse_version_string()):
+        print(token)
--- a/parso/python/tree.py
+++ b/parso/python/tree.py
@@ -12,7 +12,7 @@ the input given to the parser. This is important if you are using refactoring.
 The easiest way to play with this module is to use :class:`parsing.Parser`.
 :attr:`parsing.Parser.module` holds an instance of :class:`Module`:

->>> from parso.python import parse
+>>> from parso import parse
 >>> parser = parse('import os')
 >>> module = parser.get_root_node()
 >>> module
@@ -25,9 +25,23 @@ Any subclasses of :class:`Scope`, including :class:`Module` has an attribute
 [<ImportName: import os@1,0>]
 """

+import re
+
 from parso._compatibility import utf8_repr, unicode
 from parso.tree import Node, BaseNode, Leaf, ErrorNode, ErrorLeaf, \
    search_ancestor
+from parso.python.prefix import split_prefix
+
+_FLOW_CONTAINERS = set(['if_stmt', 'while_stmt', 'for_stmt', 'try_stmt',
+                        'with_stmt', 'async_stmt', 'suite'])
+_RETURN_STMT_CONTAINERS = set(['suite', 'simple_stmt']) | _FLOW_CONTAINERS
+_FUNC_CONTAINERS = set(['suite', 'simple_stmt', 'decorated']) | _FLOW_CONTAINERS
+_GET_DEFINITION_TYPES = set([
+    'expr_stmt', 'comp_for', 'with_stmt', 'for_stmt', 'import_name',
+    'import_from', 'param'
+])
+_IMPORTS = set(['import_name', 'import_from'])
+


 class DocstringMixin(object):
@@ -39,7 +53,7 @@ class DocstringMixin(object):
        """
        if self.type == 'file_input':
            node = self.children[0]
-        elif isinstance(self, ClassOrFunc):
+        elif self.type in ('funcdef', 'classdef'):
            node = self.children[self.children.index(':') + 1]
            if node.type == 'suite':  # Normally a suite
                node = node.children[1]  # -> NEWLINE stmt
@@ -64,24 +78,6 @@ class PythonMixin(object):
    """
    __slots__ = ()

-    def get_definition(self):
-        if self.type in ('newline', 'endmarker'):
-            raise ValueError('Cannot get the indentation of whitespace or indentation.')
-        scope = self
-        while scope.parent is not None:
-            parent = scope.parent
-            if isinstance(scope, (PythonNode, PythonLeaf)) and parent.type != 'simple_stmt':
-                if scope.type == 'testlist_comp':
-                    try:
-                        if scope.children[1].type == 'comp_for':
-                            return scope.children[1]
-                    except IndexError:
-                        pass
-                scope = parent
-            else:
-                break
-        return scope
-
    def get_name_of_position(self, position):
        for c in self.children:
            if isinstance(c, Leaf):
@@ -94,9 +90,25 @@ class PythonMixin(object):
        return None


-class PythonLeaf(Leaf, PythonMixin):
+class PythonLeaf(PythonMixin, Leaf):
    __slots__ = ()

+    def _split_prefix(self):
+        return split_prefix(self, self.get_start_pos_of_prefix())
+
+    def get_start_pos_of_prefix(self):
+        # TODO it is really ugly that we have to override it. Maybe change
+        #   indent error leafs somehow? No idea how, though.
+        previous_leaf = self.get_previous_leaf()
+        if previous_leaf is not None and previous_leaf.type == 'error_leaf' \
+                and previous_leaf.original_type in ('indent', 'error_dedent'):
+            previous_leaf = previous_leaf.get_previous_leaf()
+
+        if previous_leaf is None:
+            return self.line - self.prefix.count('\n'), 0  # It's the first leaf.
+        return previous_leaf.end_pos
+
+

 class _LeafWithoutNewlines(PythonLeaf):
    """
@@ -106,23 +118,23 @@ class _LeafWithoutNewlines(PythonLeaf):

    @property
    def end_pos(self):
-        return self.line, self.indent + len(self.value)
+        return self.line, self.column + len(self.value)


 # Python base classes
-class PythonBaseNode(BaseNode, PythonMixin):
+class PythonBaseNode(PythonMixin, BaseNode):
    __slots__ = ()


-class PythonNode(Node, PythonMixin):
+class PythonNode(PythonMixin, Node):
    __slots__ = ()


-class PythonErrorNode(ErrorNode, PythonMixin):
+class PythonErrorNode(PythonMixin, ErrorNode):
    __slots__ = ()


-class PythonErrorLeaf(ErrorLeaf, PythonMixin):
+class PythonErrorLeaf(ErrorLeaf, PythonLeaf):
    __slots__ = ()


@@ -151,24 +163,58 @@ class Name(_LeafWithoutNewlines):

    def __repr__(self):
        return "<%s: %s@%s,%s>" % (type(self).__name__, self.value,
-                                   self.line, self.indent)
+                                   self.line, self.column)

    def is_definition(self):
-        if self.parent.type in ('power', 'atom_expr'):
-            # In `self.x = 3` self is not a definition, but x is.
-            return False
+        """
+        Returns True if the name is being defined.
+        """
+        return self.get_definition() is not None
+
+    def get_definition(self, import_name_always=False):
+        """
+        Returns None if there's on definition for a name.
+
+        :param import_name_alway: Specifies if an import name is always a
+            definition. Normally foo in `from foo import bar` is not a
+            definition.
+        """
+        node = self.parent
+        type_ = node.type
+        if type_ in ('power', 'atom_expr'):
+            # In `self.x = 3` self is not a definition, but x is.
+            return None
+
+        if type_ in ('funcdef', 'classdef'):
+            if self == node.name:
+                return node
+            return None
+
+        if type_ in ():
+            if self in node.get_defined_names():
+                return node
+            return None
+
+        if type_ == 'except_clause':
+            # TODO in Python 2 this doesn't work correctly. See grammar file.
+            #      I think we'll just let it be. Python 2 will be gone in a few
+            #      years.
+            if self.get_previous_sibling() == 'as':
+                return node.parent  # The try_stmt.
+            return None
+
+        while node is not None:
+            if node.type == 'suite':
+                return None
+            if node.type in _GET_DEFINITION_TYPES:
+                if self in node.get_defined_names():
+                    return node
+                if import_name_always and node.type in _IMPORTS:
+                    return node
+                return None
+            node = node.parent
+        return None

-        stmt = self.get_definition()
-        if stmt.type in ('funcdef', 'classdef', 'param'):
-            return self == stmt.name
-        elif stmt.type == 'for_stmt':
-            return self.start_pos < stmt.children[2].start_pos
-        elif stmt.type == 'try_stmt':
-            return self.get_previous_sibling() == 'as'
-        else:
-            return stmt.type in ('expr_stmt', 'import_name', 'import_from',
-                                 'comp_for', 'with_stmt') \
-                and self in stmt.get_defined_names()


 class Literal(PythonLeaf):
@@ -184,6 +230,18 @@ class String(Literal):
    type = 'string'
    __slots__ = ()

+    @property
+    def string_prefix(self):
+        return re.match('\w*(?=[\'"])', self.value).group(0)
+
+    def _get_payload(self):
+        match = re.search(
+            r'''('{3}|"{3}|'|")(.*)$''',
+            self.value,
+            flags=re.DOTALL
+        )
+        return match.group(2)[:-len(match.group(1))]
+

 class _StringComparisonMixin(object):
    def __eq__(self, other):
@@ -248,8 +306,7 @@ class Scope(PythonBaseNode, DocstringMixin):
            for element in children:
                if element.type in names:
                    yield element
-                if element.type in ('suite', 'simple_stmt', 'decorated') \
-                        or isinstance(element, Flow):
+                if element.type in _FUNC_CONTAINERS:
                    for e in scan(element.children):
                        yield e

@@ -284,14 +341,14 @@ class Module(Scope):
        super(Module, self).__init__(children)
        self._used_names = None

-    def iter_future_import_names(self):
+    def _iter_future_import_names(self):
        """
        :return list of str: A list of future import names.
        """
-        # TODO this is a strange scan and not fully correct. I think Python's
-        # parser does it in a different way and scans for the first
-        # statement/import with a tokenizer (to check for syntax changes like
-        # the future print statement).
+        # In Python it's not allowed to use future imports after the first
+        # actual (non-future) statement. However this is not a linter here,
+        # just return all future imports. If people want to scan for issues
+        # they should use the API.
        for imp in self.iter_imports():
            if imp.type == 'import_from' and imp.level == 0:
                for path in imp.get_paths():
@@ -299,13 +356,14 @@ class Module(Scope):
                    if len(names) == 2 and names[0] == '__future__':
                        yield names[1]

-    def has_explicit_absolute_import(self):
+    def _has_explicit_absolute_import(self):
        """
        Checks if imports in this module are explicitly absolute, i.e. there
        is a ``__future__`` import.
+        Currently not public, might be in the future.
        :return bool:
        """
-        for name in self.iter_future_import_names():
+        for name in self._iter_future_import_names():
            if name == 'absolute_import':
                return True
        return False
@@ -412,14 +470,14 @@ def _create_params(parent, argslist_list):
        basically a way of unpacking tuples in params. Python 3 has ditched
        this behavior. Jedi currently just ignores those constructs.
        """
-        return node.type == 'tfpdef' and node.children[0] == '('
+        return node.type == 'fpdef' and node.children[0] == '('

    try:
        first = argslist_list[0]
    except IndexError:
        return []

-    if first.type in ('name', 'tfpdef'):
+    if first.type in ('name', 'fpdef'):
        if check_python2_nested_param(first):
            return [first]
        else:
@@ -427,7 +485,10 @@ def _create_params(parent, argslist_list):
    elif first == '*':
        return [first]
    else:  # argslist is a `typedargslist` or a `varargslist`.
-        children = first.children
+        if first.type == 'tfpdef':
+            children = [first]
+        else:
+            children = first.children
        new_children = []
        start = 0
        # Start with offset 1, because the end is higher.
@@ -435,9 +496,10 @@ def _create_params(parent, argslist_list):
            if child is None or child == ',':
                param_children = children[start:end]
                if param_children:  # Could as well be comma and then end.
-                    if check_python2_nested_param(param_children[0]):
-                        new_children += param_children
-                    elif param_children[0] == '*' and param_children[1] == ',':
+                    if param_children[0] == '*' and param_children[1] == ',' \
+                            or check_python2_nested_param(param_children[0]):
+                        for p in param_children:
+                            p.parent = parent
                        new_children += param_children
                    else:
                        new_children.append(Param(param_children, parent))
@@ -469,8 +531,7 @@ class Function(ClassOrFunc):
    def _get_param_nodes(self):
        return self.children[2].children

-    @property
-    def params(self):
+    def get_params(self):
        """
        Returns a list of `Param()`.
        """
@@ -484,14 +545,39 @@ class Function(ClassOrFunc):
        """
        Returns a generator of `yield_expr`.
        """
-        # TODO This is incorrect, yields are also possible in a statement.
-        return self._search_in_scope('yield_expr')
+        def scan(children):
+            for element in children:
+                if element.type in ('classdef', 'funcdef', 'lambdef'):
+                    continue
+
+                try:
+                    nested_children = element.children
+                except AttributeError:
+                    if element.value == 'yield':
+                        if element.parent.type == 'yield_expr':
+                            yield element.parent
+                        else:
+                            yield element
+                else:
+                    for result in scan(nested_children):
+                        yield result
+
+        return scan(self.children)

    def iter_return_stmts(self):
        """
        Returns a generator of `return_stmt`.
        """
-        return self._search_in_scope('return_stmt')
+        def scan(children):
+            for element in children:
+                if element.type == 'return_stmt' \
+                        or element.type == 'keyword' and element.value == 'return':
+                    yield element
+                if element.type in _RETURN_STMT_CONTAINERS:
+                    for e in scan(element.children):
+                        yield e
+
+        return scan(self.children)

    def is_generator(self):
        """
@@ -617,6 +703,9 @@ class ForStmt(Flow):
        """
        return self.children[3]

+    def get_defined_names(self):
+        return _defined_names(self.children[1])
+

 class TryStmt(Flow):
    type = 'try_stmt'
@@ -628,7 +717,6 @@ class TryStmt(Flow):
        Returns ``[None]`` for except clauses without an exception given.
        """
        for node in self.children:
-            # TODO this is not correct. We're not returning an except clause.
            if node.type == 'except_clause':
                yield node.children[1]
            elif node == 'except':
@@ -651,8 +739,7 @@ class WithStmt(Flow):
                names += _defined_names(with_item.children[2])
        return names

-    def get_context_manager_from_name(self, name):
-        # TODO Replace context_manager with test?
+    def get_test_node_from_name(self, name):
        node = name.parent
        if node.type != 'with_item':
            raise ValueError('The name is not actually part of a with statement.')
@@ -868,7 +955,7 @@ def _defined_names(current):
    list comprehensions.
    """
    names = []
-    if current.type in ('testlist_star_expr', 'testlist_comp', 'exprlist'):
+    if current.type in ('testlist_star_expr', 'testlist_comp', 'exprlist', 'testlist'):
        for child in current.children[::2]:
            names += _defined_names(child)
    elif current.type in ('atom', 'star_expr'):
@@ -952,8 +1039,10 @@ class Param(PythonBaseNode):
        The default is the test node that appears after the `=`. Is `None` in
        case no default is present.
        """
+        has_comma = self.children[-1] == ','
        try:
-            return self.children[int(self.children[0] in ('*', '**')) + 2]
+            if self.children[-2 - int(has_comma)] == '=':
+                return self.children[-1 - int(has_comma)]
        except IndexError:
            return None

@@ -974,7 +1063,7 @@ class Param(PythonBaseNode):

    def _tfpdef(self):
        """
-        tfpdef: see grammar.txt.
+        tfpdef: see e.g. grammar36.txt.
        """
        offset = int(self.children[0] in ('*', '**'))
        return self.children[offset]
@@ -989,6 +1078,9 @@ class Param(PythonBaseNode):
        else:
            return self._tfpdef()

+    def get_defined_names(self):
+        return [self.name]
+
    @property
    def position_index(self):
        """
@@ -1010,7 +1102,7 @@ class Param(PythonBaseNode):
        """
        return search_ancestor(self, 'funcdef', 'lambdef')

-    def get_code(self, normalized=False, include_prefix=True, include_comma=True):
+    def get_code(self, include_prefix=True, include_comma=True):
        """
        Like all the other get_code functions, but includes the param
        `include_comma`.
@@ -1018,14 +1110,13 @@ class Param(PythonBaseNode):
        :param include_comma bool: If enabled includes the comma in the string output.
        """
        if include_comma:
-            return super(Param, self).get_code(normalized, include_prefix)
+            return super(Param, self).get_code(include_prefix)

        children = self.children
        if children[-1] == ',':
            children = children[:-1]
        return self._get_code_for_children(
            children,
-            normalized=False,
            include_prefix=include_prefix
        )

--- a/parso/tokenize.py
+++ b/parso/tokenize.py
@@ -1,369 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-This tokenizer has been copied from the ``tokenize.py`` standard library
-tokenizer. The reason was simple: The standard library tokenizer fails
-if the indentation is not right. To make it possible to do error recovery the
-    tokenizer needed to be rewritten.
-
-Basically this is a stripped down version of the standard library module, so
-you can read the documentation there. Additionally we included some speed and
-memory optimizations here.
-"""
-from __future__ import absolute_import
-
-import string
-import re
-from collections import namedtuple
-import itertools as _itertools
-
-from parso.token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, opmap,
-                               NAME, OP, ERRORTOKEN, NEWLINE, INDENT, DEDENT)
-from parso._compatibility import py_version, u
-from parso.utils import splitlines
-
-
-cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
-
-
-if py_version >= 30:
-    # Python 3 has str.isidentifier() to check if a char is a valid identifier
-    is_identifier = str.isidentifier
-else:
-    namechars = string.ascii_letters + '_'
-    is_identifier = lambda s: s in namechars
-
-
-COMMENT = N_TOKENS
-tok_name[COMMENT] = 'COMMENT'
-
-
-def group(*choices, **kwargs):
-    capture = kwargs.pop('capture', False)  # Python 2, arrghhhhh :(
-    assert not kwargs
-
-    start = '('
-    if not capture:
-        start += '?:'
-    return start + '|'.join(choices) + ')'
-
-def any(*choices):
-    return group(*choices) + '*'
-
-def maybe(*choices):
-    return group(*choices) + '?'
-
-# Note: we use unicode matching for names ("\w") but ascii matching for
-# number literals.
-Whitespace = r'[ \f\t]*'
-Comment = r'#[^\r\n]*'
-Name = r'\w+'
-
-if py_version >= 36:
-    Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
-    Binnumber = r'0[bB](?:_?[01])+'
-    Octnumber = r'0[oO](?:_?[0-7])+'
-    Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)'
-    Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
-    Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*'
-    Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?',
-                       r'\.[0-9](?:_?[0-9])*') + maybe(Exponent)
-    Expfloat = r'[0-9](?:_?[0-9])*' + Exponent
-    Floatnumber = group(Pointfloat, Expfloat)
-    Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]')
-else:
-    Hexnumber = r'0[xX][0-9a-fA-F]+'
-    Binnumber = r'0[bB][01]+'
-    if py_version >= 30:
-        Octnumber = r'0[oO][0-7]+'
-    else:
-        Octnumber = '0[0-7]+'
-    Decnumber = r'(?:0+|[1-9][0-9]*)'
-    Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
-    Exponent = r'[eE][-+]?[0-9]+'
-    Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent)
-    Expfloat = r'[0-9]+' + Exponent
-    Floatnumber = group(Pointfloat, Expfloat)
-    Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
-Number = group(Imagnumber, Floatnumber, Intnumber)
-
-# Return the empty string, plus all of the valid string prefixes.
-def _all_string_prefixes():
-    # The valid string prefixes. Only contain the lower case versions,
-    #  and don't contain any permuations (include 'fr', but not
-    #  'rf'). The various permutations will be generated.
-    _valid_string_prefixes = ['b', 'r', 'u', 'br']
-    if py_version >= 36:
-        _valid_string_prefixes += ['f', 'fr']
-    if py_version <= 27:
-        # TODO this is actually not 100% valid. ur is valid in Python 2.7,
-        # while ru is not.
-        _valid_string_prefixes.append('ur')
-
-    # if we add binary f-strings, add: ['fb', 'fbr']
-    result = set([''])
-    for prefix in _valid_string_prefixes:
-        for t in _itertools.permutations(prefix):
-            # create a list with upper and lower versions of each
-            #  character
-            for u in _itertools.product(*[(c, c.upper()) for c in t]):
-                result.add(''.join(u))
-    return result
-
-def _compile(expr):
-    return re.compile(expr, re.UNICODE)
-
-# Note that since _all_string_prefixes includes the empty string,
-#  StringPrefix can be the empty string (making it optional).
-StringPrefix = group(*_all_string_prefixes())
-
-# Tail end of ' string.
-Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
-# Tail end of " string.
-Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
-# Tail end of ''' string.
-Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
-# Tail end of """ string.
-Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
-Triple = group(StringPrefix + "'''", StringPrefix + '"""')
-
-# Because of leftmost-then-longest match semantics, be sure to put the
-# longest operators first (e.g., if = came before ==, == would get
-# recognized as two instances of =).
-Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=",
-                 r"//=?", r"->",
-                 r"[+\-*/%&@|^=<>]=?",
-                 r"~")
-
-Bracket = '[][(){}]'
-Special = group(r'\r?\n', r'\.\.\.', r'[:;.,@]')
-Funny = group(Operator, Bracket, Special)
-
-PlainToken = group(Number, Funny, Name, capture=True)
-
-# First (or only) line of ' or " string.
-ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
-                group("'", r'\\\r?\n'),
-                StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
-                group('"', r'\\\r?\n'))
-PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple)
-PseudoToken = group(Whitespace, capture=True) + \
-    group(PseudoExtras, Number, Funny, ContStr, Name, capture=True)
-
-# For a given string prefix plus quotes, endpats maps it to a regex
-#  to match the remainder of that string. _prefix can be empty, for
-#  a normal single or triple quoted string (with no prefix).
-endpats = {}
-for _prefix in _all_string_prefixes():
-    endpats[_prefix + "'"] = _compile(Single)
-    endpats[_prefix + '"'] = _compile(Double)
-    endpats[_prefix + "'''"] = _compile(Single3)
-    endpats[_prefix + '"""'] = _compile(Double3)
-
-# A set of all of the single and triple quoted string prefixes,
-#  including the opening quotes.
-single_quoted = set()
-triple_quoted = set()
-for t in _all_string_prefixes():
-    for p in (t + '"', t + "'"):
-        single_quoted.add(p)
-    for p in (t + '"""', t + "'''"):
-        triple_quoted.add(p)
-
-
-# TODO add with?
-ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except',
-                       'finally', 'while', 'return')
-pseudo_token_compiled = _compile(PseudoToken)
-
-
-class TokenInfo(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])):
-    def __repr__(self):
-        return ('TokenInfo(type=%s, string=%r, start=%r, prefix=%r)' %
-                self._replace(type=self.get_type_name()))
-
-    def get_type_name(self, exact=True):
-        if exact:
-            typ = self.exact_type
-        else:
-            typ = self.type
-        return tok_name[typ]
-
-    @property
-    def exact_type(self):
-        if self.type == OP and self.string in opmap:
-            return opmap[self.string]
-        else:
-            return self.type
-
-    @property
-    def end_pos(self):
-        lines = splitlines(self.string)
-        if len(lines) > 1:
-            return self.start_pos[0] + len(lines) - 1, 0
-        else:
-            return self.start_pos[0], self.start_pos[1] + len(self.string)
-
-
-def source_tokens(source, use_exact_op_types=False):
-    """Generate tokens from a the source code (string)."""
-    lines = splitlines(source, keepends=True)
-    return generate_tokens(lines, use_exact_op_types)
-
-
-def generate_tokens(lines, use_exact_op_types=False):
-    """
-    A heavily modified Python standard library tokenizer.
-
-    Additionally to the default information, yields also the prefix of each
-    token. This idea comes from lib2to3. The prefix contains all information
-    that is irrelevant for the parser like newlines in parentheses or comments.
-    """
-    paren_level = 0  # count parentheses
-    indents = [0]
-    max = 0
-    numchars = '0123456789'
-    contstr = ''
-    contline = None
-    # We start with a newline. This makes indent at the first position
-    # possible. It's not valid Python, but still better than an INDENT in the
-    # second line (and not in the first). This makes quite a few things in
-    # Jedi's fast parser possible.
-    new_line = True
-    prefix = ''  # Should never be required, but here for safety
-    additional_prefix = ''
-    for lnum, line in enumerate(lines, 1):  # loop over lines in stream
-        pos, max = 0, len(line)
-
-        if contstr:                                         # continued string
-            endmatch = endprog.match(line)
-            if endmatch:
-                pos = endmatch.end(0)
-                yield TokenInfo(STRING, contstr + line[:pos], contstr_start, prefix)
-                contstr = ''
-                contline = None
-            else:
-                contstr = contstr + line
-                contline = contline + line
-                continue
-
-        while pos < max:
-            pseudomatch = pseudo_token_compiled.match(line, pos)
-            if not pseudomatch:                             # scan for tokens
-                txt = line[pos:]
-                if txt.endswith('\n'):
-                    new_line = True
-                yield TokenInfo(ERRORTOKEN, txt, (lnum, pos), prefix)
-                break
-
-            prefix = additional_prefix + pseudomatch.group(1)
-            additional_prefix = ''
-            start, pos = pseudomatch.span(2)
-            spos = (lnum, start)
-            token = pseudomatch.group(2)
-            initial = token[0]
-
-            if new_line and initial not in '\r\n#':
-                new_line = False
-                if paren_level == 0:
-                    i = 0
-                    while line[i] == '\f':
-                        i += 1
-                        start -= 1
-                    if start > indents[-1]:
-                        yield TokenInfo(INDENT, '', spos, '')
-                        indents.append(start)
-                    while start < indents[-1]:
-                        yield TokenInfo(DEDENT, '', spos, '')
-                        indents.pop()
-
-            if (initial in numchars or                      # ordinary number
-                    (initial == '.' and token != '.' and token != '...')):
-                yield TokenInfo(NUMBER, token, spos, prefix)
-            elif initial in '\r\n':
-                if not new_line and paren_level == 0:
-                    yield TokenInfo(NEWLINE, token, spos, prefix)
-                else:
-                    additional_prefix = prefix + token
-                new_line = True
-            elif initial == '#':  # Comments
-                assert not token.endswith("\n")
-                additional_prefix = prefix + token
-            elif token in triple_quoted:
-                endprog = endpats[token]
-                endmatch = endprog.match(line, pos)
-                if endmatch:                                # all on one line
-                    pos = endmatch.end(0)
-                    token = line[start:pos]
-                    yield TokenInfo(STRING, token, spos, prefix)
-                else:
-                    contstr_start = (lnum, start)           # multiple lines
-                    contstr = line[start:]
-                    contline = line
-                    break
-            elif initial in single_quoted or \
-                    token[:2] in single_quoted or \
-                    token[:3] in single_quoted:
-                if token[-1] == '\n':                       # continued string
-                    contstr_start = lnum, start
-                    endprog = (endpats.get(initial) or endpats.get(token[1])
-                               or endpats.get(token[2]))
-                    contstr = line[start:]
-                    contline = line
-                    break
-                else:                                       # ordinary string
-                    yield TokenInfo(STRING, token, spos, prefix)
-            elif is_identifier(initial):                      # ordinary name
-                if token in ALWAYS_BREAK_TOKENS:
-                    paren_level = 0
-                    while True:
-                        indent = indents.pop()
-                        if indent > start:
-                            yield TokenInfo(DEDENT, '', spos, '')
-                        else:
-                            indents.append(indent)
-                            break
-                yield TokenInfo(NAME, token, spos, prefix)
-            elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'):  # continued stmt
-                additional_prefix += prefix + line[start:]
-                break
-            else:
-                if token in '([{':
-                    paren_level += 1
-                elif token in ')]}':
-                    paren_level -= 1
-
-                try:
-                    # This check is needed in any case to check if it's a valid
-                    # operator or just some random unicode character.
-                    exact_type = opmap[token]
-                except KeyError:
-                    exact_type = typ = ERRORTOKEN
-                if use_exact_op_types:
-                    typ = exact_type
-                else:
-                    typ = OP
-                yield TokenInfo(typ, token, spos, prefix)
-
-    if contstr:
-        yield TokenInfo(ERRORTOKEN, contstr, contstr_start, prefix)
-        if contstr.endswith('\n'):
-            new_line = True
-
-    end_pos = lnum, max
-    # As the last position we just take the maximally possible position. We
-    # remove -1 for the last new line.
-    for indent in indents[1:]:
-        yield TokenInfo(DEDENT, '', end_pos, '')
-    yield TokenInfo(ENDMARKER, '', end_pos, additional_prefix)
-
-
-if __name__ == "__main__":
-    import sys
-    if len(sys.argv) >= 2:
-        path = sys.argv[1]
-        with open(path) as f:
-            code = u(f.read())
-    else:
-        code = u(sys.stdin.read())
-    for token in source_tokens(code, use_exact_op_types=True):
-        print(token)
--- a/parso/tree.py
+++ b/parso/tree.py
@@ -145,20 +145,17 @@ class NodeOrLeaf(object):
        """

    @abstractmethod
-    def get_code(self, normalized=False, include_prefix=True):
+    def get_code(self, include_prefix=True):
        """
        Returns the code that was the input of the parser.

-        If a normalizer is given, the returned code will be normalized and will
-        not be equal to the input.
-
-        :param include_prefix: Removes the prefix (whitespace and comments) of e.g. a statement.
-        :param normalized: Deprecated. Please don't use. Will be replaced with something more powerful.
+        :param include_prefix: Removes the prefix (whitespace and comments) of
+            e.g. a statement.
        """


 class Leaf(NodeOrLeaf):
-    __slots__ = ('value', 'parent', 'line', 'indent', 'prefix')
+    __slots__ = ('value', 'parent', 'line', 'column', 'prefix')

    def __init__(self, value, start_pos, prefix=''):
        self.value = value
@@ -168,12 +165,12 @@ class Leaf(NodeOrLeaf):

    @property
    def start_pos(self):
-        return self.line, self.indent
+        return self.line, self.column

    @start_pos.setter
    def start_pos(self, value):
        self.line = value[0]
-        self.indent = value[1]
+        self.column = value[1]

    def get_start_pos_of_prefix(self):
        previous_leaf = self.get_previous_leaf()
@@ -187,9 +184,7 @@ class Leaf(NodeOrLeaf):
    def get_last_leaf(self):
        return self

-    def get_code(self, normalized=False, include_prefix=True):
-        if normalized:
-            return self.value
+    def get_code(self, include_prefix=True):
        if include_prefix:
            return self.prefix + self.value
        else:
@@ -201,14 +196,24 @@ class Leaf(NodeOrLeaf):
        end_pos_line = self.line + len(lines) - 1
        # Check for multiline token
        if self.line == end_pos_line:
-            end_pos_indent = self.indent + len(lines[-1])
+            end_pos_column = self.column + len(lines[-1])
        else:
-            end_pos_indent = len(lines[-1])
-        return end_pos_line, end_pos_indent
+            end_pos_column = len(lines[-1])
+        return end_pos_line, end_pos_column

    @utf8_repr
    def __repr__(self):
-        return "<%s: %s start=%s>" % (type(self).__name__, self.value, self.start_pos)
+        value = self.value
+        if not value:
+            value = self.type
+        return "<%s: %s>" % (type(self).__name__, value)
+
+
+class TypedLeaf(Leaf):
+    __slots__ = ('type',)
+    def __init__(self, type, value, start_pos, prefix=''):
+        super(TypedLeaf, self).__init__(value, start_pos, prefix)
+        self.type = type


 class BaseNode(NodeOrLeaf):
@@ -238,16 +243,15 @@ class BaseNode(NodeOrLeaf):
    def end_pos(self):
        return self.children[-1].end_pos

-    def _get_code_for_children(self, children, normalized, include_prefix):
-        # TODO implement normalized (depending on context).
+    def _get_code_for_children(self, children, include_prefix):
        if include_prefix:
-            return "".join(c.get_code(normalized) for c in children)
+            return "".join(c.get_code() for c in children)
        else:
            first = children[0].get_code(include_prefix=False)
-            return first + "".join(c.get_code(normalized) for c in children[1:])
+            return first + "".join(c.get_code() for c in children[1:])

-    def get_code(self, normalized=False, include_prefix=True):
-        return self._get_code_for_children(self.children, normalized, include_prefix)
+    def get_code(self, include_prefix=True):
+        return self._get_code_for_children(self.children, include_prefix)

    def get_leaf_for_position(self, position, include_prefixes=False):
        def binary_search(lower, upper):
@@ -316,7 +320,7 @@ class ErrorLeaf(Leaf):
    A leaf that is either completely invalid in a language (like `$` in Python)
    or is invalid at that position. Like the star in `1 +* 1`.
    """
-    __slots__ = ('original_type')
+    __slots__ = ('original_type',)
    type = 'error_leaf'

    def __init__(self, original_type, value, start_pos, prefix=''):
@@ -324,5 +328,5 @@ class ErrorLeaf(Leaf):
        self.original_type = original_type

    def __repr__(self):
-        return "<%s: %s:%s, %s)>" % \
+        return "<%s: %s:%s, %s>" % \
            (type(self).__name__, self.original_type, repr(self.value), self.start_pos)
--- a/parso/utils.py
+++ b/parso/utils.py
@@ -1,12 +1,17 @@
+from collections import namedtuple
 import re
+import sys
 from ast import literal_eval

-from parso._compatibility import unicode
+from parso._compatibility import unicode, total_ordering


-def splitlines(string, keepends=False):
-    """
-    A splitlines for Python code. In contrast to Python's ``str.splitlines``,
+Version = namedtuple('Version', 'major, minor, micro')
+
+
+def split_lines(string, keepends=False):
+    r"""
+    A str.splitlines for Python code. In contrast to Python's ``str.splitlines``,
    looks at form feeds and other special characters as normal text. Just
    splits ``\n`` and ``\r\n``.
    Also different: Returns ``['']`` for an empty string input.
@@ -43,7 +48,10 @@ def splitlines(string, keepends=False):
        return re.split('\n|\r\n', string)


-def source_to_unicode(source, encoding=None):
+def python_bytes_to_unicode(source, default_encoding='utf-8', errors='strict'):
+    """
+    `errors` can be 'strict', 'replace' or 'ignore'.
+    """
    def detect_encoding():
        """
        For the implementation of encoding definitions in Python, look at:
@@ -62,7 +70,7 @@ def source_to_unicode(source, encoding=None):
            return possible_encoding.group(1)
        else:
            # the default if nothing else has been set -> PEP 263
-            return encoding if encoding is not None else 'utf-8'
+            return default_encoding

    if isinstance(source, unicode):
        # only cast str/bytes
@@ -71,5 +79,73 @@ def source_to_unicode(source, encoding=None):
    encoding = detect_encoding()
    if not isinstance(encoding, unicode):
        encoding = unicode(encoding, 'utf-8', 'replace')
-    # cast to unicode by default
-    return unicode(source, encoding, 'replace')
+
+    # Cast to unicode
+    return unicode(source, encoding, errors)
+
+
+def version_info():
+    """
+    Returns a namedtuple of parso's version, similar to Python's
+    ``sys.version_info``.
+    """
+    from parso import __version__
+    tupl = re.findall(r'[a-z]+|\d+', __version__)
+    return Version(*[x if i == 3 else int(x) for i, x in enumerate(tupl)])
+
+
+def _parse_version(version):
+    match = re.match(r'(\d+)(?:\.(\d)(?:\.\d+)?)?$', version)
+    if match is None:
+        raise ValueError('The given version is not in the right format. '
+                         'Use something like "3.2" or "3".')
+
+    major = int(match.group(1))
+    minor = match.group(2)
+    if minor is None:
+        # Use the latest Python in case it's not exactly defined, because the
+        # grammars are typically backwards compatible?
+        if major == 2:
+            minor = "7"
+        elif major == 3:
+            minor = "6"
+        else:
+            raise NotImplementedError("Sorry, no support yet for those fancy new/old versions.")
+    minor = int(minor)
+    return PythonVersionInfo(major, minor)
+
+
+@total_ordering
+class PythonVersionInfo(namedtuple('Version', 'major, minor')):
+    def __gt__(self, other):
+        if isinstance(other, tuple):
+            if len(other) != 2:
+                raise ValueError("Can only compare to tuples of length 2.")
+            return (self.major, self.minor) > other
+        super(PythonVersionInfo, self).__gt__(other)
+
+        return (self.major, self.minor)
+
+    def __eq__(self, other):
+        if isinstance(other, tuple):
+            if len(other) != 2:
+                raise ValueError("Can only compare to tuples of length 2.")
+            return (self.major, self.minor) == other
+        super(PythonVersionInfo, self).__eq__(other)
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+
+def parse_version_string(version=None):
+    """
+    Checks for a valid version number (e.g. `3.2` or `2.7.1` or `3`) and
+    returns a corresponding version info that is always two characters long in
+    decimal.
+    """
+    if version is None:
+        version = '%s.%s' % sys.version_info[:2]
+    if not isinstance(version, (unicode, str)):
+        raise TypeError("version must be a string like 3.2.")
+
+    return _parse_version(version)
--- a/pytest.ini
+++ b/pytest.ini
@@ -2,7 +2,7 @@
 addopts = --doctest-modules

 # Ignore broken files inblackbox test directories
-norecursedirs = .* docs scripts old*
+norecursedirs = .* docs scripts normalizer_issue_files build

 # Activate `clean_jedi_cache` fixture for all tests.  This should be
 # fine as long as we are using `clean_jedi_cache` as a session scoped
--- a/setup.py
+++ b/setup.py
@@ -1,16 +1,16 @@
 #!/usr/bin/env python

 from __future__ import with_statement
-from setuptools import setup
+
+from setuptools import setup, find_packages
+
+import parso


 __AUTHOR__ = 'David Halter'
 __AUTHOR_EMAIL__ = 'davidhalter88@gmail.com'

 readme = open('README.rst').read() + '\n\n' + open('CHANGELOG.rst').read()
-packages = ['parso', 'parso.pgen2', 'parso.python']
-
-import parso

 setup(name='parso',
      version=parso.__version__,
@@ -24,7 +24,7 @@ setup(name='parso',
      license='MIT',
      keywords='python parser parsing',
      long_description=readme,
-      packages=packages,
+      packages=find_packages(exclude=['test']),
      package_data={'parso': ['python/grammar*.txt']},
      platforms=['any'],
      classifiers=[
--- a/test/failing_examples.py
+++ b/test/failing_examples.py
@@ -0,0 +1,313 @@
+# -*- coding: utf-8 -*-
+import sys
+from textwrap import dedent
+
+
+def indent(code):
+    lines = code.splitlines(True)
+    return ''.join([' ' * 2 + line for line in lines])
+
+
+def build_nested(code, depth, base='def f():\n'):
+    if depth == 0:
+        return code
+
+    new_code = base + indent(code)
+    return build_nested(new_code, depth - 1, base=base)
+
+
+FAILING_EXAMPLES = [
+    '1 +',
+    '?',
+    # Python/compile.c
+    dedent('''\
+        for a in [1]:
+            try:
+                pass
+            finally:
+                continue
+        '''), # 'continue' not supported inside 'finally' clause"
+    'continue',
+    'break',
+    'return',
+    'yield',
+
+    # SyntaxError from Python/ast.c
+    'f(x for x in bar, 1)',
+    'from foo import a,',
+    'from __future__ import whatever',
+    'from __future__ import braces',
+    'from .__future__ import whatever',
+    'def f(x=3, y): pass',
+    'lambda x=3, y: x',
+    '__debug__ = 1',
+    'with x() as __debug__: pass',
+    # Mostly 3.6 relevant
+    '[]: int',
+    '[a, b]: int',
+    '(): int',
+    '(()): int',
+    '((())): int',
+    '{}: int',
+    'True: int',
+    '(a, b): int',
+    '*star,: int',
+    'a, b: int = 3',
+    'foo(+a=3)',
+    'f(lambda: 1=1)',
+    'f(x=1, x=2)',
+    'f(**x, y)',
+    'f(x=2, y)',
+    'f(**x, *y)',
+    'f(**x, y=3, z)',
+    'a, b += 3',
+    '(a, b) += 3',
+    '[a, b] += 3',
+    # All assignment tests
+    'lambda a: 1 = 1',
+    '[x for x in y] = 1',
+    '{x for x in y} = 1',
+    '{x:x for x in y} = 1',
+    '(x for x in y) = 1',
+    'None = 1',
+    '... = 1',
+    'a == b = 1',
+    '{a, b} = 1',
+    '{a: b} = 1',
+    '1 = 1',
+    '"" = 1',
+    'b"" = 1',
+    'b"" = 1',
+    '"" "" = 1',
+    '1 | 1 = 3',
+    '1**1 = 3',
+    '~ 1 = 3',
+    'not 1 = 3',
+    '1 and 1 = 3',
+    'def foo(): (yield 1) = 3',
+    'def foo(): x = yield 1 = 3',
+    'async def foo(): await x = 3',
+    '(a if a else a) = a',
+    'a, 1 = x',
+    'foo() = 1',
+    # Cases without the equals but other assignments.
+    'with x as foo(): pass',
+    'del bar, 1',
+    'for x, 1 in []: pass',
+    'for (not 1) in []: pass',
+    '[x for 1 in y]',
+    '[x for a, 3 in y]',
+    '(x for 1 in y)',
+    '{x for 1 in y}',
+    '{x:x for 1 in y}',
+    # Unicode/Bytes issues.
+    r'u"\x"',
+    r'u"\"',
+    r'u"\u"',
+    r'u"""\U"""',
+    r'u"\Uffffffff"',
+    r"u'''\N{}'''",
+    r"u'\N{foo}'",
+    r'b"\x"',
+    r'b"\"',
+    '*a, *b = 3, 3',
+    'async def foo(): yield from []',
+    'yield from []',
+    '*a = 3',
+    'del *a, b',
+    'def x(*): pass',
+    '(%s *d) = x' % ('a,' * 256),
+    '{**{} for a in [1]}',
+
+    # Parser/tokenize.c
+    r'"""',
+    r'"',
+    r"'''",
+    r"'",
+    r"\blub",
+    # IndentationError: too many levels of indentation
+    build_nested('pass', 100),
+
+    # SyntaxErrors from Python/symtable.c
+    'def f(x, x): pass',
+    'nonlocal a',
+
+    # IndentationError
+    ' foo',
+    'def x():\n    1\n 2',
+    'def x():\n 1\n  2',
+    'if 1:\nfoo',
+    'if 1: blubb\nif 1:\npass\nTrue and False',
+
+    # f-strings
+    'f"{}"',
+    'f"{\\}"',
+    'f"{\'\\\'}"',
+    'f"{#}"',
+    "f'{1!b}'",
+    "f'{1:{5:{3}}}'",
+    "f'{'",
+    "f'{'",
+    "f'}'",
+    "f'{\"}'",
+    "f'{\"}'",
+    # Now nested parsing
+    "f'{continue}'",
+    "f'{1;1}'",
+    "f'{a=3}'",
+    "f'{b\"\" \"\"}'",
+]
+
+GLOBAL_NONLOCAL_ERROR = [
+    dedent('''
+        def glob():
+            x = 3
+            x.z
+            global x'''),
+    dedent('''
+        def glob():
+            x = 3
+            global x'''),
+    dedent('''
+        def glob():
+            x
+            global x'''),
+    dedent('''
+        def glob():
+            x = 3
+            x.z
+            nonlocal x'''),
+    dedent('''
+        def glob():
+            x = 3
+            nonlocal x'''),
+    dedent('''
+        def glob():
+            x
+            nonlocal x'''),
+    # Annotation issues
+    dedent('''
+        def glob():
+            x[0]: foo
+            global x'''),
+    dedent('''
+        def glob():
+            x.a: foo
+            global x'''),
+    dedent('''
+        def glob():
+            x: foo
+            global x'''),
+    dedent('''
+        def glob():
+            x: foo = 5
+            global x'''),
+    dedent('''
+        def glob():
+            x: foo = 5
+            x
+            global x'''),
+    dedent('''
+        def glob():
+            global x
+            x: foo = 3
+        '''),
+    # global/nonlocal + param
+    dedent('''
+        def glob(x):
+            global x
+        '''),
+    dedent('''
+        def glob(x):
+            nonlocal x
+        '''),
+    dedent('''
+        def x():
+            a =3
+            def z():
+                nonlocal a
+                a = 3
+                nonlocal a
+        '''),
+    dedent('''
+        def x():
+            a = 4
+            def y():
+                global a
+                nonlocal a
+        '''),
+    # Missing binding of nonlocal
+    dedent('''
+        def x():
+            nonlocal a
+        '''),
+    dedent('''
+        def x():
+            def y():
+                nonlocal a
+        '''),
+    dedent('''
+        def x():
+            a = 4
+            def y():
+                global a
+                print(a)
+                def z():
+                    nonlocal a
+        '''),
+]
+
+if sys.version_info >= (3, 6):
+    FAILING_EXAMPLES += GLOBAL_NONLOCAL_ERROR
+    FAILING_EXAMPLES += [
+        # Raises multiple errors in previous versions.
+        'async def foo():\n def nofoo():[x async for x in []]',
+    ]
+if sys.version_info >= (3, 5):
+    FAILING_EXAMPLES += [
+        # Raises different errors so just ignore them for now.
+        '[*[] for a in [1]]',
+        # Raises multiple errors in previous versions.
+        'async def bla():\n def x():  await bla()',
+    ]
+if sys.version_info >= (3, 4):
+    # Before that del None works like del list, it gives a NameError.
+    FAILING_EXAMPLES.append('del None')
+if sys.version_info >= (3,):
+    FAILING_EXAMPLES += [
+        # Unfortunately assigning to False and True do not raise an error in
+        # 2.x.
+        '(True,) = x',
+        '([False], a) = x',
+        # A symtable error that raises only a SyntaxWarning in Python 2.
+        'def x(): from math import *',
+        # unicode chars in bytes are allowed in python 2
+        'b"ä"',
+        # combining strings and unicode is allowed in Python 2.
+        '"s" b""',
+    ]
+if sys.version_info >= (2, 7):
+    # This is something that raises a different error in 2.6 than in the other
+    # versions. Just skip it for 2.6.
+    FAILING_EXAMPLES.append('[a, 1] += 3')
+
+if sys.version_info[:2] == (3, 5):
+    # yields are not allowed in 3.5 async functions. Therefore test them
+    # separately, here.
+    FAILING_EXAMPLES += [
+        'async def foo():\n yield x',
+        'async def foo():\n yield x',
+    ]
+else:
+    FAILING_EXAMPLES += [
+        'async def foo():\n yield x\n return 1',
+        'async def foo():\n yield x\n return 1',
+    ]
+
+
+if sys.version_info[:2] <= (3, 4):
+    # Python > 3.4 this is valid code.
+    FAILING_EXAMPLES += [
+        'a = *[1], 2',
+        '(*[1], 2)',
+    ]
--- a/test/normalizer_issue_files/E10.py
+++ b/test/normalizer_issue_files/E10.py
@@ -0,0 +1,51 @@
+for a in 'abc':
+    for b in 'xyz':
+        hello(a)  # indented with 8 spaces
+        #: E903:0
+	hello(b)  # indented with 1 tab
+if True:
+    #: E101:0
+	pass
+
+#: E122+1
+change_2_log = \
+"""Change 2 by slamb@testclient on 2006/04/13 21:46:23
+
+	creation
+"""
+
+p4change = {
+    2: change_2_log,
+}
+
+
+class TestP4Poller(unittest.TestCase):
+    def setUp(self):
+        self.setUpGetProcessOutput()
+        return self.setUpChangeSource()
+
+    def tearDown(self):
+        pass
+
+
+#
+if True:
+    #: E101:0 E101+1:0
+	foo(1,
+	    2)
+
+
+def test_keys(self):
+    """areas.json - All regions are accounted for."""
+    expected = set([
+        #: E101:0
+	u'Norrbotten',
+        #: E101:0
+	u'V\xe4sterbotten',
+    ])
+
+
+if True:
+    hello("""
+	tab at start of this line
+""")
--- a/test/normalizer_issue_files/E101.py
+++ b/test/normalizer_issue_files/E101.py
@@ -0,0 +1,137 @@
+# Used to be the file for W191
+
+#: E101+1
+if False:
+	print  # indented with 1 tab
+
+#: E101+1
+y = x == 2 \
+	or x == 3
+#: E101+5
+if (
+        x == (
+            3
+        ) or
+        y == 4):
+	pass
+#: E101+3
+if x == 2 \
+        or y > 1 \
+        or x == 3:
+	pass
+#: E101+3
+if x == 2 \
+        or y > 1 \
+        or x == 3:
+	pass
+
+#: E101+1
+if (foo == bar and baz == frop):
+	pass
+#: E101+1
+if (foo == bar and baz == frop):
+	pass
+
+#: E101+2 E101+3
+if start[1] > end_col and not (
+        over_indent == 4 and indent_next):
+	assert (0, "E121 continuation line over-"
+	        "indented for visual indent")
+
+
+#: E101+3
+def long_function_name(
+        var_one, var_two, var_three,
+        var_four):
+	hello(var_one)
+
+
+#: E101+2
+if ((row < 0 or self.moduleCount <= row or
+     col < 0 or self.moduleCount <= col)):
+	raise Exception("%s,%s - %s" % (row, col, self.moduleCount))
+#: E101+1 E101+2 E101+3 E101+4 E101+5 E101+6
+if bar:
+	assert (
+	    start, 'E121 lines starting with a '
+	    'closing bracket should be indented '
+	    "to match that of the opening "
+	    "bracket's line"
+	)
+
+# you want vertical alignment, so use a parens
+#: E101+3
+if ((foo.bar("baz") and
+     foo.bar("frop")
+     )):
+	hello("yes")
+#: E101+3
+# also ok, but starting to look like LISP
+if ((foo.bar("baz") and
+     foo.bar("frop"))):
+	hello("yes")
+#: E101+1
+if (a == 2 or b == "abc def ghi" "jkl mno"):
+	assert True
+#: E101+2
+if (a == 2 or b == """abc def ghi
+jkl mno"""):
+	assert True
+#: E101+1 E101+2
+if length > options.max_line_length:
+	assert options.max_line_length, \
+	    "E501 line too long (%d characters)" % length
+
+
+#: E101+1 E101+2
+if os.path.exists(os.path.join(path, PEP8_BIN)):
+	cmd = ([os.path.join(path, PEP8_BIN)] +
+	       self._pep8_options(targetfile))
+# TODO Tabs in docstrings shouldn't be there, use \t.
+'''
+	multiline string with tab in it'''
+# Same here.
+'''multiline string
+	with tabs
+   and spaces
+'''
+# Okay
+'''sometimes, you just need to go nuts in a multiline string
+	and allow all sorts of crap
+  like mixed tabs and spaces
+      
+or trailing whitespace  
+or long long long long long long long long long long long long long long long long long lines
+'''  # noqa
+# Okay
+'''this one
+	will get no warning
+even though the noqa comment is not immediately after the string
+''' + foo  # noqa
+
+#: E101+2
+if foo is None and bar is "frop" and \
+        blah == 'yeah':
+	blah = 'yeahnah'
+
+
+#: E101+1 E101+2 E101+3
+if True:
+	foo(
+		1,
+		2)
+
+
+#: E101+1 E101+2 E101+3 E101+4 E101+5
+def test_keys(self):
+	"""areas.json - All regions are accounted for."""
+	expected = set([
+		u'Norrbotten',
+		u'V\xe4sterbotten',
+	])
+
+
+#: E101+1
+x = [
+	'abc'
+]
--- a/test/normalizer_issue_files/E11.py
+++ b/test/normalizer_issue_files/E11.py
@@ -0,0 +1,60 @@
+if x > 2:
+    #: E111:2
+  hello(x)
+if True:
+    #: E111:5
+     print
+    #: E111:6
+      # 
+    #: E111:2
+  # what
+    # Comment is fine
+# Comment is also fine
+
+if False:
+    pass
+print
+print
+#: E903:0
+    print
+mimetype = 'application/x-directory'
+#: E111:5
+     # 'httpd/unix-directory'
+create_date = False
+
+
+def start(self):
+    # foo
+    #: E111:8
+        # bar
+    if True:  # Hello
+        self.master.start()  # Comment
+        # try:
+        #: E111:12
+            # self.master.start()
+        # except MasterExit:
+        #: E111:12
+            # self.shutdown()
+        # finally:
+        #: E111:12
+            # sys.exit()
+    # Dedent to the first level
+    #: E111:6
+      # error
+# Dedent to the base level
+#: E111:2
+  # Also wrongly indented.
+# Indent is correct.
+
+
+def start(self):  # Correct comment
+    if True:
+        #: E111:0
+#       try:
+        #: E111:0
+#           self.master.start()
+        #: E111:0
+#       except MasterExit:
+        #: E111:0
+#           self.shutdown()
+        self.master.start()  # comment
--- a/test/normalizer_issue_files/E12_first.py
+++ b/test/normalizer_issue_files/E12_first.py
@@ -0,0 +1,78 @@
+abc = "E121", (
+    #: E121:2
+  "dent")
+abc = "E122", (
+    #: E121:0
+"dent")
+my_list = [
+    1, 2, 3,
+    4, 5, 6,
+    #: E123
+    ]
+abc = "E124", ("visual",
+               "indent_two"
+               #: E124:14
+              )
+abc = "E124", ("visual",
+               "indent_five"
+               #: E124:0
+)
+a = (123,
+     #: E124:0
+)
+#: E129+1:4
+if (row < 0 or self.moduleCount <= row or
+    col < 0 or self.moduleCount <= col):
+    raise Exception("%s,%s - %s" % (row, col, self.moduleCount))
+
+abc = "E126", (
+    #: E126:12
+            "dent")
+abc = "E126", (
+    #: E126:8
+        "dent")
+abc = "E127", ("over-",
+               #: E127:18
+                  "over-indent")
+abc = "E128", ("visual",
+               #: E128:4
+    "hanging")
+abc = "E128", ("under-",
+               #: E128:14
+              "under-indent")
+
+
+my_list = [
+    1, 2, 3,
+    4, 5, 6,
+    #: E123:5
+     ]
+result = {
+    #: E121:3
+   'key1': 'value',
+    #: E121:3
+   'key2': 'value',
+}
+rv.update(dict.fromkeys((
+              'qualif_nr', 'reasonComment_en', 'reasonComment_fr',
+              'reasonComment_de', 'reasonComment_it'),
+                        #: E128:10
+          '?'),
+          "foo")
+
+abricot = 3 + \
+          4 + \
+          5 + 6
+abc = "hello", (
+
+    "there",
+    #: E126:5
+     # "john",
+    "dude")
+part = set_mimetype((
+    a.get('mime_type', 'text')),
+                    'default')
+part = set_mimetype((
+    a.get('mime_type', 'text')),
+                    #: E127:21
+                     'default')
--- a/test/normalizer_issue_files/E12_not_first.py
+++ b/test/normalizer_issue_files/E12_not_first.py
@@ -0,0 +1,356 @@
+# The issue numbers described in this file are part of the pycodestyle tracker
+# and not of parso.
+# Originally there were no issues in here, I (dave) added the ones that were
+# necessary and IMO useful.
+if (
+        x == (
+            3
+        ) or
+        y == 4):
+    pass
+
+y = x == 2 \
+    or x == 3
+
+#: E129+1:4
+if x == 2 \
+    or y > 1 \
+        or x == 3:
+    pass
+
+if x == 2 \
+        or y > 1 \
+        or x == 3:
+    pass
+
+
+if (foo == bar and
+        baz == frop):
+    pass
+
+#: E129+1:4 E129+2:4 E123+3
+if (
+    foo == bar and
+    baz == frop
+):
+    pass
+
+if (
+        foo == bar and
+        baz == frop
+        #: E129:4 
+    ):
+    pass
+
+a = (
+)
+
+a = (123,
+     )
+
+
+if start[1] > end_col and not (
+        over_indent == 4 and indent_next):
+    assert (0, "E121 continuation line over-"
+            "indented for visual indent")
+
+
+abc = "OK", ("visual",
+             "indent")
+
+abc = "Okay", ("visual",
+               "indent_three"
+               )
+
+abc = "a-ok", (
+    "there",
+    "dude",
+)
+
+abc = "hello", (
+    "there",
+    "dude")
+
+abc = "hello", (
+
+    "there",
+    # "john",
+    "dude")
+
+abc = "hello", (
+    "there", "dude")
+
+abc = "hello", (
+    "there", "dude",
+)
+
+# Aligned with opening delimiter
+foo = long_function_name(var_one, var_two,
+                         var_three, var_four)
+
+# Extra indentation is not necessary.
+foo = long_function_name(
+    var_one, var_two,
+    var_three, var_four)
+
+
+arm = 'AAA'    \
+      'BBB'    \
+      'CCC'
+
+bbb = 'AAA'    \
+      'BBB'    \
+      'CCC'
+
+cc = ('AAA'
+      'BBB'
+      'CCC')
+
+cc = {'text': 'AAA'
+              'BBB'
+              'CCC'}
+
+cc = dict(text='AAA'
+               'BBB')
+
+sat = 'AAA'    \
+      'BBB'    \
+      'iii'    \
+      'CCC'
+
+abricot = (3 +
+           4 +
+           5 + 6)
+
+#: E122+1:4
+abricot = 3 + \
+    4 + \
+          5 + 6
+
+part = [-1, 2, 3,
+        4, 5, 6]
+
+#: E128+1:8
+part = [-1, (2, 3,
+        4, 5, 6), 7,
+        8, 9, 0]
+
+fnct(1, 2, 3,
+     4, 5, 6)
+
+fnct(1, 2, 3,
+     4, 5, 6,
+     7, 8, 9,
+     10, 11)
+
+
+def long_function_name(
+        var_one, var_two, var_three,
+        var_four):
+    hello(var_one)
+
+
+if ((row < 0 or self.moduleCount <= row or
+     col < 0 or self.moduleCount <= col)):
+    raise Exception("%s,%s - %s" % (row, col, self.moduleCount))
+
+
+result = {
+    'foo': [
+        'bar', {
+            'baz': 'frop',
+        }
+    ]
+}
+
+
+foo = my.func({
+    "foo": "bar",
+}, "baz")
+
+
+fooff(aaaa,
+      cca(
+          vvv,
+          dadd
+      ), fff,
+      ggg)
+
+fooff(aaaa,
+      abbb,
+      cca(
+          vvv,
+          aaa,
+          dadd),
+      "visual indentation is not a multiple of four",)
+
+if bar:
+    assert (
+        start, 'E121 lines starting with a '
+        'closing bracket should be indented '
+        "to match that of the opening "
+        "bracket's line"
+    )
+
+# you want vertical alignment, so use a parens
+if ((foo.bar("baz") and
+     foo.bar("frop")
+     )):
+    hello("yes")
+
+# also ok, but starting to look like LISP
+if ((foo.bar("baz") and
+     foo.bar("frop"))):
+    hello("yes")
+
+#: E129+1:4 E127+2:9
+if (a == 2 or
+    b == "abc def ghi"
+         "jkl mno"):
+    assert True
+
+#: E129+1:4
+if (a == 2 or
+    b == """abc def ghi
+jkl mno"""):
+    assert True
+
+if length > options.max_line_length:
+    assert options.max_line_length, \
+        "E501 line too long (%d characters)" % length
+
+
+# blub
+
+
+asd = 'l.{line}\t{pos}\t{name}\t{text}'.format(
+    line=token[2][0],
+    pos=pos,
+    name=tokenize.tok_name[token[0]],
+    text=repr(token[1]),
+)
+
+#: E121+1:6 E121+2:6
+hello('%-7d %s per second (%d total)' % (
+      options.counters[key] / elapsed, key,
+      options.counters[key]))
+
+
+if os.path.exists(os.path.join(path, PEP8_BIN)):
+    cmd = ([os.path.join(path, PEP8_BIN)] +
+           self._pep8_options(targetfile))
+
+
+fixed = (re.sub(r'\t+', ' ', target[c::-1], 1)[::-1] +
+         target[c + 1:])
+
+fixed = (
+    re.sub(r'\t+', ' ', target[c::-1], 1)[::-1] +
+    target[c + 1:]
+)
+
+
+if foo is None and bar is "frop" and \
+        blah == 'yeah':
+    blah = 'yeahnah'
+
+
+"""This is a multi-line
+   docstring."""
+
+
+if blah:
+    # is this actually readable?  :)
+    multiline_literal = """
+while True:
+    if True:
+        1
+""".lstrip()
+    multiline_literal = (
+        """
+while True:
+    if True:
+        1
+""".lstrip()
+    )
+    multiline_literal = (
+        """
+while True:
+    if True:
+        1
+"""
+        .lstrip()
+    )
+
+
+if blah:
+    multiline_visual = ("""
+while True:
+    if True:
+        1
+"""
+                        .lstrip())
+
+
+rv = {'aaa': 42}
+rv.update(dict.fromkeys((
+              #: E121:4 E121+1:4
+    'qualif_nr', 'reasonComment_en', 'reasonComment_fr',
+    'reasonComment_de', 'reasonComment_it'), '?'))
+
+rv.update(dict.fromkeys(('qualif_nr', 'reasonComment_en',
+                         'reasonComment_fr', 'reasonComment_de',
+                         'reasonComment_it'), '?'))
+
+#: E128+1:10
+rv.update(dict.fromkeys(('qualif_nr', 'reasonComment_en', 'reasonComment_fr',
+          'reasonComment_de', 'reasonComment_it'), '?'))
+
+
+rv.update(dict.fromkeys(
+              ('qualif_nr', 'reasonComment_en', 'reasonComment_fr',
+               'reasonComment_de', 'reasonComment_it'), '?'
+          ), "foo", context={
+              'alpha': 4, 'beta': 53242234, 'gamma': 17,
+          })
+
+
+rv.update(
+    dict.fromkeys((
+        'qualif_nr', 'reasonComment_en', 'reasonComment_fr',
+        'reasonComment_de', 'reasonComment_it'), '?'),
+    "foo",
+    context={
+        'alpha': 4, 'beta': 53242234, 'gamma': 17,
+    },
+)
+
+
+event_obj.write(cursor, user_id, {
+                    'user': user,
+                    'summary': text,
+                    'data': data,
+                })
+
+event_obj.write(cursor, user_id, {
+                    'user': user,
+                    'summary': text,
+                    'data': {'aaa': 1, 'bbb': 2},
+                })
+
+event_obj.write(cursor, user_id, {
+                    'user': user,
+                    'summary': text,
+                    'data': {
+                        'aaa': 1,
+                        'bbb': 2},
+                })
+
+event_obj.write(cursor, user_id, {
+                    'user': user,
+                    'summary': text,
+                    'data': {'timestamp': now, 'content': {
+                                 'aaa': 1,
+                                 'bbb': 2
+                             }},
+                })
--- a/test/normalizer_issue_files/E12_not_second.py
+++ b/test/normalizer_issue_files/E12_not_second.py
@@ -0,0 +1,294 @@
+
+def qualify_by_address(
+        self, cr, uid, ids, context=None,
+        params_to_check=frozenset(QUALIF_BY_ADDRESS_PARAM)):
+    """ This gets called by the web server """
+
+
+def qualify_by_address(self, cr, uid, ids, context=None,
+                       params_to_check=frozenset(QUALIF_BY_ADDRESS_PARAM)):
+    """ This gets called by the web server """
+
+
+_ipv4_re = re.compile('^(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.'
+                      '(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.'
+                      '(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.'
+                      '(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$')
+
+
+fct("""
+    AAA """ + status_2_string)
+
+
+if context:
+    msg = """\
+action: GET-CONFIG
+payload:
+    ip_address: "%(ip)s"
+    username: "%(username)s"
+""" % context
+
+
+if context:
+    msg = """\
+action: \
+GET-CONFIG
+""" % context
+
+
+if context:
+    #: E122+2:0
+    msg = """\
+action: """\
+"""GET-CONFIG
+""" % context
+
+
+def unicode2html(s):
+    """Convert the characters &<>'" in string s to HTML-safe sequences.
+    Convert newline to <br> too."""
+    #: E127+1:28
+    return unicode((s or '').replace('&', '&amp;')
+                            .replace('\n', '<br>\n'))
+
+
+parser.add_option('--count', action='store_true',
+                  help="print total number of errors and warnings "
+                       "to standard error and set exit code to 1 if "
+                       "total is not null")
+
+parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE,
+                  help="exclude files or directories which match these "
+                       "comma separated patterns (default: %s)" %
+                       DEFAULT_EXCLUDE)
+
+add_option('--count',
+           #: E135+1
+           help="print total number of errors "
+           "to standard error total is not null")
+
+add_option('--count',
+           #: E135+2:11
+           help="print total number of errors "
+                "to standard error "
+           "total is not null")
+
+
+help = ("print total number of errors " +
+        "to standard error")
+
+help = "print total number of errors " \
+       "to standard error"
+
+help = u"print total number of errors " \
+       u"to standard error"
+
+help = b"print total number of errors " \
+       b"to standard error"
+
+#: E122+1:5
+help = br"print total number of errors " \
+     br"to standard error"
+
+d = dict('foo', help="exclude files or directories which match these "
+                     #: E135:9
+         "comma separated patterns (default: %s)" % DEFAULT_EXCLUDE)
+
+d = dict('foo', help=u"exclude files or directories which match these "
+                     u"comma separated patterns (default: %s)"
+                     % DEFAULT_EXCLUDE)
+
+#: E135+1:9 E135+2:9
+d = dict('foo', help=b"exclude files or directories which match these "
+         b"comma separated patterns (default: %s)"
+         % DEFAULT_EXCLUDE)
+
+d = dict('foo', help=br"exclude files or directories which match these "
+                     br"comma separated patterns (default: %s)" %
+                     DEFAULT_EXCLUDE)
+
+d = dict('foo',
+         help="exclude files or directories which match these "
+              "comma separated patterns (default: %s)" %
+              DEFAULT_EXCLUDE)
+
+d = dict('foo',
+         help="exclude files or directories which match these "
+              "comma separated patterns (default: %s, %s)" %
+              (DEFAULT_EXCLUDE, DEFAULT_IGNORE)
+         )
+
+d = dict('foo',
+         help="exclude files or directories which match these "
+              "comma separated patterns (default: %s, %s)" %
+              # who knows what might happen here?
+              (DEFAULT_EXCLUDE, DEFAULT_IGNORE)
+         )
+
+# parens used to allow the indenting.
+troublefree_hash = {
+    "hash": "value",
+    "long": ("the quick brown fox jumps over the lazy dog before doing a "
+             "somersault"),
+    "long key that tends to happen more when you're indented": (
+        "stringwithalongtoken you don't want to break"
+    ),
+}
+
+# another accepted form
+troublefree_hash = {
+    "hash": "value",
+    "long": "the quick brown fox jumps over the lazy dog before doing "
+            "a somersault",
+    ("long key that tends to happen more "
+     "when you're indented"): "stringwithalongtoken you don't want to break",
+}
+# confusing but accepted... don't do that
+troublesome_hash = {
+    "hash": "value",
+    "long": "the quick brown fox jumps over the lazy dog before doing a "
+            #: E135:4
+    "somersault",
+    "longer":
+        "the quick brown fox jumps over the lazy dog before doing a "
+        "somersaulty",
+    "long key that tends to happen more "
+    "when you're indented": "stringwithalongtoken you don't want to break",
+}
+
+d = dict('foo',
+         help="exclude files or directories which match these "
+              "comma separated patterns (default: %s)" %
+              DEFAULT_EXCLUDE
+         )
+d = dict('foo',
+         help="exclude files or directories which match these "
+              "comma separated patterns (default: %s)" % DEFAULT_EXCLUDE,
+         foobar="this clearly should work, because it is at "
+                "the right indent level",
+         )
+
+rv.update(dict.fromkeys(
+              ('qualif_nr', 'reasonComment_en', 'reasonComment_fr',
+               'reasonComment_de', 'reasonComment_it'),
+              '?'), "foo",
+          context={'alpha': 4, 'beta': 53242234, 'gamma': 17})
+
+
+def f():
+    try:
+        if not Debug:
+            hello('''
+If you would like to see debugging output,
+try: %s -d5
+''' % sys.argv[0])
+
+
+# The try statement above was not finished.
+#: E901
+d = {  # comment
+    1: 2
+}
+
+# issue 138 (we won't allow this in parso)
+#: E126+2:9
+[
+    12,  # this is a multi-line inline
+         # comment
+]
+# issue 151
+#: E122+1:3
+if a > b and \
+   c > d:
+    moo_like_a_cow()
+
+my_list = [
+    1, 2, 3,
+    4, 5, 6,
+]
+
+my_list = [1, 2, 3,
+           4, 5, 6,
+           ]
+
+result = some_function_that_takes_arguments(
+    'a', 'b', 'c',
+    'd', 'e', 'f',
+)
+
+result = some_function_that_takes_arguments('a', 'b', 'c',
+                                            'd', 'e', 'f',
+                                            )
+
+# issue 203
+dica = {
+    ('abc'
+     'def'): (
+        'abc'),
+}
+
+(abcdef[0]
+ [1]) = (
+    'abc')
+
+('abc'
+ 'def') == (
+    'abc')
+
+# issue 214
+bar(
+    1).zap(
+    2)
+
+bar(
+    1).zap(
+    2)
+
+if True:
+
+    def example_issue254():
+        return [node.copy(
+                    (
+                        replacement
+                        # First, look at all the node's current children.
+                        for child in node.children
+                        # Replace them.
+                        for replacement in replace(child)
+                    ),
+                    dict(name=token.undefined)
+                )]
+
+
+def valid_example():
+    return [node.copy(properties=dict(
+                          (key, val if val is not None else token.undefined)
+                          for key, val in node.items()
+                      ))]
+
+
+foo([
+    'bug'
+])
+
+# issue 144, finally!
+some_hash = {
+    "long key that tends to happen more when you're indented":
+        "stringwithalongtoken you don't want to break",
+}
+
+{
+    1:
+        999999 if True
+        else 0,
+}
+
+
+abc = dedent(
+    '''
+        mkdir -p ./{build}/
+        mv ./build/ ./{build}/%(revision)s/
+    '''.format(
+        build='build',
+        # more stuff
+    )
+)
--- a/test/normalizer_issue_files/E12_second.py
+++ b/test/normalizer_issue_files/E12_second.py
@@ -0,0 +1,195 @@
+if True:
+    result = some_function_that_takes_arguments(
+        'a', 'b', 'c',
+        'd', 'e', 'f',
+        #: E123:0
+)
+#: E122+1
+if some_very_very_very_long_variable_name or var \
+or another_very_long_variable_name:
+    raise Exception()
+#: E122+1
+if some_very_very_very_long_variable_name or var[0] \
+or another_very_long_variable_name:
+    raise Exception()
+if True:
+    #: E122+1
+    if some_very_very_very_long_variable_name or var \
+    or another_very_long_variable_name:
+        raise Exception()
+if True:
+    #: E122+1
+    if some_very_very_very_long_variable_name or var[0] \
+    or another_very_long_variable_name:
+        raise Exception()
+
+#: E901+1:8
+dictionary = [
+    "is": {
+        # Might be a E122:4, but is not because the code is invalid Python.
+    "nested": yes(),
+    },
+]
+setup('',
+      scripts=[''],
+      classifiers=[
+          #: E121:6
+      'Development Status :: 4 - Beta',
+          'Environment :: Console',
+          'Intended Audience :: Developers',
+      ])
+
+
+#: E123+2:4 E291:15
+abc = "E123", (   
+    "bad", "hanging", "close"
+    )
+
+result = {
+    'foo': [
+        'bar', {
+            'baz': 'frop',
+            #: E123
+            }
+        #: E123
+        ]
+    #: E123
+    }
+result = some_function_that_takes_arguments(
+    'a', 'b', 'c',
+    'd', 'e', 'f',
+    #: E123
+    )
+my_list = [1, 2, 3,
+           4, 5, 6,
+           #: E124:0
+]
+my_list = [1, 2, 3,
+           4, 5, 6,
+           #: E124:19
+                   ]
+#: E124+2
+result = some_function_that_takes_arguments('a', 'b', 'c',
+                                            'd', 'e', 'f',
+)
+fooff(aaaa,
+      cca(
+          vvv,
+          dadd
+      ), fff,
+      #: E124:0
+)
+fooff(aaaa,
+      ccaaa(
+          vvv,
+          dadd
+      ),
+      fff,
+      #: E124:0
+)
+d = dict('foo',
+         help="exclude files or directories which match these "
+              "comma separated patterns (default: %s)" % DEFAULT_EXCLUDE
+         #: E124:14
+              )
+
+if line_removed:
+    self.event(cr, uid,
+               #: E128:8
+        name="Removing the option for contract",
+               #: E128:8
+        description="contract line has been removed",
+               #: E124:8
+        )
+
+#: E129+1:4
+if foo is None and bar is "frop" and \
+    blah == 'yeah':
+    blah = 'yeahnah'
+
+
+#: E129+1:4 E129+2:4
+def long_function_name(
+    var_one, var_two, var_three,
+    var_four):
+    hello(var_one)
+
+
+def qualify_by_address(
+        #: E129:4 E129+1:4
+    self, cr, uid, ids, context=None,
+    params_to_check=frozenset(QUALIF_BY_ADDRESS_PARAM)):
+    """ This gets called by the web server """
+
+
+#: E129+1:4 E129+2:4
+if (a == 2 or
+    b == "abc def ghi"
+    "jkl mno"):
+    True
+
+my_list = [
+    1, 2, 3,
+    4, 5, 6,
+    #: E123:8
+        ]
+
+abris = 3 + \
+        4 + \
+        5 + 6
+
+fixed = re.sub(r'\t+', ' ', target[c::-1], 1)[::-1] + \
+        target[c + 1:]
+
+rv.update(dict.fromkeys((
+              'qualif_nr', 'reasonComment_en', 'reasonComment_fr',
+              #: E121:12
+            'reasonComment_de', 'reasonComment_it'),
+                        '?'),
+          #: E128:4
+    "foo")
+#: E126+1:8
+eat_a_dict_a_day({
+        "foo": "bar",
+})
+#: E129+1:4
+if (
+    x == (
+            3
+            #: E129:4
+    ) or
+        y == 4):
+    pass
+#: E129+1:4 E121+2:8 E129+3:4
+if (
+    x == (
+        3
+    ) or
+        x == (
+            # This one has correct indentation.
+            3
+            #: E129:4
+    ) or
+        y == 4):
+    pass
+troublesome_hash = {
+    "hash": "value",
+    #: E135+1:8
+    "long": "the quick brown fox jumps over the lazy dog before doing a "
+        "somersault",
+}
+
+# Arguments on first line forbidden when not using vertical alignment
+#: E128+1:4
+foo = long_function_name(var_one, var_two,
+    var_three, var_four)
+
+#: E128+1:4
+hello('l.%s\t%s\t%s\t%r' %
+    (token[2][0], pos, tokenize.tok_name[token[0]], token[1]))
+
+
+def qualify_by_address(self, cr, uid, ids, context=None,
+                       #: E128:8
+        params_to_check=frozenset(QUALIF_BY_ADDRESS_PARAM)):
+    """ This gets called by the web server """
--- a/test/normalizer_issue_files/E12_third.py
+++ b/test/normalizer_issue_files/E12_third.py
@@ -0,0 +1,116 @@
+#: E128+1
+foo(1, 2, 3,
+4, 5, 6)
+#: E128+1:1
+foo(1, 2, 3,
+ 4, 5, 6)
+#: E128+1:2
+foo(1, 2, 3,
+  4, 5, 6)
+#: E128+1:3
+foo(1, 2, 3,
+   4, 5, 6)
+foo(1, 2, 3,
+    4, 5, 6)
+#: E127+1:5
+foo(1, 2, 3,
+     4, 5, 6)
+#: E127+1:6
+foo(1, 2, 3,
+      4, 5, 6)
+#: E127+1:7
+foo(1, 2, 3,
+       4, 5, 6)
+#: E127+1:8
+foo(1, 2, 3,
+        4, 5, 6)
+#: E127+1:9
+foo(1, 2, 3,
+         4, 5, 6)
+#: E127+1:10
+foo(1, 2, 3,
+          4, 5, 6)
+#: E127+1:11
+foo(1, 2, 3,
+           4, 5, 6)
+#: E127+1:12
+foo(1, 2, 3,
+            4, 5, 6)
+#: E127+1:13
+foo(1, 2, 3,
+             4, 5, 6)
+if line_removed:
+    #: E128+1:14 E128+2:14
+    self.event(cr, uid,
+              name="Removing the option for contract",
+              description="contract line has been removed",
+               )
+
+if line_removed:
+    self.event(cr, uid,
+               #: E127:16
+                name="Removing the option for contract",
+               #: E127:16
+                description="contract line has been removed",
+               #: E124:16
+                )
+rv.update(d=('a', 'b', 'c'),
+          #: E127:13
+             e=42)
+
+#: E135+2:17
+rv.update(d=('a' + 'b', 'c'),
+          e=42, f=42
+                 + 42)
+rv.update(d=('a' + 'b', 'c'),
+          e=42, f=42
+                  + 42)
+#: E127+1:26
+input1 = {'a': {'calc': 1 + 2}, 'b': 1
+                          + 42}
+#: E128+2:17
+rv.update(d=('a' + 'b', 'c'),
+          e=42, f=(42
+                 + 42))
+
+if True:
+    def example_issue254():
+        #: 
+        return [node.copy(
+                    (
+                        #: E121:16 E121+3:20
+                replacement
+                        # First, look at all the node's current children.
+                        for child in node.children
+                    for replacement in replace(child)
+                    ),
+                    dict(name=token.undefined)
+                )]
+# TODO multiline docstring are currently not handled. E125+1:4?
+if ("""
+    """):
+    pass
+
+# TODO same
+for foo in """
+    abc
+    123
+    """.strip().split():
+    hello(foo)
+abc = dedent(
+    '''
+        mkdir -p ./{build}/
+        mv ./build/ ./{build}/%(revision)s/
+    '''.format(
+        #: E121:4 E121+1:4 E123+2:0
+    build='build',
+    # more stuff
+)
+)
+#: E701+1: E122+1
+if True:\
+hello(True)
+
+#: E128+1
+foobar(a
+, end=' ')
--- a/test/normalizer_issue_files/E20.py
+++ b/test/normalizer_issue_files/E20.py
@@ -0,0 +1,52 @@
+#: E201:5
+spam( ham[1], {eggs: 2})
+#: E201:9
+spam(ham[ 1], {eggs: 2})
+#: E201:14
+spam(ham[1], { eggs: 2})
+
+# Okay
+spam(ham[1], {eggs: 2})
+
+
+#: E202:22
+spam(ham[1], {eggs: 2} )
+#: E202:21
+spam(ham[1], {eggs: 2 })
+#: E202:10
+spam(ham[1 ], {eggs: 2})
+# Okay
+spam(ham[1], {eggs: 2})
+
+result = func(
+    arg1='some value',
+    arg2='another value',
+)
+
+result = func(
+    arg1='some value',
+    arg2='another value'
+)
+
+result = [
+    item for item in items
+    if item > 5
+]
+
+#: E203:9
+if x == 4 :
+    foo(x, y)
+    x, y = y, x
+if x == 4:
+    #: E203:12 E702:13
+    a = x, y ; x, y = y, x
+if x == 4:
+    foo(x, y)
+    #: E203:12
+    x, y = y , x
+# Okay
+if x == 4:
+    foo(x, y)
+    x, y = y, x
+a[b1, :1] == 3
+b = a[:, b1]
--- a/test/normalizer_issue_files/E21.py
+++ b/test/normalizer_issue_files/E21.py
@@ -0,0 +1,16 @@
+#: E211:4
+spam (1)
+#: E211:4 E211:19
+dict ['key'] = list [index]
+#: E211:11
+dict['key'] ['subkey'] = list[index]
+# Okay
+spam(1)
+dict['key'] = list[index]
+
+
+# This is not prohibited by PEP8, but avoid it.
+# Dave: I think this is extremely stupid. Use the same convention everywhere.
+#: E211:9
+class Foo (Bar, Baz):
+    pass
--- a/test/normalizer_issue_files/E22.py
+++ b/test/normalizer_issue_files/E22.py
@@ -0,0 +1,156 @@
+a = 12 + 3
+#: E221:5 E229:8
+b = 4  + 5
+#: E221:1
+x             = 1
+#: E221:1
+y             = 2
+long_variable = 3
+#: E221:4
+x[0]          = 1
+#: E221:4
+x[1]          = 2
+long_variable = 3
+#: E221:8 E229:19
+x = f(x)          + 1
+y = long_variable + 2
+#: E221:8 E229:19
+z = x[0]          + 3
+#: E221+2:13
+text = """
+    bar
+    foo %s"""  % rofl
+# Okay
+x = 1
+y = 2
+long_variable = 3
+
+
+#: E221:7
+a = a +  1
+b = b + 10
+#: E221:3
+x =            -1
+#: E221:3
+y =            -2
+long_variable = 3
+#: E221:6
+x[0] =          1
+#: E221:6
+x[1] =          2
+long_variable = 3
+
+
+#: E223+1:1
+foobart = 4
+a	= 3  # aligned with tab
+
+
+#: E223:4
+a +=	1
+b += 1000
+
+
+#: E225:12
+submitted +=1
+#: E225:9
+submitted+= 1
+#: E225:3
+c =-1
+#: E229:7
+x = x /2 - 1
+#: E229:11
+c = alpha -4
+#: E229:10
+c = alpha- 4
+#: E229:8
+z = x **y
+#: E229:14
+z = (x + 1) **y
+#: E229:13
+z = (x + 1)** y
+#: E227:14
+_1kB = _1MB >>10
+#: E227:11
+_1kB = _1MB>> 10
+#: E225:1 E225:2 E229:4
+i=i+ 1
+#: E225:1 E225:2 E229:5
+i=i +1
+#: E225:1 E225:2
+i=i+1
+#: E225:3
+i =i+1
+#: E225:1
+i= i+1
+#: E229:8
+c = (a +b)*(a - b)
+#: E229:7
+c = (a+ b)*(a - b)
+
+z = 2//30
+c = (a+b) * (a-b)
+x = x*2 - 1
+x = x/2 - 1
+# TODO whitespace should be the other way around according to pep8.
+x = x / 2-1
+
+hypot2 = x*x + y*y
+c = (a + b)*(a - b)
+
+
+def halves(n):
+    return (i//2 for i in range(n))
+
+
+#: E227:11 E227:13
+_1kB = _1MB>>10
+#: E227:11 E227:13
+_1MB = _1kB<<10
+#: E227:5 E227:6
+a = b|c
+#: E227:5 E227:6
+b = c&a
+#: E227:5 E227:6
+c = b^a
+#: E228:5 E228:6
+a = b%c
+#: E228:9 E228:10
+msg = fmt%(errno, errmsg)
+#: E228:25 E228:26
+msg = "Error %d occurred"%errno
+
+#: E228:7
+a = b %c
+a = b % c
+
+# Okay
+i = i + 1
+submitted += 1
+x = x * 2 - 1
+hypot2 = x * x + y * y
+c = (a + b) * (a - b)
+_1MiB = 2 ** 20
+_1TiB = 2**30
+foo(bar, key='word', *args, **kwargs)
+baz(**kwargs)
+negative = -1
+spam(-1)
+-negative
+func1(lambda *args, **kw: (args, kw))
+func2(lambda a, b=h[:], c=0: (a, b, c))
+if not -5 < x < +5:
+    #: E227:12
+    print >>sys.stderr, "x is out of range."
+print >> sys.stdout, "x is an integer."
+x = x / 2 - 1
+
+
+def squares(n):
+    return (i**2 for i in range(n))
+
+
+ENG_PREFIXES = {
+    -6: "\u03bc",  # Greek letter mu
+    -3: "m",
+}
--- a/test/normalizer_issue_files/E23.py
+++ b/test/normalizer_issue_files/E23.py
@@ -0,0 +1,16 @@
+#: E231:7
+a = (1,2)
+#: E231:5
+a[b1,:]
+#: E231:10
+a = [{'a':''}]
+# Okay
+a = (4,)
+#: E202:7
+b = (5, )
+c = {'text': text[5:]}
+
+result = {
+    'key1': 'value',
+    'key2': 'value',
+}
--- a/test/normalizer_issue_files/E25.py
+++ b/test/normalizer_issue_files/E25.py
@@ -0,0 +1,36 @@
+#: E251:11 E251:13
+def foo(bar = False):
+    '''Test function with an error in declaration'''
+    pass
+
+
+#: E251:8
+foo(bar= True)
+#: E251:7
+foo(bar =True)
+#: E251:7 E251:9
+foo(bar = True)
+#: E251:13
+y = bar(root= "sdasd")
+parser.add_argument('--long-option',
+                    #: E135+1:20
+                    default=
+                    "/rather/long/filesystem/path/here/blah/blah/blah")
+parser.add_argument('--long-option',
+                    default=
+                        "/rather/long/filesystem")
+# TODO this looks so stupid.
+parser.add_argument('--long-option', default
+                    ="/rather/long/filesystem/path/here/blah/blah/blah")
+#: E251+2:7 E251+2:9
+foo(True,
+    baz=(1, 2),
+    biz = 'foo'
+    )
+# Okay
+foo(bar=(1 == 1))
+foo(bar=(1 != 1))
+foo(bar=(1 >= 1))
+foo(bar=(1 <= 1))
+(options, args) = parser.parse_args()
+d[type(None)] = _deepcopy_atomic
--- a/test/normalizer_issue_files/E26.py
+++ b/test/normalizer_issue_files/E26.py
@@ -0,0 +1,78 @@
+#: E261:4
+pass # an inline comment
+#: E261:4
+pass# an inline comment
+
+# Okay
+pass  # an inline comment
+pass   # an inline comment
+#: E262:11
+x = x + 1  #Increment x
+#: E262:11
+x = x + 1  #  Increment x
+#: E262:11
+x = y + 1  #:  Increment x
+#: E265
+#Block comment
+a = 1
+#: E265+1
+m = 42
+#! This is important
+mx = 42 - 42
+
+# Comment without anything is not an issue.
+#
+# However if there are comments at the end without anything it obviously
+# doesn't make too much sense.
+#: E262:9
+foo = 1  #
+
+
+#: E266+2:4 E266+5:4
+def how_it_feel(r):
+
+    ### This is a variable ###
+    a = 42
+
+    ### Of course it is unused
+    return
+
+
+#: E266 E266+1
+##if DEBUG:
+##    logging.error()
+#: E266
+######################################### 
+
+# Not at the beginning of a file
+#: E265
+#!/usr/bin/env python
+
+# Okay
+
+pass  # an inline comment
+x = x + 1   # Increment x
+y = y + 1   #: Increment x
+
+# Block comment
+a = 1
+
+# Block comment1
+
+# Block comment2
+aaa = 1
+
+
+# example of docstring (not parsed)
+def oof():
+    """
+    #foo not parsed
+    """
+
+    ###########################################################################
+    #                               A SEPARATOR                               #
+    ###########################################################################
+
+    # ####################################################################### #
+    # ########################## another separator ########################## #
+    # ####################################################################### #
--- a/test/normalizer_issue_files/E27.py
+++ b/test/normalizer_issue_files/E27.py
@@ -0,0 +1,49 @@
+# Okay
+from u import (a, b)
+from v import c, d
+#: E221:13
+from w import  (e, f)
+#: E275:13
+from w import(e, f)
+#: E275:29
+from importable.module import(e, f)
+try:
+    #: E275:33
+    from importable.module import(e, f)
+except ImportError:
+    pass
+# Okay
+True and False
+#: E221:8
+True and  False
+#: E221:4
+True  and False
+#: E221:2
+if   1:
+    pass
+# Syntax Error, no indentation
+#: E903+1
+if   1:
+pass
+#: E223:8
+True and		False
+#: E223:4 E223:9
+True		and	False
+#: E221:5
+a and  b
+#: E221:5
+1 and  b
+#: E221:5
+a and  2
+#: E221:1 E221:6
+1  and  b
+#: E221:1 E221:6
+a  and  2
+#: E221:4
+this  and False
+#: E223:5
+a and	b
+#: E223:1
+a		and b
+#: E223:4 E223:9
+this		and	False
--- a/test/normalizer_issue_files/E29.py
+++ b/test/normalizer_issue_files/E29.py
@@ -0,0 +1,15 @@
+# Okay
+# 情
+#: W291:5
+print 
+
+
+#: W291+1
+class Foo(object):
+    
+    bang = 12
+
+
+#: W291+1:34
+'''multiline
+string with trailing whitespace'''   
--- a/test/normalizer_issue_files/E30.py
+++ b/test/normalizer_issue_files/E30.py
@@ -0,0 +1,177 @@
+#: E301+4
+class X:
+
+    def a():
+        pass
+    def b():
+        pass
+
+
+#: E301+5
+class X:
+
+    def a():
+        pass
+    # comment
+    def b():
+        pass
+
+
+# -*- coding: utf-8 -*-
+def a():
+    pass
+
+
+#: E302+1:0
+"""Main module."""
+def _main():
+    pass
+
+
+#: E302+1:0
+foo = 1
+def get_sys_path():
+    return sys.path
+
+
+#: E302+3:0
+def a():
+    pass
+
+def b():
+    pass
+
+
+#: E302+5:0
+def a():
+    pass
+
+# comment
+
+def b():
+    pass
+
+
+#: E303+3:0
+print
+
+
+
+#: E303+3:0 E303+4:0
+print
+
+
+
+
+print
+#: E303+3:0
+print
+
+
+
+# comment
+
+print
+
+
+#: E303+3 E303+6
+def a():
+    print
+
+
+    # comment
+
+
+    # another comment
+
+    print
+
+
+#: E302+2
+a = 3
+#: E304+1
+@decorator
+
+def function():
+    pass
+
+
+#: E303+3
+# something
+
+
+
+"""This class docstring comes on line 5.
+It gives error E303: too many blank lines (3)
+"""
+
+
+#: E302+6
+def a():
+    print
+
+    # comment
+
+    # another comment
+a()
+
+
+#: E302+7
+def a():
+    print
+
+    # comment
+
+    # another comment
+
+try:
+    a()
+except Exception:
+    pass
+
+
+#: E302+4
+def a():
+    print
+
+# Two spaces before comments, too.
+if a():
+    a()
+
+
+#: E301+2
+def a():
+    x = 1
+    def b():
+        pass
+
+
+#: E301+2 E301+4
+def a():
+    x = 2
+    def b():
+        x = 1
+        def c():
+            pass
+
+
+#: E301+2 E301+4 E301+5
+def a():
+    x = 1
+    class C:
+        pass
+    x = 2
+    def b():
+        pass
+
+
+#: E302+7
+# Example from https://github.com/PyCQA/pycodestyle/issues/400
+foo = 2
+
+
+def main():
+    blah, blah
+
+if __name__ == '__main__':
+    main()
--- a/test/normalizer_issue_files/E30not.py
+++ b/test/normalizer_issue_files/E30not.py
@@ -0,0 +1,175 @@
+# Okay
+class X:
+    pass
+# Okay
+
+
+def foo():
+    pass
+
+
+# Okay
+# -*- coding: utf-8 -*-
+class X:
+    pass
+
+
+# Okay
+# -*- coding: utf-8 -*-
+def foo():
+    pass
+
+
+# Okay
+class X:
+
+    def a():
+        pass
+
+    # comment
+    def b():
+        pass
+
+    # This is a
+    # ... multi-line comment
+
+    def c():
+        pass
+
+
+# This is a
+# ... multi-line comment
+
+@some_decorator
+class Y:
+
+    def a():
+        pass
+
+    # comment
+
+    def b():
+        pass
+
+    @property
+    def c():
+        pass
+
+
+try:
+    from nonexistent import Bar
+except ImportError:
+    class Bar(object):
+        """This is a Bar replacement"""
+
+
+def with_feature(f):
+    """Some decorator"""
+    wrapper = f
+    if has_this_feature(f):
+        def wrapper(*args):
+            call_feature(args[0])
+            return f(*args)
+    return wrapper
+
+
+try:
+    next
+except NameError:
+    def next(iterator, default):
+        for item in iterator:
+            return item
+        return default
+
+
+def a():
+    pass
+
+
+class Foo():
+    """Class Foo"""
+
+    def b():
+
+        pass
+
+
+# comment
+def c():
+    pass
+
+
+# comment
+
+
+def d():
+    pass
+
+# This is a
+# ... multi-line comment
+
+# And this one is
+# ... a second paragraph
+# ... which spans on 3 lines
+
+
+# Function `e` is below
+# NOTE: Hey this is a testcase
+
+def e():
+    pass
+
+
+def a():
+    print
+
+    # comment
+
+    print
+
+    print
+
+# Comment 1
+
+# Comment 2
+
+
+# Comment 3
+
+def b():
+
+    pass
+
+
+# Okay
+def foo():
+    pass
+
+
+def bar():
+    pass
+
+
+class Foo(object):
+    pass
+
+
+class Bar(object):
+    pass
+
+
+if __name__ == '__main__':
+    foo()
+# Okay
+classification_errors = None
+# Okay
+defined_properly = True
+# Okay
+defaults = {}
+defaults.update({})
+
+
+# Okay
+def foo(x):
+    classification = x
+    definitely = not classification
--- a/test/normalizer_issue_files/E40.py
+++ b/test/normalizer_issue_files/E40.py
@@ -0,0 +1,39 @@
+#: E401:7
+import os, sys
+# Okay
+import os
+import sys
+
+from subprocess import Popen, PIPE
+
+from myclass import MyClass
+from foo.bar.yourclass import YourClass
+
+import myclass
+import foo.bar.yourclass
+# All Okay from here until the definition of VERSION
+__all__ = ['abc']
+
+import foo
+__version__ = "42"
+
+import foo
+__author__ = "Simon Gomizelj"
+
+import foo
+try:
+    import foo
+except ImportError:
+    pass
+else:
+    hello('imported foo')
+finally:
+    hello('made attempt to import foo')
+
+import bar
+VERSION = '1.2.3'
+
+#: E402
+import foo
+#: E402
+import foo
--- a/test/normalizer_issue_files/E50.py
+++ b/test/normalizer_issue_files/E50.py
@@ -0,0 +1,126 @@
+#: E501:4
+a = '12345678901234567890123456789012345678901234567890123456789012345678901234567890'
+#: E501:80
+a = '1234567890123456789012345678901234567890123456789012345678901234567890' or \
+    6
+#: E501+1:80
+a = 7 or \
+    '1234567890123456789012345678901234567890123456789012345678901234567890' or \
+    6
+#: E501+1:80 E501+2:80
+a = 7 or \
+    '1234567890123456789012345678901234567890123456789012345678901234567890' or \
+    '1234567890123456789012345678901234567890123456789012345678901234567890' or \
+    6
+#: E501:78
+a = '1234567890123456789012345678901234567890123456789012345678901234567890'  # \
+#: E502:78
+a = ('123456789012345678901234567890123456789012345678901234567890123456789'  \
+     '01234567890')
+#: E502+1:11
+a = ('AAA  \
+      BBB' \
+     'CCC')
+#: E502:38
+if (foo is None and bar is "e000" and \
+        blah == 'yeah'):
+    blah = 'yeahnah'
+#
+# Okay
+a = ('AAA'
+     'BBB')
+
+a = ('AAA  \
+      BBB'
+     'CCC')
+
+a = 'AAA'    \
+    'BBB'    \
+    'CCC'
+
+a = ('AAA\
+BBBBBBBBB\
+CCCCCCCCC\
+DDDDDDDDD')
+#
+# Okay
+if aaa:
+    pass
+elif bbb or \
+        ccc:
+    pass
+
+ddd = \
+    ccc
+
+('\
+    ' + ' \
+')
+('''
+    ''' + ' \
+')
+#: E501:67 E225:21 E225:22
+very_long_identifiers=and_terrible_whitespace_habits(are_no_excuse+for_long_lines)
+#
+# TODO Long multiline strings are not handled. E501?
+'''multiline string
+with a long long long long long long long long long long long long long long long long line
+'''
+#: E501
+'''same thing, but this time without a terminal newline in the string
+long long long long long long long long long long long long long long long long line'''
+#
+# issue 224 (unavoidable long lines in docstrings)
+# Okay
+"""
+I'm some great documentation.  Because I'm some great documentation, I'm
+going to give you a reference to some valuable information about some API
+that I'm calling:
+
+    http://msdn.microsoft.com/en-us/library/windows/desktop/aa363858(v=vs.85).aspx
+"""
+#: E501
+"""
+longnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaces"""
+
+
+# Regression test for #622
+def foo():
+    """Lorem ipsum dolor sit amet, consectetur adipiscing elit. Duis pulvinar vitae
+    """
+
+
+# Okay
+"""
+This
+                                                                       almost_empty_line
+"""
+
+"""
+This
+                                                                        almost_empty_line
+"""
+# A basic comment
+#: E501
+# with a long long long long long long long long long long long long long long long long line
+
+#
+# Okay
+# I'm some great comment.  Because I'm so great, I'm going to give you a
+# reference to some valuable information about some API that I'm calling:
+#
+#     http://msdn.microsoft.com/en-us/library/windows/desktop/aa363858(v=vs.85).aspx
+
+x = 3
+
+# longnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaces
+
+#
+# Okay
+# This
+#                                                                      almost_empty_line
+
+#
+#: E501+1
+# This
+#                                                                       almost_empty_line
--- a/test/normalizer_issue_files/E70.py
+++ b/test/normalizer_issue_files/E70.py
@@ -0,0 +1,25 @@
+#: E701:6
+if a: a = False
+#: E701:41
+if not header or header[:6] != 'bytes=': pass
+#: E702:9
+a = False; b = True
+#: E702:16 E402
+import bdist_egg; bdist_egg.write_safety_flag(cmd.egg_info, safe)
+#: E703:12 E402
+import shlex;
+#: E702:8 E703:22
+del a[:]; a.append(42);
+
+
+#: E704:10
+def f(x): return 2
+
+
+#: E704:10
+def f(x): return 2 * x
+
+
+while all is round:
+    #: E704:14
+    def f(x): return 2 * x
--- a/test/normalizer_issue_files/E71.py
+++ b/test/normalizer_issue_files/E71.py
@@ -0,0 +1,93 @@
+#: E711:7
+if res == None:
+    pass
+#: E711:7
+if res != None:
+    pass
+#: E711:8
+if None == res:
+    pass
+#: E711:8
+if None != res:
+    pass
+#: E711:10
+if res[1] == None:
+    pass
+#: E711:10
+if res[1] != None:
+    pass
+#: E711:8
+if None != res[1]:
+    pass
+#: E711:8
+if None == res[1]:
+    pass
+
+#
+#: E712:7
+if res == True:
+    pass
+#: E712:7
+if res != False:
+    pass
+#: E712:8
+if True != res:
+    pass
+#: E712:9
+if False == res:
+    pass
+#: E712:10
+if res[1] == True:
+    pass
+#: E712:10
+if res[1] != False:
+    pass
+
+if x is False:
+    pass
+
+#
+#: E713:9
+if not X in Y:
+    pass
+#: E713:11
+if not X.B in Y:
+    pass
+#: E713:9
+if not X in Y and Z == "zero":
+    pass
+#: E713:24
+if X == "zero" or not Y in Z:
+    pass
+
+#
+#: E714:9
+if not X is Y:
+    pass
+#: E714:11
+if not X.B is Y:
+    pass
+
+#
+# Okay
+if x not in y:
+    pass
+
+if not (X in Y or X is Z):
+    pass
+
+if not (X in Y):
+    pass
+
+if x is not y:
+    pass
+
+if TrueElement.get_element(True) == TrueElement.get_element(False):
+    pass
+
+if (True) == TrueElement or x == TrueElement:
+    pass
+
+assert (not foo) in bar
+assert {'x': not foo} in bar
+assert [42, not foo] in bar
--- a/test/normalizer_issue_files/E72.py
+++ b/test/normalizer_issue_files/E72.py
@@ -0,0 +1,79 @@
+#: E721:3
+if type(res) == type(42):
+    pass
+#: E721:3
+if type(res) != type(""):
+    pass
+
+import types
+
+if res == types.IntType:
+    pass
+
+import types
+
+#: E721:3
+if type(res) is not types.ListType:
+    pass
+#: E721:7 E721:35
+assert type(res) == type(False) or type(res) == type(None)
+#: E721:7
+assert type(res) == type([])
+#: E721:7
+assert type(res) == type(())
+#: E721:7
+assert type(res) == type((0,))
+#: E721:7
+assert type(res) == type((0))
+#: E721:7
+assert type(res) != type((1,))
+#: E721:7
+assert type(res) is type((1,))
+#: E721:7
+assert type(res) is not type((1,))
+
+# Okay
+#: E402
+import types
+
+if isinstance(res, int):
+    pass
+if isinstance(res, str):
+    pass
+if isinstance(res, types.MethodType):
+    pass
+
+#: E721:3 E721:25
+if type(a) != type(b) or type(a) == type(ccc):
+    pass
+#: E721
+type(a) != type(b)
+#: E721
+1 != type(b)
+#: E721
+type(b) != 1
+1 != 1
+
+try:
+    pass
+#: E722
+except:
+    pass
+try:
+    pass
+except Exception:
+    pass
+#: E722
+except:
+    pass
+# Okay
+fake_code = """"
+try:
+    do_something()
+except:
+    pass
+"""
+try:
+    pass
+except Exception:
+    pass
--- a/test/normalizer_issue_files/E73.py
+++ b/test/normalizer_issue_files/E73.py
@@ -0,0 +1,16 @@
+#: E731:4
+f = lambda x: 2 * x
+while False:
+    #: E731:10
+    foo = lambda y, z: 2 * x
+# Okay
+f = object()
+f.method = lambda: 'Method'
+
+f = {}
+f['a'] = lambda x: x ** 2
+
+f = []
+f.append(lambda x: x ** 2)
+
+lambda: 'no-op'
--- a/test/normalizer_issue_files/LICENSE
+++ b/test/normalizer_issue_files/LICENSE
@@ -0,0 +1,29 @@
+Copyright © 2006-2009 Johann C. Rocholl <johann@rocholl.net>
+Copyright © 2009-2014 Florent Xicluna <florent.xicluna@gmail.com>
+Copyright © 2014-2016 Ian Lee <IanLee1521@gmail.com>
+Copyright © 2017-???? Dave Halter <davidhalter88@gmail.com>
+
+Dave: The files in this folder were ported from pydocstyle and some
+modifications where made.
+
+Licensed under the terms of the Expat License
+
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation files
+(the "Software"), to deal in the Software without restriction,
+including without limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of the Software,
+and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/test/normalizer_issue_files/allowed_syntax.py
+++ b/test/normalizer_issue_files/allowed_syntax.py
@@ -0,0 +1,53 @@
+"""
+Some syntax errors are a bit complicated and need exact checking. Here we
+gather some of the potentially dangerous ones.
+"""
+
+from __future__ import division
+
+# With a dot it's not a future import anymore.
+from .__future__ import absolute_import
+
+'' ''
+''r''u''
+b'' BR''
+
+for x in [1]:
+    try:
+        continue  # Only the other continue and pass is an error.
+    finally:
+        #: E901
+        continue
+
+
+for x in [1]:
+    break
+    continue
+
+try:
+    pass
+except ZeroDivisionError:
+    pass
+    #: E722:0
+except:
+    pass
+
+try:
+    pass
+    #: E722:0 E901:0
+except:
+    pass
+except ZeroDivisionError:
+    pass
+
+
+r'\n'
+r'\x'
+b'\n'
+
+
+a = 3
+
+
+def x(b=a):
+    global a
--- a/test/normalizer_issue_files/allowed_syntax_python2.py
+++ b/test/normalizer_issue_files/allowed_syntax_python2.py
@@ -0,0 +1,2 @@
+'s' b''
+u's' b'ä'
--- a/test/normalizer_issue_files/allowed_syntax_python3.4.py
+++ b/test/normalizer_issue_files/allowed_syntax_python3.4.py
@@ -0,0 +1,3 @@
+*foo, a = (1,)
+*foo[0], a = (1,)
+*[], a = (1,)
--- a/test/normalizer_issue_files/allowed_syntax_python3.5.py
+++ b/test/normalizer_issue_files/allowed_syntax_python3.5.py
@@ -0,0 +1,23 @@
+"""
+Mostly allowed syntax in Python 3.5.
+"""
+
+
+async def foo():
+    await bar()
+    #: E901
+    yield from []
+    return
+    #: E901
+    return ''
+
+
+# With decorator it's a different statement.
+@bla
+async def foo():
+    await bar()
+    #: E901
+    yield from []
+    return
+    #: E901
+    return ''
--- a/test/normalizer_issue_files/allowed_syntax_python3.6.py
+++ b/test/normalizer_issue_files/allowed_syntax_python3.6.py
@@ -0,0 +1,45 @@
+foo: int = 4
+(foo): int = 3
+((foo)): int = 3
+foo.bar: int
+foo[3]: int
+
+
+def glob():
+    global x
+    y: foo = x
+
+
+def c():
+    a = 3
+
+    def d():
+        class X():
+            nonlocal a
+
+
+def x():
+    a = 3
+
+    def y():
+        nonlocal a
+
+
+def x():
+    def y():
+        nonlocal a
+
+    a = 3
+
+
+def x():
+    a = 3
+
+    def y():
+        class z():
+            nonlocal a
+
+
+a = *args, *args
+error[(*args, *args)] = 3
+*args, *args
--- a/test/normalizer_issue_files/latin-1.py
+++ b/test/normalizer_issue_files/latin-1.py
@@ -0,0 +1,6 @@
+# -*- coding: latin-1 -*-
+# Test non-UTF8 encoding
+latin1 = ('<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>'
+          '<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>')
+
+c = ("w<EFBFBD>")
--- a/test/normalizer_issue_files/python2.7.py
+++ b/test/normalizer_issue_files/python2.7.py
@@ -0,0 +1,14 @@
+import sys
+
+print 1, 2 >> sys.stdout
+
+
+foo = ur'This is not possible in Python 3.'
+
+# This is actually printing a tuple.
+#: E275:5
+print(1, 2)
+
+# True and False are not keywords in Python 2 and therefore there's no need for
+# a space.
+norman = True+False
--- a/test/normalizer_issue_files/python3.py
+++ b/test/normalizer_issue_files/python3.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+from typing import ClassVar, List
+
+print(1, 2)
+
+
+# Annotated function (Issue #29)
+def foo(x: int) -> int:
+    return x + 1
+
+
+# Annotated variables #575
+CONST: int = 42
+
+
+class Class:
+    cls_var: ClassVar[str]
+
+    def m(self):
+        xs: List[int] = []
+
+
+# True and False are keywords in Python 3 and therefore need a space.
+#: E275:13 E275:14
+norman = True+False
+
+
+#: E302+3:0
+def a():
+    pass
+
+async def b():
+    pass
+
+
+# Okay
+async def add(a: int = 0, b: int = 0) -> int:
+    return a + b
+
+
+# Previously E251 four times
+#: E221:5
+async  def add(a: int = 0, b: int = 0) -> int:
+    return a + b
+
+
+# Previously just E272+1:5 E272+4:5
+#: E302+3 E221:5 E221+3:5
+async  def x():
+    pass
+
+async  def x(y: int = 1):
+    pass
+
+
+#: E704:16
+async def f(x): return 2
+
+
+a[b1, :] == a[b1, ...]
+
+
+# Annotated Function Definitions
+# Okay
+def munge(input: AnyStr, sep: AnyStr = None, limit=1000,
+          extra: Union[str, dict] = None) -> AnyStr:
+    pass
+
+
+#: E225:24 E225:26
+def x(b: tuple = (1, 2))->int:
+    return a + b
+
+
+#: E252:11 E252:12 E231:8
+def b(a:int=1):
+    pass
+
+
+if alpha[:-i]:
+    *a, b = (1, 2, 3)
+
+
+# Named only arguments
+def foo(*, asdf):
+    pass
+
+
+def foo2(bar, *, asdf=2):
+    pass
--- a/test/normalizer_issue_files/utf-8-bom.py
+++ b/test/normalizer_issue_files/utf-8-bom.py
@@ -0,0 +1,6 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+hello = 'こんにちわ'
+
+# EOF
--- a/test/normalizer_issue_files/utf-8.py
+++ b/test/normalizer_issue_files/utf-8.py
@@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+
+# Some random text with multi-byte characters (utf-8 encoded)
+#
+# Εδώ μάτσο κειμένων τη, τρόπο πιθανό διευθυντές ώρα μη. Νέων απλό παράγει ροή
+# κι, το επί δεδομένη καθορίζουν. Πάντως ζητήσεις περιβάλλοντος ένα με, τη
+# ξέχασε αρπάζεις φαινόμενο όλη. Τρέξει εσφαλμένη χρησιμοποίησέ νέα τι. Θα όρο
+# πετάνε φακέλους, άρα με διακοπής λαμβάνουν εφαμοργής. Λες κι μειώσει
+# καθυστερεί.
+
+# 79 narrow chars
+# 01 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 [79]
+
+# 78 narrow chars (Na) + 1 wide char (W)
+# 01 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8情
+
+# 3 narrow chars (Na) + 40 wide chars (W)
+# 情 情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情
+
+# 3 narrow chars (Na) + 76 wide chars (W)
+# 情 情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情
+
+#
+# 80 narrow chars (Na)
+#: E501
+# 01 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6  [80]
+#
+# 78 narrow chars (Na) + 2 wide char (W)
+#: E501
+# 01 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8情情
+#
+# 3 narrow chars (Na) + 77 wide chars (W)
+#: E501
+# 情 情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情
+#
--- a/test/test_absolute_import.py
+++ b/test/test_absolute_import.py
@@ -2,7 +2,7 @@
 Tests ``from __future__ import absolute_import`` (only important for
 Python 2.X)
 """
-from parso.python import parse
+from parso import parse


 def test_explicit_absolute_imports():
@@ -10,14 +10,14 @@ def test_explicit_absolute_imports():
    Detect modules with ``from __future__ import absolute_import``.
    """
    module = parse("from __future__ import absolute_import")
-    assert module.has_explicit_absolute_import()
+    assert module._has_explicit_absolute_import()


 def test_no_explicit_absolute_imports():
    """
     Detect modules without ``from __future__ import absolute_import``.
    """
-    assert not parse("1").has_explicit_absolute_import()
+    assert not parse("1")._has_explicit_absolute_import()


 def test_dont_break_imports_without_namespaces():
@@ -26,4 +26,4 @@ def test_dont_break_imports_without_namespaces():
    assume that all imports have non-``None`` namespaces.
    """
    src = "from __future__ import absolute_import\nimport xyzzy"
-    assert parse(src).has_explicit_absolute_import()
+    assert parse(src)._has_explicit_absolute_import()
--- a/test/test_cache.py
+++ b/test/test_cache.py
@@ -8,7 +8,7 @@ import pytest

 from parso.cache import _NodeCacheItem, save_module, load_module, \
    _get_hashed_path, parser_cache, _load_from_file_system, _save_to_file_system
-from parso.python import load_grammar
+from parso import load_grammar
 from parso import cache


@@ -37,20 +37,20 @@ def test_modulepickling_change_cache_dir(tmpdir):
    path_1 = 'fake path 1'
    path_2 = 'fake path 2'

-    grammar = load_grammar()
-    _save_to_file_system(grammar, path_1, item_1, cache_path=dir_1)
+    hashed_grammar = load_grammar()._hashed
+    _save_to_file_system(hashed_grammar, path_1, item_1, cache_path=dir_1)
    parser_cache.clear()
-    cached = load_stored_item(grammar, path_1, item_1, cache_path=dir_1)
+    cached = load_stored_item(hashed_grammar, path_1, item_1, cache_path=dir_1)
    assert cached == item_1.node

-    _save_to_file_system(grammar, path_2, item_2, cache_path=dir_2)
-    cached = load_stored_item(grammar, path_1, item_1, cache_path=dir_2)
+    _save_to_file_system(hashed_grammar, path_2, item_2, cache_path=dir_2)
+    cached = load_stored_item(hashed_grammar, path_1, item_1, cache_path=dir_2)
    assert cached is None


-def load_stored_item(grammar, path, item, cache_path):
+def load_stored_item(hashed_grammar, path, item, cache_path):
    """Load `item` stored at `path` in `cache`."""
-    item = _load_from_file_system(grammar, path, item.change_time - 1, cache_path)
+    item = _load_from_file_system(hashed_grammar, path, item.change_time - 1, cache_path)
    return item


@@ -77,11 +77,11 @@ def test_modulepickling_simulate_deleted_cache(tmpdir):
    with open(path, 'w'):
        pass

-    save_module(grammar, path, module, [])
-    assert load_module(grammar, path) == module
+    save_module(grammar._hashed, path, module, [])
+    assert load_module(grammar._hashed, path) == module

-    unlink(_get_hashed_path(grammar, path))
+    unlink(_get_hashed_path(grammar._hashed, path))
    parser_cache.clear()

-    cached2 = load_module(grammar, path)
+    cached2 = load_module(grammar._hashed, path)
    assert cached2 is None
--- a/test/test_diff_parser.py
+++ b/test/test_diff_parser.py
@@ -3,11 +3,20 @@ import logging

 import pytest

-from parso.utils import splitlines
+from parso.utils import split_lines
 from parso import cache
-from parso.python import load_grammar
+from parso import load_grammar
 from parso.python.diff import DiffParser
-from parso.python import parse
+from parso import parse
+
+
+def test_simple():
+    """
+    The diff parser reuses modules. So check for that.
+    """
+    grammar = load_grammar()
+    module_a = grammar.parse('a', diff_cache=True)
+    assert grammar.parse('b', diff_cache=True) == module_a


 def _check_error_leaves_nodes(node):
@@ -44,20 +53,28 @@ class Differ(object):

    def initialize(self, code):
        logging.debug('differ: initialize')
-        self.lines = splitlines(code, keepends=True)
-        cache.parser_cache.pop(None, None)
+        try:
+            del cache.parser_cache[self.grammar._hashed][None]
+        except KeyError:
+            pass
+
+        self.lines = split_lines(code, keepends=True)
        self.module = parse(code, diff_cache=True, cache=True)
        return self.module

    def parse(self, code, copies=0, parsers=0, expect_error_leaves=False):
        logging.debug('differ: parse copies=%s parsers=%s', copies, parsers)
-        lines = splitlines(code, keepends=True)
-        diff_parser = DiffParser(self.grammar, self.module)
+        lines = split_lines(code, keepends=True)
+        diff_parser = DiffParser(
+            self.grammar._pgen_grammar,
+            self.grammar._tokenizer,
+            self.module,
+        )
        new_module = diff_parser.update(self.lines, lines)
        self.lines = lines
        assert code == new_module.get_code()
        assert diff_parser._copy_count == copies
-        assert diff_parser._parser_count == parsers
+        #assert diff_parser._parser_count == parsers

        assert expect_error_leaves == _check_error_leaves_nodes(new_module)
        _assert_valid_graph(new_module)
@@ -70,8 +87,6 @@ def differ():


 def test_change_and_undo(differ):
-    # Empty the parser cache for the path None.
-    cache.parser_cache.pop(None, None)
    func_before = 'def func():\n    pass\n'
    # Parse the function and a.
    differ.initialize(func_before + 'a')
@@ -79,9 +94,8 @@ def test_change_and_undo(differ):
    differ.parse(func_before + 'b', copies=1, parsers=1)
    # b has changed to a again, so parse that.
    differ.parse(func_before + 'a', copies=1, parsers=1)
-    # Same as before parsers should be used at the end, because it doesn't end
-    # with newlines and that leads to complications.
-    differ.parse(func_before + 'a', copies=1, parsers=1)
+    # Same as before parsers should not be used. Just a simple copy.
+    differ.parse(func_before + 'a', copies=1)

    # Now that we have a newline at the end, everything is easier in Python
    # syntax, we can parse once and then get a copy.
@@ -97,15 +111,12 @@ def test_change_and_undo(differ):


 def test_positions(differ):
-    # Empty the parser cache for the path None.
-    cache.parser_cache.pop(None, None)
-
    func_before = 'class A:\n pass\n'
    m = differ.initialize(func_before + 'a')
    assert m.start_pos == (1, 0)
    assert m.end_pos == (3, 1)

-    m = differ.parse('a', parsers=1)
+    m = differ.parse('a', copies=1)
    assert m.start_pos == (1, 0)
    assert m.end_pos == (1, 1)

@@ -252,7 +263,7 @@ def test_wrong_whitespace(differ):
    hello
    '''
    differ.initialize(code)
-    differ.parse(code + 'bar\n    ', parsers=1, copies=1)
+    differ.parse(code + 'bar\n    ', parsers=1)

    code += """abc(\npass\n    """
    differ.parse(code, parsers=1, copies=1, expect_error_leaves=True)
@@ -356,7 +367,7 @@ def test_totally_wrong_whitespace(differ):
    '''

    differ.initialize(code1)
-    differ.parse(code2, parsers=3, copies=1, expect_error_leaves=True)
+    differ.parse(code2, parsers=3, copies=0, expect_error_leaves=True)


 def test_node_insertion(differ):
@@ -456,6 +467,20 @@ def test_in_parentheses_newlines(differ):
    b = 2""")


+def test_indentation_issue(differ):
+    code1 = dedent("""
+        import module
+    """)
+
+    code2 = dedent("""
+        class L1:
+            class L2:
+                class L3:
+                    def f(): pass
+                def f(): pass
+            def f(): pass
+        def f(): pass
+    """)
+
    differ.initialize(code1)
-    differ.parse(code2, parsers=2, copies=1)
-    differ.parse(code1, parsers=2, copies=1)
+    differ.parse(code2, parsers=2)
--- a/test/test_file_python_errors.py
+++ b/test/test_file_python_errors.py
@@ -0,0 +1,23 @@
+import os
+
+import parso
+
+
+def get_python_files(path):
+    for dir_path, dir_names, file_names in os.walk(path):
+        for file_name in file_names:
+            if file_name.endswith('.py'):
+                yield os.path.join(dir_path, file_name)
+
+
+def test_on_itself(each_version):
+    """
+    There are obviously no syntax erros in the Python code of parso. However
+    parso should output the same for all versions.
+    """
+    grammar = parso.load_grammar(version=each_version)
+    path = os.path.dirname(os.path.dirname(__file__)) + '/parso'
+    for file in get_python_files(path):
+        tree = grammar.parse(path=file)
+        errors = list(grammar.iter_errors(tree))
+        assert not errors
--- a/test/test_fstring.py
+++ b/test/test_fstring.py
@@ -0,0 +1,75 @@
+import pytest
+
+from parso import load_grammar, ParserSyntaxError
+from parso.python.fstring import tokenize
+
+
+@pytest.fixture
+def grammar():
+    return load_grammar(language="python-f-string")
+
+
+@pytest.mark.parametrize(
+    'code', [
+        '{1}',
+        '',
+        '{1!a}',
+        '{1!a:1}',
+        '{1:1}',
+        '{1:1.{32}}',
+        '{1::>4}',
+        '{foo} {bar}',
+
+        # Escapes
+        '{{}}',
+        '{{{1}}}',
+        '{{{1}',
+        '1{{2{{3',
+        '}}',
+        '{:}}}',
+
+        # Invalid, but will be checked, later.
+        '{}',
+        '{1:}',
+        '{:}',
+        '{:1}',
+        '{!:}',
+        '{!}',
+        '{!a}',
+        '{1:{}}',
+        '{1:{:}}',
+    ]
+)
+def test_valid(code, grammar):
+    fstring = grammar.parse(code, error_recovery=False)
+    assert fstring.type == 'fstring'
+    assert fstring.get_code() == code
+
+
+@pytest.mark.parametrize(
+    'code', [
+        '}',
+        '{',
+        '{1!{a}}',
+        '{!{a}}',
+    ]
+)
+def test_invalid(code, grammar):
+    with pytest.raises(ParserSyntaxError):
+        grammar.parse(code, error_recovery=False)
+
+    # It should work with error recovery.
+    #grammar.parse(code, error_recovery=True)
+
+
+@pytest.mark.parametrize(
+    ('code', 'start_pos', 'positions'), [
+        # 2 times 2, 5 because python expr and endmarker.
+        ('}{', (2, 3), [(2, 3), (2, 4), (2, 5), (2, 5)]),
+        (' :{ 1 : } ', (1, 0), [(1, 2), (1, 3), (1, 6), (1, 8), (1, 10)]),
+        ('\n{\nfoo\n }', (2, 1), [(3, 0), (3, 1), (5, 1), (5, 2)]),
+    ]
+)
+def test_tokenize_start_pos(code, start_pos, positions):
+    tokens = tokenize(code, start_pos)
+    assert positions == [p.start_pos for p in tokens]
--- a/test/test_get_code.py
+++ b/test/test_get_code.py
@@ -2,7 +2,7 @@ import difflib

 import pytest

-from parso.python import parse
+from parso import parse

 code_basic_features = '''
 """A mod docstring"""
@@ -39,7 +39,6 @@ def diff_code_assert(a, b, n=4):
    pass


-@pytest.mark.skipif('True', reason='Refactor a few parser things first.')
 def test_basic_parsing():
    """Validate the parsing features"""

@@ -104,3 +103,17 @@ def test_end_newlines():
    test('def a():\n pass', (2, 5))

    test('def a(', (1, 6))
+
+
+@pytest.mark.parametrize(('code', 'types'), [
+    ('\r', ['error_leaf', 'endmarker']),
+    ('\n\r', ['error_leaf', 'endmarker'])
+])
+def test_carriage_return_at_end(code, types):
+    """
+    By adding an artificial newline this creates weird side effects for
+    \r at the end of files that would normally be error leafs.
+    """
+    tree = parse(code)
+    assert tree.get_code() == code
+    assert [c.type for c in tree.children] == types
--- a/test/test_grammar.py
+++ b/test/test_grammar.py
@@ -0,0 +1,8 @@
+import parso
+
+import pytest
+
+
+def test_non_unicode():
+    with pytest.raises(UnicodeDecodeError):
+        parso.parse(b'\xe4')
--- a/test/test_load_grammar.py
+++ b/test/test_load_grammar.py
@@ -0,0 +1,31 @@
+import pytest
+from parso.grammar import load_grammar
+from parso import utils
+
+
+def test_load_inexisting_grammar():
+    # This version shouldn't be out for a while, but if we ever do, wow!
+    with pytest.raises(NotImplementedError):
+        load_grammar(version='15.8')
+    # The same is true for very old grammars (even though this is probably not
+    # going to be an issue.
+    with pytest.raises(NotImplementedError):
+        load_grammar(version='1.5')
+
+
+@pytest.mark.parametrize(('string', 'result'), [
+    ('2', (2, 7)), ('3', (3, 6)), ('1.1', (1, 1)), ('1.1.1', (1, 1)), ('300.1.31', (300, 1))
+])
+def test_parse_version(string, result):
+    assert utils._parse_version(string) == result
+
+
+@pytest.mark.parametrize('string', ['1.', 'a', '#', '1.3.4.5', '1.12'])
+def test_invalid_grammar_version(string):
+    with pytest.raises(ValueError):
+        load_grammar(version=string)
+
+
+def test_grammar_int_version():
+    with pytest.raises(TypeError):
+        load_grammar(version=3.2)
--- a/test/test_normalizer_issues_files.py
+++ b/test/test_normalizer_issues_files.py
@@ -0,0 +1,70 @@
+"""
+To easily verify if our normalizer raises the right error codes, just use the
+tests of pydocstyle.
+"""
+
+import difflib
+import re
+
+import parso
+from parso._compatibility import total_ordering
+from parso.utils import python_bytes_to_unicode
+
+
+@total_ordering
+class WantedIssue(object):
+    def __init__(self, code, line, column):
+        self.code = code
+        self._line = line
+        self._column = column
+
+    def __eq__(self, other):
+        return self.code == other.code and self.start_pos == other.start_pos
+
+    def __lt__(self, other):
+        return self.start_pos < other.start_pos or self.code < other.code
+
+    def __hash__(self):
+        return hash(str(self.code) + str(self._line) + str(self._column))
+
+    @property
+    def start_pos(self):
+        return self._line, self._column
+
+
+def collect_errors(code):
+    for line_nr, line in enumerate(code.splitlines(), 1):
+        match = re.match(r'(\s*)#: (.*)$', line)
+        if match is not None:
+            codes = match.group(2)
+            for code in codes.split():
+                code, _, add_indent = code.partition(':')
+                column = int(add_indent or len(match.group(1)))
+
+                code, _, add_line = code.partition('+')
+                l = line_nr + 1 + int(add_line or 0)
+
+                yield WantedIssue(code[1:], l, column)
+
+
+def test_normalizer_issue(normalizer_issue_case):
+    def sort(issues):
+        issues = sorted(issues, key=lambda i: (i.start_pos, i.code))
+        return ["(%s, %s): %s" % (i.start_pos[0], i.start_pos[1], i.code)
+                for i in issues]
+
+    with open(normalizer_issue_case.path, 'rb') as f:
+        code = python_bytes_to_unicode(f.read())
+
+    desired = sort(collect_errors(code))
+
+    grammar = parso.load_grammar(version=normalizer_issue_case.python_version)
+    module = grammar.parse(code)
+    issues = grammar._get_normalizer_issues(module)
+    actual = sort(issues)
+
+    diff = '\n'.join(difflib.ndiff(desired, actual))
+    # To make the pytest -v diff a bit prettier, stop pytest to rewrite assert
+    # statements by executing the comparison earlier.
+    _bool = desired == actual
+    assert _bool, '\n' + diff
--- a/test/test_old_fast_parser.py
+++ b/test/test_old_fast_parser.py
@@ -9,7 +9,7 @@ However the tests might still be relevant for the parser.
 from textwrap import dedent

 from parso._compatibility import u
-from parso.python import parse
+from parso import parse


 def test_carriage_return_splitting():
--- a/test/test_param_splitting.py
+++ b/test/test_param_splitting.py
@@ -5,27 +5,27 @@ instead of simple parser objects.

 from textwrap import dedent

-from parso.python import parse
+from parso import parse


-def assert_params(param_string, **wanted_dct):
+def assert_params(param_string, version=None, **wanted_dct):
    source = dedent('''
    def x(%s):
        pass
    ''') % param_string

-    module = parse(source)
+    module = parse(source, version=version)
    funcdef = next(module.iter_funcdefs())
    dct = dict((p.name.value, p.default and p.default.get_code())
-               for p in funcdef.params)
+               for p in funcdef.get_params())
    assert dct == wanted_dct
    assert module.get_code() == source


 def test_split_params_with_separation_star():
-    assert_params(u'x, y=1, *, z=3', x=None, y='1', z='3')
-    assert_params(u'*, x', x=None)
-    assert_params(u'*')
+    assert_params(u'x, y=1, *, z=3', x=None, y='1', z='3', version='3.5')
+    assert_params(u'*, x', x=None, version='3.5')
+    assert_params(u'*', version='3.5')


 def test_split_params_with_stars():
--- a/test/test_parser.py
+++ b/test/test_parser.py
@@ -1,19 +1,18 @@
 # -*- coding: utf-8 -*-
-import sys
 from textwrap import dedent

 import pytest

-from parso._compatibility import u, py_version
-from parso.python import parse, load_grammar
+from parso._compatibility import u
+from parso import parse
 from parso.python import tree
-from parso.utils import splitlines
+from parso.utils import split_lines


-def test_basic_parsing():
+def test_basic_parsing(each_version):
    def compare(string):
        """Generates the AST object and then regenerates the code."""
-        assert parse(string).get_code() == string
+        assert parse(string, version=each_version).get_code() == string

    compare('\na #pass\n')
    compare('wblabla* 1\t\n')
@@ -21,9 +20,9 @@ def test_basic_parsing():
    compare('assert foo\n')


-def test_subscope_names():
+def test_subscope_names(each_version):
    def get_sub(source):
-        return parse(source).children[0]
+        return parse(source, version=each_version).children[0]

    name = get_sub('class Foo: pass').name
    assert name.start_pos == (1, len('class '))
@@ -36,9 +35,9 @@ def test_subscope_names():
    assert name.value == 'foo'


-def test_import_names():
+def test_import_names(each_version):
    def get_import(source):
-        return next(parse(source).iter_imports())
+        return next(parse(source, version=each_version).iter_imports())

    imp = get_import('import math\n')
    names = imp.get_defined_names()
@@ -51,76 +50,76 @@ def test_import_names():
    assert imp.end_pos == (1, len('import math'))


-def test_end_pos():
+def test_end_pos(each_version):
    s = dedent('''
               x = ['a', 'b', 'c']
               def func():
                   y = None
               ''')
-    parser = parse(s)
+    parser = parse(s, version=each_version)
    scope = next(parser.iter_funcdefs())
    assert scope.start_pos == (3, 0)
    assert scope.end_pos == (5, 0)


-def test_carriage_return_statements():
+def test_carriage_return_statements(each_version):
    source = dedent('''
        foo = 'ns1!'

        # this is a namespace package
    ''')
    source = source.replace('\n', '\r\n')
-    stmt = parse(source).children[0]
+    stmt = parse(source, version=each_version).children[0]
    assert '#' not in stmt.get_code()


-def test_incomplete_list_comprehension():
+def test_incomplete_list_comprehension(each_version):
    """ Shouldn't raise an error, same bug as #418. """
    # With the old parser this actually returned a statement. With the new
    # parser only valid statements generate one.
-    children = parse('(1 for def').children
+    children = parse('(1 for def', version=each_version).children
    assert [c.type for c in children] == \
-        ['error_node', 'error_node', 'newline', 'endmarker']
+        ['error_node', 'error_node', 'endmarker']


-def test_newline_positions():
-    endmarker = parse('a\n').children[-1]
+def test_newline_positions(each_version):
+    endmarker = parse('a\n', version=each_version).children[-1]
    assert endmarker.end_pos == (2, 0)
    new_line = endmarker.get_previous_leaf()
    assert new_line.start_pos == (1, 1)
    assert new_line.end_pos == (2, 0)


-def test_end_pos_error_correction():
+def test_end_pos_error_correction(each_version):
    """
    Source code without ending newline are given one, because the Python
    grammar needs it. However, they are removed again. We still want the right
    end_pos, even if something breaks in the parser (error correction).
    """
    s = 'def x():\n .'
-    m = parse(s)
+    m = parse(s, version=each_version)
    func = m.children[0]
    assert func.type == 'funcdef'
    assert func.end_pos == (2, 2)
    assert m.end_pos == (2, 2)


-def test_param_splitting():
+def test_param_splitting(each_version):
    """
    Jedi splits parameters into params, this is not what the grammar does,
    but Jedi does this to simplify argument parsing.
    """
    def check(src, result):
        # Python 2 tuple params should be ignored for now.
-        grammar = load_grammar('%s.%s' % sys.version_info[:2])
-        m = parse(src, grammar=grammar)
-        if py_version >= 30:
-            assert not list(m.iter_funcdefs())
-        else:
+        m = parse(src, version=each_version)
+        if each_version.startswith('2'):
            # We don't want b and c to be a part of the param enumeration. Just
            # ignore them, because it's not what we want to support in the
            # future.
-            assert [param.name.value for param in next(m.iter_funcdefs()).params] == result
+            func = next(m.iter_funcdefs())
+            assert [param.name.value for param in func.get_params()] == result
+        else:
+            assert not list(m.iter_funcdefs())

    check('def x(a, (b, c)):\n pass', ['a'])
    check('def x((b, c)):\n pass', [])
@@ -131,49 +130,32 @@ def test_unicode_string():
    assert repr(s)  # Should not raise an Error!


-def test_backslash_dos_style():
-    assert parse('\\\r\n')
+def test_backslash_dos_style(each_version):
+    assert parse('\\\r\n', version=each_version)


-def test_started_lambda_stmt():
-    m = parse(u'lambda a, b: a i')
+def test_started_lambda_stmt(each_version):
+    m = parse(u'lambda a, b: a i', version=each_version)
    assert m.children[0].type == 'error_node'


-def test_python2_octal():
-    module = parse('0660')
+def test_python2_octal(each_version):
+    module = parse('0660', version=each_version)
    first = module.children[0]
-    if py_version >= 30:
+    if each_version.startswith('2'):
+        assert first.type == 'number'
+    else:
        assert first.type == 'error_node'
-    else:
-        assert first.children[0].type == 'number'
-
-
-def test_python3_octal():
-    module = parse('0o660')
-    if py_version >= 30:
-        assert module.children[0].children[0].type == 'number'
-    else:
-        assert module.children[0].type == 'error_node'
-
-
-def test_load_newer_grammar():
-    # This version shouldn't be out for a while, but if we somehow get this it
-    # should just take the latest Python grammar.
-    load_grammar('15.8')
-    # The same is true for very old grammars (even though this is probably not
-    # going to be an issue.
-    load_grammar('1.5')


@pytest.mark.parametrize('code', ['foo "', 'foo """\n', 'foo """\nbar'])
-def test_open_string_literal(code):
+def test_open_string_literal(each_version, code):
    """
    Testing mostly if removing the last newline works.
    """
-    lines = splitlines(code, keepends=True)
+    lines = split_lines(code, keepends=True)
    end_pos = (len(lines), len(lines[-1]))
-    module = parse(code)
+    module = parse(code, version=each_version)
    assert module.get_code() == code
    assert module.end_pos == end_pos == module.children[1].end_pos

@@ -181,3 +163,29 @@ def test_open_string_literal(code):
 def test_too_many_params():
    with pytest.raises(TypeError):
        parse('asdf', hello=3)
+
+
+def test_dedent_at_end(each_version):
+    code = dedent('''
+        for foobar in [1]:
+            foobar''')
+    module = parse(code, version=each_version)
+    assert module.get_code() == code
+    suite = module.children[0].children[-1]
+    foobar = suite.children[-1]
+    assert foobar.type == 'name'
+
+
+def test_no_error_nodes(each_version):
+    def check(node):
+        assert node.type not in ('error_leaf', 'error_node')
+
+        try:
+            children = node.children
+        except AttributeError:
+            pass
+        else:
+            for child in children:
+                check(child)
+
+    check(parse("if foo:\n bar", version=each_version))
--- a/test/test_parser_tree.py
+++ b/test/test_parser_tree.py
@@ -4,7 +4,7 @@ from textwrap import dedent

 import pytest

-from parso.python import parse
+from parso import parse
 from parso.python import tree


@@ -26,7 +26,7 @@ class TestsFunctionAndLambdaParsing(object):

    @pytest.fixture(params=FIXTURES)
    def node(self, request):
-        parsed = parse(dedent(request.param[0]))
+        parsed = parse(dedent(request.param[0]), version='3.5')
        request.keywords['expected'] = request.param[1]
        child = parsed.children[0]
        if child.type == 'simple_stmt':
@@ -43,16 +43,14 @@ class TestsFunctionAndLambdaParsing(object):
            assert node.name.value == expected['name']

    def test_params(self, node, expected):
-        assert isinstance(node.params, list)
-        assert all(isinstance(x, tree.Param) for x in node.params)
-        assert [str(x.name.value) for x in node.params] == [x for x in expected['params']]
+        assert isinstance(node.get_params(), list)
+        assert all(isinstance(x, tree.Param) for x in node.get_params())
+        assert [str(x.name.value) for x in node.get_params()] == [x for x in expected['params']]

    def test_is_generator(self, node, expected):
        assert node.is_generator() is expected.get('is_generator', False)

    def test_yields(self, node, expected):
-        # TODO: There's a comment in the code noting that the current
-        # implementation is incorrect.
        assert node.is_generator() == expected.get('yields', False)

    def test_annotation(self, node, expected):
@@ -61,3 +59,93 @@ class TestsFunctionAndLambdaParsing(object):
            assert node.annotation is None
        else:
            assert node.annotation.value == expected_annotation
+
+
+def test_end_pos_line(each_version):
+    # jedi issue #150
+    s = "x()\nx( )\nx(  )\nx (  )\n"
+
+    module = parse(s, version=each_version)
+    for i, simple_stmt in enumerate(module.children[:-1]):
+        expr_stmt = simple_stmt.children[0]
+        assert expr_stmt.end_pos == (i + 1, i + 3)
+
+
+def test_default_param(each_version):
+    func = parse('def x(foo=42): pass', version=each_version).children[0]
+    param, = func.get_params()
+    assert param.default.value == '42'
+    assert param.annotation is None
+    assert not param.star_count
+
+
+def test_annotation_param(each_py3_version):
+    func = parse('def x(foo: 3): pass', version=each_py3_version).children[0]
+    param, = func.get_params()
+    assert param.default is None
+    assert param.annotation.value == '3'
+    assert not param.star_count
+
+
+def test_annotation_params(each_py3_version):
+    func = parse('def x(foo: 3, bar: 4): pass', version=each_py3_version).children[0]
+    param1, param2 = func.get_params()
+
+    assert param1.default is None
+    assert param1.annotation.value == '3'
+    assert not param1.star_count
+
+    assert param2.default is None
+    assert param2.annotation.value == '4'
+    assert not param2.star_count
+
+
+def test_default_and_annotation_param(each_py3_version):
+    func = parse('def x(foo:3=42): pass', version=each_py3_version).children[0]
+    param, = func.get_params()
+    assert param.default.value == '42'
+    assert param.annotation.value == '3'
+    assert not param.star_count
+
+
+def test_ellipsis_py2(each_py2_version):
+    module = parse('[0][...]', version=each_py2_version, error_recovery=False)
+    expr = module.children[0]
+    trailer = expr.children[-1]
+    subscript = trailer.children[1]
+    assert subscript.type == 'subscript'
+    assert [leaf.value for leaf in subscript.children] == ['.', '.', '.']
+
+
+def get_yield_exprs(code, version):
+    return list(parse(code, version=version).children[0].iter_yield_exprs())
+
+
+def get_return_stmts(code):
+    return list(parse(code).children[0].iter_return_stmts())
+
+
+def test_yields(each_version):
+    y, = get_yield_exprs('def x(): yield', each_version)
+    assert y.value == 'yield'
+    assert y.type == 'keyword'
+
+    y, = get_yield_exprs('def x(): (yield 1)', each_version)
+    assert y.type == 'yield_expr'
+
+    y, = get_yield_exprs('def x(): [1, (yield)]', each_version)
+    assert y.type == 'keyword'
+
+
+def test_yield_from():
+    y, = get_yield_exprs('def x(): (yield from 1)', '3.3')
+    assert y.type == 'yield_expr'
+
+
+def test_returns():
+    r, = get_return_stmts('def x(): return')
+    assert r.value == 'return'
+    assert r.type == 'keyword'
+
+    r, = get_return_stmts('def x(): return 1')
+    assert r.type == 'return_stmt'
--- a/test/test_pep8.py
+++ b/test/test_pep8.py
@@ -0,0 +1,39 @@
+import parso
+
+
+def issues(code):
+    grammar = parso.load_grammar()
+    module = parso.parse(code)
+    return grammar._get_normalizer_issues(module)
+
+
+def test_eof_newline():
+    def assert_issue(code):
+        found = issues(code)
+        assert len(found) == 1
+        issue, = found
+        assert issue.code == 292
+
+    assert not issues('asdf = 1\n')
+    assert_issue('asdf = 1')
+    assert_issue('asdf = 1\n# foo')
+    assert_issue('# foobar')
+    assert_issue('')
+    assert_issue('foo = 1  # comment')
+
+
+def test_eof_blankline():
+    def assert_issue(code):
+        found = issues(code)
+        assert len(found) == 1
+        issue, = found
+        assert issue.code == 391
+
+    assert_issue('asdf = 1\n\n')
+    assert_issue('# foobar\n\n')
+    assert_issue('\n\n')
+
+def test_shebang():
+    assert not issues('#!\n')
+    assert not issues('#!/foo\n')
+    assert not issues('#! python\n')
--- a/test/test_pgen2.py
+++ b/test/test_pgen2.py
@@ -8,110 +8,81 @@ test_grammar.py files from both Python 2 and Python 3.

 from textwrap import dedent

-from parso._compatibility import py_version
-from parso.python import parse as _parse, load_grammar
-from parso import ParserSyntaxError
 import pytest

+from parso import load_grammar
+from parso import ParserSyntaxError

-def parse(code, version='3.4'):
+
+def _parse(code, version=None):
    code = dedent(code) + "\n\n"
    grammar = load_grammar(version=version)
-    return _parse(code, grammar=grammar, error_recovery=False)
+    return grammar.parse(code, error_recovery=False)


-def test_formfeed():
-    s = """print 1\n\x0Cprint 2\n"""
-    t = parse(s, '2.7')
+def _invalid_syntax(code, version=None, **kwargs):
+    with pytest.raises(ParserSyntaxError):
+        module = _parse(code, version=version, **kwargs)
+        # For debugging
+        print(module.children)
+
+
+def test_formfeed(each_py2_version):
+    s = u"""print 1\n\x0Cprint 2\n"""
+    t = _parse(s, each_py2_version)
    assert t.children[0].children[0].type == 'print_stmt'
    assert t.children[1].children[0].type == 'print_stmt'
-    s = """1\n\x0C\x0C2\n"""
-    t = parse(s, '2.7')
+    s = u"""1\n\x0C\x0C2\n"""
+    t = _parse(s, each_py2_version)


-def _invalid_syntax(code, **kwargs):
-    try:
-        parse(code, **kwargs)
-    except ParserSyntaxError:
-        pass
-    else:
-        raise AssertionError("Syntax shouldn't have been valid")
+def test_matrix_multiplication_operator(works_ge_py35):
+    works_ge_py35.parse("a @ b")
+    works_ge_py35.parse("a @= b")


-@pytest.mark.skipif('sys.version_info[:2] < (3, 5)')
-def test_matrix_multiplication_operator():
-    parse("a @ b", "3.5")
-    parse("a @= b", "3.5")
+def test_yield_from(works_ge_py3, each_version):
+    works_ge_py3.parse("yield from x")
+    works_ge_py3.parse("(yield from x) + y")
+    _invalid_syntax("yield from", each_version)


-def test_yield_from():
-    parse("yield from x")
-    parse("(yield from x) + y")
-    _invalid_syntax("yield from")
-
-
-@pytest.mark.skipif('sys.version_info[:2] < (3, 5)')
-def test_await_expr():
-    parse("""async def foo():
+def test_await_expr(works_ge_py35):
+    works_ge_py35.parse("""async def foo():
                         await x
-                  """, "3.5")
+                  """)

-    parse("""async def foo():
+    works_ge_py35.parse("""async def foo():

        def foo(): pass

        def foo(): pass

        await x
-    """, "3.5")
+    """)

-    parse("""async def foo(): return await a""", "3.5")
+    works_ge_py35.parse("""async def foo(): return await a""")

-    parse("""def foo():
+    works_ge_py35.parse("""def foo():
        def foo(): pass
        async def foo(): await x
-    """, "3.5")
-
-
-@pytest.mark.skipif('sys.version_info[:2] < (3, 5)')
-@pytest.mark.xfail(reason="acting like python 3.7")
-def test_await_expr_invalid():
-    _invalid_syntax("await x", version="3.5")
-    _invalid_syntax("""def foo():
-                               await x""", version="3.5")
-
-    _invalid_syntax("""def foo():
-        def foo(): pass
-        async def foo(): pass
-        await x
-    """, version="3.5")
+    """)


@pytest.mark.skipif('sys.version_info[:2] < (3, 5)')
@pytest.mark.xfail(reason="acting like python 3.7")
 def test_async_var():
-    parse("""async = 1""", "3.5")
-    parse("""await = 1""", "3.5")
-    parse("""def async(): pass""", "3.5")
+    _parse("""async = 1""", "3.5")
+    _parse("""await = 1""", "3.5")
+    _parse("""def async(): pass""", "3.5")


-@pytest.mark.skipif('sys.version_info[:2] < (3, 5)')
-def test_async_for():
-    parse("""async def foo():
-                         async for a in b: pass""", "3.5")
+def test_async_for(works_ge_py35):
+    works_ge_py35.parse("async def foo():\n async for a in b: pass")


-@pytest.mark.skipif('sys.version_info[:2] < (3, 5)')
-@pytest.mark.xfail(reason="acting like python 3.7")
-def test_async_for_invalid():
-    _invalid_syntax("""def foo():
-                               async for a in b: pass""", version="3.5")
-
-
-@pytest.mark.skipif('sys.version_info[:2] < (3, 5)')
-def test_async_with():
-    parse("""async def foo():
-                         async with a: pass""", "3.5")
+def test_async_with(works_ge_py35):
+    works_ge_py35.parse("async def foo():\n async with a: pass")

    @pytest.mark.skipif('sys.version_info[:2] < (3, 5)')
    @pytest.mark.xfail(reason="acting like python 3.7")
@@ -120,151 +91,182 @@ def test_async_with():
                                   async with a: pass""", version="3.5")


-def test_raise_2x_style_1():
-    parse("raise")
+def test_raise_3x_style_1(each_version):
+    _parse("raise", each_version)

-def test_raise_2x_style_2():
-    parse("raise E, V", version='2.7')

-def test_raise_2x_style_3():
-    parse("raise E, V, T", version='2.7')
+def test_raise_2x_style_2(works_in_py2):
+    works_in_py2.parse("raise E, V")

-def test_raise_2x_style_invalid_1():
-    _invalid_syntax("raise E, V, T, Z", version='2.7')
+def test_raise_2x_style_3(works_in_py2):
+    works_in_py2.parse("raise E, V, T")

-def test_raise_3x_style():
-    parse("raise E1 from E2")
+def test_raise_2x_style_invalid_1(each_version):
+    _invalid_syntax("raise E, V, T, Z", version=each_version)

-def test_raise_3x_style_invalid_1():
-    _invalid_syntax("raise E, V from E1")
+def test_raise_3x_style(works_ge_py3):
+    works_ge_py3.parse("raise E1 from E2")

-def test_raise_3x_style_invalid_2():
-    _invalid_syntax("raise E from E1, E2")
+def test_raise_3x_style_invalid_1(each_version):
+    _invalid_syntax("raise E, V from E1", each_version)

-def test_raise_3x_style_invalid_3():
-    _invalid_syntax("raise from E1, E2")
+def test_raise_3x_style_invalid_2(each_version):
+    _invalid_syntax("raise E from E1, E2", each_version)

-def test_raise_3x_style_invalid_4():
-    _invalid_syntax("raise E from")
+def test_raise_3x_style_invalid_3(each_version):
+    _invalid_syntax("raise from E1, E2", each_version)
+
+def test_raise_3x_style_invalid_4(each_version):
+    _invalid_syntax("raise E from", each_version)


 # Adapted from Python 3's Lib/test/test_grammar.py:GrammarTests.testFuncdef
-def test_annotation_1():
-    parse("""def f(x) -> list: pass""")
+def test_annotation_1(works_ge_py3):
+    works_ge_py3.parse("""def f(x) -> list: pass""")

-def test_annotation_2():
-    parse("""def f(x:int): pass""")
+def test_annotation_2(works_ge_py3):
+    works_ge_py3.parse("""def f(x:int): pass""")

-def test_annotation_3():
-    parse("""def f(*x:str): pass""")
+def test_annotation_3(works_ge_py3):
+    works_ge_py3.parse("""def f(*x:str): pass""")

-def test_annotation_4():
-    parse("""def f(**x:float): pass""")
+def test_annotation_4(works_ge_py3):
+    works_ge_py3.parse("""def f(**x:float): pass""")

-def test_annotation_5():
-    parse("""def f(x, y:1+2): pass""")
+def test_annotation_5(works_ge_py3):
+    works_ge_py3.parse("""def f(x, y:1+2): pass""")

-def test_annotation_6():
-    _invalid_syntax("""def f(a, (b:1, c:2, d)): pass""")
+def test_annotation_6(each_py3_version):
+    _invalid_syntax("""def f(a, (b:1, c:2, d)): pass""", each_py3_version)

-def test_annotation_7():
-    _invalid_syntax("""def f(a, (b:1, c:2, d), e:3=4, f=5, *g:6): pass""")
+def test_annotation_7(each_py3_version):
+    _invalid_syntax("""def f(a, (b:1, c:2, d), e:3=4, f=5, *g:6): pass""", each_py3_version)

-def test_annotation_8():
+def test_annotation_8(each_py3_version):
    s = """def f(a, (b:1, c:2, d), e:3=4, f=5,
                    *g:6, h:7, i=8, j:9=10, **k:11) -> 12: pass"""
-    _invalid_syntax(s)
+    _invalid_syntax(s, each_py3_version)


-def test_except_new():
-    s = """
+def test_except_new(each_version):
+    s = dedent("""
        try:
            x
        except E as N:
-            y"""
-    parse(s)
+            y""")
+    _parse(s, each_version)

-def test_except_old():
-    s = """
+def test_except_old(works_in_py2):
+    s = dedent("""
        try:
            x
        except E, N:
-            y"""
-    parse(s, version='2.7')
+            y""")
+    works_in_py2.parse(s)


 # Adapted from Python 3's Lib/test/test_grammar.py:GrammarTests.testAtoms
-def test_set_literal_1():
-    parse("""x = {'one'}""")
+def test_set_literal_1(works_ge_py27):
+    works_ge_py27.parse("""x = {'one'}""")

-def test_set_literal_2():
-    parse("""x = {'one', 1,}""")
+def test_set_literal_2(works_ge_py27):
+    works_ge_py27.parse("""x = {'one', 1,}""")

-def test_set_literal_3():
-    parse("""x = {'one', 'two', 'three'}""")
+def test_set_literal_3(works_ge_py27):
+    works_ge_py27.parse("""x = {'one', 'two', 'three'}""")

-def test_set_literal_4():
-    parse("""x = {2, 3, 4,}""")
+def test_set_literal_4(works_ge_py27):
+    works_ge_py27.parse("""x = {2, 3, 4,}""")


-def test_new_octal_notation():
-    code = """0o7777777777777"""
-    if py_version >= 30:
-        parse(code)
-    else:
-        _invalid_syntax(code)
-    _invalid_syntax("""0o7324528887""")
-
-def test_new_binary_notation():
-    parse("""0b101010""")
-    _invalid_syntax("""0b0101021""")
+def test_new_octal_notation(each_version):
+    _parse("""0o7777777777777""", each_version)
+    _invalid_syntax("""0o7324528887""", each_version)


-def test_class_new_syntax():
-    parse("class B(t=7): pass")
-    parse("class B(t, *args): pass")
-    parse("class B(t, **kwargs): pass")
-    parse("class B(t, *args, **kwargs): pass")
-    parse("class B(t, y=9, *args, **kwargs): pass")
+def test_old_octal_notation(works_in_py2):
+    works_in_py2.parse("07")


-def test_parser_idempotency_extended_unpacking():
+def test_new_binary_notation(each_version):
+    _parse("""0b101010""", each_version)
+    _invalid_syntax("""0b0101021""", each_version)
+
+
+def test_class_new_syntax(works_ge_py3):
+    works_ge_py3.parse("class B(t=7): pass")
+    works_ge_py3.parse("class B(t, *args): pass")
+    works_ge_py3.parse("class B(t, **kwargs): pass")
+    works_ge_py3.parse("class B(t, *args, **kwargs): pass")
+    works_ge_py3.parse("class B(t, y=9, *args, **kwargs): pass")
+
+
+def test_parser_idempotency_extended_unpacking(works_ge_py3):
    """A cut-down version of pytree_idempotency.py."""
-    parse("a, *b, c = x\n")
-    parse("[*a, b] = x\n")
-    parse("(z, *y, w) = m\n")
-    parse("for *z, m in d: pass\n")
+    works_ge_py3.parse("a, *b, c = x\n")
+    works_ge_py3.parse("[*a, b] = x\n")
+    works_ge_py3.parse("(z, *y, w) = m\n")
+    works_ge_py3.parse("for *z, m in d: pass\n")


-@pytest.mark.skipif('sys.version_info[0] < 3')
-def test_multiline_bytes_literals():
+def test_multiline_bytes_literals(each_version):
    """
    It's not possible to get the same result when using \xaa in Python 2/3,
    because it's treated differently.
    """
-    s = """
+    s = u"""
        md5test(b"\xaa" * 80,
                (b"Test Using Larger Than Block-Size Key "
                 b"and Larger Than One Block-Size Data"),
                "6f630fad67cda0ee1fb1f562db3aa53e")
        """
-    parse(s)
+    _parse(s, each_version)

-def test_multiline_bytes_tripquote_literals():
+
+def test_multiline_bytes_tripquote_literals(each_version):
    s = '''
        b"""
        <?xml version="1.0" encoding="UTF-8"?>
        <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN">
        """
        '''
-    parse(s)
+    _parse(s, each_version)

-@pytest.mark.skipif('sys.version_info[0] < 3')
-def test_multiline_str_literals():
-    s = """
+
+def test_ellipsis(works_ge_py3, each_version):
+    works_ge_py3.parse("...")
+    _parse("[0][...]", version=each_version)
+
+
+def test_dict_unpacking(works_ge_py35):
+    works_ge_py35.parse("{**dict(a=3), foo:2}")
+
+
+def test_multiline_str_literals(each_version):
+    s = u"""
        md5test("\xaa" * 80,
                ("Test Using Larger Than Block-Size Key "
                 "and Larger Than One Block-Size Data"),
                "6f630fad67cda0ee1fb1f562db3aa53e")
        """
-    parse(s)
+    _parse(s, each_version)
+
+
+def test_py2_backticks(works_in_py2):
+    works_in_py2.parse("`1`")
+
+
+def test_py2_string_prefixes(works_in_py2):
+    works_in_py2.parse("ur'1'")
+    works_in_py2.parse("Ur'1'")
+    works_in_py2.parse("UR'1'")
+    _invalid_syntax("ru'1'", works_in_py2.version)
+
+
+def py_br(each_version):
+    _parse('br""', each_version)
+
+
+def test_py3_rb(works_ge_py3):
+    works_ge_py3.parse("rb'1'")
+    works_ge_py3.parse("RB'1'")
--- a/test/test_prefix.py
+++ b/test/test_prefix.py
@@ -0,0 +1,79 @@
+try:
+    from itertools import zip_longest
+except ImportError:
+    # Python 2
+    from itertools import izip_longest as zip_longest
+
+from codecs import BOM_UTF8
+
+import pytest
+
+import parso
+
+unicode_bom = BOM_UTF8.decode('utf-8')
+
+
+@pytest.mark.parametrize(('string', 'tokens'), [
+    ('', ['']),
+    ('#', ['#', '']),
+    (' # ', ['# ', '']),
+    (' # \n', ['# ', '\n', '']),
+    (' # \f\n', ['# ', '\f', '\n', '']),
+    ('  \n', ['\n', '']),
+    ('  \n ', ['\n', ' ']),
+    (' \f ', ['\f', ' ']),
+    (' \f ', ['\f', ' ']),
+    (' \r\n', ['\r\n', '']),
+    ('\\\n', ['\\\n', '']),
+    ('\\\r\n', ['\\\r\n', '']),
+    ('\t\t\n\t', ['\n', '\t']),
+])
+def test_simple_prefix_splitting(string, tokens):
+    tree = parso.parse(string)
+    leaf = tree.children[0]
+    assert leaf.type == 'endmarker'
+
+    parsed_tokens = list(leaf._split_prefix())
+    start_pos = (1, 0)
+    for pt, expected in zip_longest(parsed_tokens, tokens):
+        assert pt.value == expected
+
+        # Calculate the estimated end_pos
+        if expected.endswith('\n'):
+            end_pos = start_pos[0] + 1, 0
+        else:
+            end_pos = start_pos[0], start_pos[1] + len(expected) + len(pt.spacing)
+
+        #assert start_pos == pt.start_pos
+        assert end_pos == pt.end_pos
+        start_pos = end_pos
+
+
+@pytest.mark.parametrize(('string', 'types'), [
+    ('# ', ['comment', 'spacing']),
+    ('\r\n', ['newline', 'spacing']),
+    ('\f', ['formfeed', 'spacing']),
+    ('\\\n', ['backslash', 'spacing']),
+    (' \t', ['spacing']),
+    (' \t ', ['spacing']),
+    (unicode_bom + ' # ', ['bom', 'comment', 'spacing']),
+])
+def test_prefix_splitting_types(string, types):
+    tree = parso.parse(string)
+    leaf = tree.children[0]
+    assert leaf.type == 'endmarker'
+    parsed_tokens = list(leaf._split_prefix())
+    assert [t.type for t in parsed_tokens] == types
+
+
+def test_utf8_bom():
+    tree = parso.parse(unicode_bom + 'a = 1')
+    expr_stmt = tree.children[0]
+    assert expr_stmt.start_pos == (1, 0)
+
+    tree = parso.parse(unicode_bom + '\n')
+    endmarker = tree.children[0]
+    parts = list(endmarker._split_prefix())
+    assert [p.type for p in parts] == ['bom', 'newline', 'spacing']
+    assert [p.start_pos for p in parts] == [(1, 0), (1, 0), (2, 0)]
+    assert [p.end_pos for p in parts] == [(1, 0), (2, 0), (2, 0)]
--- a/test/test_python_errors.py
+++ b/test/test_python_errors.py
@@ -0,0 +1,262 @@
+"""
+Testing if parso finds syntax errors and indentation errors.
+"""
+import sys
+import warnings
+
+import pytest
+
+import parso
+from parso._compatibility import is_pypy
+from .failing_examples import FAILING_EXAMPLES, indent, build_nested
+
+
+if is_pypy:
+    # The errors in PyPy might be different. Just skip the module for now.
+    pytestmark = pytest.mark.skip()
+
+
+def _get_error_list(code, version=None):
+    grammar = parso.load_grammar(version=version)
+    tree = grammar.parse(code)
+    return list(grammar.iter_errors(tree))
+
+
+def assert_comparison(code, error_code, positions):
+    errors = [(error.start_pos, error.code) for error in _get_error_list(code)]
+    assert [(pos, error_code) for pos in positions] == errors
+
+
+@pytest.mark.parametrize('code', FAILING_EXAMPLES)
+def test_python_exception_matches(code):
+    wanted, line_nr = _get_actual_exception(code)
+
+    errors = _get_error_list(code)
+    actual = None
+    if errors:
+        error, = errors
+        actual = error.message
+    assert actual in wanted
+    # Somehow in Python3.3 the SyntaxError().lineno is sometimes None
+    assert line_nr is None or line_nr == error.start_pos[0]
+
+
+@pytest.mark.parametrize(
+    ('code', 'positions'), [
+        ('1 +', [(1, 3)]),
+        ('1 +\n', [(1, 3)]),
+        ('1 +\n2 +', [(1, 3), (2, 3)]),
+        ('x + 2', []),
+        ('[\n', [(2, 0)]),
+        ('[\ndef x(): pass', [(2, 0)]),
+        ('[\nif 1: pass', [(2, 0)]),
+        ('1+?', [(1, 2)]),
+        ('?', [(1, 0)]),
+        ('??', [(1, 0)]),
+        ('? ?', [(1, 0)]),
+        ('?\n?', [(1, 0), (2, 0)]),
+        ('? * ?', [(1, 0)]),
+        ('1 + * * 2', [(1, 4)]),
+        ('?\n1\n?', [(1, 0), (3, 0)]),
+    ]
+)
+def test_syntax_errors(code, positions):
+    assert_comparison(code, 901, positions)
+
+
+@pytest.mark.parametrize(
+    ('code', 'positions'), [
+        (' 1', [(1, 0)]),
+        ('def x():\n    1\n 2', [(3, 0)]),
+        ('def x():\n 1\n  2', [(3, 0)]),
+        ('def x():\n1', [(2, 0)]),
+    ]
+)
+def test_indentation_errors(code, positions):
+    assert_comparison(code, 903, positions)
+
+
+def _get_actual_exception(code):
+    with warnings.catch_warnings():
+        # We don't care about warnings where locals/globals misbehave here.
+        # It's as simple as either an error or not.
+        warnings.filterwarnings('ignore', category=SyntaxWarning)
+        try:
+            compile(code, '<unknown>', 'exec')
+        except (SyntaxError, IndentationError) as e:
+            wanted = e.__class__.__name__ + ': ' + e.msg
+            line_nr = e.lineno
+        except ValueError as e:
+            # The ValueError comes from byte literals in Python 2 like '\x'
+            # that are oddly enough not SyntaxErrors.
+            wanted = 'SyntaxError: (value error) ' + str(e)
+            line_nr = None
+        else:
+            assert False, "The piece of code should raise an exception."
+
+    # SyntaxError
+    # Python 2.6 has a bit different error messages here, so skip it.
+    if sys.version_info[:2] == (2, 6) and wanted == 'SyntaxError: unexpected EOF while parsing':
+        wanted = 'SyntaxError: invalid syntax'
+
+    if wanted == 'SyntaxError: non-keyword arg after keyword arg':
+        # The python 3.5+ way, a bit nicer.
+        wanted = 'SyntaxError: positional argument follows keyword argument'
+    elif wanted == 'SyntaxError: assignment to keyword':
+        return [wanted, "SyntaxError: can't assign to keyword"], line_nr
+    elif wanted == 'SyntaxError: assignment to None':
+        # Python 2.6 does has a slightly different error.
+        wanted = 'SyntaxError: cannot assign to None'
+    elif wanted == 'SyntaxError: can not assign to __debug__':
+        # Python 2.6 does has a slightly different error.
+        wanted = 'SyntaxError: cannot assign to __debug__'
+    elif wanted == 'SyntaxError: can use starred expression only as assignment target':
+        # Python 3.4/3.4 have a bit of a different warning than 3.5/3.6 in
+        # certain places. But in others this error makes sense.
+        return [wanted, "SyntaxError: can't use starred expression here"], line_nr
+    return [wanted], line_nr
+
+
+def test_default_except_error_postition():
+    # For this error the position seemed to be one line off, but that doesn't
+    # really matter.
+    code = 'try: pass\nexcept: pass\nexcept X: pass'
+    wanted, line_nr = _get_actual_exception(code)
+    error, = _get_error_list(code)
+    assert error.message in wanted
+    assert line_nr != error.start_pos[0]
+    # I think this is the better position.
+    assert error.start_pos[0] == 2
+
+
+def test_statically_nested_blocks():
+    def build(code, depth):
+        if depth == 0:
+            return code
+
+        new_code = 'if 1:\n' + indent(code)
+        return build(new_code, depth - 1)
+
+    def get_error(depth, add_func=False):
+        code = build('foo', depth)
+        if add_func:
+            code = 'def bar():\n' + indent(code)
+        errors = _get_error_list(code)
+        if errors:
+            assert errors[0].message == 'SyntaxError: too many statically nested blocks'
+            return errors[0]
+        return None
+
+    assert get_error(19) is None
+    assert get_error(19, add_func=True) is None
+
+    assert get_error(20)
+    assert get_error(20, add_func=True)
+
+
+def test_future_import_first():
+    def is_issue(code, *args):
+        code = code % args
+        return bool(_get_error_list(code))
+
+    i1 = 'from __future__ import division'
+    i2 = 'from __future__ import absolute_import'
+    assert not is_issue(i1)
+    assert not is_issue(i1 + ';' + i2)
+    assert not is_issue(i1 + '\n' + i2)
+    assert not is_issue('"";' + i1)
+    assert not is_issue('"";' + i1)
+    assert not is_issue('""\n' + i1)
+    assert not is_issue('""\n%s\n%s', i1, i2)
+    assert not is_issue('""\n%s;%s', i1, i2)
+    assert not is_issue('"";%s;%s ', i1, i2)
+    assert not is_issue('"";%s\n%s ', i1, i2)
+    assert is_issue('1;' + i1)
+    assert is_issue('1\n' + i1)
+    assert is_issue('"";1\n' + i1)
+    assert is_issue('""\n%s\nfrom x import a\n%s', i1, i2)
+    assert is_issue('%s\n""\n%s', i1, i2)
+
+
+def test_named_argument_issues(works_not_in_py):
+    message = works_not_in_py.get_error_message('def foo(*, **dict): pass')
+    message = works_not_in_py.get_error_message('def foo(*): pass')
+    if works_not_in_py.version.startswith('2'):
+        assert message == 'SyntaxError: invalid syntax'
+    else:
+        assert message == 'SyntaxError: named arguments must follow bare *'
+
+    works_not_in_py.assert_no_error_in_passing('def foo(*, name): pass')
+    works_not_in_py.assert_no_error_in_passing('def foo(bar, *, name=1): pass')
+    works_not_in_py.assert_no_error_in_passing('def foo(bar, *, name=1, **dct): pass')
+
+
+def test_escape_decode_literals(each_version):
+    """
+    We are using internal functions to assure that unicode/bytes escaping is
+    without syntax errors. Here we make a bit of quality assurance that this
+    works through versions, because the internal function might change over
+    time.
+    """
+    def get_msg(end, to=1):
+        base = "SyntaxError: (unicode error) 'unicodeescape' " \
+               "codec can't decode bytes in position 0-%s: " % to
+        return base + end
+
+    def get_msgs(escape):
+        return (get_msg('end of string in escape sequence'),
+                get_msg(r"truncated %s escape" % escape))
+
+    error, = _get_error_list(r'u"\x"', version=each_version)
+    assert error.message in get_msgs(r'\xXX')
+
+    error, = _get_error_list(r'u"\u"', version=each_version)
+    assert error.message in get_msgs(r'\uXXXX')
+
+    error, = _get_error_list(r'u"\U"', version=each_version)
+    assert error.message in get_msgs(r'\UXXXXXXXX')
+
+    error, = _get_error_list(r'u"\N{}"', version=each_version)
+    assert error.message == get_msg(r'malformed \N character escape', to=2)
+
+    error, = _get_error_list(r'u"\N{foo}"', version=each_version)
+    assert error.message == get_msg(r'unknown Unicode character name', to=6)
+
+    # Finally bytes.
+    error, = _get_error_list(r'b"\x"', version=each_version)
+    wanted = r'SyntaxError: (value error) invalid \x escape'
+    if sys.version_info >= (3, 0):
+        # The positioning information is only available in Python 3.
+        wanted += ' at position 0'
+    assert error.message == wanted
+
+
+def test_too_many_levels_of_indentation():
+    assert not _get_error_list(build_nested('pass', 99))
+    assert _get_error_list(build_nested('pass', 100))
+    base = 'def x():\n if x:\n'
+    assert not _get_error_list(build_nested('pass', 49, base=base))
+    assert _get_error_list(build_nested('pass', 50, base=base))
+
+
+@pytest.mark.parametrize(
+    'code', [
+        "f'{*args,}'",
+    ]
+)
+def test_valid_fstrings(code):
+    assert not _get_error_list(code, version='3.6')
+
+
+@pytest.mark.parametrize(
+    ('code', 'message'), [
+        ("f'{1+}'", ('invalid syntax')),
+    ]
+)
+def test_invalid_fstrings(code, message):
+    """
+    Some fstring errors are handled differntly in 3.6 and other versions.
+    Therefore check specifically for these errors here.
+    """
+    error, = _get_error_list(code, version='3.6')
+    assert message in error.message
--- a/test/test_tokenize.py
+++ b/test/test_tokenize.py
@@ -2,15 +2,21 @@

 from textwrap import dedent

+import pytest
+
 from parso._compatibility import py_version
-from parso.utils import splitlines
-from parso.token import NAME, OP, NEWLINE, STRING, INDENT, ERRORTOKEN, ENDMARKER
-from parso import tokenize
-from parso.python import parse
-from parso.tokenize import TokenInfo
+from parso.utils import split_lines, parse_version_string
+from parso.python.token import (
+    NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER, ERROR_DEDENT)
+from parso.python import tokenize
+from parso import parse
+from parso.python.tokenize import PythonToken
+

 def _get_token_list(string):
-    return list(tokenize.source_tokens(string))
+    # Load the current version.
+    version_info = parse_version_string()
+    return list(tokenize.tokenize(string, version_info))


 def test_end_pos_one_line():
@@ -37,8 +43,7 @@ def test_end_pos_multi_line():
 def test_simple_no_whitespace():
    # Test a simple one line string, no preceding whitespace
    simple_docstring = '"""simple one line docstring"""'
-    tokens = tokenize.source_tokens(simple_docstring)
-    token_list = list(tokens)
+    token_list = _get_token_list(simple_docstring)
    _, value, _, prefix = token_list[0]
    assert prefix == ''
    assert value == '"""simple one line docstring"""'
@@ -47,8 +52,7 @@ def test_simple_no_whitespace():
 def test_simple_with_whitespace():
    # Test a simple one line string with preceding whitespace and newline
    simple_docstring = '  """simple one line docstring""" \r\n'
-    tokens = tokenize.source_tokens(simple_docstring)
-    token_list = list(tokens)
+    token_list = _get_token_list(simple_docstring)
    assert token_list[0][0] == INDENT
    typ, value, start_pos, prefix = token_list[1]
    assert prefix == '  '
@@ -67,8 +71,7 @@ def test_function_whitespace():
        if x > 0:
            print(True)
    ''')
-    tokens = tokenize.source_tokens(fundef)
-    token_list = list(tokens)
+    token_list = _get_token_list(fundef)
    for _, value, _, prefix in token_list:
        if value == 'test_whitespace':
            assert prefix == ' '
@@ -88,30 +91,27 @@ def test_tokenize_multiline_I():
    # Make sure multiline string having newlines have the end marker on the
    # next line
    fundef = '''""""\n'''
-    tokens = tokenize.source_tokens(fundef)
-    token_list = list(tokens)
-    assert token_list == [TokenInfo(ERRORTOKEN, '""""\n', (1, 0), ''),
-                          TokenInfo(ENDMARKER ,       '', (2, 0), '')]
+    token_list = _get_token_list(fundef)
+    assert token_list == [PythonToken(ERRORTOKEN, '""""\n', (1, 0), ''),
+                          PythonToken(ENDMARKER ,       '', (2, 0), '')]


 def test_tokenize_multiline_II():
    # Make sure multiline string having no newlines have the end marker on
    # same line
    fundef = '''""""'''
-    tokens = tokenize.source_tokens(fundef)
-    token_list = list(tokens)
-    assert token_list == [TokenInfo(ERRORTOKEN, '""""', (1, 0), ''),
-                          TokenInfo(ENDMARKER,      '', (1, 4), '')]
+    token_list = _get_token_list(fundef)
+    assert token_list == [PythonToken(ERRORTOKEN, '""""', (1, 0), ''),
+                          PythonToken(ENDMARKER,      '', (1, 4), '')]


 def test_tokenize_multiline_III():
    # Make sure multiline string having newlines have the end marker on the
    # next line even if several newline
    fundef = '''""""\n\n'''
-    tokens = tokenize.source_tokens(fundef)
-    token_list = list(tokens)
-    assert token_list == [TokenInfo(ERRORTOKEN, '""""\n\n', (1, 0), ''),
-                          TokenInfo(ENDMARKER,          '', (3, 0), '')]
+    token_list = _get_token_list(fundef)
+    assert token_list == [PythonToken(ERRORTOKEN, '""""\n\n', (1, 0), ''),
+                          PythonToken(ENDMARKER,          '', (3, 0), '')]


 def test_identifier_contains_unicode():
@@ -119,15 +119,14 @@ def test_identifier_contains_unicode():
    def 我あφ():
        pass
    ''')
-    tokens = tokenize.source_tokens(fundef)
-    token_list = list(tokens)
+    token_list = _get_token_list(fundef)
    unicode_token = token_list[1]
    if py_version >= 30:
        assert unicode_token[0] == NAME
    else:
        # Unicode tokens in Python 2 seem to be identified as operators.
        # They will be ignored in the parser, that's ok.
-        assert unicode_token[0] == OP
+        assert unicode_token[0] == tokenize.ERRORTOKEN


 def test_quoted_strings():
@@ -173,9 +172,9 @@ def test_ur_literals():
    check('Ur""', is_literal=not py_version >= 30)
    check('UR""', is_literal=not py_version >= 30)
    check('bR""')
-    # Starting with Python 3.3 this ordering is also possible, but we just
-    # enable it for all versions. It doesn't hurt.
-    check('Rb""')
+    # Starting with Python 3.3 this ordering is also possible.
+    if py_version >= 33:
+        check('Rb""')
    # Starting with Python 3.6 format strings where introduced.
    check('fr""', is_literal=py_version >= 36)
    check('rF""', is_literal=py_version >= 36)
@@ -202,7 +201,7 @@ def test_error_literal():
 def test_endmarker_end_pos():
    def check(code):
        tokens = _get_token_list(code)
-        lines = splitlines(code)
+        lines = split_lines(code)
        assert tokens[-1].end_pos == (len(lines), len(lines[-1]))

    check('#c')
@@ -211,3 +210,17 @@ def test_endmarker_end_pos():
    check('a')
    check(r'a\\n')
    check('a\\')
+
+
+@pytest.mark.parametrize(
+    ('code', 'types'), [
+        (' foo', [INDENT, NAME, DEDENT]),
+        ('  foo\n bar', [INDENT, NAME, NEWLINE, ERROR_DEDENT, NAME, DEDENT]),
+        ('  foo\n bar \n baz', [INDENT, NAME, NEWLINE, ERROR_DEDENT, NAME,
+                                NEWLINE, ERROR_DEDENT, NAME, DEDENT]),
+        (' foo\nbar', [INDENT, NAME, NEWLINE, DEDENT, NAME]),
+    ]
+)
+def test_indentation(code, types):
+    actual_types = [t.type for t in _get_token_list(code)]
+    assert actual_types == types + [ENDMARKER]
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1,27 +1,44 @@
-from parso.utils import splitlines, source_to_unicode
+from codecs import BOM_UTF8
+
+from parso.utils import split_lines, python_bytes_to_unicode
+import parso


-def test_splitlines_no_keepends():
-    assert splitlines('asd\r\n') == ['asd', '']
-    assert splitlines('asd\r\n\f') == ['asd', '\f']
-    assert splitlines('\fasd\r\n') == ['\fasd', '']
-    assert splitlines('') == ['']
-    assert splitlines('\n') == ['', '']
+def test_split_lines_no_keepends():
+    assert split_lines('asd\r\n') == ['asd', '']
+    assert split_lines('asd\r\n\f') == ['asd', '\f']
+    assert split_lines('\fasd\r\n') == ['\fasd', '']
+    assert split_lines('') == ['']
+    assert split_lines('\n') == ['', '']


-def test_splitlines_keepends():
-    assert splitlines('asd\r\n', keepends=True) == ['asd\r\n', '']
-    assert splitlines('asd\r\n\f', keepends=True) == ['asd\r\n', '\f']
-    assert splitlines('\fasd\r\n', keepends=True) == ['\fasd\r\n', '']
-    assert splitlines('', keepends=True) == ['']
-    assert splitlines('\n', keepends=True) == ['\n', '']
+def test_split_lines_keepends():
+    assert split_lines('asd\r\n', keepends=True) == ['asd\r\n', '']
+    assert split_lines('asd\r\n\f', keepends=True) == ['asd\r\n', '\f']
+    assert split_lines('\fasd\r\n', keepends=True) == ['\fasd\r\n', '']
+    assert split_lines('', keepends=True) == ['']
+    assert split_lines('\n', keepends=True) == ['\n', '']


-def test_source_to_unicode_unicode_text():
+def test_python_bytes_to_unicode_unicode_text():
    source = (
        b"# vim: fileencoding=utf-8\n"
        b"# \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a\n"
    )
-    actual = source_to_unicode(source)
+    actual = python_bytes_to_unicode(source)
    expected = source.decode('utf-8')
    assert actual == expected
+
+
+def test_utf8_bom():
+    unicode_bom = BOM_UTF8.decode('utf-8')
+
+    module = parso.parse(unicode_bom)
+    endmarker = module.children[0]
+    assert endmarker.type == 'endmarker'
+    assert unicode_bom == endmarker.prefix
+
+    module = parso.parse(unicode_bom + 'foo = 1')
+    expr_stmt = module.children[0]
+    assert expr_stmt.type == 'expr_stmt'
+    assert unicode_bom == expr_stmt.get_first_leaf().prefix
--- a/tox.ini
+++ b/tox.ini
@@ -11,26 +11,6 @@ setenv =
    PYTHONDONTWRITEBYTECODE=1
 commands =
    py.test {posargs:parso test}
-[testenv:py26]
-deps =
-    unittest2
-    {[testenv]deps}
-[testenv:py27]
-deps =
-# for testing the typing module
-    typing
-    {[testenv]deps}
-[testenv:py33]
-deps =
-    typing
-    {[testenv]deps}
-[testenv:py34]
-deps =
-    typing
-    {[testenv]deps}
-[testenv:py35]
-deps =
-    {[testenv]deps}
 [testenv:cov]
 deps =
    coverage