Add an initial version of the code from Jedi.

Still includes imports that have to be removed.
2025-12-08 05:34:51 +08:00 · 2017-05-11 07:13:23 -04:00
parent 36e0c047ae
commit 150fb4c86e
18 changed files with 4371 additions and 0 deletions
--- a/parso/init.py
+++ b/parso/init.py
@@ -0,0 +1,8 @@
+from parso.parser import ParserSyntaxError
+from parso.pgen2.pgen import generate_grammar
+from parso import python
+
+
+def parse(grammar, code):
+    raise NotImplementedError
+    Parser(grammar, code)
--- a/parso/cache.py
+++ b/parso/cache.py
@@ -0,0 +1,147 @@
+import time
+import os
+import sys
+import hashlib
+import gc
+import shutil
+import pickle
+import platform
+import errno
+
+from jedi import settings
+from jedi import debug
+from jedi._compatibility import FileNotFoundError
+
+
+_PICKLE_VERSION = 30
+"""
+Version number (integer) for file system cache.
+
+Increment this number when there are any incompatible changes in
+the parser tree classes.  For example, the following changes
+are regarded as incompatible.
+
+- A class name is changed.
+- A class is moved to another module.
+- A __slot__ of a class is changed.
+"""
+
+_VERSION_TAG = '%s-%s%s-%s' % (
+    platform.python_implementation(),
+    sys.version_info[0],
+    sys.version_info[1],
+    _PICKLE_VERSION
+)
+"""
+Short name for distinguish Python implementations and versions.
+
+It's like `sys.implementation.cache_tag` but for Python < 3.3
+we generate something similar.  See:
+http://docs.python.org/3/library/sys.html#sys.implementation
+"""
+
+# for fast_parser, should not be deleted
+parser_cache = {}
+
+
+
+class _NodeCacheItem(object):
+    def __init__(self, node, lines, change_time=None):
+        self.node = node
+        self.lines = lines
+        if change_time is None:
+            change_time = time.time()
+        self.change_time = change_time
+
+
+def load_module(grammar, path):
+    """
+    Returns a module or None, if it fails.
+    """
+    try:
+        p_time = os.path.getmtime(path)
+    except FileNotFoundError:
+        return None
+
+    try:
+        # TODO Add grammar sha256
+        module_cache_item = parser_cache[path]
+        if p_time <= module_cache_item.change_time:
+            return module_cache_item.node
+    except KeyError:
+        if not settings.use_filesystem_cache:
+            return None
+
+        return _load_from_file_system(grammar, path, p_time)
+
+
+def _load_from_file_system(grammar, path, p_time):
+    cache_path = _get_hashed_path(grammar, path)
+    try:
+        try:
+            if p_time > os.path.getmtime(cache_path):
+                # Cache is outdated
+                return None
+        except OSError as e:
+            if e.errno == errno.ENOENT:
+                # In Python 2 instead of an IOError here we get an OSError.
+                raise FileNotFoundError
+            else:
+                raise
+
+        with open(cache_path, 'rb') as f:
+            gc.disable()
+            try:
+                module_cache_item = pickle.load(f)
+            finally:
+                gc.enable()
+    except FileNotFoundError:
+        return None
+    else:
+        parser_cache[path] = module_cache_item
+        debug.dbg('pickle loaded: %s', path)
+        return module_cache_item.node
+
+
+def save_module(grammar, path, module, lines, pickling=True):
+    try:
+        p_time = None if path is None else os.path.getmtime(path)
+    except OSError:
+        p_time = None
+        pickling = False
+
+    item = _NodeCacheItem(module, lines, p_time)
+    parser_cache[path] = item
+    if settings.use_filesystem_cache and pickling and path is not None:
+        _save_to_file_system(grammar, path, item)
+
+
+def _save_to_file_system(grammar, path, item):
+    with open(_get_hashed_path(grammar, path), 'wb') as f:
+        pickle.dump(item, f, pickle.HIGHEST_PROTOCOL)
+
+
+def remove_old_modules(self):
+    """
+    # TODO Might want to use such a function to clean up the cache (if it's
+    # too old). We could potentially also scan for old files in the
+    # directory and delete those.
+    """
+
+
+def clear_cache(self):
+    shutil.rmtree(settings.cache_directory)
+    parser_cache.clear()
+
+
+def _get_hashed_path(grammar, path):
+    file_hash = hashlib.sha256(path.encode("utf-8")).hexdigest()
+    directory = _get_cache_directory_path()
+    return os.path.join(directory, '%s-%s.pkl' % (grammar.sha256, file_hash))
+
+
+def _get_cache_directory_path():
+    directory = os.path.join(settings.cache_directory, _VERSION_TAG)
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+    return directory
--- a/parso/parser.py
+++ b/parso/parser.py
@@ -0,0 +1,77 @@
+"""
+The ``Parser`` tries to convert the available Python code in an easy to read
+format, something like an abstract syntax tree. The classes who represent this
+tree, are sitting in the :mod:`jedi.parser.tree` module.
+
+The Python module ``tokenize`` is a very important part in the ``Parser``,
+because it splits the code into different words (tokens).  Sometimes it looks a
+bit messy. Sorry for that! You might ask now: "Why didn't you use the ``ast``
+module for this? Well, ``ast`` does a very good job understanding proper Python
+code, but fails to work as soon as there's a single line of broken code.
+
+There's one important optimization that needs to be known: Statements are not
+being parsed completely. ``Statement`` is just a representation of the tokens
+within the statement. This lowers memory usage and cpu time and reduces the
+complexity of the ``Parser`` (there's another parser sitting inside
+``Statement``, which produces ``Array`` and ``Call``).
+"""
+from parso import tree
+from parso.pgen2.parse import PgenParser
+
+
+class ParserSyntaxError(Exception):
+    """
+    Contains error information about the parser tree.
+
+    May be raised as an exception.
+    """
+    def __init__(self, message, position):
+        self.message = message
+        self.position = position
+
+
+class BaseParser(object):
+    node_map = {}
+    default_node = tree.Node
+
+    leaf_map = {
+    }
+    default_leaf = tree.Leaf
+
+    def __init__(self, grammar, start_symbol='file_input', error_recovery=False):
+        self._grammar = grammar
+        self._start_symbol = start_symbol
+        self._error_recovery = error_recovery
+
+    def parse(self, tokens):
+        start_number = self._grammar.symbol2number[self._start_symbol]
+        self.pgen_parser = PgenParser(
+            self._grammar, self.convert_node, self.convert_leaf,
+            self.error_recovery, start_number
+        )
+
+        node = self.pgen_parser.parse(tokens)
+        # The stack is empty now, we don't need it anymore.
+        del self.pgen_parser
+        return node
+
+    def error_recovery(self, grammar, stack, arcs, typ, value, start_pos, prefix,
+                       add_token_callback):
+        if self._error_recovery:
+            raise NotImplementedError("Error Recovery is not implemented")
+        else:
+            raise ParserSyntaxError('SyntaxError: invalid syntax', start_pos)
+
+    def convert_node(self, grammar, type_, children):
+        # TODO REMOVE symbol, we don't want type here.
+        symbol = grammar.number2symbol[type_]
+        try:
+            return self.node_map[symbol](children)
+        except KeyError:
+            return self.default_node(symbol, children)
+
+    def convert_leaf(self, grammar, type_, value, prefix, start_pos):
+        try:
+            return self.leaf_map[type_](value, start_pos, prefix)
+        except KeyError:
+            return self.default_leaf(value, start_pos, prefix)
--- a/parso/pgen2/init.py
+++ b/parso/pgen2/init.py
@@ -0,0 +1,8 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+# Modifications:
+# Copyright 2006 Google, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+# Copyright 2014 David Halter. Integration into Jedi.
+# Modifications are dual-licensed: MIT and PSF.
--- a/parso/pgen2/grammar.py
+++ b/parso/pgen2/grammar.py
@@ -0,0 +1,127 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+# Modifications:
+# Copyright 2014 David Halter. Integration into Jedi.
+# Modifications are dual-licensed: MIT and PSF.
+
+"""This module defines the data structures used to represent a grammar.
+
+These are a bit arcane because they are derived from the data
+structures used by Python's 'pgen' parser generator.
+
+There's also a table here mapping operators to their names in the
+token module; the Python tokenize module reports all operators as the
+fallback token code OP, but the parser needs the actual token code.
+
+"""
+
+import pickle
+import hashlib
+
+
+
+class Grammar(object):
+    """Pgen parsing tables conversion class.
+
+    Once initialized, this class supplies the grammar tables for the
+    parsing engine implemented by parse.py.  The parsing engine
+    accesses the instance variables directly.  The class here does not
+    provide initialization of the tables; several subclasses exist to
+    do this (see the conv and pgen modules).
+
+    The load() method reads the tables from a pickle file, which is
+    much faster than the other ways offered by subclasses.  The pickle
+    file is written by calling dump() (after loading the grammar
+    tables using a subclass).  The report() method prints a readable
+    representation of the tables to stdout, for debugging.
+
+    The instance variables are as follows:
+
+    symbol2number -- a dict mapping symbol names to numbers.  Symbol
+                     numbers are always 256 or higher, to distinguish
+                     them from token numbers, which are between 0 and
+                     255 (inclusive).
+
+    number2symbol -- a dict mapping numbers to symbol names;
+                     these two are each other's inverse.
+
+    states        -- a list of DFAs, where each DFA is a list of
+                     states, each state is a list of arcs, and each
+                     arc is a (i, j) pair where i is a label and j is
+                     a state number.  The DFA number is the index into
+                     this list.  (This name is slightly confusing.)
+                     Final states are represented by a special arc of
+                     the form (0, j) where j is its own state number.
+
+    dfas          -- a dict mapping symbol numbers to (DFA, first)
+                     pairs, where DFA is an item from the states list
+                     above, and first is a set of tokens that can
+                     begin this grammar rule (represented by a dict
+                     whose values are always 1).
+
+    labels        -- a list of (x, y) pairs where x is either a token
+                     number or a symbol number, and y is either None
+                     or a string; the strings are keywords.  The label
+                     number is the index in this list; label numbers
+                     are used to mark state transitions (arcs) in the
+                     DFAs.
+
+    start         -- the number of the grammar's start symbol.
+
+    keywords      -- a dict mapping keyword strings to arc labels.
+
+    tokens        -- a dict mapping token numbers to arc labels.
+
+    """
+
+    def __init__(self, bnf_text):
+        self.symbol2number = {}
+        self.number2symbol = {}
+        self.states = []
+        self.dfas = {}
+        self.labels = [(0, "EMPTY")]
+        self.keywords = {}
+        self.tokens = {}
+        self.symbol2label = {}
+        self.start = 256
+        self.sha256 = hashlib.sha256(bnf_text.encode("utf-8")).hexdigest()
+
+    def dump(self, filename):
+        """Dump the grammar tables to a pickle file."""
+        with open(filename, "wb") as f:
+            pickle.dump(self.__dict__, f, 2)
+
+    def load(self, filename):
+        """Load the grammar tables from a pickle file."""
+        with open(filename, "rb") as f:
+            d = pickle.load(f)
+        self.__dict__.update(d)
+
+    def copy(self):
+        """
+        Copy the grammar.
+        """
+        new = self.__class__()
+        for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
+                          "tokens", "symbol2label"):
+            setattr(new, dict_attr, getattr(self, dict_attr).copy())
+        new.labels = self.labels[:]
+        new.states = self.states[:]
+        new.start = self.start
+        return new
+
+    def report(self):
+        """Dump the grammar tables to standard output, for debugging."""
+        from pprint import pprint
+        print("s2n")
+        pprint(self.symbol2number)
+        print("n2s")
+        pprint(self.number2symbol)
+        print("states")
+        pprint(self.states)
+        print("dfas")
+        pprint(self.dfas)
+        print("labels")
+        pprint(self.labels)
+        print("start", self.start)
--- a/parso/pgen2/parse.py
+++ b/parso/pgen2/parse.py
@@ -0,0 +1,217 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+# Modifications:
+# Copyright 2014 David Halter. Integration into Jedi.
+# Modifications are dual-licensed: MIT and PSF.
+
+"""
+Parser engine for the grammar tables generated by pgen.
+
+The grammar table must be loaded first.
+
+See Parser/parser.c in the Python distribution for additional info on
+how this parsing engine works.
+"""
+
+from parso import tokenize
+
+
+class InternalParseError(Exception):
+    """
+    Exception to signal the parser is stuck and error recovery didn't help.
+    Basically this shouldn't happen. It's a sign that something is really
+    wrong.
+    """
+
+    def __init__(self, msg, type, value, start_pos):
+        Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" %
+                           (msg, tokenize.tok_name[type], value, start_pos))
+        self.msg = msg
+        self.type = type
+        self.value = value
+        self.start_pos = start_pos
+
+
+def token_to_ilabel(grammar, type_, value):
+    # Map from token to label
+    if type_ == tokenize.NAME:
+        # Check for reserved words (keywords)
+        try:
+            return grammar.keywords[value]
+        except KeyError:
+            pass
+
+    try:
+        return grammar.tokens[type_]
+    except KeyError:
+        return None
+
+
+class PgenParser(object):
+    """Parser engine.
+
+    The proper usage sequence is:
+
+    p = Parser(grammar, [converter])  # create instance
+    p.setup([start])                  # prepare for parsing
+    <for each input token>:
+        if p.addtoken(...):           # parse a token
+            break
+    root = p.rootnode                 # root of abstract syntax tree
+
+    A Parser instance may be reused by calling setup() repeatedly.
+
+    A Parser instance contains state pertaining to the current token
+    sequence, and should not be used concurrently by different threads
+    to parse separate token sequences.
+
+    See driver.py for how to get input tokens by tokenizing a file or
+    string.
+
+    Parsing is complete when addtoken() returns True; the root of the
+    abstract syntax tree can then be retrieved from the rootnode
+    instance variable.  When a syntax error occurs, error_recovery()
+    is called. There is no error recovery; the parser cannot be used
+    after a syntax error was reported (but it can be reinitialized by
+    calling setup()).
+
+    """
+
+    def __init__(self, grammar, convert_node, convert_leaf, error_recovery, start):
+        """Constructor.
+
+        The grammar argument is a grammar.Grammar instance; see the
+        grammar module for more information.
+
+        The parser is not ready yet for parsing; you must call the
+        setup() method to get it started.
+
+        The optional convert argument is a function mapping concrete
+        syntax tree nodes to abstract syntax tree nodes.  If not
+        given, no conversion is done and the syntax tree produced is
+        the concrete syntax tree.  If given, it must be a function of
+        two arguments, the first being the grammar (a grammar.Grammar
+        instance), and the second being the concrete syntax tree node
+        to be converted.  The syntax tree is converted from the bottom
+        up.
+
+        A concrete syntax tree node is a (type, nodes) tuple, where
+        type is the node type (a token or symbol number) and nodes
+        is a list of children for symbols, and None for tokens.
+
+        An abstract syntax tree node may be anything; this is entirely
+        up to the converter function.
+
+        """
+        self.grammar = grammar
+        self.convert_node = convert_node
+        self.convert_leaf = convert_leaf
+
+        # Each stack entry is a tuple: (dfa, state, node).
+        # A node is a tuple: (type, children),
+        # where children is a list of nodes or None
+        newnode = (start, [])
+        stackentry = (self.grammar.dfas[start], 0, newnode)
+        self.stack = [stackentry]
+        self.rootnode = None
+        self.error_recovery = error_recovery
+
+    def parse(self, tokens):
+        for type_, value, start_pos, prefix in tokens:
+            if self.addtoken(type_, value, start_pos, prefix):
+                break
+        else:
+            # We never broke out -- EOF is too soon -- Unfinished statement.
+            # However, the error recovery might have added the token again, if
+            # the stack is empty, we're fine.
+            if self.stack:
+                raise InternalParseError("incomplete input", type_, value, start_pos)
+        return self.rootnode
+
+    def addtoken(self, type_, value, start_pos, prefix):
+        """Add a token; return True if this is the end of the program."""
+        ilabel = token_to_ilabel(self.grammar, type_, value)
+
+        # Loop until the token is shifted; may raise exceptions
+        _gram = self.grammar
+        _labels = _gram.labels
+        _push = self._push
+        _pop = self._pop
+        _shift = self._shift
+        while True:
+            dfa, state, node = self.stack[-1]
+            states, first = dfa
+            arcs = states[state]
+            # Look for a state with this label
+            for i, newstate in arcs:
+                t, v = _labels[i]
+                if ilabel == i:
+                    # Look it up in the list of labels
+                    assert t < 256
+                    # Shift a token; we're done with it
+                    _shift(type_, value, newstate, prefix, start_pos)
+                    # Pop while we are in an accept-only state
+                    state = newstate
+                    while states[state] == [(0, state)]:
+                        _pop()
+                        if not self.stack:
+                            # Done parsing!
+                            return True
+                        dfa, state, node = self.stack[-1]
+                        states, first = dfa
+                    # Done with this token
+                    return False
+                elif t >= 256:
+                    # See if it's a symbol and if we're in its first set
+                    itsdfa = _gram.dfas[t]
+                    itsstates, itsfirst = itsdfa
+                    if ilabel in itsfirst:
+                        # Push a symbol
+                        _push(t, itsdfa, newstate)
+                        break  # To continue the outer while loop
+            else:
+                if (0, state) in arcs:
+                    # An accepting state, pop it and try something else
+                    _pop()
+                    if not self.stack:
+                        # Done parsing, but another token is input
+                        raise InternalParseError("too much input", type_, value, start_pos)
+                else:
+                    self.error_recovery(self.grammar, self.stack, arcs, type_,
+                                        value, start_pos, prefix, self.addtoken)
+                    break
+
+    def _shift(self, type_, value, newstate, prefix, start_pos):
+        """Shift a token.  (Internal)"""
+        dfa, state, node = self.stack[-1]
+        newnode = self.convert_leaf(self.grammar, type_, value, prefix, start_pos)
+        node[-1].append(newnode)
+        self.stack[-1] = (dfa, newstate, node)
+
+    def _push(self, type_, newdfa, newstate):
+        """Push a nonterminal.  (Internal)"""
+        dfa, state, node = self.stack[-1]
+        newnode = (type_, [])
+        self.stack[-1] = (dfa, newstate, node)
+        self.stack.append((newdfa, 0, newnode))
+
+    def _pop(self):
+        """Pop a nonterminal.  (Internal)"""
+        popdfa, popstate, (type_, children) = self.stack.pop()
+        # If there's exactly one child, return that child instead of creating a
+        # new node.  We still create expr_stmt and file_input though, because a
+        # lot of Jedi depends on its logic.
+        if len(children) == 1:
+            newnode = children[0]
+        else:
+            newnode = self.convert_node(self.grammar, type_, children)
+
+        try:
+            # Equal to:
+            # dfa, state, node = self.stack[-1]
+            # symbol, children = node
+            self.stack[-1][2][1].append(newnode)
+        except IndexError:
+            # Stack is empty, set the rootnode.
+            self.rootnode = newnode
--- a/parso/pgen2/pgen.py
+++ b/parso/pgen2/pgen.py
@@ -0,0 +1,394 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+# Modifications:
+# Copyright 2014 David Halter. Integration into Jedi.
+# Modifications are dual-licensed: MIT and PSF.
+
+from parso.pgen2 import grammar
+from parso import token
+from parso import tokenize
+
+
+class ParserGenerator(object):
+    def __init__(self, bnf_text):
+        self._bnf_text = bnf_text
+        self.generator = tokenize.source_tokens(bnf_text)
+        self.gettoken()  # Initialize lookahead
+        self.dfas, self.startsymbol = self.parse()
+        self.first = {}  # map from symbol name to set of tokens
+        self.addfirstsets()
+
+    def make_grammar(self):
+        c = grammar.Grammar(self._bnf_text)
+        names = list(self.dfas.keys())
+        names.sort()
+        names.remove(self.startsymbol)
+        names.insert(0, self.startsymbol)
+        for name in names:
+            i = 256 + len(c.symbol2number)
+            c.symbol2number[name] = i
+            c.number2symbol[i] = name
+        for name in names:
+            dfa = self.dfas[name]
+            states = []
+            for state in dfa:
+                arcs = []
+                for label, next in state.arcs.items():
+                    arcs.append((self.make_label(c, label), dfa.index(next)))
+                if state.isfinal:
+                    arcs.append((0, dfa.index(state)))
+                states.append(arcs)
+            c.states.append(states)
+            c.dfas[c.symbol2number[name]] = (states, self.make_first(c, name))
+        c.start = c.symbol2number[self.startsymbol]
+        return c
+
+    def make_first(self, c, name):
+        rawfirst = self.first[name]
+        first = {}
+        for label in rawfirst:
+            ilabel = self.make_label(c, label)
+            ##assert ilabel not in first # XXX failed on <> ... !=
+            first[ilabel] = 1
+        return first
+
+    def make_label(self, c, label):
+        # XXX Maybe this should be a method on a subclass of converter?
+        ilabel = len(c.labels)
+        if label[0].isalpha():
+            # Either a symbol name or a named token
+            if label in c.symbol2number:
+                # A symbol name (a non-terminal)
+                if label in c.symbol2label:
+                    return c.symbol2label[label]
+                else:
+                    c.labels.append((c.symbol2number[label], None))
+                    c.symbol2label[label] = ilabel
+                    return ilabel
+            else:
+                # A named token (NAME, NUMBER, STRING)
+                itoken = getattr(token, label, None)
+                assert isinstance(itoken, int), label
+                assert itoken in token.tok_name, label
+                if itoken in c.tokens:
+                    return c.tokens[itoken]
+                else:
+                    c.labels.append((itoken, None))
+                    c.tokens[itoken] = ilabel
+                    return ilabel
+        else:
+            # Either a keyword or an operator
+            assert label[0] in ('"', "'"), label
+            value = eval(label)
+            if value[0].isalpha():
+                # A keyword
+                if value in c.keywords:
+                    return c.keywords[value]
+                else:
+                    c.labels.append((token.NAME, value))
+                    c.keywords[value] = ilabel
+                    return ilabel
+            else:
+                # An operator (any non-numeric token)
+                itoken = token.opmap[value]  # Fails if unknown token
+                if itoken in c.tokens:
+                    return c.tokens[itoken]
+                else:
+                    c.labels.append((itoken, None))
+                    c.tokens[itoken] = ilabel
+                    return ilabel
+
+    def addfirstsets(self):
+        names = list(self.dfas.keys())
+        names.sort()
+        for name in names:
+            if name not in self.first:
+                self.calcfirst(name)
+            #print name, self.first[name].keys()
+
+    def calcfirst(self, name):
+        dfa = self.dfas[name]
+        self.first[name] = None  # dummy to detect left recursion
+        state = dfa[0]
+        totalset = {}
+        overlapcheck = {}
+        for label, next in state.arcs.items():
+            if label in self.dfas:
+                if label in self.first:
+                    fset = self.first[label]
+                    if fset is None:
+                        raise ValueError("recursion for rule %r" % name)
+                else:
+                    self.calcfirst(label)
+                    fset = self.first[label]
+                totalset.update(fset)
+                overlapcheck[label] = fset
+            else:
+                totalset[label] = 1
+                overlapcheck[label] = {label: 1}
+        inverse = {}
+        for label, itsfirst in overlapcheck.items():
+            for symbol in itsfirst:
+                if symbol in inverse:
+                    raise ValueError("rule %s is ambiguous; %s is in the"
+                                     " first sets of %s as well as %s" %
+                                     (name, symbol, label, inverse[symbol]))
+                inverse[symbol] = label
+        self.first[name] = totalset
+
+    def parse(self):
+        dfas = {}
+        startsymbol = None
+        # MSTART: (NEWLINE | RULE)* ENDMARKER
+        while self.type != token.ENDMARKER:
+            while self.type == token.NEWLINE:
+                self.gettoken()
+            # RULE: NAME ':' RHS NEWLINE
+            name = self.expect(token.NAME)
+            self.expect(token.OP, ":")
+            a, z = self.parse_rhs()
+            self.expect(token.NEWLINE)
+            #self.dump_nfa(name, a, z)
+            dfa = self.make_dfa(a, z)
+            #self.dump_dfa(name, dfa)
+            # oldlen = len(dfa)
+            self.simplify_dfa(dfa)
+            # newlen = len(dfa)
+            dfas[name] = dfa
+            #print name, oldlen, newlen
+            if startsymbol is None:
+                startsymbol = name
+        return dfas, startsymbol
+
+    def make_dfa(self, start, finish):
+        # To turn an NFA into a DFA, we define the states of the DFA
+        # to correspond to *sets* of states of the NFA.  Then do some
+        # state reduction.  Let's represent sets as dicts with 1 for
+        # values.
+        assert isinstance(start, NFAState)
+        assert isinstance(finish, NFAState)
+
+        def closure(state):
+            base = {}
+            addclosure(state, base)
+            return base
+
+        def addclosure(state, base):
+            assert isinstance(state, NFAState)
+            if state in base:
+                return
+            base[state] = 1
+            for label, next in state.arcs:
+                if label is None:
+                    addclosure(next, base)
+
+        states = [DFAState(closure(start), finish)]
+        for state in states:  # NB states grows while we're iterating
+            arcs = {}
+            for nfastate in state.nfaset:
+                for label, next in nfastate.arcs:
+                    if label is not None:
+                        addclosure(next, arcs.setdefault(label, {}))
+            for label, nfaset in arcs.items():
+                for st in states:
+                    if st.nfaset == nfaset:
+                        break
+                else:
+                    st = DFAState(nfaset, finish)
+                    states.append(st)
+                state.addarc(st, label)
+        return states  # List of DFAState instances; first one is start
+
+    def dump_nfa(self, name, start, finish):
+        print("Dump of NFA for", name)
+        todo = [start]
+        for i, state in enumerate(todo):
+            print("  State", i, state is finish and "(final)" or "")
+            for label, next in state.arcs:
+                if next in todo:
+                    j = todo.index(next)
+                else:
+                    j = len(todo)
+                    todo.append(next)
+                if label is None:
+                    print("    -> %d" % j)
+                else:
+                    print("    %s -> %d" % (label, j))
+
+    def dump_dfa(self, name, dfa):
+        print("Dump of DFA for", name)
+        for i, state in enumerate(dfa):
+            print("  State", i, state.isfinal and "(final)" or "")
+            for label, next in state.arcs.items():
+                print("    %s -> %d" % (label, dfa.index(next)))
+
+    def simplify_dfa(self, dfa):
+        # This is not theoretically optimal, but works well enough.
+        # Algorithm: repeatedly look for two states that have the same
+        # set of arcs (same labels pointing to the same nodes) and
+        # unify them, until things stop changing.
+
+        # dfa is a list of DFAState instances
+        changes = True
+        while changes:
+            changes = False
+            for i, state_i in enumerate(dfa):
+                for j in range(i + 1, len(dfa)):
+                    state_j = dfa[j]
+                    if state_i == state_j:
+                        #print "  unify", i, j
+                        del dfa[j]
+                        for state in dfa:
+                            state.unifystate(state_j, state_i)
+                        changes = True
+                        break
+
+    def parse_rhs(self):
+        # RHS: ALT ('|' ALT)*
+        a, z = self.parse_alt()
+        if self.value != "|":
+            return a, z
+        else:
+            aa = NFAState()
+            zz = NFAState()
+            aa.addarc(a)
+            z.addarc(zz)
+            while self.value == "|":
+                self.gettoken()
+                a, z = self.parse_alt()
+                aa.addarc(a)
+                z.addarc(zz)
+            return aa, zz
+
+    def parse_alt(self):
+        # ALT: ITEM+
+        a, b = self.parse_item()
+        while (self.value in ("(", "[") or
+               self.type in (token.NAME, token.STRING)):
+            c, d = self.parse_item()
+            b.addarc(c)
+            b = d
+        return a, b
+
+    def parse_item(self):
+        # ITEM: '[' RHS ']' | ATOM ['+' | '*']
+        if self.value == "[":
+            self.gettoken()
+            a, z = self.parse_rhs()
+            self.expect(token.OP, "]")
+            a.addarc(z)
+            return a, z
+        else:
+            a, z = self.parse_atom()
+            value = self.value
+            if value not in ("+", "*"):
+                return a, z
+            self.gettoken()
+            z.addarc(a)
+            if value == "+":
+                return a, z
+            else:
+                return a, a
+
+    def parse_atom(self):
+        # ATOM: '(' RHS ')' | NAME | STRING
+        if self.value == "(":
+            self.gettoken()
+            a, z = self.parse_rhs()
+            self.expect(token.OP, ")")
+            return a, z
+        elif self.type in (token.NAME, token.STRING):
+            a = NFAState()
+            z = NFAState()
+            a.addarc(z, self.value)
+            self.gettoken()
+            return a, z
+        else:
+            self.raise_error("expected (...) or NAME or STRING, got %s/%s",
+                             self.type, self.value)
+
+    def expect(self, type, value=None):
+        if self.type != type or (value is not None and self.value != value):
+            self.raise_error("expected %s/%s, got %s/%s",
+                             type, value, self.type, self.value)
+        value = self.value
+        self.gettoken()
+        return value
+
+    def gettoken(self):
+        tup = next(self.generator)
+        while tup[0] in (token.COMMENT, token.NL):
+            tup = next(self.generator)
+        self.type, self.value, self.begin, prefix = tup
+        #print tokenize.tok_name[self.type], repr(self.value)
+
+    def raise_error(self, msg, *args):
+        if args:
+            try:
+                msg = msg % args
+            except:
+                msg = " ".join([msg] + list(map(str, args)))
+        line = open(self.filename).readlines()[self.begin[0]]
+        raise SyntaxError(msg, (self.filename, self.begin[0],
+                                self.begin[1], line))
+
+
+class NFAState(object):
+    def __init__(self):
+        self.arcs = []  # list of (label, NFAState) pairs
+
+    def addarc(self, next, label=None):
+        assert label is None or isinstance(label, str)
+        assert isinstance(next, NFAState)
+        self.arcs.append((label, next))
+
+
+class DFAState(object):
+    def __init__(self, nfaset, final):
+        assert isinstance(nfaset, dict)
+        assert isinstance(next(iter(nfaset)), NFAState)
+        assert isinstance(final, NFAState)
+        self.nfaset = nfaset
+        self.isfinal = final in nfaset
+        self.arcs = {}  # map from label to DFAState
+
+    def addarc(self, next, label):
+        assert isinstance(label, str)
+        assert label not in self.arcs
+        assert isinstance(next, DFAState)
+        self.arcs[label] = next
+
+    def unifystate(self, old, new):
+        for label, next in self.arcs.items():
+            if next is old:
+                self.arcs[label] = new
+
+    def __eq__(self, other):
+        # Equality test -- ignore the nfaset instance variable
+        assert isinstance(other, DFAState)
+        if self.isfinal != other.isfinal:
+            return False
+        # Can't just return self.arcs == other.arcs, because that
+        # would invoke this method recursively, with cycles...
+        if len(self.arcs) != len(other.arcs):
+            return False
+        for label, next in self.arcs.items():
+            if next is not other.arcs.get(label):
+                return False
+        return True
+
+    __hash__ = None  # For Py3 compatibility.
+
+
+def generate_grammar(bnf_text):
+    """
+    ``bnf_text`` is a grammar in extended BNF (using * for repetition, + for
+    at-least-once repetition, [] for optional parts, | for alternatives and ()
+    for grouping).
+
+    It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its
+    own parser.
+    """
+    p = ParserGenerator(bnf_text)
+    return p.make_grammar()
--- a/parso/python/init.py
+++ b/parso/python/init.py
@@ -0,0 +1,124 @@
+"""
+Parsers for Python
+"""
+import os
+
+from jedi import settings
+from jedi.common import splitlines, source_to_unicode
+from jedi._compatibility import FileNotFoundError
+from parso.pgen2.pgen import generate_grammar
+from parso.python.parser import Parser, _remove_last_newline
+from parso.python.diff import DiffParser
+from parso.tokenize import generate_tokens
+from parso.cache import parser_cache, load_module, save_module
+
+
+_loaded_grammars = {}
+
+
+def load_grammar(version=None):
+    """
+    Loads a Python grammar. The default version is always the latest.
+
+    If you need support for a specific version, please use e.g.
+    `version='3.3'`.
+    """
+    if version is None:
+        version = '3.6'
+
+    if version in ('3.2', '3.3'):
+        version = '3.4'
+    elif version == '2.6':
+        version = '2.7'
+
+    file = 'grammar' + version + '.txt'
+
+    global _loaded_grammars
+    path = os.path.join(os.path.dirname(__file__), file)
+    try:
+        return _loaded_grammars[path]
+    except KeyError:
+        try:
+            with open(path) as f:
+                bnf_text = f.read()
+            grammar = generate_grammar(bnf_text)
+            return _loaded_grammars.setdefault(path, grammar)
+        except FileNotFoundError:
+            # Just load the default if the file does not exist.
+            return load_grammar()
+
+
+def parse(code=None, path=None, grammar=None, error_recovery=True,
+          start_symbol='file_input', cache=False, diff_cache=False):
+    """
+    If you want to parse a Python file you want to start here, most likely.
+
+    If you need finer grained control over the parsed instance, there will be
+    other ways to access it.
+
+    :param code: A unicode string that contains Python code.
+    :param path: The path to the file you want to open. Only needed for caching.
+    :param grammar: A Python grammar file, created with load_grammar. You may
+        not specify it. In that case it's the current Python version.
+    :param error_recovery: If enabled, any code will be returned. If it is
+        invalid, it will be returned as an error node. If disabled, you will
+        get a ParseError when encountering syntax errors in your code.
+    :param start_symbol: The grammar symbol that you want to parse. Only
+        allowed to be used when error_recovery is disabled.
+
+    :return: A syntax tree node. Typically the module.
+    """
+    if code is None and path is None:
+        raise TypeError("Please provide either code or a path.")
+
+    if grammar is None:
+        grammar = load_grammar()
+
+    if cache and not code and path is not None:
+        # In this case we do actual caching. We just try to load it.
+        module_node = load_module(grammar, path)
+        if module_node is not None:
+            return module_node
+
+    if code is None:
+        with open(path, 'rb') as f:
+            code = source_to_unicode(f.read())
+
+    if diff_cache and settings.fast_parser:
+        try:
+            module_cache_item = parser_cache[path]
+        except KeyError:
+            pass
+        else:
+            lines = splitlines(code, keepends=True)
+            module_node = module_cache_item.node
+            old_lines = module_cache_item.lines
+            if old_lines == lines:
+                save_module(grammar, path, module_node, lines, pickling=False)
+                return module_node
+
+            new_node = DiffParser(grammar, module_node).update(
+                old_lines=old_lines,
+                new_lines=lines
+            )
+            save_module(grammar, path, new_node, lines, pickling=cache)
+            return new_node
+
+    added_newline = not code.endswith('\n')
+    lines = tokenize_lines = splitlines(code, keepends=True)
+    if added_newline:
+        code += '\n'
+        tokenize_lines = list(tokenize_lines)
+        tokenize_lines[-1] += '\n'
+        tokenize_lines.append('')
+
+    tokens = generate_tokens(tokenize_lines, use_exact_op_types=True)
+
+    p = Parser(grammar, error_recovery=error_recovery, start_symbol=start_symbol)
+    root_node = p.parse(tokens=tokens)
+    if added_newline:
+        _remove_last_newline(root_node)
+
+    if cache or diff_cache:
+        save_module(grammar, path, root_node, lines, pickling=cache)
+    return root_node
--- a/parso/python/diff.py
+++ b/parso/python/diff.py
@@ -0,0 +1,603 @@
+"""
+Basically a contains parser that is faster, because it tries to parse only
+parts and if anything changes, it only reparses the changed parts.
+
+It works with a simple diff in the beginning and will try to reuse old parser
+fragments.
+"""
+import re
+import difflib
+from collections import namedtuple
+
+from jedi.common import splitlines
+from jedi import debug
+from parso.python.parser import Parser, _remove_last_newline
+from parso.python.tree import EndMarker
+from parso.tokenize import (generate_tokens, NEWLINE, TokenInfo,
+                            ENDMARKER, INDENT, DEDENT)
+
+
+def _get_last_line(node_or_leaf):
+    last_leaf = node_or_leaf.get_last_leaf()
+    if _ends_with_newline(last_leaf):
+        return last_leaf.start_pos[0]
+    else:
+        return last_leaf.end_pos[0]
+
+
+def _ends_with_newline(leaf, suffix=''):
+    if leaf.type == 'error_leaf':
+        typ = leaf.original_type
+    else:
+        typ = leaf.type
+
+    return typ == 'newline' or suffix.endswith('\n')
+
+
+def _flows_finished(grammar, stack):
+    """
+    if, while, for and try might not be finished, because another part might
+    still be parsed.
+    """
+    for dfa, newstate, (symbol_number, nodes) in stack:
+        if grammar.number2symbol[symbol_number] in ('if_stmt', 'while_stmt',
+                                                    'for_stmt', 'try_stmt'):
+            return False
+    return True
+
+
+def suite_or_file_input_is_valid(grammar, stack):
+    if not _flows_finished(grammar, stack):
+        return False
+
+    for dfa, newstate, (symbol_number, nodes) in reversed(stack):
+        if grammar.number2symbol[symbol_number] == 'suite':
+            # If only newline is in the suite, the suite is not valid, yet.
+            return len(nodes) > 1
+    # Not reaching a suite means that we're dealing with file_input levels
+    # where there's no need for a valid statement in it. It can also be empty.
+    return True
+
+
+def _is_flow_node(node):
+    try:
+        value = node.children[0].value
+    except AttributeError:
+        return False
+    return value in ('if', 'for', 'while', 'try')
+
+
+class _PositionUpdatingFinished(Exception):
+    pass
+
+
+def _update_positions(nodes, line_offset, last_leaf):
+    for node in nodes:
+        try:
+            children = node.children
+        except AttributeError:
+            # Is a leaf
+            node.line += line_offset
+            if node is last_leaf:
+                raise _PositionUpdatingFinished
+        else:
+            _update_positions(children, line_offset, last_leaf)
+
+
+class DiffParser(object):
+    """
+    An advanced form of parsing a file faster. Unfortunately comes with huge
+    side effects. It changes the given module.
+    """
+    def __init__(self, grammar, module):
+        self._grammar = grammar
+        self._module = module
+
+    def _reset(self):
+        self._copy_count = 0
+        self._parser_count = 0
+
+        self._nodes_stack = _NodesStack(self._module)
+
+    def update(self, old_lines, new_lines):
+        '''
+        The algorithm works as follows:
+
+        Equal:
+            - Assure that the start is a newline, otherwise parse until we get
+              one.
+            - Copy from parsed_until_line + 1 to max(i2 + 1)
+            - Make sure that the indentation is correct (e.g. add DEDENT)
+            - Add old and change positions
+        Insert:
+            - Parse from parsed_until_line + 1 to min(j2 + 1), hopefully not
+              much more.
+
+        Returns the new module node.
+        '''
+        debug.speed('diff parser start')
+        # Reset the used names cache so they get regenerated.
+        self._module._used_names = None
+
+        self._parser_lines_new = new_lines
+        self._added_newline = False
+        if new_lines[-1] != '':
+            # The Python grammar needs a newline at the end of a file, but for
+            # everything else we keep working with new_lines here.
+            self._parser_lines_new = list(new_lines)
+            self._parser_lines_new[-1] += '\n'
+            self._parser_lines_new.append('')
+            self._added_newline = True
+
+        self._reset()
+
+        line_length = len(new_lines)
+        sm = difflib.SequenceMatcher(None, old_lines, self._parser_lines_new)
+        opcodes = sm.get_opcodes()
+        debug.speed('diff parser calculated')
+        debug.dbg('diff: line_lengths old: %s, new: %s' % (len(old_lines), line_length))
+
+        for operation, i1, i2, j1, j2 in opcodes:
+            debug.dbg('diff %s old[%s:%s] new[%s:%s]',
+                      operation, i1 + 1, i2, j1 + 1, j2)
+
+            if j2 == line_length + int(self._added_newline):
+                # The empty part after the last newline is not relevant.
+                j2 -= 1
+
+            if operation == 'equal':
+                line_offset = j1 - i1
+                self._copy_from_old_parser(line_offset, i2, j2)
+            elif operation == 'replace':
+                self._parse(until_line=j2)
+            elif operation == 'insert':
+                self._parse(until_line=j2)
+            else:
+                assert operation == 'delete'
+
+        # With this action all change will finally be applied and we have a
+        # changed module.
+        self._nodes_stack.close()
+
+        if self._added_newline:
+            _remove_last_newline(self._module)
+
+        # Good for debugging.
+        if debug.debug_function:
+            self._enabled_debugging(old_lines, new_lines)
+        last_pos = self._module.end_pos[0]
+        if last_pos != line_length:
+            current_lines = splitlines(self._module.get_code(), keepends=True)
+            diff = difflib.unified_diff(current_lines, new_lines)
+            raise Exception(
+                "There's an issue (%s != %s) with the diff parser. Please report:\n%s"
+                % (last_pos, line_length, ''.join(diff))
+            )
+
+        debug.speed('diff parser end')
+        return self._module
+
+    def _enabled_debugging(self, old_lines, lines_new):
+        if self._module.get_code() != ''.join(lines_new):
+            debug.warning('parser issue:\n%s\n%s', ''.join(old_lines),
+                          ''.join(lines_new))
+
+    def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new):
+        copied_nodes = [None]
+
+        last_until_line = -1
+        while until_line_new > self._nodes_stack.parsed_until_line:
+            parsed_until_line_old = self._nodes_stack.parsed_until_line - line_offset
+            line_stmt = self._get_old_line_stmt(parsed_until_line_old + 1)
+            if line_stmt is None:
+                # Parse 1 line at least. We don't need more, because we just
+                # want to get into a state where the old parser has statements
+                # again that can be copied (e.g. not lines within parentheses).
+                self._parse(self._nodes_stack.parsed_until_line + 1)
+            elif not copied_nodes:
+                # We have copied as much as possible (but definitely not too
+                # much). Therefore we just parse the rest.
+                # We might not reach the end, because there's a statement
+                # that is not finished.
+                self._parse(until_line_new)
+            else:
+                p_children = line_stmt.parent.children
+                index = p_children.index(line_stmt)
+
+                copied_nodes = self._nodes_stack.copy_nodes(
+                    p_children[index:],
+                    until_line_old,
+                    line_offset
+                )
+                # Match all the nodes that are in the wanted range.
+                if copied_nodes:
+                    self._copy_count += 1
+
+                    from_ = copied_nodes[0].get_start_pos_of_prefix()[0] + line_offset
+                    to = self._nodes_stack.parsed_until_line
+
+                    debug.dbg('diff actually copy %s to %s', from_, to)
+            # Since there are potential bugs that might loop here endlessly, we
+            # just stop here.
+            assert last_until_line != self._nodes_stack.parsed_until_line \
+                or not copied_nodes, last_until_line
+            last_until_line = self._nodes_stack.parsed_until_line
+
+    def _get_old_line_stmt(self, old_line):
+        leaf = self._module.get_leaf_for_position((old_line, 0), include_prefixes=True)
+
+        if _ends_with_newline(leaf):
+            leaf = leaf.get_next_leaf()
+        if leaf.get_start_pos_of_prefix()[0] == old_line:
+            node = leaf
+            while node.parent.type not in ('file_input', 'suite'):
+                node = node.parent
+            return node
+        # Must be on the same line. Otherwise we need to parse that bit.
+        return None
+
+    def _get_before_insertion_node(self):
+        if self._nodes_stack.is_empty():
+            return None
+
+        line = self._nodes_stack.parsed_until_line + 1
+        node = self._new_module.get_last_leaf()
+        while True:
+            parent = node.parent
+            if parent.type in ('suite', 'file_input'):
+                assert node.end_pos[0] <= line
+                assert node.end_pos[1] == 0 or '\n' in self._prefix
+                return node
+            node = parent
+
+    def _parse(self, until_line):
+        """
+        Parses at least until the given line, but might just parse more until a
+        valid state is reached.
+        """
+        last_until_line = 0
+        while until_line > self._nodes_stack.parsed_until_line:
+            node = self._try_parse_part(until_line)
+            nodes = self._get_children_nodes(node)
+            #self._insert_nodes(nodes)
+
+            self._nodes_stack.add_parsed_nodes(nodes)
+            debug.dbg(
+                'parse part %s to %s (to %s in parser)',
+                nodes[0].get_start_pos_of_prefix()[0],
+                self._nodes_stack.parsed_until_line,
+                node.end_pos[0] - 1
+            )
+            # Since the tokenizer sometimes has bugs, we cannot be sure that
+            # this loop terminates. Therefore assert that there's always a
+            # change.
+            assert last_until_line != self._nodes_stack.parsed_until_line, last_until_line
+            last_until_line = self._nodes_stack.parsed_until_line
+
+    def _get_children_nodes(self, node):
+        nodes = node.children
+        first_element = nodes[0]
+        # TODO this looks very strange...
+        if first_element.type == 'error_leaf' and \
+                first_element.original_type == 'indent':
+            assert False, str(nodes)
+
+        return nodes
+
+    def _try_parse_part(self, until_line):
+        """
+        Sets up a normal parser that uses a spezialized tokenizer to only parse
+        until a certain position (or a bit longer if the statement hasn't
+        ended.
+        """
+        self._parser_count += 1
+        # TODO speed up, shouldn't copy the whole list all the time.
+        # memoryview?
+        parsed_until_line = self._nodes_stack.parsed_until_line
+        lines_after = self._parser_lines_new[parsed_until_line:]
+        #print('parse_content', parsed_until_line, lines_after, until_line)
+        tokens = self._diff_tokenize(
+            lines_after,
+            until_line,
+            line_offset=parsed_until_line
+        )
+        self._active_parser = Parser(
+            self._grammar,
+            error_recovery=True
+        )
+        return self._active_parser.parse(tokens=tokens)
+
+    def _diff_tokenize(self, lines, until_line, line_offset=0):
+        is_first_token = True
+        omitted_first_indent = False
+        indents = []
+        tokens = generate_tokens(lines, use_exact_op_types=True)
+        stack = self._active_parser.pgen_parser.stack
+        for typ, string, start_pos, prefix in tokens:
+            start_pos = start_pos[0] + line_offset, start_pos[1]
+            if typ == INDENT:
+                indents.append(start_pos[1])
+                if is_first_token:
+                    omitted_first_indent = True
+                    # We want to get rid of indents that are only here because
+                    # we only parse part of the file. These indents would only
+                    # get parsed as error leafs, which doesn't make any sense.
+                    is_first_token = False
+                    continue
+            is_first_token = False
+
+            if typ == DEDENT:
+                indents.pop()
+                if omitted_first_indent and not indents:
+                    # We are done here, only thing that can come now is an
+                    # endmarker or another dedented code block.
+                    typ, string, start_pos, prefix = next(tokens)
+                    if '\n' in prefix:
+                        prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix)
+                    else:
+                        prefix = ''
+                    yield TokenInfo(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix)
+                    break
+            elif typ == NEWLINE and start_pos[0] >= until_line:
+                yield TokenInfo(typ, string, start_pos, prefix)
+                # Check if the parser is actually in a valid suite state.
+                if suite_or_file_input_is_valid(self._grammar, stack):
+                    start_pos = start_pos[0] + 1, 0
+                    while len(indents) > int(omitted_first_indent):
+                        indents.pop()
+                        yield TokenInfo(DEDENT, '', start_pos, '')
+
+                    yield TokenInfo(ENDMARKER, '', start_pos, '')
+                    break
+                else:
+                    continue
+
+            yield TokenInfo(typ, string, start_pos, prefix)
+
+
+class _NodesStackNode(object):
+    ChildrenGroup = namedtuple('ChildrenGroup', 'children line_offset last_line_offset_leaf')
+
+    def __init__(self, tree_node, parent=None):
+        self.tree_node = tree_node
+        self.children_groups = []
+        self.parent = parent
+
+    def close(self):
+        children = []
+        for children_part, line_offset, last_line_offset_leaf in self.children_groups:
+            if line_offset != 0:
+                try:
+                    _update_positions(
+                        children_part, line_offset, last_line_offset_leaf)
+                except _PositionUpdatingFinished:
+                    pass
+            children += children_part
+        self.tree_node.children = children
+        # Reset the parents
+        for node in children:
+            node.parent = self.tree_node
+
+    def add(self, children, line_offset=0, last_line_offset_leaf=None):
+        group = self.ChildrenGroup(children, line_offset, last_line_offset_leaf)
+        self.children_groups.append(group)
+
+    def get_last_line(self, suffix):
+        line = 0
+        if self.children_groups:
+            children_group = self.children_groups[-1]
+            last_leaf = children_group.children[-1].get_last_leaf()
+            line = last_leaf.end_pos[0]
+
+            # Calculate the line offsets
+            offset = children_group.line_offset
+            if offset:
+                # In case the line_offset is not applied to this specific leaf,
+                # just ignore it.
+                if last_leaf.line <= children_group.last_line_offset_leaf.line:
+                    line += children_group.line_offset
+
+            # Newlines end on the next line, which means that they would cover
+            # the next line. That line is not fully parsed at this point.
+            if _ends_with_newline(last_leaf, suffix):
+                line -= 1
+        line += suffix.count('\n')
+        return line
+
+
+class _NodesStack(object):
+    endmarker_type = 'endmarker'
+
+    def __init__(self, module):
+        # Top of stack
+        self._tos = self._base_node = _NodesStackNode(module)
+        self._module = module
+        self._last_prefix = ''
+        self.prefix = ''
+
+    def is_empty(self):
+        return not self._base_node.children
+
+    @property
+    def parsed_until_line(self):
+        return self._tos.get_last_line(self.prefix)
+
+    def _get_insertion_node(self, indentation_node):
+        indentation = indentation_node.start_pos[1]
+
+        # find insertion node
+        node = self._tos
+        while True:
+            tree_node = node.tree_node
+            if tree_node.type == 'suite':
+                # A suite starts with NEWLINE, ...
+                node_indentation = tree_node.children[1].start_pos[1]
+
+                if indentation >= node_indentation:  # Not a Dedent
+                    # We might be at the most outer layer: modules. We
+                    # don't want to depend on the first statement
+                    # having the right indentation.
+                    return node
+
+            elif tree_node.type == 'file_input':
+                return node
+
+            node = self._close_tos()
+
+    def _close_tos(self):
+        self._tos.close()
+        self._tos = self._tos.parent
+        return self._tos
+
+    def add_parsed_nodes(self, tree_nodes):
+        tree_nodes = self._remove_endmarker(tree_nodes)
+        if not tree_nodes:
+            return
+
+        assert tree_nodes[0].type != 'newline'
+
+        node = self._get_insertion_node(tree_nodes[0])
+        assert node.tree_node.type in ('suite', 'file_input')
+        node.add(tree_nodes)
+        self._update_tos(tree_nodes[-1])
+
+    def _remove_endmarker(self, tree_nodes):
+        """
+        Helps cleaning up the tree nodes that get inserted.
+        """
+        last_leaf = tree_nodes[-1].get_last_leaf()
+        is_endmarker = last_leaf.type == self.endmarker_type
+        self._last_prefix = ''
+        if is_endmarker:
+            try:
+                separation = last_leaf.prefix.rindex('\n')
+            except ValueError:
+                pass
+            else:
+                # Remove the whitespace part of the prefix after a newline.
+                # That is not relevant if parentheses were opened. Always parse
+                # until the end of a line.
+                last_leaf.prefix, self._last_prefix = \
+                    last_leaf.prefix[:separation + 1], last_leaf.prefix[separation + 1:]
+
+        first_leaf = tree_nodes[0].get_first_leaf()
+        first_leaf.prefix = self.prefix + first_leaf.prefix
+        self.prefix = ''
+
+        if is_endmarker:
+            self.prefix = last_leaf.prefix
+
+            tree_nodes = tree_nodes[:-1]
+
+        return tree_nodes
+
+    def copy_nodes(self, tree_nodes, until_line, line_offset):
+        """
+        Copies tree nodes from the old parser tree.
+
+        Returns the number of tree nodes that were copied.
+        """
+        tos = self._get_insertion_node(tree_nodes[0])
+
+        new_nodes, self._tos = self._copy_nodes(tos, tree_nodes, until_line, line_offset)
+        return new_nodes
+
+    def _copy_nodes(self, tos, nodes, until_line, line_offset):
+        new_nodes = []
+
+        new_tos = tos
+        for node in nodes:
+            if node.type == 'endmarker':
+                # Endmarkers just distort all the checks below. Remove them.
+                break
+
+            if node.start_pos[0] > until_line:
+                break
+            # TODO this check might take a bit of time for large files. We
+            # might want to change this to do more intelligent guessing or
+            # binary search.
+            if _get_last_line(node) > until_line:
+                # We can split up functions and classes later.
+                if node.type in ('classdef', 'funcdef') and node.children[-1].type == 'suite':
+                    new_nodes.append(node)
+                break
+
+            new_nodes.append(node)
+
+        if not new_nodes:
+            return [], tos
+
+        last_node = new_nodes[-1]
+        line_offset_index = -1
+        if last_node.type in ('classdef', 'funcdef'):
+            suite = last_node.children[-1]
+            if suite.type == 'suite':
+                suite_tos = _NodesStackNode(suite)
+                # Don't need to pass line_offset here, it's already done by the
+                # parent.
+                suite_nodes, recursive_tos = self._copy_nodes(
+                    suite_tos, suite.children, until_line, line_offset)
+                if len(suite_nodes) < 2:
+                    # A suite only with newline is not valid.
+                    new_nodes.pop()
+                else:
+                    suite_tos.parent = tos
+                    new_tos = recursive_tos
+                    line_offset_index = -2
+
+        elif (new_nodes[-1].type in ('error_leaf', 'error_node') or
+                          _is_flow_node(new_nodes[-1])):
+            # Error leafs/nodes don't have a defined start/end. Error
+            # nodes might not end with a newline (e.g. if there's an
+            # open `(`). Therefore ignore all of them unless they are
+            # succeeded with valid parser state.
+            # If we copy flows at the end, they might be continued
+            # after the copy limit (in the new parser).
+            # In this while loop we try to remove until we find a newline.
+            new_nodes.pop()
+            while new_nodes:
+                last_node = new_nodes[-1]
+                if last_node.get_last_leaf().type == 'newline':
+                    break
+                new_nodes.pop()
+
+        if new_nodes:
+            try:
+                last_line_offset_leaf = new_nodes[line_offset_index].get_last_leaf()
+            except IndexError:
+                line_offset = 0
+                # In this case we don't have to calculate an offset, because
+                # there's no children to be managed.
+                last_line_offset_leaf = None
+            tos.add(new_nodes, line_offset, last_line_offset_leaf)
+        return new_nodes, new_tos
+
+    def _update_tos(self, tree_node):
+        if tree_node.type in ('suite', 'file_input'):
+            self._tos = _NodesStackNode(tree_node, self._tos)
+            self._tos.add(list(tree_node.children))
+            self._update_tos(tree_node.children[-1])
+        elif tree_node.type in ('classdef', 'funcdef'):
+            self._update_tos(tree_node.children[-1])
+
+    def close(self):
+        while self._tos is not None:
+            self._close_tos()
+
+        # Add an endmarker.
+        try:
+            last_leaf = self._module.get_last_leaf()
+            end_pos = list(last_leaf.end_pos)
+        except IndexError:
+            end_pos = [1, 0]
+        lines = splitlines(self.prefix)
+        assert len(lines) > 0
+        if len(lines) == 1:
+            end_pos[1] += len(lines[0])
+        else:
+            end_pos[0] += len(lines) - 1
+            end_pos[1] = len(lines[-1])
+
+        endmarker = EndMarker('', tuple(end_pos), self.prefix + self._last_prefix)
+        endmarker.parent = self._module
+        self._module.children.append(endmarker)
--- a/parso/python/grammar2.7.txt
+++ b/parso/python/grammar2.7.txt
@@ -0,0 +1,152 @@
+# Grammar for 2to3. This grammar supports Python 2.x and 3.x.
+
+# Note:  Changing the grammar specified in this file will most likely
+#        require corresponding changes in the parser module
+#        (../Modules/parsermodule.c).  If you can't make the changes to
+#        that module yourself, please co-ordinate the required changes
+#        with someone who can; ask around on python-dev for help.  Fred
+#        Drake <fdrake@acm.org> will probably be listening there.
+
+# NOTE WELL: You should also follow all the steps listed in PEP 306,
+# "How to Change Python's Grammar"
+
+
+# Start symbols for the grammar:
+#	    file_input is a module or sequence of commands read from an input file;
+#	    single_input is a single interactive statement;
+#	    eval_input is the input for the eval() and input() functions.
+# NB: compound_stmt in single_input is followed by extra NEWLINE!
+file_input: (NEWLINE | stmt)* ENDMARKER
+single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
+eval_input: testlist NEWLINE* ENDMARKER
+
+decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
+decorators: decorator+
+decorated: decorators (classdef | funcdef)
+funcdef: 'def' NAME parameters ['->' test] ':' suite
+parameters: '(' [typedargslist] ')'
+typedargslist: ((tfpdef ['=' test] ',')*
+                ('*' [tname] (',' tname ['=' test])* [',' '**' tname] | '**' tname)
+                | tfpdef ['=' test] (',' tfpdef ['=' test])* [','])
+tname: NAME [':' test]
+tfpdef: tname | '(' tfplist ')'
+tfplist: tfpdef (',' tfpdef)* [',']
+varargslist: ((vfpdef ['=' test] ',')*
+              ('*' [vname] (',' vname ['=' test])*  [',' '**' vname] | '**' vname)
+              | vfpdef ['=' test] (',' vfpdef ['=' test])* [','])
+vname: NAME
+vfpdef: vname | '(' vfplist ')'
+vfplist: vfpdef (',' vfpdef)* [',']
+
+stmt: simple_stmt | compound_stmt
+simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
+small_stmt: (expr_stmt | print_stmt  | del_stmt | pass_stmt | flow_stmt |
+             import_stmt | global_stmt | exec_stmt | assert_stmt)
+expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) |
+                     ('=' (yield_expr|testlist_star_expr))*)
+testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
+augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
+            '<<=' | '>>=' | '**=' | '//=')
+# For normal assignments, additional restrictions enforced by the interpreter
+print_stmt: 'print' ( [ test (',' test)* [','] ] |
+                      '>>' test [ (',' test)+ [','] ] )
+del_stmt: 'del' exprlist
+pass_stmt: 'pass'
+flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
+break_stmt: 'break'
+continue_stmt: 'continue'
+return_stmt: 'return' [testlist]
+yield_stmt: yield_expr
+raise_stmt: 'raise' [test [',' test [',' test]]]
+import_stmt: import_name | import_from
+import_name: 'import' dotted_as_names
+# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
+import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
+              'import' ('*' | '(' import_as_names ')' | import_as_names))
+import_as_name: NAME ['as' NAME]
+dotted_as_name: dotted_name ['as' NAME]
+import_as_names: import_as_name (',' import_as_name)* [',']
+dotted_as_names: dotted_as_name (',' dotted_as_name)*
+dotted_name: NAME ('.' NAME)*
+global_stmt: 'global' NAME (',' NAME)*
+exec_stmt: 'exec' expr ['in' test [',' test]]
+assert_stmt: 'assert' test [',' test]
+
+compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
+if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
+while_stmt: 'while' test ':' suite ['else' ':' suite]
+for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
+try_stmt: ('try' ':' suite
+           ((except_clause ':' suite)+
+	        ['else' ':' suite]
+	        ['finally' ':' suite] |
+	       'finally' ':' suite))
+with_stmt: 'with' with_item (',' with_item)*  ':' suite
+with_item: test ['as' expr]
+with_var: 'as' expr
+# NB compile.c makes sure that the default except clause is last
+except_clause: 'except' [test [(',' | 'as') test]]
+# Edit by David Halter: The stmt is now optional. This reflects how Jedi allows
+# classes and functions to be empty, which is beneficial for autocompletion.
+suite: simple_stmt | NEWLINE INDENT stmt* DEDENT
+
+# Backward compatibility cruft to support:
+# [ x for x in lambda: True, lambda: False if x() ]
+# even while also allowing:
+# lambda x: 5 if x else 2
+# (But not a mix of the two)
+testlist_safe: old_test [(',' old_test)+ [',']]
+old_test: or_test | old_lambdef
+old_lambdef: 'lambda' [varargslist] ':' old_test
+
+test: or_test ['if' or_test 'else' test] | lambdef
+or_test: and_test ('or' and_test)*
+and_test: not_test ('and' not_test)*
+not_test: 'not' not_test | comparison
+comparison: expr (comp_op expr)*
+comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
+star_expr: '*' expr
+expr: xor_expr ('|' xor_expr)*
+xor_expr: and_expr ('^' and_expr)*
+and_expr: shift_expr ('&' shift_expr)*
+shift_expr: arith_expr (('<<'|'>>') arith_expr)*
+arith_expr: term (('+'|'-') term)*
+term: factor (('*'|'/'|'%'|'//') factor)*
+factor: ('+'|'-'|'~') factor | power
+power: atom trailer* ['**' factor]
+atom: ('(' [yield_expr|testlist_comp] ')' |
+       '[' [testlist_comp] ']' |
+       '{' [dictorsetmaker] '}' |
+       '`' testlist1 '`' |
+       NAME | NUMBER | STRING+ | '.' '.' '.')
+# Modification by David Halter, remove `testlist_gexp` and `listmaker`
+testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
+lambdef: 'lambda' [varargslist] ':' test
+trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
+subscriptlist: subscript (',' subscript)* [',']
+subscript: test | [test] ':' [test] [sliceop]
+sliceop: ':' [test]
+exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
+testlist: test (',' test)* [',']
+# Modification by David Halter, dictsetmaker -> dictorsetmaker (so that it's
+# the same as in the 3.4 grammar).
+dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) |
+                (test (comp_for | (',' test)* [','])) )
+
+classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
+
+arglist: (argument ',')* (argument [',']
+                         |'*' test (',' argument)* [',' '**' test] 
+                         |'**' test)
+argument: test [comp_for] | test '=' test  # Really [keyword '='] test
+
+comp_iter: comp_for | comp_if
+comp_for: 'for' exprlist 'in' testlist_safe [comp_iter]
+comp_if: 'if' old_test [comp_iter]
+
+testlist1: test (',' test)*
+
+# not used in grammar, but may appear in "node" passed from Parser to Compiler
+encoding_decl: NAME
+
+yield_expr: 'yield' [testlist]
--- a/parso/python/grammar3.4.txt
+++ b/parso/python/grammar3.4.txt
@@ -0,0 +1,135 @@
+# Grammar for Python
+
+# Note:  Changing the grammar specified in this file will most likely
+#        require corresponding changes in the parser module
+#        (../Modules/parsermodule.c).  If you can't make the changes to
+#        that module yourself, please co-ordinate the required changes
+#        with someone who can; ask around on python-dev for help.  Fred
+#        Drake <fdrake@acm.org> will probably be listening there.
+
+# NOTE WELL: You should also follow all the steps listed in PEP 306,
+# "How to Change Python's Grammar"
+
+# Start symbols for the grammar:
+#       single_input is a single interactive statement;
+#       file_input is a module or sequence of commands read from an input file;
+#       eval_input is the input for the eval() functions.
+# NB: compound_stmt in single_input is followed by extra NEWLINE!
+file_input: (NEWLINE | stmt)* ENDMARKER
+single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
+eval_input: testlist NEWLINE* ENDMARKER
+
+decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
+decorators: decorator+
+decorated: decorators (classdef | funcdef)
+funcdef: 'def' NAME parameters ['->' test] ':' suite
+parameters: '(' [typedargslist] ')'
+typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [','
+       ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]]
+     |  '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef)
+tfpdef: NAME [':' test]
+varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [','
+       ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]]
+     |  '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef)
+vfpdef: NAME
+
+stmt: simple_stmt | compound_stmt
+simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
+small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
+             import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
+expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) |
+                     ('=' (yield_expr|testlist_star_expr))*)
+testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
+augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
+            '<<=' | '>>=' | '**=' | '//=')
+# For normal assignments, additional restrictions enforced by the interpreter
+del_stmt: 'del' exprlist
+pass_stmt: 'pass'
+flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
+break_stmt: 'break'
+continue_stmt: 'continue'
+return_stmt: 'return' [testlist]
+yield_stmt: yield_expr
+raise_stmt: 'raise' [test ['from' test]]
+import_stmt: import_name | import_from
+import_name: 'import' dotted_as_names
+# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
+import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
+              'import' ('*' | '(' import_as_names ')' | import_as_names))
+import_as_name: NAME ['as' NAME]
+dotted_as_name: dotted_name ['as' NAME]
+import_as_names: import_as_name (',' import_as_name)* [',']
+dotted_as_names: dotted_as_name (',' dotted_as_name)*
+dotted_name: NAME ('.' NAME)*
+global_stmt: 'global' NAME (',' NAME)*
+nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
+assert_stmt: 'assert' test [',' test]
+
+compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
+if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
+while_stmt: 'while' test ':' suite ['else' ':' suite]
+for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
+try_stmt: ('try' ':' suite
+           ((except_clause ':' suite)+
+            ['else' ':' suite]
+            ['finally' ':' suite] |
+           'finally' ':' suite))
+with_stmt: 'with' with_item (',' with_item)*  ':' suite
+with_item: test ['as' expr]
+# NB compile.c makes sure that the default except clause is last
+except_clause: 'except' [test ['as' NAME]]
+# Edit by David Halter: The stmt is now optional. This reflects how Jedi allows
+# classes and functions to be empty, which is beneficial for autocompletion.
+suite: simple_stmt | NEWLINE INDENT stmt* DEDENT
+
+test: or_test ['if' or_test 'else' test] | lambdef
+test_nocond: or_test | lambdef_nocond
+lambdef: 'lambda' [varargslist] ':' test
+lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
+or_test: and_test ('or' and_test)*
+and_test: not_test ('and' not_test)*
+not_test: 'not' not_test | comparison
+comparison: expr (comp_op expr)*
+# <> isn't actually a valid comparison operator in Python. It's here for the
+# sake of a __future__ import described in PEP 401
+comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
+star_expr: '*' expr
+expr: xor_expr ('|' xor_expr)*
+xor_expr: and_expr ('^' and_expr)*
+and_expr: shift_expr ('&' shift_expr)*
+shift_expr: arith_expr (('<<'|'>>') arith_expr)*
+arith_expr: term (('+'|'-') term)*
+term: factor (('*'|'/'|'%'|'//') factor)*
+factor: ('+'|'-'|'~') factor | power
+power: atom trailer* ['**' factor]
+atom: ('(' [yield_expr|testlist_comp] ')' |
+       '[' [testlist_comp] ']' |
+       '{' [dictorsetmaker] '}' |
+       NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
+testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
+trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
+subscriptlist: subscript (',' subscript)* [',']
+subscript: test | [test] ':' [test] [sliceop]
+sliceop: ':' [test]
+exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
+testlist: test (',' test)* [',']
+dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) |
+                  (test (comp_for | (',' test)* [','])) )
+
+classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
+
+arglist: (argument ',')* (argument [',']
+                         |'*' test (',' argument)* [',' '**' test] 
+                         |'**' test)
+# The reason that keywords are test nodes instead of NAME is that using NAME
+# results in an ambiguity. ast.c makes sure it's a NAME.
+argument: test [comp_for] | test '=' test  # Really [keyword '='] test
+comp_iter: comp_for | comp_if
+comp_for: 'for' exprlist 'in' or_test [comp_iter]
+comp_if: 'if' test_nocond [comp_iter]
+
+# not used in grammar, but may appear in "node" passed from Parser to Compiler
+encoding_decl: NAME
+
+yield_expr: 'yield' [yield_arg]
+yield_arg: 'from' test | testlist
--- a/parso/python/grammar3.5.txt
+++ b/parso/python/grammar3.5.txt
@@ -0,0 +1,154 @@
+# Grammar for Python
+
+# Note:  Changing the grammar specified in this file will most likely
+#        require corresponding changes in the parser module
+#        (../Modules/parsermodule.c).  If you can't make the changes to
+#        that module yourself, please co-ordinate the required changes
+#        with someone who can; ask around on python-dev for help.  Fred
+#        Drake <fdrake@acm.org> will probably be listening there.
+
+# NOTE WELL: You should also follow all the steps listed at
+# https://docs.python.org/devguide/grammar.html
+
+# Start symbols for the grammar:
+#       single_input is a single interactive statement;
+#       file_input is a module or sequence of commands read from an input file;
+#       eval_input is the input for the eval() functions.
+# NB: compound_stmt in single_input is followed by extra NEWLINE!
+file_input: (NEWLINE | stmt)* ENDMARKER
+single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
+eval_input: testlist NEWLINE* ENDMARKER
+
+decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
+decorators: decorator+
+decorated: decorators (classdef | funcdef | async_funcdef)
+
+# NOTE: Reinoud Elhorst, using ASYNC/AWAIT keywords instead of tokens
+# skipping python3.5 compatibility, in favour of 3.7 solution
+async_funcdef: 'async' funcdef
+funcdef: 'def' NAME parameters ['->' test] ':' suite
+
+parameters: '(' [typedargslist] ')'
+typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [','
+       ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]]
+     |  '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef)
+tfpdef: NAME [':' test]
+varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [','
+       ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]]
+     |  '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef)
+vfpdef: NAME
+
+stmt: simple_stmt | compound_stmt
+simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
+small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
+             import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
+expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) |
+                     ('=' (yield_expr|testlist_star_expr))*)
+testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
+augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
+            '<<=' | '>>=' | '**=' | '//=')
+# For normal assignments, additional restrictions enforced by the interpreter
+del_stmt: 'del' exprlist
+pass_stmt: 'pass'
+flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
+break_stmt: 'break'
+continue_stmt: 'continue'
+return_stmt: 'return' [testlist]
+yield_stmt: yield_expr
+raise_stmt: 'raise' [test ['from' test]]
+import_stmt: import_name | import_from
+import_name: 'import' dotted_as_names
+# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
+import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
+              'import' ('*' | '(' import_as_names ')' | import_as_names))
+import_as_name: NAME ['as' NAME]
+dotted_as_name: dotted_name ['as' NAME]
+import_as_names: import_as_name (',' import_as_name)* [',']
+dotted_as_names: dotted_as_name (',' dotted_as_name)*
+dotted_name: NAME ('.' NAME)*
+global_stmt: 'global' NAME (',' NAME)*
+nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
+assert_stmt: 'assert' test [',' test]
+
+compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
+async_stmt: 'async' (funcdef | with_stmt | for_stmt)
+if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
+while_stmt: 'while' test ':' suite ['else' ':' suite]
+for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
+try_stmt: ('try' ':' suite
+           ((except_clause ':' suite)+
+            ['else' ':' suite]
+            ['finally' ':' suite] |
+           'finally' ':' suite))
+with_stmt: 'with' with_item (',' with_item)*  ':' suite
+with_item: test ['as' expr]
+# NB compile.c makes sure that the default except clause is last
+except_clause: 'except' [test ['as' NAME]]
+# Edit by David Halter: The stmt is now optional. This reflects how Jedi allows
+# classes and functions to be empty, which is beneficial for autocompletion.
+suite: simple_stmt | NEWLINE INDENT stmt* DEDENT
+
+test: or_test ['if' or_test 'else' test] | lambdef
+test_nocond: or_test | lambdef_nocond
+lambdef: 'lambda' [varargslist] ':' test
+lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
+or_test: and_test ('or' and_test)*
+and_test: not_test ('and' not_test)*
+not_test: 'not' not_test | comparison
+comparison: expr (comp_op expr)*
+# <> isn't actually a valid comparison operator in Python. It's here for the
+# sake of a __future__ import described in PEP 401 (which really works :-)
+comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
+star_expr: '*' expr
+expr: xor_expr ('|' xor_expr)*
+xor_expr: and_expr ('^' and_expr)*
+and_expr: shift_expr ('&' shift_expr)*
+shift_expr: arith_expr (('<<'|'>>') arith_expr)*
+arith_expr: term (('+'|'-') term)*
+term: factor (('*'|'@'|'/'|'%'|'//') factor)*
+factor: ('+'|'-'|'~') factor | power
+power: atom_expr ['**' factor]
+atom_expr: ['await'] atom trailer*
+atom: ('(' [yield_expr|testlist_comp] ')' |
+       '[' [testlist_comp] ']' |
+       '{' [dictorsetmaker] '}' |
+       NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
+testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
+trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
+subscriptlist: subscript (',' subscript)* [',']
+subscript: test | [test] ':' [test] [sliceop]
+sliceop: ':' [test]
+exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
+testlist: test (',' test)* [',']
+dictorsetmaker: ( ((test ':' test | '**' expr)
+                   (comp_for | (',' (test ':' test | '**' expr))* [','])) |
+                  ((test | star_expr)
+                   (comp_for | (',' (test | star_expr))* [','])) )
+
+classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
+
+arglist: argument (',' argument)*  [',']
+
+# The reason that keywords are test nodes instead of NAME is that using NAME
+# results in an ambiguity. ast.c makes sure it's a NAME.
+# "test '=' test" is really "keyword '=' test", but we have no such token.
+# These need to be in a single rule to avoid grammar that is ambiguous
+# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
+# we explicitly match '*' here, too, to give it proper precedence.
+# Illegal combinations and orderings are blocked in ast.c:
+# multiple (test comp_for) arguements are blocked; keyword unpackings
+# that precede iterable unpackings are blocked; etc.
+argument: ( test [comp_for] |
+            test '=' test |
+            '**' test |
+            '*' test )
+
+comp_iter: comp_for | comp_if
+comp_for: 'for' exprlist 'in' or_test [comp_iter]
+comp_if: 'if' test_nocond [comp_iter]
+
+# not used in grammar, but may appear in "node" passed from Parser to Compiler
+encoding_decl: NAME
+
+yield_expr: 'yield' [yield_arg]
+yield_arg: 'from' test | testlist
--- a/parso/python/grammar3.6.txt
+++ b/parso/python/grammar3.6.txt
@@ -0,0 +1,161 @@
+# Grammar for Python
+
+# Note:  Changing the grammar specified in this file will most likely
+#        require corresponding changes in the parser module
+#        (../Modules/parsermodule.c).  If you can't make the changes to
+#        that module yourself, please co-ordinate the required changes
+#        with someone who can; ask around on python-dev for help.  Fred
+#        Drake <fdrake@acm.org> will probably be listening there.
+
+# NOTE WELL: You should also follow all the steps listed at
+# https://docs.python.org/devguide/grammar.html
+
+# Start symbols for the grammar:
+#       file_input is a module or sequence of commands read from an input file;
+#       single_input is a single interactive statement;
+#       eval_input is the input for the eval() functions.
+# NB: compound_stmt in single_input is followed by extra NEWLINE!
+file_input: (NEWLINE | stmt)* ENDMARKER
+single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
+eval_input: testlist NEWLINE* ENDMARKER
+
+decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
+decorators: decorator+
+decorated: decorators (classdef | funcdef | async_funcdef)
+
+# NOTE: Francisco Souza/Reinoud Elhorst, using ASYNC/'await' keywords instead of
+# skipping python3.5+ compatibility, in favour of 3.7 solution
+async_funcdef: 'async' funcdef
+funcdef: 'def' NAME parameters ['->' test] ':' suite
+
+parameters: '(' [typedargslist] ')'
+typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [
+        '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
+      | '**' tfpdef [',']]]
+  | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
+  | '**' tfpdef [','])
+tfpdef: NAME [':' test]
+varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
+        '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
+      | '**' vfpdef [',']]]
+  | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
+  | '**' vfpdef [',']
+)
+vfpdef: NAME
+
+stmt: simple_stmt | compound_stmt
+simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
+small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
+             import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
+expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
+                     ('=' (yield_expr|testlist_star_expr))*)
+annassign: ':' test ['=' test]
+testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
+augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
+            '<<=' | '>>=' | '**=' | '//=')
+# For normal and annotated assignments, additional restrictions enforced by the interpreter
+del_stmt: 'del' exprlist
+pass_stmt: 'pass'
+flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
+break_stmt: 'break'
+continue_stmt: 'continue'
+return_stmt: 'return' [testlist]
+yield_stmt: yield_expr
+raise_stmt: 'raise' [test ['from' test]]
+import_stmt: import_name | import_from
+import_name: 'import' dotted_as_names
+# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
+import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
+              'import' ('*' | '(' import_as_names ')' | import_as_names))
+import_as_name: NAME ['as' NAME]
+dotted_as_name: dotted_name ['as' NAME]
+import_as_names: import_as_name (',' import_as_name)* [',']
+dotted_as_names: dotted_as_name (',' dotted_as_name)*
+dotted_name: NAME ('.' NAME)*
+global_stmt: 'global' NAME (',' NAME)*
+nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
+assert_stmt: 'assert' test [',' test]
+
+compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
+async_stmt: 'async' (funcdef | with_stmt | for_stmt)
+if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
+while_stmt: 'while' test ':' suite ['else' ':' suite]
+for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
+try_stmt: ('try' ':' suite
+           ((except_clause ':' suite)+
+            ['else' ':' suite]
+            ['finally' ':' suite] |
+           'finally' ':' suite))
+with_stmt: 'with' with_item (',' with_item)*  ':' suite
+with_item: test ['as' expr]
+# NB compile.c makes sure that the default except clause is last
+except_clause: 'except' [test ['as' NAME]]
+# Edit by Francisco Souza/David Halter: The stmt is now optional. This reflects
+# how Jedi allows classes and functions to be empty, which is beneficial for
+# autocompletion.
+suite: simple_stmt | NEWLINE INDENT stmt* DEDENT
+
+test: or_test ['if' or_test 'else' test] | lambdef
+test_nocond: or_test | lambdef_nocond
+lambdef: 'lambda' [varargslist] ':' test
+lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
+or_test: and_test ('or' and_test)*
+and_test: not_test ('and' not_test)*
+not_test: 'not' not_test | comparison
+comparison: expr (comp_op expr)*
+# <> isn't actually a valid comparison operator in Python. It's here for the
+# sake of a __future__ import described in PEP 401 (which really works :-)
+comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
+star_expr: '*' expr
+expr: xor_expr ('|' xor_expr)*
+xor_expr: and_expr ('^' and_expr)*
+and_expr: shift_expr ('&' shift_expr)*
+shift_expr: arith_expr (('<<'|'>>') arith_expr)*
+arith_expr: term (('+'|'-') term)*
+term: factor (('*'|'@'|'/'|'%'|'//') factor)*
+factor: ('+'|'-'|'~') factor | power
+power: atom_expr ['**' factor]
+atom_expr: ['await'] atom trailer*
+atom: ('(' [yield_expr|testlist_comp] ')' |
+       '[' [testlist_comp] ']' |
+       '{' [dictorsetmaker] '}' |
+       NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
+testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
+trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
+subscriptlist: subscript (',' subscript)* [',']
+subscript: test | [test] ':' [test] [sliceop]
+sliceop: ':' [test]
+exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
+testlist: test (',' test)* [',']
+dictorsetmaker: ( ((test ':' test | '**' expr)
+                   (comp_for | (',' (test ':' test | '**' expr))* [','])) |
+                  ((test | star_expr)
+                   (comp_for | (',' (test | star_expr))* [','])) )
+
+classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
+
+arglist: argument (',' argument)*  [',']
+
+# The reason that keywords are test nodes instead of NAME is that using NAME
+# results in an ambiguity. ast.c makes sure it's a NAME.
+# "test '=' test" is really "keyword '=' test", but we have no such token.
+# These need to be in a single rule to avoid grammar that is ambiguous
+# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
+# we explicitly match '*' here, too, to give it proper precedence.
+# Illegal combinations and orderings are blocked in ast.c:
+# multiple (test comp_for) arguments are blocked; keyword unpackings
+# that precede iterable unpackings are blocked; etc.
+argument: ( test [comp_for] |
+            test '=' test |
+            '**' test |
+            '*' test )
+
+comp_iter: comp_for | comp_if
+comp_for: ['async'] 'for' exprlist 'in' or_test [comp_iter]
+comp_if: 'if' test_nocond [comp_iter]
+
+# not used in grammar, but may appear in "node" passed from Parser to Compiler
+encoding_decl: NAME
+
+yield_expr: 'yield' [yield_arg]
+yield_arg: 'from' test | testlist
--- a/parso/python/parser.py
+++ b/parso/python/parser.py
@@ -0,0 +1,232 @@
+from parso.python import tree
+from parso import tokenize
+from parso.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER,
+                               STRING, tok_name)
+from parso.parser import BaseParser
+from jedi.common import splitlines
+
+
+class Parser(BaseParser):
+    """
+    This class is used to parse a Python file, it then divides them into a
+    class structure of different scopes.
+
+    :param grammar: The grammar object of pgen2. Loaded by load_grammar.
+    """
+
+    node_map = {
+        'expr_stmt': tree.ExprStmt,
+        'classdef': tree.Class,
+        'funcdef': tree.Function,
+        'file_input': tree.Module,
+        'import_name': tree.ImportName,
+        'import_from': tree.ImportFrom,
+        'break_stmt': tree.KeywordStatement,
+        'continue_stmt': tree.KeywordStatement,
+        'return_stmt': tree.ReturnStmt,
+        'raise_stmt': tree.KeywordStatement,
+        'yield_expr': tree.YieldExpr,
+        'del_stmt': tree.KeywordStatement,
+        'pass_stmt': tree.KeywordStatement,
+        'global_stmt': tree.GlobalStmt,
+        'nonlocal_stmt': tree.KeywordStatement,
+        'print_stmt': tree.KeywordStatement,
+        'assert_stmt': tree.AssertStmt,
+        'if_stmt': tree.IfStmt,
+        'with_stmt': tree.WithStmt,
+        'for_stmt': tree.ForStmt,
+        'while_stmt': tree.WhileStmt,
+        'try_stmt': tree.TryStmt,
+        'comp_for': tree.CompFor,
+        'decorator': tree.Decorator,
+        'lambdef': tree.Lambda,
+        'old_lambdef': tree.Lambda,
+        'lambdef_nocond': tree.Lambda,
+    }
+    default_node = tree.PythonNode
+
+    def __init__(self, grammar, error_recovery=True, start_symbol='file_input'):
+        super(Parser, self).__init__(grammar, start_symbol, error_recovery=error_recovery)
+
+        self.syntax_errors = []
+        self._omit_dedent_list = []
+        self._indent_counter = 0
+
+        # TODO do print absolute import detection here.
+        # try:
+        #     del python_grammar_no_print_statement.keywords["print"]
+        # except KeyError:
+        #     pass  # Doesn't exist in the Python 3 grammar.
+
+        # if self.options["print_function"]:
+        #     python_grammar = pygram.python_grammar_no_print_statement
+        # else:
+
+    def parse(self, tokens):
+        if self._error_recovery:
+            if self._start_symbol != 'file_input':
+                raise NotImplementedError
+
+            tokens = self._recovery_tokenize(tokens)
+
+        node = super(Parser, self).parse(tokens)
+
+        if self._start_symbol == 'file_input' != node.type:
+            # If there's only one statement, we get back a non-module. That's
+            # not what we want, we want a module, so we add it here:
+            node = self.convert_node(
+                self._grammar,
+                self._grammar.symbol2number['file_input'],
+                [node]
+            )
+
+        return node
+
+    def convert_node(self, grammar, type, children):
+        """
+        Convert raw node information to a PythonBaseNode instance.
+
+        This is passed to the parser driver which calls it whenever a reduction of a
+        grammar rule produces a new complete node, so that the tree is build
+        strictly bottom-up.
+        """
+        # TODO REMOVE symbol, we don't want type here.
+        symbol = grammar.number2symbol[type]
+        try:
+            return self.node_map[symbol](children)
+        except KeyError:
+            if symbol == 'suite':
+                # We don't want the INDENT/DEDENT in our parser tree. Those
+                # leaves are just cancer. They are virtual leaves and not real
+                # ones and therefore have pseudo start/end positions and no
+                # prefixes. Just ignore them.
+                children = [children[0]] + children[2:-1]
+            return self.default_node(symbol, children)
+
+    def convert_leaf(self, grammar, type, value, prefix, start_pos):
+        # print('leaf', repr(value), token.tok_name[type])
+        if type == tokenize.NAME:
+            if value in grammar.keywords:
+                return tree.Keyword(value, start_pos, prefix)
+            else:
+                return tree.Name(value, start_pos, prefix)
+        elif type == STRING:
+            return tree.String(value, start_pos, prefix)
+        elif type == NUMBER:
+            return tree.Number(value, start_pos, prefix)
+        elif type == NEWLINE:
+            return tree.Newline(value, start_pos, prefix)
+        elif type == ENDMARKER:
+            return tree.EndMarker(value, start_pos, prefix)
+        else:
+            return tree.Operator(value, start_pos, prefix)
+
+    def error_recovery(self, grammar, stack, arcs, typ, value, start_pos, prefix,
+                       add_token_callback):
+        """
+        This parser is written in a dynamic way, meaning that this parser
+        allows using different grammars (even non-Python). However, error
+        recovery is purely written for Python.
+        """
+        if not self._error_recovery:
+            return super(Parser, self).error_recovery(
+                grammar, stack, arcs, typ, value, start_pos, prefix,
+                add_token_callback)
+
+        def current_suite(stack):
+            # For now just discard everything that is not a suite or
+            # file_input, if we detect an error.
+            for index, (dfa, state, (type_, nodes)) in reversed(list(enumerate(stack))):
+                # `suite` can sometimes be only simple_stmt, not stmt.
+                symbol = grammar.number2symbol[type_]
+                if symbol == 'file_input':
+                    break
+                elif symbol == 'suite' and len(nodes) > 1:
+                    # suites without an indent in them get discarded.
+                    break
+            return index, symbol, nodes
+
+        index, symbol, nodes = current_suite(stack)
+
+        # print('err', token.tok_name[typ], repr(value), start_pos, len(stack), index)
+        if self._stack_removal(grammar, stack, arcs, index + 1, value, start_pos):
+            add_token_callback(typ, value, start_pos, prefix)
+        else:
+            if typ == INDENT:
+                # For every deleted INDENT we have to delete a DEDENT as well.
+                # Otherwise the parser will get into trouble and DEDENT too early.
+                self._omit_dedent_list.append(self._indent_counter)
+            else:
+                error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix)
+                stack[-1][2][1].append(error_leaf)
+
+    def _stack_removal(self, grammar, stack, arcs, start_index, value, start_pos):
+        failed_stack = []
+        found = False
+        all_nodes = []
+        for dfa, state, (typ, nodes) in stack[start_index:]:
+            if nodes:
+                found = True
+            if found:
+                symbol = grammar.number2symbol[typ]
+                failed_stack.append((symbol, nodes))
+                all_nodes += nodes
+        if failed_stack:
+            stack[start_index - 1][2][1].append(tree.PythonErrorNode(all_nodes))
+
+        stack[start_index:] = []
+        return failed_stack
+
+    def _recovery_tokenize(self, tokens):
+        for typ, value, start_pos, prefix in tokens:
+            # print(tokenize.tok_name[typ], repr(value), start_pos, repr(prefix))
+            if typ == DEDENT:
+                # We need to count indents, because if we just omit any DEDENT,
+                # we might omit them in the wrong place.
+                o = self._omit_dedent_list
+                if o and o[-1] == self._indent_counter:
+                    o.pop()
+                    continue
+
+                self._indent_counter -= 1
+            elif typ == INDENT:
+                self._indent_counter += 1
+
+            yield typ, value, start_pos, prefix
+
+
+def _remove_last_newline(node):
+    endmarker = node.children[-1]
+    # The newline is either in the endmarker as a prefix or the previous
+    # leaf as a newline token.
+    prefix = endmarker.prefix
+    leaf = endmarker.get_previous_leaf()
+    if prefix:
+        text = prefix
+    else:
+        if leaf is None:
+            raise ValueError("You're trying to remove a newline from an empty module.")
+
+        text = leaf.value
+
+    if not text.endswith('\n'):
+        raise ValueError("There's no newline at the end, cannot remove it.")
+
+    text = text[:-1]
+    if prefix:
+        endmarker.prefix = text
+
+        if leaf is None:
+            end_pos = (1, 0)
+        else:
+            end_pos = leaf.end_pos
+
+        lines = splitlines(text, keepends=True)
+        if len(lines) == 1:
+            end_pos = end_pos[0], end_pos[1] + len(lines[0])
+        else:
+            end_pos = end_pos[0] + len(lines) - 1,  len(lines[-1])
+        endmarker.start_pos = end_pos
+    else:
+        leaf.value = text
+        endmarker.start_pos = leaf.end_pos
--- a/parso/python/tree.py
+++ b/parso/python/tree.py
--- a/parso/token.py
+++ b/parso/token.py
@@ -0,0 +1,90 @@
+from __future__ import absolute_import
+
+from jedi._compatibility import is_py3, is_py35
+from token import *
+
+
+COMMENT = N_TOKENS
+tok_name[COMMENT] = 'COMMENT'
+N_TOKENS += 1
+
+NL = N_TOKENS
+tok_name[NL] = 'NL'
+N_TOKENS += 1
+
+if is_py3:
+    BACKQUOTE = N_TOKENS
+    tok_name[BACKQUOTE] = 'BACKQUOTE'
+    N_TOKENS += 1
+else:
+    RARROW = N_TOKENS
+    tok_name[RARROW] = 'RARROW'
+    N_TOKENS += 1
+    ELLIPSIS = N_TOKENS
+    tok_name[ELLIPSIS] = 'ELLIPSIS'
+    N_TOKENS += 1
+
+if not is_py35:
+    ATEQUAL = N_TOKENS
+    tok_name[ATEQUAL] = 'ATEQUAL'
+    N_TOKENS += 1
+
+
+
+# Map from operator to number (since tokenize doesn't do this)
+
+opmap_raw = """\
+( LPAR
+) RPAR
+[ LSQB
+] RSQB
+: COLON
+, COMMA
+; SEMI
+ PLUS
+- MINUS
+* STAR
+/ SLASH
+| VBAR
+& AMPER
+< LESS
+> GREATER
+= EQUAL
+. DOT
+% PERCENT
+` BACKQUOTE
+{ LBRACE
+} RBRACE
+@ AT
+== EQEQUAL
+!= NOTEQUAL
+<> NOTEQUAL
+<= LESSEQUAL
+>= GREATEREQUAL
+~ TILDE
+^ CIRCUMFLEX
+<< LEFTSHIFT
+>> RIGHTSHIFT
+** DOUBLESTAR
+= PLUSEQUAL
+-= MINEQUAL
+*= STAREQUAL
+/= SLASHEQUAL
+%= PERCENTEQUAL
+&= AMPEREQUAL
+|= VBAREQUAL
+@= ATEQUAL
+^= CIRCUMFLEXEQUAL
+<<= LEFTSHIFTEQUAL
+>>= RIGHTSHIFTEQUAL
+**= DOUBLESTAREQUAL
+// DOUBLESLASH
+//= DOUBLESLASHEQUAL
+-> RARROW
+... ELLIPSIS
+"""
+
+opmap = {}
+for line in opmap_raw.splitlines():
+    op, name = line.split()
+    opmap[op] = globals()[name]
--- a/parso/tokenize.py
+++ b/parso/tokenize.py
@@ -0,0 +1,369 @@
+# -*- coding: utf-8 -*-
+"""
+This tokenizer has been copied from the ``tokenize.py`` standard library
+tokenizer. The reason was simple: The standard library tokenizer fails
+if the indentation is not right. The fast parser of jedi however requires
+"wrong" indentation.
+
+Basically this is a stripped down version of the standard library module, so
+you can read the documentation there. Additionally we included some speed and
+memory optimizations here.
+"""
+from __future__ import absolute_import
+
+import string
+import re
+from collections import namedtuple
+import itertools as _itertools
+
+from parso.token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, opmap,
+                               NAME, OP, ERRORTOKEN, NEWLINE, INDENT, DEDENT)
+from jedi._compatibility import is_py3, py_version, u
+from jedi.common import splitlines
+
+
+cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
+
+
+if is_py3:
+    # Python 3 has str.isidentifier() to check if a char is a valid identifier
+    is_identifier = str.isidentifier
+else:
+    namechars = string.ascii_letters + '_'
+    is_identifier = lambda s: s in namechars
+
+
+COMMENT = N_TOKENS
+tok_name[COMMENT] = 'COMMENT'
+
+
+def group(*choices, **kwargs):
+    capture = kwargs.pop('capture', False)  # Python 2, arrghhhhh :(
+    assert not kwargs
+
+    start = '('
+    if not capture:
+        start += '?:'
+    return start + '|'.join(choices) + ')'
+
+def any(*choices):
+    return group(*choices) + '*'
+
+def maybe(*choices):
+    return group(*choices) + '?'
+
+# Note: we use unicode matching for names ("\w") but ascii matching for
+# number literals.
+Whitespace = r'[ \f\t]*'
+Comment = r'#[^\r\n]*'
+Name = r'\w+'
+
+if py_version >= 36:
+    Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
+    Binnumber = r'0[bB](?:_?[01])+'
+    Octnumber = r'0[oO](?:_?[0-7])+'
+    Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)'
+    Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
+    Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*'
+    Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?',
+                       r'\.[0-9](?:_?[0-9])*') + maybe(Exponent)
+    Expfloat = r'[0-9](?:_?[0-9])*' + Exponent
+    Floatnumber = group(Pointfloat, Expfloat)
+    Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]')
+else:
+    Hexnumber = r'0[xX][0-9a-fA-F]+'
+    Binnumber = r'0[bB][01]+'
+    if is_py3:
+        Octnumber = r'0[oO][0-7]+'
+    else:
+        Octnumber = '0[0-7]+'
+    Decnumber = r'(?:0+|[1-9][0-9]*)'
+    Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
+    Exponent = r'[eE][-+]?[0-9]+'
+    Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent)
+    Expfloat = r'[0-9]+' + Exponent
+    Floatnumber = group(Pointfloat, Expfloat)
+    Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
+Number = group(Imagnumber, Floatnumber, Intnumber)
+
+# Return the empty string, plus all of the valid string prefixes.
+def _all_string_prefixes():
+    # The valid string prefixes. Only contain the lower case versions,
+    #  and don't contain any permuations (include 'fr', but not
+    #  'rf'). The various permutations will be generated.
+    _valid_string_prefixes = ['b', 'r', 'u', 'br']
+    if py_version >= 36:
+        _valid_string_prefixes += ['f', 'fr']
+    if py_version <= 27:
+        # TODO this is actually not 100% valid. ur is valid in Python 2.7,
+        # while ru is not.
+        _valid_string_prefixes.append('ur')
+
+    # if we add binary f-strings, add: ['fb', 'fbr']
+    result = set([''])
+    for prefix in _valid_string_prefixes:
+        for t in _itertools.permutations(prefix):
+            # create a list with upper and lower versions of each
+            #  character
+            for u in _itertools.product(*[(c, c.upper()) for c in t]):
+                result.add(''.join(u))
+    return result
+
+def _compile(expr):
+    return re.compile(expr, re.UNICODE)
+
+# Note that since _all_string_prefixes includes the empty string,
+#  StringPrefix can be the empty string (making it optional).
+StringPrefix = group(*_all_string_prefixes())
+
+# Tail end of ' string.
+Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
+# Tail end of " string.
+Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
+# Tail end of ''' string.
+Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
+# Tail end of """ string.
+Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
+Triple = group(StringPrefix + "'''", StringPrefix + '"""')
+
+# Because of leftmost-then-longest match semantics, be sure to put the
+# longest operators first (e.g., if = came before ==, == would get
+# recognized as two instances of =).
+Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=",
+                 r"//=?", r"->",
+                 r"[+\-*/%&@|^=<>]=?",
+                 r"~")
+
+Bracket = '[][(){}]'
+Special = group(r'\r?\n', r'\.\.\.', r'[:;.,@]')
+Funny = group(Operator, Bracket, Special)
+
+PlainToken = group(Number, Funny, Name, capture=True)
+
+# First (or only) line of ' or " string.
+ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
+                group("'", r'\\\r?\n'),
+                StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
+                group('"', r'\\\r?\n'))
+PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple)
+PseudoToken = group(Whitespace, capture=True) + \
+    group(PseudoExtras, Number, Funny, ContStr, Name, capture=True)
+
+# For a given string prefix plus quotes, endpats maps it to a regex
+#  to match the remainder of that string. _prefix can be empty, for
+#  a normal single or triple quoted string (with no prefix).
+endpats = {}
+for _prefix in _all_string_prefixes():
+    endpats[_prefix + "'"] = _compile(Single)
+    endpats[_prefix + '"'] = _compile(Double)
+    endpats[_prefix + "'''"] = _compile(Single3)
+    endpats[_prefix + '"""'] = _compile(Double3)
+
+# A set of all of the single and triple quoted string prefixes,
+#  including the opening quotes.
+single_quoted = set()
+triple_quoted = set()
+for t in _all_string_prefixes():
+    for p in (t + '"', t + "'"):
+        single_quoted.add(p)
+    for p in (t + '"""', t + "'''"):
+        triple_quoted.add(p)
+
+
+# TODO add with?
+ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except',
+                       'finally', 'while', 'return')
+pseudo_token_compiled = _compile(PseudoToken)
+
+
+class TokenInfo(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])):
+    def __repr__(self):
+        return ('TokenInfo(type=%s, string=%r, start=%r, prefix=%r)' %
+                self._replace(type=self.get_type_name()))
+
+    def get_type_name(self, exact=True):
+        if exact:
+            typ = self.exact_type
+        else:
+            typ = self.type
+        return tok_name[typ]
+
+    @property
+    def exact_type(self):
+        if self.type == OP and self.string in opmap:
+            return opmap[self.string]
+        else:
+            return self.type
+
+    @property
+    def end_pos(self):
+        lines = splitlines(self.string)
+        if len(lines) > 1:
+            return self.start_pos[0] + len(lines) - 1, 0
+        else:
+            return self.start_pos[0], self.start_pos[1] + len(self.string)
+
+
+def source_tokens(source, use_exact_op_types=False):
+    """Generate tokens from a the source code (string)."""
+    lines = splitlines(source, keepends=True)
+    return generate_tokens(lines, use_exact_op_types)
+
+
+def generate_tokens(lines, use_exact_op_types=False):
+    """
+    A heavily modified Python standard library tokenizer.
+
+    Additionally to the default information, yields also the prefix of each
+    token. This idea comes from lib2to3. The prefix contains all information
+    that is irrelevant for the parser like newlines in parentheses or comments.
+    """
+    paren_level = 0  # count parentheses
+    indents = [0]
+    max = 0
+    numchars = '0123456789'
+    contstr = ''
+    contline = None
+    # We start with a newline. This makes indent at the first position
+    # possible. It's not valid Python, but still better than an INDENT in the
+    # second line (and not in the first). This makes quite a few things in
+    # Jedi's fast parser possible.
+    new_line = True
+    prefix = ''  # Should never be required, but here for safety
+    additional_prefix = ''
+    for lnum, line in enumerate(lines, 1):  # loop over lines in stream
+        pos, max = 0, len(line)
+
+        if contstr:                                         # continued string
+            endmatch = endprog.match(line)
+            if endmatch:
+                pos = endmatch.end(0)
+                yield TokenInfo(STRING, contstr + line[:pos], contstr_start, prefix)
+                contstr = ''
+                contline = None
+            else:
+                contstr = contstr + line
+                contline = contline + line
+                continue
+
+        while pos < max:
+            pseudomatch = pseudo_token_compiled.match(line, pos)
+            if not pseudomatch:                             # scan for tokens
+                txt = line[pos:]
+                if txt.endswith('\n'):
+                    new_line = True
+                yield TokenInfo(ERRORTOKEN, txt, (lnum, pos), prefix)
+                break
+
+            prefix = additional_prefix + pseudomatch.group(1)
+            additional_prefix = ''
+            start, pos = pseudomatch.span(2)
+            spos = (lnum, start)
+            token = pseudomatch.group(2)
+            initial = token[0]
+
+            if new_line and initial not in '\r\n#':
+                new_line = False
+                if paren_level == 0:
+                    i = 0
+                    while line[i] == '\f':
+                        i += 1
+                        start -= 1
+                    if start > indents[-1]:
+                        yield TokenInfo(INDENT, '', spos, '')
+                        indents.append(start)
+                    while start < indents[-1]:
+                        yield TokenInfo(DEDENT, '', spos, '')
+                        indents.pop()
+
+            if (initial in numchars or                      # ordinary number
+                    (initial == '.' and token != '.' and token != '...')):
+                yield TokenInfo(NUMBER, token, spos, prefix)
+            elif initial in '\r\n':
+                if not new_line and paren_level == 0:
+                    yield TokenInfo(NEWLINE, token, spos, prefix)
+                else:
+                    additional_prefix = prefix + token
+                new_line = True
+            elif initial == '#':  # Comments
+                assert not token.endswith("\n")
+                additional_prefix = prefix + token
+            elif token in triple_quoted:
+                endprog = endpats[token]
+                endmatch = endprog.match(line, pos)
+                if endmatch:                                # all on one line
+                    pos = endmatch.end(0)
+                    token = line[start:pos]
+                    yield TokenInfo(STRING, token, spos, prefix)
+                else:
+                    contstr_start = (lnum, start)           # multiple lines
+                    contstr = line[start:]
+                    contline = line
+                    break
+            elif initial in single_quoted or \
+                    token[:2] in single_quoted or \
+                    token[:3] in single_quoted:
+                if token[-1] == '\n':                       # continued string
+                    contstr_start = lnum, start
+                    endprog = (endpats.get(initial) or endpats.get(token[1])
+                               or endpats.get(token[2]))
+                    contstr = line[start:]
+                    contline = line
+                    break
+                else:                                       # ordinary string
+                    yield TokenInfo(STRING, token, spos, prefix)
+            elif is_identifier(initial):                      # ordinary name
+                if token in ALWAYS_BREAK_TOKENS:
+                    paren_level = 0
+                    while True:
+                        indent = indents.pop()
+                        if indent > start:
+                            yield TokenInfo(DEDENT, '', spos, '')
+                        else:
+                            indents.append(indent)
+                            break
+                yield TokenInfo(NAME, token, spos, prefix)
+            elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'):  # continued stmt
+                additional_prefix += prefix + line[start:]
+                break
+            else:
+                if token in '([{':
+                    paren_level += 1
+                elif token in ')]}':
+                    paren_level -= 1
+
+                try:
+                    # This check is needed in any case to check if it's a valid
+                    # operator or just some random unicode character.
+                    exact_type = opmap[token]
+                except KeyError:
+                    exact_type = typ = ERRORTOKEN
+                if use_exact_op_types:
+                    typ = exact_type
+                else:
+                    typ = OP
+                yield TokenInfo(typ, token, spos, prefix)
+
+    if contstr:
+        yield TokenInfo(ERRORTOKEN, contstr, contstr_start, prefix)
+        if contstr.endswith('\n'):
+            new_line = True
+
+    end_pos = lnum, max
+    # As the last position we just take the maximally possible position. We
+    # remove -1 for the last new line.
+    for indent in indents[1:]:
+        yield TokenInfo(DEDENT, '', end_pos, '')
+    yield TokenInfo(ENDMARKER, '', end_pos, additional_prefix)
+
+
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) >= 2:
+        path = sys.argv[1]
+        with open(path) as f:
+            code = u(f.read())
+    else:
+        code = u(sys.stdin.read())
+    for token in source_tokens(code, use_exact_op_types=True):
+        print(token)
--- a/parso/tree.py
+++ b/parso/tree.py
@@ -0,0 +1,328 @@
+from abc import abstractmethod, abstractproperty
+from parso._compatibility import utf8_repr, encoding, is_py3
+
+
+def search_ancestor(node, *node_types):
+    """
+    Recursively looks at the parents of a node and checks if the type names
+    match.
+
+    :param node: The node that is looked at.
+    :param node_types: A tuple or a string of type names that are
+        searched for.
+    """
+    while True:
+        node = node.parent
+        if node is None or node.type in node_types:
+            return node
+
+
+class NodeOrLeaf(object):
+    """
+    The base class for nodes and leaves.
+    """
+    __slots__ = ()
+
+    def get_root_node(self):
+        """
+        Returns the root node of a parser tree. The returned node doesn't have
+        a parent node like all the other nodes/leaves.
+        """
+        scope = self
+        while scope.parent is not None:
+            scope = scope.parent
+        return scope
+
+    def get_next_sibling(self):
+        """
+        The node immediately following the invocant in their parent's children
+        list. If the invocant does not have a next sibling, it is None
+        """
+        # Can't use index(); we need to test by identity
+        for i, child in enumerate(self.parent.children):
+            if child is self:
+                try:
+                    return self.parent.children[i + 1]
+                except IndexError:
+                    return None
+
+    def get_previous_sibling(self):
+        """
+        The node/leaf immediately preceding the invocant in their parent's
+        children list. If the invocant does not have a previous sibling, it is
+        None.
+        """
+        # Can't use index(); we need to test by identity
+        for i, child in enumerate(self.parent.children):
+            if child is self:
+                if i == 0:
+                    return None
+                return self.parent.children[i - 1]
+
+    def get_previous_leaf(self):
+        """
+        Returns the previous leaf in the parser tree.
+        Raises an IndexError if it's the first element in the parser tree.
+        """
+        node = self
+        while True:
+            c = node.parent.children
+            i = c.index(node)
+            if i == 0:
+                node = node.parent
+                if node.parent is None:
+                    return None
+            else:
+                node = c[i - 1]
+                break
+
+        while True:
+            try:
+                node = node.children[-1]
+            except AttributeError:  # A Leaf doesn't have children.
+                return node
+
+    def get_next_leaf(self):
+        """
+        Returns the next leaf in the parser tree.
+        Returns `None` if it's the last element in the parser tree.
+        """
+        node = self
+        while True:
+            c = node.parent.children
+            i = c.index(node)
+            if i == len(c) - 1:
+                node = node.parent
+                if node.parent is None:
+                    return None
+            else:
+                node = c[i + 1]
+                break
+
+        while True:
+            try:
+                node = node.children[0]
+            except AttributeError:  # A Leaf doesn't have children.
+                return node
+
+    @abstractproperty
+    def start_pos(self):
+        """
+        Returns the starting position of the prefix as a tuple, e.g. `(3, 4)`.
+
+        :return tuple of int: (line, column)
+        """
+
+    @abstractproperty
+    def end_pos(self):
+        """
+        Returns the end position of the prefix as a tuple, e.g. `(3, 4)`.
+
+        :return tuple of int: (line, column)
+        """
+
+    @abstractmethod
+    def get_start_pos_of_prefix(self):
+        """
+        Returns the start_pos of the prefix. This means basically it returns
+        the end_pos of the last prefix. The `get_start_pos_of_prefix()` of the
+        prefix `+` in `2 + 1` would be `(1, 1)`, while the start_pos is
+        `(1, 2)`.
+
+        :return tuple of int: (line, column)
+        """
+
+    @abstractmethod
+    def get_first_leaf(self):
+        """
+        Returns the first leaf of a node or itself it's a leaf.
+        """
+
+    @abstractmethod
+    def get_last_leaf(self):
+        """
+        Returns the last leaf of a node or itself it's a leaf.
+        """
+
+    @abstractmethod
+    def get_code(self, normalized=False, include_prefix=True):
+        """
+        Returns the code that was the input of the parser.
+
+        If a normalizer is given, the returned code will be normalized and will
+        not be equal to the input.
+
+        :param include_prefix: Removes the prefix (whitespace and comments) of e.g. a statement.
+        :param normalized: Deprecated. Please don't use. Will be replaced with something more powerful.
+        """
+
+
+class Leaf(NodeOrLeaf):
+    __slots__ = ('value', 'parent', 'line', 'indent', 'prefix')
+
+    def __init__(self, value, start_pos, prefix=''):
+        self.value = value
+        self.start_pos = start_pos
+        self.prefix = prefix
+        self.parent = None
+
+    @property
+    def start_pos(self):
+        return self.line, self.indent
+
+    @start_pos.setter
+    def start_pos(self, value):
+        self.line = value[0]
+        self.indent = value[1]
+
+    def get_start_pos_of_prefix(self):
+        previous_leaf = self.get_previous_leaf()
+        if previous_leaf is None:
+            return self.line - self.prefix.count('\n'), 0  # It's the first leaf.
+        return previous_leaf.end_pos
+
+    def get_first_leaf(self):
+        return self
+
+    def get_last_leaf(self):
+        return self
+
+    def get_code(self, normalized=False, include_prefix=True):
+        if normalized:
+            return self.value
+        if include_prefix:
+            return self.prefix + self.value
+        else:
+            return self.value
+
+    @property
+    def end_pos(self):
+        lines = self.value.split('\n')
+        end_pos_line = self.line + len(lines) - 1
+        # Check for multiline token
+        if self.line == end_pos_line:
+            end_pos_indent = self.indent + len(lines[-1])
+        else:
+            end_pos_indent = len(lines[-1])
+        return end_pos_line, end_pos_indent
+
+    @utf8_repr
+    def __repr__(self):
+        return "<%s: %s start=%s>" % (type(self).__name__, self.value, self.start_pos)
+
+
+class BaseNode(NodeOrLeaf):
+    """
+    The super class for all nodes.
+
+    If you create custom nodes, you will probably want to inherit from this
+    ``BaseNode``.
+    """
+    __slots__ = ('children', 'parent')
+    type = None
+
+    def __init__(self, children):
+        for c in children:
+            c.parent = self
+        self.children = children
+        self.parent = None
+
+    @property
+    def start_pos(self):
+        return self.children[0].start_pos
+
+    def get_start_pos_of_prefix(self):
+        return self.children[0].get_start_pos_of_prefix()
+
+    @property
+    def end_pos(self):
+        return self.children[-1].end_pos
+
+    def _get_code_for_children(self, children, normalized, include_prefix):
+        # TODO implement normalized (depending on context).
+        if include_prefix:
+            return "".join(c.get_code(normalized) for c in children)
+        else:
+            first = children[0].get_code(include_prefix=False)
+            return first + "".join(c.get_code(normalized) for c in children[1:])
+
+    def get_code(self, normalized=False, include_prefix=True):
+        return self._get_code_for_children(self.children, normalized, include_prefix)
+
+    def get_leaf_for_position(self, position, include_prefixes=False):
+        def binary_search(lower, upper):
+            if lower == upper:
+                element = self.children[lower]
+                if not include_prefixes and position < element.start_pos:
+                    # We're on a prefix.
+                    return None
+                # In case we have prefixes, a leaf always matches
+                try:
+                    return element.get_leaf_for_position(position, include_prefixes)
+                except AttributeError:
+                    return element
+
+
+            index = int((lower + upper) / 2)
+            element = self.children[index]
+            if position <= element.end_pos:
+                return binary_search(lower, index)
+            else:
+                return binary_search(index + 1, upper)
+
+        if not ((1, 0) <= position <= self.children[-1].end_pos):
+            raise ValueError('Please provide a position that exists within this node.')
+        return binary_search(0, len(self.children) - 1)
+
+    def get_first_leaf(self):
+        return self.children[0].get_first_leaf()
+
+    def get_last_leaf(self):
+        return self.children[-1].get_last_leaf()
+
+    @utf8_repr
+    def __repr__(self):
+        code = self.get_code().replace('\n', ' ').strip()
+        if not is_py3:
+            code = code.encode(encoding, 'replace')
+        return "<%s: %s@%s,%s>" % \
+            (type(self).__name__, code, self.start_pos[0], self.start_pos[1])
+
+
+class Node(BaseNode):
+    """Concrete implementation for interior nodes."""
+    __slots__ = ('type',)
+
+    def __init__(self, type, children):
+        super(Node, self).__init__(children)
+        self.type = type
+
+    def __repr__(self):
+        return "%s(%s, %r)" % (self.__class__.__name__, self.type, self.children)
+
+
+class ErrorNode(BaseNode):
+    """
+    A node that containes valid nodes/leaves that we're follow by a token that
+    was invalid. This basically means that the leaf after this node is where
+    Python would mark a syntax error.
+    """
+    __slots__ = ()
+    type = 'error_node'
+
+
+class ErrorLeaf(Leaf):
+    """
+    A leaf that is either completely invalid in a language (like `$` in Python)
+    or is invalid at that position. Like the star in `1 +* 1`.
+    """
+    __slots__ = ('original_type')
+    type = 'error_leaf'
+
+    def __init__(self, original_type, value, start_pos, prefix=''):
+        super(ErrorLeaf, self).__init__(value, start_pos, prefix)
+        self.original_type = original_type
+
+    def __repr__(self):
+        return "<%s: %s:%s, %s)>" % \
+            (type(self).__name__, self.original_type, repr(self.value), self.start_pos)