Add an initial version of the code from Jedi.

Still includes imports that have to be removed.
2025-12-07 21:34:32 +08:00 · 2017-05-11 07:13:23 -04:00
parent 36e0c047ae
commit 150fb4c86e
18 changed files with 4371 additions and 0 deletions
--- a/parso/init.py
+++ b/parso/init.py
@@ -0,0 +1,8 @@
 from parso.parser import ParserSyntaxError
 from parso.pgen2.pgen import generate_grammar
 from parso import python
 def parse(grammar, code):
    raise NotImplementedError
    Parser(grammar, code)
--- a/parso/cache.py
+++ b/parso/cache.py
@@ -0,0 +1,147 @@
 import time
 import os
 import sys
 import hashlib
 import gc
 import shutil
 import pickle
 import platform
 import errno
 from jedi import settings
 from jedi import debug
 from jedi._compatibility import FileNotFoundError
 _PICKLE_VERSION = 30
 """
 Version number (integer) for file system cache.
 Increment this number when there are any incompatible changes in
 the parser tree classes.  For example, the following changes
 are regarded as incompatible.
 - A class name is changed.
 - A class is moved to another module.
 - A __slot__ of a class is changed.
 """
 _VERSION_TAG = '%s-%s%s-%s' % (
    platform.python_implementation(),
    sys.version_info[0],
    sys.version_info[1],
    _PICKLE_VERSION
 )
 """
 Short name for distinguish Python implementations and versions.
 It's like `sys.implementation.cache_tag` but for Python < 3.3
 we generate something similar.  See:
 http://docs.python.org/3/library/sys.html#sys.implementation
 """
 # for fast_parser, should not be deleted
 parser_cache = {}
 class _NodeCacheItem(object):
    def __init__(self, node, lines, change_time=None):
        self.node = node
        self.lines = lines
        if change_time is None:
            change_time = time.time()
        self.change_time = change_time
 def load_module(grammar, path):
    """
    Returns a module or None, if it fails.
    """
    try:
        p_time = os.path.getmtime(path)
    except FileNotFoundError:
        return None
    try:
        # TODO Add grammar sha256
        module_cache_item = parser_cache[path]
        if p_time <= module_cache_item.change_time:
            return module_cache_item.node
    except KeyError:
        if not settings.use_filesystem_cache:
            return None
        return _load_from_file_system(grammar, path, p_time)
 def _load_from_file_system(grammar, path, p_time):
    cache_path = _get_hashed_path(grammar, path)
    try:
        try:
            if p_time > os.path.getmtime(cache_path):
                # Cache is outdated
                return None
        except OSError as e:
            if e.errno == errno.ENOENT:
                # In Python 2 instead of an IOError here we get an OSError.
                raise FileNotFoundError
            else:
                raise
        with open(cache_path, 'rb') as f:
            gc.disable()
            try:
                module_cache_item = pickle.load(f)
            finally:
                gc.enable()
    except FileNotFoundError:
        return None
    else:
        parser_cache[path] = module_cache_item
        debug.dbg('pickle loaded: %s', path)
        return module_cache_item.node
 def save_module(grammar, path, module, lines, pickling=True):
    try:
        p_time = None if path is None else os.path.getmtime(path)
    except OSError:
        p_time = None
        pickling = False
    item = _NodeCacheItem(module, lines, p_time)
    parser_cache[path] = item
    if settings.use_filesystem_cache and pickling and path is not None:
        _save_to_file_system(grammar, path, item)
 def _save_to_file_system(grammar, path, item):
    with open(_get_hashed_path(grammar, path), 'wb') as f:
        pickle.dump(item, f, pickle.HIGHEST_PROTOCOL)
 def remove_old_modules(self):
    """
    # TODO Might want to use such a function to clean up the cache (if it's
    # too old). We could potentially also scan for old files in the
    # directory and delete those.
    """
 def clear_cache(self):
    shutil.rmtree(settings.cache_directory)
    parser_cache.clear()
 def _get_hashed_path(grammar, path):
    file_hash = hashlib.sha256(path.encode("utf-8")).hexdigest()
    directory = _get_cache_directory_path()
    return os.path.join(directory, '%s-%s.pkl' % (grammar.sha256, file_hash))
 def _get_cache_directory_path():
    directory = os.path.join(settings.cache_directory, _VERSION_TAG)
    if not os.path.exists(directory):
        os.makedirs(directory)
    return directory
--- a/parso/parser.py
+++ b/parso/parser.py
@@ -0,0 +1,77 @@
 """
 The ``Parser`` tries to convert the available Python code in an easy to read
 format, something like an abstract syntax tree. The classes who represent this
 tree, are sitting in the :mod:`jedi.parser.tree` module.
 The Python module ``tokenize`` is a very important part in the ``Parser``,
 because it splits the code into different words (tokens).  Sometimes it looks a
 bit messy. Sorry for that! You might ask now: "Why didn't you use the ``ast``
 module for this? Well, ``ast`` does a very good job understanding proper Python
 code, but fails to work as soon as there's a single line of broken code.
 There's one important optimization that needs to be known: Statements are not
 being parsed completely. ``Statement`` is just a representation of the tokens
 within the statement. This lowers memory usage and cpu time and reduces the
 complexity of the ``Parser`` (there's another parser sitting inside
 ``Statement``, which produces ``Array`` and ``Call``).
 """
 from parso import tree
 from parso.pgen2.parse import PgenParser
 class ParserSyntaxError(Exception):
    """
    Contains error information about the parser tree.
    May be raised as an exception.
    """
    def __init__(self, message, position):
        self.message = message
        self.position = position
 class BaseParser(object):
    node_map = {}
    default_node = tree.Node
    leaf_map = {
    }
    default_leaf = tree.Leaf
    def __init__(self, grammar, start_symbol='file_input', error_recovery=False):
        self._grammar = grammar
        self._start_symbol = start_symbol
        self._error_recovery = error_recovery
    def parse(self, tokens):
        start_number = self._grammar.symbol2number[self._start_symbol]
        self.pgen_parser = PgenParser(
            self._grammar, self.convert_node, self.convert_leaf,
            self.error_recovery, start_number
        )
        node = self.pgen_parser.parse(tokens)
        # The stack is empty now, we don't need it anymore.
        del self.pgen_parser
        return node
    def error_recovery(self, grammar, stack, arcs, typ, value, start_pos, prefix,
                       add_token_callback):
        if self._error_recovery:
            raise NotImplementedError("Error Recovery is not implemented")
        else:
            raise ParserSyntaxError('SyntaxError: invalid syntax', start_pos)
    def convert_node(self, grammar, type_, children):
        # TODO REMOVE symbol, we don't want type here.
        symbol = grammar.number2symbol[type_]
        try:
            return self.node_map[symbol](children)
        except KeyError:
            return self.default_node(symbol, children)
    def convert_leaf(self, grammar, type_, value, prefix, start_pos):
        try:
            return self.leaf_map[type_](value, start_pos, prefix)
        except KeyError:
            return self.default_leaf(value, start_pos, prefix)
--- a/parso/pgen2/init.py
+++ b/parso/pgen2/init.py
@@ -0,0 +1,8 @@
 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
 # Licensed to PSF under a Contributor Agreement.
 # Modifications:
 # Copyright 2006 Google, Inc. All Rights Reserved.
 # Licensed to PSF under a Contributor Agreement.
 # Copyright 2014 David Halter. Integration into Jedi.
 # Modifications are dual-licensed: MIT and PSF.
--- a/parso/pgen2/grammar.py
+++ b/parso/pgen2/grammar.py
@@ -0,0 +1,127 @@
 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
 # Licensed to PSF under a Contributor Agreement.
 # Modifications:
 # Copyright 2014 David Halter. Integration into Jedi.
 # Modifications are dual-licensed: MIT and PSF.
 """This module defines the data structures used to represent a grammar.
 These are a bit arcane because they are derived from the data
 structures used by Python's 'pgen' parser generator.
 There's also a table here mapping operators to their names in the
 token module; the Python tokenize module reports all operators as the
 fallback token code OP, but the parser needs the actual token code.
 """
 import pickle
 import hashlib
 class Grammar(object):
    """Pgen parsing tables conversion class.
    Once initialized, this class supplies the grammar tables for the
    parsing engine implemented by parse.py.  The parsing engine
    accesses the instance variables directly.  The class here does not
    provide initialization of the tables; several subclasses exist to
    do this (see the conv and pgen modules).
    The load() method reads the tables from a pickle file, which is
    much faster than the other ways offered by subclasses.  The pickle
    file is written by calling dump() (after loading the grammar
    tables using a subclass).  The report() method prints a readable
    representation of the tables to stdout, for debugging.
    The instance variables are as follows:
    symbol2number -- a dict mapping symbol names to numbers.  Symbol
                     numbers are always 256 or higher, to distinguish
                     them from token numbers, which are between 0 and
                     255 (inclusive).
    number2symbol -- a dict mapping numbers to symbol names;
                     these two are each other's inverse.
    states        -- a list of DFAs, where each DFA is a list of
                     states, each state is a list of arcs, and each
                     arc is a (i, j) pair where i is a label and j is
                     a state number.  The DFA number is the index into
                     this list.  (This name is slightly confusing.)
                     Final states are represented by a special arc of
                     the form (0, j) where j is its own state number.
    dfas          -- a dict mapping symbol numbers to (DFA, first)
                     pairs, where DFA is an item from the states list
                     above, and first is a set of tokens that can
                     begin this grammar rule (represented by a dict
                     whose values are always 1).
    labels        -- a list of (x, y) pairs where x is either a token
                     number or a symbol number, and y is either None
                     or a string; the strings are keywords.  The label
                     number is the index in this list; label numbers
                     are used to mark state transitions (arcs) in the
                     DFAs.
    start         -- the number of the grammar's start symbol.
    keywords      -- a dict mapping keyword strings to arc labels.
    tokens        -- a dict mapping token numbers to arc labels.
    """
    def __init__(self, bnf_text):
        self.symbol2number = {}
        self.number2symbol = {}
        self.states = []
        self.dfas = {}
        self.labels = [(0, "EMPTY")]
        self.keywords = {}
        self.tokens = {}
        self.symbol2label = {}
        self.start = 256
        self.sha256 = hashlib.sha256(bnf_text.encode("utf-8")).hexdigest()
    def dump(self, filename):
        """Dump the grammar tables to a pickle file."""
        with open(filename, "wb") as f:
            pickle.dump(self.__dict__, f, 2)
    def load(self, filename):
        """Load the grammar tables from a pickle file."""
        with open(filename, "rb") as f:
            d = pickle.load(f)
        self.__dict__.update(d)
    def copy(self):
        """
        Copy the grammar.
        """
        new = self.__class__()
        for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
                          "tokens", "symbol2label"):
            setattr(new, dict_attr, getattr(self, dict_attr).copy())
        new.labels = self.labels[:]
        new.states = self.states[:]
        new.start = self.start
        return new
    def report(self):
        """Dump the grammar tables to standard output, for debugging."""
        from pprint import pprint
        print("s2n")
        pprint(self.symbol2number)
        print("n2s")
        pprint(self.number2symbol)
        print("states")
        pprint(self.states)
        print("dfas")
        pprint(self.dfas)
        print("labels")
        pprint(self.labels)
        print("start", self.start)
--- a/parso/pgen2/parse.py
+++ b/parso/pgen2/parse.py
@@ -0,0 +1,217 @@
 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
 # Licensed to PSF under a Contributor Agreement.
 # Modifications:
 # Copyright 2014 David Halter. Integration into Jedi.
 # Modifications are dual-licensed: MIT and PSF.
 """
 Parser engine for the grammar tables generated by pgen.
 The grammar table must be loaded first.
 See Parser/parser.c in the Python distribution for additional info on
 how this parsing engine works.
 """
 from parso import tokenize
 class InternalParseError(Exception):
    """
    Exception to signal the parser is stuck and error recovery didn't help.
    Basically this shouldn't happen. It's a sign that something is really
    wrong.
    """
    def __init__(self, msg, type, value, start_pos):
        Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" %
                           (msg, tokenize.tok_name[type], value, start_pos))
        self.msg = msg
        self.type = type
        self.value = value
        self.start_pos = start_pos
 def token_to_ilabel(grammar, type_, value):
    # Map from token to label
    if type_ == tokenize.NAME:
        # Check for reserved words (keywords)
        try:
            return grammar.keywords[value]
        except KeyError:
            pass
    try:
        return grammar.tokens[type_]
    except KeyError:
        return None
 class PgenParser(object):
    """Parser engine.
    The proper usage sequence is:
    p = Parser(grammar, [converter])  # create instance
    p.setup([start])                  # prepare for parsing
    <for each input token>:
        if p.addtoken(...):           # parse a token
            break
    root = p.rootnode                 # root of abstract syntax tree
    A Parser instance may be reused by calling setup() repeatedly.
    A Parser instance contains state pertaining to the current token
    sequence, and should not be used concurrently by different threads
    to parse separate token sequences.
    See driver.py for how to get input tokens by tokenizing a file or
    string.
    Parsing is complete when addtoken() returns True; the root of the
    abstract syntax tree can then be retrieved from the rootnode
    instance variable.  When a syntax error occurs, error_recovery()
    is called. There is no error recovery; the parser cannot be used
    after a syntax error was reported (but it can be reinitialized by
    calling setup()).
    """
    def __init__(self, grammar, convert_node, convert_leaf, error_recovery, start):
        """Constructor.
        The grammar argument is a grammar.Grammar instance; see the
        grammar module for more information.
        The parser is not ready yet for parsing; you must call the
        setup() method to get it started.
        The optional convert argument is a function mapping concrete
        syntax tree nodes to abstract syntax tree nodes.  If not
        given, no conversion is done and the syntax tree produced is
        the concrete syntax tree.  If given, it must be a function of
        two arguments, the first being the grammar (a grammar.Grammar
        instance), and the second being the concrete syntax tree node
        to be converted.  The syntax tree is converted from the bottom
        up.
        A concrete syntax tree node is a (type, nodes) tuple, where
        type is the node type (a token or symbol number) and nodes
        is a list of children for symbols, and None for tokens.
        An abstract syntax tree node may be anything; this is entirely
        up to the converter function.
        """
        self.grammar = grammar
        self.convert_node = convert_node
        self.convert_leaf = convert_leaf
        # Each stack entry is a tuple: (dfa, state, node).
        # A node is a tuple: (type, children),
        # where children is a list of nodes or None
        newnode = (start, [])
        stackentry = (self.grammar.dfas[start], 0, newnode)
        self.stack = [stackentry]
        self.rootnode = None
        self.error_recovery = error_recovery
    def parse(self, tokens):
        for type_, value, start_pos, prefix in tokens:
            if self.addtoken(type_, value, start_pos, prefix):
                break
        else:
            # We never broke out -- EOF is too soon -- Unfinished statement.
            # However, the error recovery might have added the token again, if
            # the stack is empty, we're fine.
            if self.stack:
                raise InternalParseError("incomplete input", type_, value, start_pos)
        return self.rootnode
    def addtoken(self, type_, value, start_pos, prefix):
        """Add a token; return True if this is the end of the program."""
        ilabel = token_to_ilabel(self.grammar, type_, value)
        # Loop until the token is shifted; may raise exceptions
        _gram = self.grammar
        _labels = _gram.labels
        _push = self._push
        _pop = self._pop
        _shift = self._shift
        while True:
            dfa, state, node = self.stack[-1]
            states, first = dfa
            arcs = states[state]
            # Look for a state with this label
            for i, newstate in arcs:
                t, v = _labels[i]
                if ilabel == i:
                    # Look it up in the list of labels
                    assert t < 256
                    # Shift a token; we're done with it
                    _shift(type_, value, newstate, prefix, start_pos)
                    # Pop while we are in an accept-only state
                    state = newstate
                    while states[state] == [(0, state)]:
                        _pop()
                        if not self.stack:
                            # Done parsing!
                            return True
                        dfa, state, node = self.stack[-1]
                        states, first = dfa
                    # Done with this token
                    return False
                elif t >= 256:
                    # See if it's a symbol and if we're in its first set
                    itsdfa = _gram.dfas[t]
                    itsstates, itsfirst = itsdfa
                    if ilabel in itsfirst:
                        # Push a symbol
                        _push(t, itsdfa, newstate)
                        break  # To continue the outer while loop
            else:
                if (0, state) in arcs:
                    # An accepting state, pop it and try something else
                    _pop()
                    if not self.stack:
                        # Done parsing, but another token is input
                        raise InternalParseError("too much input", type_, value, start_pos)
                else:
                    self.error_recovery(self.grammar, self.stack, arcs, type_,
                                        value, start_pos, prefix, self.addtoken)
                    break
    def _shift(self, type_, value, newstate, prefix, start_pos):
        """Shift a token.  (Internal)"""
        dfa, state, node = self.stack[-1]
        newnode = self.convert_leaf(self.grammar, type_, value, prefix, start_pos)
        node[-1].append(newnode)
        self.stack[-1] = (dfa, newstate, node)
    def _push(self, type_, newdfa, newstate):
        """Push a nonterminal.  (Internal)"""
        dfa, state, node = self.stack[-1]
        newnode = (type_, [])
        self.stack[-1] = (dfa, newstate, node)
        self.stack.append((newdfa, 0, newnode))
    def _pop(self):
        """Pop a nonterminal.  (Internal)"""
        popdfa, popstate, (type_, children) = self.stack.pop()
        # If there's exactly one child, return that child instead of creating a
        # new node.  We still create expr_stmt and file_input though, because a
        # lot of Jedi depends on its logic.
        if len(children) == 1:
            newnode = children[0]
        else:
            newnode = self.convert_node(self.grammar, type_, children)
        try:
            # Equal to:
            # dfa, state, node = self.stack[-1]
            # symbol, children = node
            self.stack[-1][2][1].append(newnode)
        except IndexError:
            # Stack is empty, set the rootnode.
            self.rootnode = newnode
--- a/parso/pgen2/pgen.py
+++ b/parso/pgen2/pgen.py
@@ -0,0 +1,394 @@
 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
 # Licensed to PSF under a Contributor Agreement.
 # Modifications:
 # Copyright 2014 David Halter. Integration into Jedi.
 # Modifications are dual-licensed: MIT and PSF.
 from parso.pgen2 import grammar
 from parso import token
 from parso import tokenize
 class ParserGenerator(object):
    def __init__(self, bnf_text):
        self._bnf_text = bnf_text
        self.generator = tokenize.source_tokens(bnf_text)
        self.gettoken()  # Initialize lookahead
        self.dfas, self.startsymbol = self.parse()
        self.first = {}  # map from symbol name to set of tokens
        self.addfirstsets()
    def make_grammar(self):
        c = grammar.Grammar(self._bnf_text)
        names = list(self.dfas.keys())
        names.sort()
        names.remove(self.startsymbol)
        names.insert(0, self.startsymbol)
        for name in names:
            i = 256 + len(c.symbol2number)
            c.symbol2number[name] = i
            c.number2symbol[i] = name
        for name in names:
            dfa = self.dfas[name]
            states = []
            for state in dfa:
                arcs = []
                for label, next in state.arcs.items():
                    arcs.append((self.make_label(c, label), dfa.index(next)))
                if state.isfinal:
                    arcs.append((0, dfa.index(state)))
                states.append(arcs)
            c.states.append(states)
            c.dfas[c.symbol2number[name]] = (states, self.make_first(c, name))
        c.start = c.symbol2number[self.startsymbol]
        return c
    def make_first(self, c, name):
        rawfirst = self.first[name]
        first = {}
        for label in rawfirst:
            ilabel = self.make_label(c, label)
            ##assert ilabel not in first # XXX failed on <> ... !=
            first[ilabel] = 1
        return first
    def make_label(self, c, label):
        # XXX Maybe this should be a method on a subclass of converter?
        ilabel = len(c.labels)
        if label[0].isalpha():
            # Either a symbol name or a named token
            if label in c.symbol2number:
                # A symbol name (a non-terminal)
                if label in c.symbol2label:
                    return c.symbol2label[label]
                else:
                    c.labels.append((c.symbol2number[label], None))
                    c.symbol2label[label] = ilabel
                    return ilabel
            else:
                # A named token (NAME, NUMBER, STRING)
                itoken = getattr(token, label, None)
                assert isinstance(itoken, int), label
                assert itoken in token.tok_name, label
                if itoken in c.tokens:
                    return c.tokens[itoken]
                else:
                    c.labels.append((itoken, None))
                    c.tokens[itoken] = ilabel
                    return ilabel
        else:
            # Either a keyword or an operator
            assert label[0] in ('"', "'"), label
            value = eval(label)
            if value[0].isalpha():
                # A keyword
                if value in c.keywords:
                    return c.keywords[value]
                else:
                    c.labels.append((token.NAME, value))
                    c.keywords[value] = ilabel
                    return ilabel
            else:
                # An operator (any non-numeric token)
                itoken = token.opmap[value]  # Fails if unknown token
                if itoken in c.tokens:
                    return c.tokens[itoken]
                else:
                    c.labels.append((itoken, None))
                    c.tokens[itoken] = ilabel
                    return ilabel
    def addfirstsets(self):
        names = list(self.dfas.keys())
        names.sort()
        for name in names:
            if name not in self.first:
                self.calcfirst(name)
            #print name, self.first[name].keys()
    def calcfirst(self, name):
        dfa = self.dfas[name]
        self.first[name] = None  # dummy to detect left recursion
        state = dfa[0]
        totalset = {}
        overlapcheck = {}
        for label, next in state.arcs.items():
            if label in self.dfas:
                if label in self.first:
                    fset = self.first[label]
                    if fset is None:
                        raise ValueError("recursion for rule %r" % name)
                else:
                    self.calcfirst(label)
                    fset = self.first[label]
                totalset.update(fset)
                overlapcheck[label] = fset
            else:
                totalset[label] = 1
                overlapcheck[label] = {label: 1}
        inverse = {}
        for label, itsfirst in overlapcheck.items():
            for symbol in itsfirst:
                if symbol in inverse:
                    raise ValueError("rule %s is ambiguous; %s is in the"
                                     " first sets of %s as well as %s" %
                                     (name, symbol, label, inverse[symbol]))
                inverse[symbol] = label
        self.first[name] = totalset
    def parse(self):
        dfas = {}
        startsymbol = None
        # MSTART: (NEWLINE | RULE)* ENDMARKER
        while self.type != token.ENDMARKER:
            while self.type == token.NEWLINE:
                self.gettoken()
            # RULE: NAME ':' RHS NEWLINE
            name = self.expect(token.NAME)
            self.expect(token.OP, ":")
            a, z = self.parse_rhs()
            self.expect(token.NEWLINE)
            #self.dump_nfa(name, a, z)
            dfa = self.make_dfa(a, z)
            #self.dump_dfa(name, dfa)
            # oldlen = len(dfa)
            self.simplify_dfa(dfa)
            # newlen = len(dfa)
            dfas[name] = dfa
            #print name, oldlen, newlen
            if startsymbol is None:
                startsymbol = name
        return dfas, startsymbol
    def make_dfa(self, start, finish):
        # To turn an NFA into a DFA, we define the states of the DFA
        # to correspond to *sets* of states of the NFA.  Then do some
        # state reduction.  Let's represent sets as dicts with 1 for
        # values.
        assert isinstance(start, NFAState)
        assert isinstance(finish, NFAState)
        def closure(state):
            base = {}
            addclosure(state, base)
            return base
        def addclosure(state, base):
            assert isinstance(state, NFAState)
            if state in base:
                return
            base[state] = 1
            for label, next in state.arcs:
                if label is None:
                    addclosure(next, base)
        states = [DFAState(closure(start), finish)]
        for state in states:  # NB states grows while we're iterating
            arcs = {}
            for nfastate in state.nfaset:
                for label, next in nfastate.arcs:
                    if label is not None:
                        addclosure(next, arcs.setdefault(label, {}))
            for label, nfaset in arcs.items():
                for st in states:
                    if st.nfaset == nfaset:
                        break
                else:
                    st = DFAState(nfaset, finish)
                    states.append(st)
                state.addarc(st, label)
        return states  # List of DFAState instances; first one is start
    def dump_nfa(self, name, start, finish):
        print("Dump of NFA for", name)
        todo = [start]
        for i, state in enumerate(todo):
            print("  State", i, state is finish and "(final)" or "")
            for label, next in state.arcs:
                if next in todo:
                    j = todo.index(next)
                else:
                    j = len(todo)
                    todo.append(next)
                if label is None:
                    print("    -> %d" % j)
                else:
                    print("    %s -> %d" % (label, j))
    def dump_dfa(self, name, dfa):
        print("Dump of DFA for", name)
        for i, state in enumerate(dfa):
            print("  State", i, state.isfinal and "(final)" or "")
            for label, next in state.arcs.items():
                print("    %s -> %d" % (label, dfa.index(next)))
    def simplify_dfa(self, dfa):
        # This is not theoretically optimal, but works well enough.
        # Algorithm: repeatedly look for two states that have the same
        # set of arcs (same labels pointing to the same nodes) and
        # unify them, until things stop changing.
        # dfa is a list of DFAState instances
        changes = True
        while changes:
            changes = False
            for i, state_i in enumerate(dfa):
                for j in range(i + 1, len(dfa)):
                    state_j = dfa[j]
                    if state_i == state_j:
                        #print "  unify", i, j
                        del dfa[j]
                        for state in dfa:
                            state.unifystate(state_j, state_i)
                        changes = True
                        break
    def parse_rhs(self):
        # RHS: ALT ('|' ALT)*
        a, z = self.parse_alt()
        if self.value != "|":
            return a, z
        else:
            aa = NFAState()
            zz = NFAState()
            aa.addarc(a)
            z.addarc(zz)
            while self.value == "|":
                self.gettoken()
                a, z = self.parse_alt()
                aa.addarc(a)
                z.addarc(zz)
            return aa, zz
    def parse_alt(self):
        # ALT: ITEM+
        a, b = self.parse_item()
        while (self.value in ("(", "[") or
               self.type in (token.NAME, token.STRING)):
            c, d = self.parse_item()
            b.addarc(c)
            b = d
        return a, b
    def parse_item(self):
        # ITEM: '[' RHS ']' | ATOM ['+' | '*']
        if self.value == "[":
            self.gettoken()
            a, z = self.parse_rhs()
            self.expect(token.OP, "]")
            a.addarc(z)
            return a, z
        else:
            a, z = self.parse_atom()
            value = self.value
            if value not in ("+", "*"):
                return a, z
            self.gettoken()
            z.addarc(a)
            if value == "+":
                return a, z
            else:
                return a, a
    def parse_atom(self):
        # ATOM: '(' RHS ')' | NAME | STRING
        if self.value == "(":
            self.gettoken()
            a, z = self.parse_rhs()
            self.expect(token.OP, ")")
            return a, z
        elif self.type in (token.NAME, token.STRING):
            a = NFAState()
            z = NFAState()
            a.addarc(z, self.value)
            self.gettoken()
            return a, z
        else:
            self.raise_error("expected (...) or NAME or STRING, got %s/%s",
                             self.type, self.value)
    def expect(self, type, value=None):
        if self.type != type or (value is not None and self.value != value):
            self.raise_error("expected %s/%s, got %s/%s",
                             type, value, self.type, self.value)
        value = self.value
        self.gettoken()
        return value
    def gettoken(self):
        tup = next(self.generator)
        while tup[0] in (token.COMMENT, token.NL):
            tup = next(self.generator)
        self.type, self.value, self.begin, prefix = tup
        #print tokenize.tok_name[self.type], repr(self.value)
    def raise_error(self, msg, *args):
        if args:
            try:
                msg = msg % args
            except:
                msg = " ".join([msg] + list(map(str, args)))
        line = open(self.filename).readlines()[self.begin[0]]
        raise SyntaxError(msg, (self.filename, self.begin[0],
                                self.begin[1], line))
 class NFAState(object):
    def __init__(self):
        self.arcs = []  # list of (label, NFAState) pairs
    def addarc(self, next, label=None):
        assert label is None or isinstance(label, str)
        assert isinstance(next, NFAState)
        self.arcs.append((label, next))
 class DFAState(object):
    def __init__(self, nfaset, final):
        assert isinstance(nfaset, dict)
        assert isinstance(next(iter(nfaset)), NFAState)
        assert isinstance(final, NFAState)
        self.nfaset = nfaset
        self.isfinal = final in nfaset
        self.arcs = {}  # map from label to DFAState
    def addarc(self, next, label):
        assert isinstance(label, str)
        assert label not in self.arcs
        assert isinstance(next, DFAState)
        self.arcs[label] = next
    def unifystate(self, old, new):
        for label, next in self.arcs.items():
            if next is old:
                self.arcs[label] = new
    def __eq__(self, other):
        # Equality test -- ignore the nfaset instance variable
        assert isinstance(other, DFAState)
        if self.isfinal != other.isfinal:
            return False
        # Can't just return self.arcs == other.arcs, because that
        # would invoke this method recursively, with cycles...
        if len(self.arcs) != len(other.arcs):
            return False
        for label, next in self.arcs.items():
            if next is not other.arcs.get(label):
                return False
        return True
    __hash__ = None  # For Py3 compatibility.
 def generate_grammar(bnf_text):
    """
    ``bnf_text`` is a grammar in extended BNF (using * for repetition, + for
    at-least-once repetition, [] for optional parts, | for alternatives and ()
    for grouping).
    It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its
    own parser.
    """
    p = ParserGenerator(bnf_text)
    return p.make_grammar()
--- a/parso/python/init.py
+++ b/parso/python/init.py
@@ -0,0 +1,124 @@
 """
 Parsers for Python
 """
 import os
 from jedi import settings
 from jedi.common import splitlines, source_to_unicode
 from jedi._compatibility import FileNotFoundError
 from parso.pgen2.pgen import generate_grammar
 from parso.python.parser import Parser, _remove_last_newline
 from parso.python.diff import DiffParser
 from parso.tokenize import generate_tokens
 from parso.cache import parser_cache, load_module, save_module
 _loaded_grammars = {}
 def load_grammar(version=None):
    """
    Loads a Python grammar. The default version is always the latest.
    If you need support for a specific version, please use e.g.
    `version='3.3'`.
    """
    if version is None:
        version = '3.6'
    if version in ('3.2', '3.3'):
        version = '3.4'
    elif version == '2.6':
        version = '2.7'
    file = 'grammar' + version + '.txt'
    global _loaded_grammars
    path = os.path.join(os.path.dirname(__file__), file)
    try:
        return _loaded_grammars[path]
    except KeyError:
        try:
            with open(path) as f:
                bnf_text = f.read()
            grammar = generate_grammar(bnf_text)
            return _loaded_grammars.setdefault(path, grammar)
        except FileNotFoundError:
            # Just load the default if the file does not exist.
            return load_grammar()
 def parse(code=None, path=None, grammar=None, error_recovery=True,
          start_symbol='file_input', cache=False, diff_cache=False):
    """
    If you want to parse a Python file you want to start here, most likely.
    If you need finer grained control over the parsed instance, there will be
    other ways to access it.
    :param code: A unicode string that contains Python code.
    :param path: The path to the file you want to open. Only needed for caching.
    :param grammar: A Python grammar file, created with load_grammar. You may
        not specify it. In that case it's the current Python version.
    :param error_recovery: If enabled, any code will be returned. If it is
        invalid, it will be returned as an error node. If disabled, you will
        get a ParseError when encountering syntax errors in your code.
    :param start_symbol: The grammar symbol that you want to parse. Only
        allowed to be used when error_recovery is disabled.
    :return: A syntax tree node. Typically the module.
    """
    if code is None and path is None:
        raise TypeError("Please provide either code or a path.")
    if grammar is None:
        grammar = load_grammar()
    if cache and not code and path is not None:
        # In this case we do actual caching. We just try to load it.
        module_node = load_module(grammar, path)
        if module_node is not None:
            return module_node
    if code is None:
        with open(path, 'rb') as f:
            code = source_to_unicode(f.read())
    if diff_cache and settings.fast_parser:
        try:
            module_cache_item = parser_cache[path]
        except KeyError:
            pass
        else:
            lines = splitlines(code, keepends=True)
            module_node = module_cache_item.node
            old_lines = module_cache_item.lines
            if old_lines == lines:
                save_module(grammar, path, module_node, lines, pickling=False)
                return module_node
            new_node = DiffParser(grammar, module_node).update(
                old_lines=old_lines,
                new_lines=lines
            )
            save_module(grammar, path, new_node, lines, pickling=cache)
            return new_node
    added_newline = not code.endswith('\n')
    lines = tokenize_lines = splitlines(code, keepends=True)
    if added_newline:
        code += '\n'
        tokenize_lines = list(tokenize_lines)
        tokenize_lines[-1] += '\n'
        tokenize_lines.append('')
    tokens = generate_tokens(tokenize_lines, use_exact_op_types=True)
    p = Parser(grammar, error_recovery=error_recovery, start_symbol=start_symbol)
    root_node = p.parse(tokens=tokens)
    if added_newline:
        _remove_last_newline(root_node)
    if cache or diff_cache:
        save_module(grammar, path, root_node, lines, pickling=cache)
    return root_node
--- a/parso/python/diff.py
+++ b/parso/python/diff.py
@@ -0,0 +1,603 @@
 """
 Basically a contains parser that is faster, because it tries to parse only
 parts and if anything changes, it only reparses the changed parts.
 It works with a simple diff in the beginning and will try to reuse old parser
 fragments.
 """
 import re
 import difflib
 from collections import namedtuple
 from jedi.common import splitlines
 from jedi import debug
 from parso.python.parser import Parser, _remove_last_newline
 from parso.python.tree import EndMarker
 from parso.tokenize import (generate_tokens, NEWLINE, TokenInfo,
                            ENDMARKER, INDENT, DEDENT)
 def _get_last_line(node_or_leaf):
    last_leaf = node_or_leaf.get_last_leaf()
    if _ends_with_newline(last_leaf):
        return last_leaf.start_pos[0]
    else:
        return last_leaf.end_pos[0]
 def _ends_with_newline(leaf, suffix=''):
    if leaf.type == 'error_leaf':
        typ = leaf.original_type
    else:
        typ = leaf.type
    return typ == 'newline' or suffix.endswith('\n')
 def _flows_finished(grammar, stack):
    """
    if, while, for and try might not be finished, because another part might
    still be parsed.
    """
    for dfa, newstate, (symbol_number, nodes) in stack:
        if grammar.number2symbol[symbol_number] in ('if_stmt', 'while_stmt',
                                                    'for_stmt', 'try_stmt'):
            return False
    return True
 def suite_or_file_input_is_valid(grammar, stack):
    if not _flows_finished(grammar, stack):
        return False
    for dfa, newstate, (symbol_number, nodes) in reversed(stack):
        if grammar.number2symbol[symbol_number] == 'suite':
            # If only newline is in the suite, the suite is not valid, yet.
            return len(nodes) > 1
    # Not reaching a suite means that we're dealing with file_input levels
    # where there's no need for a valid statement in it. It can also be empty.
    return True
 def _is_flow_node(node):
    try:
        value = node.children[0].value
    except AttributeError:
        return False
    return value in ('if', 'for', 'while', 'try')
 class _PositionUpdatingFinished(Exception):
    pass
 def _update_positions(nodes, line_offset, last_leaf):
    for node in nodes:
        try:
            children = node.children
        except AttributeError:
            # Is a leaf
            node.line += line_offset
            if node is last_leaf:
                raise _PositionUpdatingFinished
        else:
            _update_positions(children, line_offset, last_leaf)
 class DiffParser(object):
    """
    An advanced form of parsing a file faster. Unfortunately comes with huge
    side effects. It changes the given module.
    """
    def __init__(self, grammar, module):
        self._grammar = grammar
        self._module = module
    def _reset(self):
        self._copy_count = 0
        self._parser_count = 0
        self._nodes_stack = _NodesStack(self._module)
    def update(self, old_lines, new_lines):
        '''
        The algorithm works as follows:
        Equal:
            - Assure that the start is a newline, otherwise parse until we get
              one.
            - Copy from parsed_until_line + 1 to max(i2 + 1)
            - Make sure that the indentation is correct (e.g. add DEDENT)
            - Add old and change positions
        Insert:
            - Parse from parsed_until_line + 1 to min(j2 + 1), hopefully not
              much more.
        Returns the new module node.
        '''
        debug.speed('diff parser start')
        # Reset the used names cache so they get regenerated.
        self._module._used_names = None
        self._parser_lines_new = new_lines
        self._added_newline = False
        if new_lines[-1] != '':
            # The Python grammar needs a newline at the end of a file, but for
            # everything else we keep working with new_lines here.
            self._parser_lines_new = list(new_lines)
            self._parser_lines_new[-1] += '\n'
            self._parser_lines_new.append('')
            self._added_newline = True
        self._reset()
        line_length = len(new_lines)
        sm = difflib.SequenceMatcher(None, old_lines, self._parser_lines_new)
        opcodes = sm.get_opcodes()
        debug.speed('diff parser calculated')
        debug.dbg('diff: line_lengths old: %s, new: %s' % (len(old_lines), line_length))
        for operation, i1, i2, j1, j2 in opcodes:
            debug.dbg('diff %s old[%s:%s] new[%s:%s]',
                      operation, i1 + 1, i2, j1 + 1, j2)
            if j2 == line_length + int(self._added_newline):
                # The empty part after the last newline is not relevant.
                j2 -= 1
            if operation == 'equal':
                line_offset = j1 - i1
                self._copy_from_old_parser(line_offset, i2, j2)
            elif operation == 'replace':
                self._parse(until_line=j2)
            elif operation == 'insert':
                self._parse(until_line=j2)
            else:
                assert operation == 'delete'
        # With this action all change will finally be applied and we have a
        # changed module.
        self._nodes_stack.close()
        if self._added_newline:
            _remove_last_newline(self._module)
        # Good for debugging.
        if debug.debug_function:
            self._enabled_debugging(old_lines, new_lines)
        last_pos = self._module.end_pos[0]
        if last_pos != line_length:
            current_lines = splitlines(self._module.get_code(), keepends=True)
            diff = difflib.unified_diff(current_lines, new_lines)
            raise Exception(
                "There's an issue (%s != %s) with the diff parser. Please report:\n%s"
                % (last_pos, line_length, ''.join(diff))
            )
        debug.speed('diff parser end')
        return self._module
    def _enabled_debugging(self, old_lines, lines_new):
        if self._module.get_code() != ''.join(lines_new):
            debug.warning('parser issue:\n%s\n%s', ''.join(old_lines),
                          ''.join(lines_new))
    def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new):
        copied_nodes = [None]
        last_until_line = -1
        while until_line_new > self._nodes_stack.parsed_until_line:
            parsed_until_line_old = self._nodes_stack.parsed_until_line - line_offset
            line_stmt = self._get_old_line_stmt(parsed_until_line_old + 1)
            if line_stmt is None:
                # Parse 1 line at least. We don't need more, because we just
                # want to get into a state where the old parser has statements
                # again that can be copied (e.g. not lines within parentheses).
                self._parse(self._nodes_stack.parsed_until_line + 1)
            elif not copied_nodes:
                # We have copied as much as possible (but definitely not too
                # much). Therefore we just parse the rest.
                # We might not reach the end, because there's a statement
                # that is not finished.
                self._parse(until_line_new)
            else:
                p_children = line_stmt.parent.children
                index = p_children.index(line_stmt)
                copied_nodes = self._nodes_stack.copy_nodes(
                    p_children[index:],
                    until_line_old,
                    line_offset
                )
                # Match all the nodes that are in the wanted range.
                if copied_nodes:
                    self._copy_count += 1
                    from_ = copied_nodes[0].get_start_pos_of_prefix()[0] + line_offset
                    to = self._nodes_stack.parsed_until_line
                    debug.dbg('diff actually copy %s to %s', from_, to)
            # Since there are potential bugs that might loop here endlessly, we
            # just stop here.
            assert last_until_line != self._nodes_stack.parsed_until_line \
                or not copied_nodes, last_until_line
            last_until_line = self._nodes_stack.parsed_until_line
    def _get_old_line_stmt(self, old_line):
        leaf = self._module.get_leaf_for_position((old_line, 0), include_prefixes=True)
        if _ends_with_newline(leaf):
            leaf = leaf.get_next_leaf()
        if leaf.get_start_pos_of_prefix()[0] == old_line:
            node = leaf
            while node.parent.type not in ('file_input', 'suite'):
                node = node.parent
            return node
        # Must be on the same line. Otherwise we need to parse that bit.
        return None
    def _get_before_insertion_node(self):
        if self._nodes_stack.is_empty():
            return None
        line = self._nodes_stack.parsed_until_line + 1
        node = self._new_module.get_last_leaf()
        while True:
            parent = node.parent
            if parent.type in ('suite', 'file_input'):
                assert node.end_pos[0] <= line
                assert node.end_pos[1] == 0 or '\n' in self._prefix
                return node
            node = parent
    def _parse(self, until_line):
        """
        Parses at least until the given line, but might just parse more until a
        valid state is reached.
        """
        last_until_line = 0
        while until_line > self._nodes_stack.parsed_until_line:
            node = self._try_parse_part(until_line)
            nodes = self._get_children_nodes(node)
            #self._insert_nodes(nodes)
            self._nodes_stack.add_parsed_nodes(nodes)
            debug.dbg(
                'parse part %s to %s (to %s in parser)',
                nodes[0].get_start_pos_of_prefix()[0],
                self._nodes_stack.parsed_until_line,
                node.end_pos[0] - 1
            )
            # Since the tokenizer sometimes has bugs, we cannot be sure that
            # this loop terminates. Therefore assert that there's always a
            # change.
            assert last_until_line != self._nodes_stack.parsed_until_line, last_until_line
            last_until_line = self._nodes_stack.parsed_until_line
    def _get_children_nodes(self, node):
        nodes = node.children
        first_element = nodes[0]
        # TODO this looks very strange...
        if first_element.type == 'error_leaf' and \
                first_element.original_type == 'indent':
            assert False, str(nodes)
        return nodes
    def _try_parse_part(self, until_line):
        """
        Sets up a normal parser that uses a spezialized tokenizer to only parse
        until a certain position (or a bit longer if the statement hasn't
        ended.
        """
        self._parser_count += 1
        # TODO speed up, shouldn't copy the whole list all the time.
        # memoryview?
        parsed_until_line = self._nodes_stack.parsed_until_line
        lines_after = self._parser_lines_new[parsed_until_line:]
        #print('parse_content', parsed_until_line, lines_after, until_line)
        tokens = self._diff_tokenize(
            lines_after,
            until_line,
            line_offset=parsed_until_line
        )
        self._active_parser = Parser(
            self._grammar,
            error_recovery=True
        )
        return self._active_parser.parse(tokens=tokens)
    def _diff_tokenize(self, lines, until_line, line_offset=0):
        is_first_token = True
        omitted_first_indent = False
        indents = []
        tokens = generate_tokens(lines, use_exact_op_types=True)
        stack = self._active_parser.pgen_parser.stack
        for typ, string, start_pos, prefix in tokens:
            start_pos = start_pos[0] + line_offset, start_pos[1]
            if typ == INDENT:
                indents.append(start_pos[1])
                if is_first_token:
                    omitted_first_indent = True
                    # We want to get rid of indents that are only here because
                    # we only parse part of the file. These indents would only
                    # get parsed as error leafs, which doesn't make any sense.
                    is_first_token = False
                    continue
            is_first_token = False
            if typ == DEDENT:
                indents.pop()
                if omitted_first_indent and not indents:
                    # We are done here, only thing that can come now is an
                    # endmarker or another dedented code block.
                    typ, string, start_pos, prefix = next(tokens)
                    if '\n' in prefix:
                        prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix)
                    else:
                        prefix = ''
                    yield TokenInfo(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix)
                    break
            elif typ == NEWLINE and start_pos[0] >= until_line:
                yield TokenInfo(typ, string, start_pos, prefix)
                # Check if the parser is actually in a valid suite state.
                if suite_or_file_input_is_valid(self._grammar, stack):
                    start_pos = start_pos[0] + 1, 0
                    while len(indents) > int(omitted_first_indent):
                        indents.pop()
                        yield TokenInfo(DEDENT, '', start_pos, '')
                    yield TokenInfo(ENDMARKER, '', start_pos, '')
                    break
                else:
                    continue
            yield TokenInfo(typ, string, start_pos, prefix)
 class _NodesStackNode(object):
    ChildrenGroup = namedtuple('ChildrenGroup', 'children line_offset last_line_offset_leaf')
    def __init__(self, tree_node, parent=None):
        self.tree_node = tree_node
        self.children_groups = []
        self.parent = parent
    def close(self):
        children = []
        for children_part, line_offset, last_line_offset_leaf in self.children_groups:
            if line_offset != 0:
                try:
                    _update_positions(
                        children_part, line_offset, last_line_offset_leaf)
                except _PositionUpdatingFinished:
                    pass
            children += children_part
        self.tree_node.children = children
        # Reset the parents
        for node in children:
            node.parent = self.tree_node
    def add(self, children, line_offset=0, last_line_offset_leaf=None):
        group = self.ChildrenGroup(children, line_offset, last_line_offset_leaf)
        self.children_groups.append(group)
    def get_last_line(self, suffix):
        line = 0
        if self.children_groups:
            children_group = self.children_groups[-1]
            last_leaf = children_group.children[-1].get_last_leaf()
            line = last_leaf.end_pos[0]
            # Calculate the line offsets
            offset = children_group.line_offset
            if offset:
                # In case the line_offset is not applied to this specific leaf,
                # just ignore it.
                if last_leaf.line <= children_group.last_line_offset_leaf.line:
                    line += children_group.line_offset
            # Newlines end on the next line, which means that they would cover
            # the next line. That line is not fully parsed at this point.
            if _ends_with_newline(last_leaf, suffix):
                line -= 1
        line += suffix.count('\n')
        return line
 class _NodesStack(object):
    endmarker_type = 'endmarker'
    def __init__(self, module):
        # Top of stack
        self._tos = self._base_node = _NodesStackNode(module)
        self._module = module
        self._last_prefix = ''
        self.prefix = ''
    def is_empty(self):
        return not self._base_node.children
    @property
    def parsed_until_line(self):
        return self._tos.get_last_line(self.prefix)
    def _get_insertion_node(self, indentation_node):
        indentation = indentation_node.start_pos[1]
        # find insertion node
        node = self._tos
        while True:
            tree_node = node.tree_node
            if tree_node.type == 'suite':
                # A suite starts with NEWLINE, ...
                node_indentation = tree_node.children[1].start_pos[1]
                if indentation >= node_indentation:  # Not a Dedent
                    # We might be at the most outer layer: modules. We
                    # don't want to depend on the first statement
                    # having the right indentation.
                    return node
            elif tree_node.type == 'file_input':
                return node
            node = self._close_tos()
    def _close_tos(self):
        self._tos.close()
        self._tos = self._tos.parent
        return self._tos
    def add_parsed_nodes(self, tree_nodes):
        tree_nodes = self._remove_endmarker(tree_nodes)
        if not tree_nodes:
            return
        assert tree_nodes[0].type != 'newline'
        node = self._get_insertion_node(tree_nodes[0])
        assert node.tree_node.type in ('suite', 'file_input')
        node.add(tree_nodes)
        self._update_tos(tree_nodes[-1])
    def _remove_endmarker(self, tree_nodes):
        """
        Helps cleaning up the tree nodes that get inserted.
        """
        last_leaf = tree_nodes[-1].get_last_leaf()
        is_endmarker = last_leaf.type == self.endmarker_type
        self._last_prefix = ''
        if is_endmarker:
            try:
                separation = last_leaf.prefix.rindex('\n')
            except ValueError:
                pass
            else:
                # Remove the whitespace part of the prefix after a newline.
                # That is not relevant if parentheses were opened. Always parse
                # until the end of a line.
                last_leaf.prefix, self._last_prefix = \
                    last_leaf.prefix[:separation + 1], last_leaf.prefix[separation + 1:]
        first_leaf = tree_nodes[0].get_first_leaf()
        first_leaf.prefix = self.prefix + first_leaf.prefix
        self.prefix = ''
        if is_endmarker:
            self.prefix = last_leaf.prefix
            tree_nodes = tree_nodes[:-1]
        return tree_nodes
    def copy_nodes(self, tree_nodes, until_line, line_offset):
        """
        Copies tree nodes from the old parser tree.
        Returns the number of tree nodes that were copied.
        """
        tos = self._get_insertion_node(tree_nodes[0])
        new_nodes, self._tos = self._copy_nodes(tos, tree_nodes, until_line, line_offset)
        return new_nodes
    def _copy_nodes(self, tos, nodes, until_line, line_offset):
        new_nodes = []
        new_tos = tos
        for node in nodes:
            if node.type == 'endmarker':
                # Endmarkers just distort all the checks below. Remove them.
                break
            if node.start_pos[0] > until_line:
                break
            # TODO this check might take a bit of time for large files. We
            # might want to change this to do more intelligent guessing or
            # binary search.
            if _get_last_line(node) > until_line:
                # We can split up functions and classes later.
                if node.type in ('classdef', 'funcdef') and node.children[-1].type == 'suite':
                    new_nodes.append(node)
                break
            new_nodes.append(node)
        if not new_nodes:
            return [], tos
        last_node = new_nodes[-1]
        line_offset_index = -1
        if last_node.type in ('classdef', 'funcdef'):
            suite = last_node.children[-1]
            if suite.type == 'suite':
                suite_tos = _NodesStackNode(suite)
                # Don't need to pass line_offset here, it's already done by the
                # parent.
                suite_nodes, recursive_tos = self._copy_nodes(
                    suite_tos, suite.children, until_line, line_offset)
                if len(suite_nodes) < 2:
                    # A suite only with newline is not valid.
                    new_nodes.pop()
                else:
                    suite_tos.parent = tos
                    new_tos = recursive_tos
                    line_offset_index = -2
        elif (new_nodes[-1].type in ('error_leaf', 'error_node') or
                          _is_flow_node(new_nodes[-1])):
            # Error leafs/nodes don't have a defined start/end. Error
            # nodes might not end with a newline (e.g. if there's an
            # open `(`). Therefore ignore all of them unless they are
            # succeeded with valid parser state.
            # If we copy flows at the end, they might be continued
            # after the copy limit (in the new parser).
            # In this while loop we try to remove until we find a newline.
            new_nodes.pop()
            while new_nodes:
                last_node = new_nodes[-1]
                if last_node.get_last_leaf().type == 'newline':
                    break
                new_nodes.pop()
        if new_nodes:
            try:
                last_line_offset_leaf = new_nodes[line_offset_index].get_last_leaf()
            except IndexError:
                line_offset = 0
                # In this case we don't have to calculate an offset, because
                # there's no children to be managed.
                last_line_offset_leaf = None
            tos.add(new_nodes, line_offset, last_line_offset_leaf)
        return new_nodes, new_tos
    def _update_tos(self, tree_node):
        if tree_node.type in ('suite', 'file_input'):
            self._tos = _NodesStackNode(tree_node, self._tos)
            self._tos.add(list(tree_node.children))
            self._update_tos(tree_node.children[-1])
        elif tree_node.type in ('classdef', 'funcdef'):
            self._update_tos(tree_node.children[-1])
    def close(self):
        while self._tos is not None:
            self._close_tos()
        # Add an endmarker.
        try:
            last_leaf = self._module.get_last_leaf()
            end_pos = list(last_leaf.end_pos)
        except IndexError:
            end_pos = [1, 0]
        lines = splitlines(self.prefix)
        assert len(lines) > 0
        if len(lines) == 1:
            end_pos[1] += len(lines[0])
        else:
            end_pos[0] += len(lines) - 1
            end_pos[1] = len(lines[-1])
        endmarker = EndMarker('', tuple(end_pos), self.prefix + self._last_prefix)
        endmarker.parent = self._module
        self._module.children.append(endmarker)
--- a/parso/python/grammar2.7.txt
+++ b/parso/python/grammar2.7.txt
@@ -0,0 +1,152 @@
 # Grammar for 2to3. This grammar supports Python 2.x and 3.x.
 # Note:  Changing the grammar specified in this file will most likely
 #        require corresponding changes in the parser module
 #        (../Modules/parsermodule.c).  If you can't make the changes to
 #        that module yourself, please co-ordinate the required changes
 #        with someone who can; ask around on python-dev for help.  Fred
 #        Drake <fdrake@acm.org> will probably be listening there.
 # NOTE WELL: You should also follow all the steps listed in PEP 306,
 # "How to Change Python's Grammar"
 # Start symbols for the grammar:
 #	    file_input is a module or sequence of commands read from an input file;
 #	    single_input is a single interactive statement;
 #	    eval_input is the input for the eval() and input() functions.
 # NB: compound_stmt in single_input is followed by extra NEWLINE!
 file_input: (NEWLINE | stmt)* ENDMARKER
 single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
 eval_input: testlist NEWLINE* ENDMARKER
 decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
 decorators: decorator+
 decorated: decorators (classdef | funcdef)
 funcdef: 'def' NAME parameters ['->' test] ':' suite
 parameters: '(' [typedargslist] ')'
 typedargslist: ((tfpdef ['=' test] ',')*
                ('*' [tname] (',' tname ['=' test])* [',' '**' tname] | '**' tname)
                | tfpdef ['=' test] (',' tfpdef ['=' test])* [','])
 tname: NAME [':' test]
 tfpdef: tname | '(' tfplist ')'
 tfplist: tfpdef (',' tfpdef)* [',']
 varargslist: ((vfpdef ['=' test] ',')*
              ('*' [vname] (',' vname ['=' test])*  [',' '**' vname] | '**' vname)
              | vfpdef ['=' test] (',' vfpdef ['=' test])* [','])
 vname: NAME
 vfpdef: vname | '(' vfplist ')'
 vfplist: vfpdef (',' vfpdef)* [',']
 stmt: simple_stmt | compound_stmt
 simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
 small_stmt: (expr_stmt | print_stmt  | del_stmt | pass_stmt | flow_stmt |
             import_stmt | global_stmt | exec_stmt | assert_stmt)
 expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) |
                     ('=' (yield_expr|testlist_star_expr))*)
 testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
 augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
            '<<=' | '>>=' | '**=' | '//=')
 # For normal assignments, additional restrictions enforced by the interpreter
 print_stmt: 'print' ( [ test (',' test)* [','] ] |
                      '>>' test [ (',' test)+ [','] ] )
 del_stmt: 'del' exprlist
 pass_stmt: 'pass'
 flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
 break_stmt: 'break'
 continue_stmt: 'continue'
 return_stmt: 'return' [testlist]
 yield_stmt: yield_expr
 raise_stmt: 'raise' [test [',' test [',' test]]]
 import_stmt: import_name | import_from
 import_name: 'import' dotted_as_names
 # note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
 import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
              'import' ('*' | '(' import_as_names ')' | import_as_names))
 import_as_name: NAME ['as' NAME]
 dotted_as_name: dotted_name ['as' NAME]
 import_as_names: import_as_name (',' import_as_name)* [',']
 dotted_as_names: dotted_as_name (',' dotted_as_name)*
 dotted_name: NAME ('.' NAME)*
 global_stmt: 'global' NAME (',' NAME)*
 exec_stmt: 'exec' expr ['in' test [',' test]]
 assert_stmt: 'assert' test [',' test]
 compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
 if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
 while_stmt: 'while' test ':' suite ['else' ':' suite]
 for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
 try_stmt: ('try' ':' suite
           ((except_clause ':' suite)+
 	        ['else' ':' suite]
 	        ['finally' ':' suite] |
 	       'finally' ':' suite))
 with_stmt: 'with' with_item (',' with_item)*  ':' suite
 with_item: test ['as' expr]
 with_var: 'as' expr
 # NB compile.c makes sure that the default except clause is last
 except_clause: 'except' [test [(',' | 'as') test]]
 # Edit by David Halter: The stmt is now optional. This reflects how Jedi allows
 # classes and functions to be empty, which is beneficial for autocompletion.
 suite: simple_stmt | NEWLINE INDENT stmt* DEDENT
 # Backward compatibility cruft to support:
 # [ x for x in lambda: True, lambda: False if x() ]
 # even while also allowing:
 # lambda x: 5 if x else 2
 # (But not a mix of the two)
 testlist_safe: old_test [(',' old_test)+ [',']]
 old_test: or_test | old_lambdef
 old_lambdef: 'lambda' [varargslist] ':' old_test
 test: or_test ['if' or_test 'else' test] | lambdef
 or_test: and_test ('or' and_test)*
 and_test: not_test ('and' not_test)*
 not_test: 'not' not_test | comparison
 comparison: expr (comp_op expr)*
 comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
 star_expr: '*' expr
 expr: xor_expr ('|' xor_expr)*
 xor_expr: and_expr ('^' and_expr)*
 and_expr: shift_expr ('&' shift_expr)*
 shift_expr: arith_expr (('<<'|'>>') arith_expr)*
 arith_expr: term (('+'|'-') term)*
 term: factor (('*'|'/'|'%'|'//') factor)*
 factor: ('+'|'-'|'~') factor | power
 power: atom trailer* ['**' factor]
 atom: ('(' [yield_expr|testlist_comp] ')' |
       '[' [testlist_comp] ']' |
       '{' [dictorsetmaker] '}' |
       '`' testlist1 '`' |
       NAME | NUMBER | STRING+ | '.' '.' '.')
 # Modification by David Halter, remove `testlist_gexp` and `listmaker`
 testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
 lambdef: 'lambda' [varargslist] ':' test
 trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
 subscriptlist: subscript (',' subscript)* [',']
 subscript: test | [test] ':' [test] [sliceop]
 sliceop: ':' [test]
 exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
 testlist: test (',' test)* [',']
 # Modification by David Halter, dictsetmaker -> dictorsetmaker (so that it's
 # the same as in the 3.4 grammar).
 dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) |
                (test (comp_for | (',' test)* [','])) )
 classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
 arglist: (argument ',')* (argument [',']
                         |'*' test (',' argument)* [',' '**' test] 
                         |'**' test)
 argument: test [comp_for] | test '=' test  # Really [keyword '='] test
 comp_iter: comp_for | comp_if
 comp_for: 'for' exprlist 'in' testlist_safe [comp_iter]
 comp_if: 'if' old_test [comp_iter]
 testlist1: test (',' test)*
 # not used in grammar, but may appear in "node" passed from Parser to Compiler
 encoding_decl: NAME
 yield_expr: 'yield' [testlist]
--- a/parso/python/grammar3.4.txt
+++ b/parso/python/grammar3.4.txt
@@ -0,0 +1,135 @@
 # Grammar for Python
 # Note:  Changing the grammar specified in this file will most likely
 #        require corresponding changes in the parser module
 #        (../Modules/parsermodule.c).  If you can't make the changes to
 #        that module yourself, please co-ordinate the required changes
 #        with someone who can; ask around on python-dev for help.  Fred
 #        Drake <fdrake@acm.org> will probably be listening there.
 # NOTE WELL: You should also follow all the steps listed in PEP 306,
 # "How to Change Python's Grammar"
 # Start symbols for the grammar:
 #       single_input is a single interactive statement;
 #       file_input is a module or sequence of commands read from an input file;
 #       eval_input is the input for the eval() functions.
 # NB: compound_stmt in single_input is followed by extra NEWLINE!
 file_input: (NEWLINE | stmt)* ENDMARKER
 single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
 eval_input: testlist NEWLINE* ENDMARKER
 decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
 decorators: decorator+
 decorated: decorators (classdef | funcdef)
 funcdef: 'def' NAME parameters ['->' test] ':' suite
 parameters: '(' [typedargslist] ')'
 typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [','
       ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]]
     |  '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef)
 tfpdef: NAME [':' test]
 varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [','
       ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]]
     |  '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef)
 vfpdef: NAME
 stmt: simple_stmt | compound_stmt
 simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
 small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
             import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
 expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) |
                     ('=' (yield_expr|testlist_star_expr))*)
 testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
 augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
            '<<=' | '>>=' | '**=' | '//=')
 # For normal assignments, additional restrictions enforced by the interpreter
 del_stmt: 'del' exprlist
 pass_stmt: 'pass'
 flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
 break_stmt: 'break'
 continue_stmt: 'continue'
 return_stmt: 'return' [testlist]
 yield_stmt: yield_expr
 raise_stmt: 'raise' [test ['from' test]]
 import_stmt: import_name | import_from
 import_name: 'import' dotted_as_names
 # note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
 import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
              'import' ('*' | '(' import_as_names ')' | import_as_names))
 import_as_name: NAME ['as' NAME]
 dotted_as_name: dotted_name ['as' NAME]
 import_as_names: import_as_name (',' import_as_name)* [',']
 dotted_as_names: dotted_as_name (',' dotted_as_name)*
 dotted_name: NAME ('.' NAME)*
 global_stmt: 'global' NAME (',' NAME)*
 nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
 assert_stmt: 'assert' test [',' test]
 compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
 if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
 while_stmt: 'while' test ':' suite ['else' ':' suite]
 for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
 try_stmt: ('try' ':' suite
           ((except_clause ':' suite)+
            ['else' ':' suite]
            ['finally' ':' suite] |
           'finally' ':' suite))
 with_stmt: 'with' with_item (',' with_item)*  ':' suite
 with_item: test ['as' expr]
 # NB compile.c makes sure that the default except clause is last
 except_clause: 'except' [test ['as' NAME]]
 # Edit by David Halter: The stmt is now optional. This reflects how Jedi allows
 # classes and functions to be empty, which is beneficial for autocompletion.
 suite: simple_stmt | NEWLINE INDENT stmt* DEDENT
 test: or_test ['if' or_test 'else' test] | lambdef
 test_nocond: or_test | lambdef_nocond
 lambdef: 'lambda' [varargslist] ':' test
 lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
 or_test: and_test ('or' and_test)*
 and_test: not_test ('and' not_test)*
 not_test: 'not' not_test | comparison
 comparison: expr (comp_op expr)*
 # <> isn't actually a valid comparison operator in Python. It's here for the
 # sake of a __future__ import described in PEP 401
 comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
 star_expr: '*' expr
 expr: xor_expr ('|' xor_expr)*
 xor_expr: and_expr ('^' and_expr)*
 and_expr: shift_expr ('&' shift_expr)*
 shift_expr: arith_expr (('<<'|'>>') arith_expr)*
 arith_expr: term (('+'|'-') term)*
 term: factor (('*'|'/'|'%'|'//') factor)*
 factor: ('+'|'-'|'~') factor | power
 power: atom trailer* ['**' factor]
 atom: ('(' [yield_expr|testlist_comp] ')' |
       '[' [testlist_comp] ']' |
       '{' [dictorsetmaker] '}' |
       NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
 testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
 trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
 subscriptlist: subscript (',' subscript)* [',']
 subscript: test | [test] ':' [test] [sliceop]
 sliceop: ':' [test]
 exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
 testlist: test (',' test)* [',']
 dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) |
                  (test (comp_for | (',' test)* [','])) )
 classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
 arglist: (argument ',')* (argument [',']
                         |'*' test (',' argument)* [',' '**' test] 
                         |'**' test)
 # The reason that keywords are test nodes instead of NAME is that using NAME
 # results in an ambiguity. ast.c makes sure it's a NAME.
 argument: test [comp_for] | test '=' test  # Really [keyword '='] test
 comp_iter: comp_for | comp_if
 comp_for: 'for' exprlist 'in' or_test [comp_iter]
 comp_if: 'if' test_nocond [comp_iter]
 # not used in grammar, but may appear in "node" passed from Parser to Compiler
 encoding_decl: NAME
 yield_expr: 'yield' [yield_arg]
 yield_arg: 'from' test | testlist
--- a/parso/python/grammar3.5.txt
+++ b/parso/python/grammar3.5.txt
@@ -0,0 +1,154 @@
 # Grammar for Python
 # Note:  Changing the grammar specified in this file will most likely
 #        require corresponding changes in the parser module
 #        (../Modules/parsermodule.c).  If you can't make the changes to
 #        that module yourself, please co-ordinate the required changes
 #        with someone who can; ask around on python-dev for help.  Fred
 #        Drake <fdrake@acm.org> will probably be listening there.
 # NOTE WELL: You should also follow all the steps listed at
 # https://docs.python.org/devguide/grammar.html
 # Start symbols for the grammar:
 #       single_input is a single interactive statement;
 #       file_input is a module or sequence of commands read from an input file;
 #       eval_input is the input for the eval() functions.
 # NB: compound_stmt in single_input is followed by extra NEWLINE!
 file_input: (NEWLINE | stmt)* ENDMARKER
 single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
 eval_input: testlist NEWLINE* ENDMARKER
 decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
 decorators: decorator+
 decorated: decorators (classdef | funcdef | async_funcdef)
 # NOTE: Reinoud Elhorst, using ASYNC/AWAIT keywords instead of tokens
 # skipping python3.5 compatibility, in favour of 3.7 solution
 async_funcdef: 'async' funcdef
 funcdef: 'def' NAME parameters ['->' test] ':' suite
 parameters: '(' [typedargslist] ')'
 typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [','
       ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]]
     |  '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef)
 tfpdef: NAME [':' test]
 varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [','
       ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]]
     |  '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef)
 vfpdef: NAME
 stmt: simple_stmt | compound_stmt
 simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
 small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
             import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
 expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) |
                     ('=' (yield_expr|testlist_star_expr))*)
 testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
 augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
            '<<=' | '>>=' | '**=' | '//=')
 # For normal assignments, additional restrictions enforced by the interpreter
 del_stmt: 'del' exprlist
 pass_stmt: 'pass'
 flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
 break_stmt: 'break'
 continue_stmt: 'continue'
 return_stmt: 'return' [testlist]
 yield_stmt: yield_expr
 raise_stmt: 'raise' [test ['from' test]]
 import_stmt: import_name | import_from
 import_name: 'import' dotted_as_names
 # note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
 import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
              'import' ('*' | '(' import_as_names ')' | import_as_names))
 import_as_name: NAME ['as' NAME]
 dotted_as_name: dotted_name ['as' NAME]
 import_as_names: import_as_name (',' import_as_name)* [',']
 dotted_as_names: dotted_as_name (',' dotted_as_name)*
 dotted_name: NAME ('.' NAME)*
 global_stmt: 'global' NAME (',' NAME)*
 nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
 assert_stmt: 'assert' test [',' test]
 compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
 async_stmt: 'async' (funcdef | with_stmt | for_stmt)
 if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
 while_stmt: 'while' test ':' suite ['else' ':' suite]
 for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
 try_stmt: ('try' ':' suite
           ((except_clause ':' suite)+
            ['else' ':' suite]
            ['finally' ':' suite] |
           'finally' ':' suite))
 with_stmt: 'with' with_item (',' with_item)*  ':' suite
 with_item: test ['as' expr]
 # NB compile.c makes sure that the default except clause is last
 except_clause: 'except' [test ['as' NAME]]
 # Edit by David Halter: The stmt is now optional. This reflects how Jedi allows
 # classes and functions to be empty, which is beneficial for autocompletion.
 suite: simple_stmt | NEWLINE INDENT stmt* DEDENT
 test: or_test ['if' or_test 'else' test] | lambdef
 test_nocond: or_test | lambdef_nocond
 lambdef: 'lambda' [varargslist] ':' test
 lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
 or_test: and_test ('or' and_test)*
 and_test: not_test ('and' not_test)*
 not_test: 'not' not_test | comparison
 comparison: expr (comp_op expr)*
 # <> isn't actually a valid comparison operator in Python. It's here for the
 # sake of a __future__ import described in PEP 401 (which really works :-)
 comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
 star_expr: '*' expr
 expr: xor_expr ('|' xor_expr)*
 xor_expr: and_expr ('^' and_expr)*
 and_expr: shift_expr ('&' shift_expr)*
 shift_expr: arith_expr (('<<'|'>>') arith_expr)*
 arith_expr: term (('+'|'-') term)*
 term: factor (('*'|'@'|'/'|'%'|'//') factor)*
 factor: ('+'|'-'|'~') factor | power
 power: atom_expr ['**' factor]
 atom_expr: ['await'] atom trailer*
 atom: ('(' [yield_expr|testlist_comp] ')' |
       '[' [testlist_comp] ']' |
       '{' [dictorsetmaker] '}' |
       NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
 testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
 trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
 subscriptlist: subscript (',' subscript)* [',']
 subscript: test | [test] ':' [test] [sliceop]
 sliceop: ':' [test]
 exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
 testlist: test (',' test)* [',']
 dictorsetmaker: ( ((test ':' test | '**' expr)
                   (comp_for | (',' (test ':' test | '**' expr))* [','])) |
                  ((test | star_expr)
                   (comp_for | (',' (test | star_expr))* [','])) )
 classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
 arglist: argument (',' argument)*  [',']
 # The reason that keywords are test nodes instead of NAME is that using NAME
 # results in an ambiguity. ast.c makes sure it's a NAME.
 # "test '=' test" is really "keyword '=' test", but we have no such token.
 # These need to be in a single rule to avoid grammar that is ambiguous
 # to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
 # we explicitly match '*' here, too, to give it proper precedence.
 # Illegal combinations and orderings are blocked in ast.c:
 # multiple (test comp_for) arguements are blocked; keyword unpackings
 # that precede iterable unpackings are blocked; etc.
 argument: ( test [comp_for] |
            test '=' test |
            '**' test |
            '*' test )
 comp_iter: comp_for | comp_if
 comp_for: 'for' exprlist 'in' or_test [comp_iter]
 comp_if: 'if' test_nocond [comp_iter]
 # not used in grammar, but may appear in "node" passed from Parser to Compiler
 encoding_decl: NAME
 yield_expr: 'yield' [yield_arg]
 yield_arg: 'from' test | testlist
--- a/parso/python/grammar3.6.txt
+++ b/parso/python/grammar3.6.txt
@@ -0,0 +1,161 @@
 # Grammar for Python
 # Note:  Changing the grammar specified in this file will most likely
 #        require corresponding changes in the parser module
 #        (../Modules/parsermodule.c).  If you can't make the changes to
 #        that module yourself, please co-ordinate the required changes
 #        with someone who can; ask around on python-dev for help.  Fred
 #        Drake <fdrake@acm.org> will probably be listening there.
 # NOTE WELL: You should also follow all the steps listed at
 # https://docs.python.org/devguide/grammar.html
 # Start symbols for the grammar:
 #       file_input is a module or sequence of commands read from an input file;
 #       single_input is a single interactive statement;
 #       eval_input is the input for the eval() functions.
 # NB: compound_stmt in single_input is followed by extra NEWLINE!
 file_input: (NEWLINE | stmt)* ENDMARKER
 single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
 eval_input: testlist NEWLINE* ENDMARKER
 decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
 decorators: decorator+
 decorated: decorators (classdef | funcdef | async_funcdef)
 # NOTE: Francisco Souza/Reinoud Elhorst, using ASYNC/'await' keywords instead of
 # skipping python3.5+ compatibility, in favour of 3.7 solution
 async_funcdef: 'async' funcdef
 funcdef: 'def' NAME parameters ['->' test] ':' suite
 parameters: '(' [typedargslist] ')'
 typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [
        '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
      | '**' tfpdef [',']]]
  | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
  | '**' tfpdef [','])
 tfpdef: NAME [':' test]
 varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
        '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
      | '**' vfpdef [',']]]
  | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
  | '**' vfpdef [',']
 )
 vfpdef: NAME
 stmt: simple_stmt | compound_stmt
 simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
 small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
             import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
 expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
                     ('=' (yield_expr|testlist_star_expr))*)
 annassign: ':' test ['=' test]
 testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
 augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
            '<<=' | '>>=' | '**=' | '//=')
 # For normal and annotated assignments, additional restrictions enforced by the interpreter
 del_stmt: 'del' exprlist
 pass_stmt: 'pass'
 flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
 break_stmt: 'break'
 continue_stmt: 'continue'
 return_stmt: 'return' [testlist]
 yield_stmt: yield_expr
 raise_stmt: 'raise' [test ['from' test]]
 import_stmt: import_name | import_from
 import_name: 'import' dotted_as_names
 # note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
 import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
              'import' ('*' | '(' import_as_names ')' | import_as_names))
 import_as_name: NAME ['as' NAME]
 dotted_as_name: dotted_name ['as' NAME]
 import_as_names: import_as_name (',' import_as_name)* [',']
 dotted_as_names: dotted_as_name (',' dotted_as_name)*
 dotted_name: NAME ('.' NAME)*
 global_stmt: 'global' NAME (',' NAME)*
 nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
 assert_stmt: 'assert' test [',' test]
 compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
 async_stmt: 'async' (funcdef | with_stmt | for_stmt)
 if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
 while_stmt: 'while' test ':' suite ['else' ':' suite]
 for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
 try_stmt: ('try' ':' suite
           ((except_clause ':' suite)+
            ['else' ':' suite]
            ['finally' ':' suite] |
           'finally' ':' suite))
 with_stmt: 'with' with_item (',' with_item)*  ':' suite
 with_item: test ['as' expr]
 # NB compile.c makes sure that the default except clause is last
 except_clause: 'except' [test ['as' NAME]]
 # Edit by Francisco Souza/David Halter: The stmt is now optional. This reflects
 # how Jedi allows classes and functions to be empty, which is beneficial for
 # autocompletion.
 suite: simple_stmt | NEWLINE INDENT stmt* DEDENT
 test: or_test ['if' or_test 'else' test] | lambdef
 test_nocond: or_test | lambdef_nocond
 lambdef: 'lambda' [varargslist] ':' test
 lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
 or_test: and_test ('or' and_test)*
 and_test: not_test ('and' not_test)*
 not_test: 'not' not_test | comparison
 comparison: expr (comp_op expr)*
 # <> isn't actually a valid comparison operator in Python. It's here for the
 # sake of a __future__ import described in PEP 401 (which really works :-)
 comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
 star_expr: '*' expr
 expr: xor_expr ('|' xor_expr)*
 xor_expr: and_expr ('^' and_expr)*
 and_expr: shift_expr ('&' shift_expr)*
 shift_expr: arith_expr (('<<'|'>>') arith_expr)*
 arith_expr: term (('+'|'-') term)*
 term: factor (('*'|'@'|'/'|'%'|'//') factor)*
 factor: ('+'|'-'|'~') factor | power
 power: atom_expr ['**' factor]
 atom_expr: ['await'] atom trailer*
 atom: ('(' [yield_expr|testlist_comp] ')' |
       '[' [testlist_comp] ']' |
       '{' [dictorsetmaker] '}' |
       NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
 testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
 trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
 subscriptlist: subscript (',' subscript)* [',']
 subscript: test | [test] ':' [test] [sliceop]
 sliceop: ':' [test]
 exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
 testlist: test (',' test)* [',']
 dictorsetmaker: ( ((test ':' test | '**' expr)
                   (comp_for | (',' (test ':' test | '**' expr))* [','])) |
                  ((test | star_expr)
                   (comp_for | (',' (test | star_expr))* [','])) )
 classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
 arglist: argument (',' argument)*  [',']
 # The reason that keywords are test nodes instead of NAME is that using NAME
 # results in an ambiguity. ast.c makes sure it's a NAME.
 # "test '=' test" is really "keyword '=' test", but we have no such token.
 # These need to be in a single rule to avoid grammar that is ambiguous
 # to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
 # we explicitly match '*' here, too, to give it proper precedence.
 # Illegal combinations and orderings are blocked in ast.c:
 # multiple (test comp_for) arguments are blocked; keyword unpackings
 # that precede iterable unpackings are blocked; etc.
 argument: ( test [comp_for] |
            test '=' test |
            '**' test |
            '*' test )
 comp_iter: comp_for | comp_if
 comp_for: ['async'] 'for' exprlist 'in' or_test [comp_iter]
 comp_if: 'if' test_nocond [comp_iter]
 # not used in grammar, but may appear in "node" passed from Parser to Compiler
 encoding_decl: NAME
 yield_expr: 'yield' [yield_arg]
 yield_arg: 'from' test | testlist
--- a/parso/python/parser.py
+++ b/parso/python/parser.py
@@ -0,0 +1,232 @@
 from parso.python import tree
 from parso import tokenize
 from parso.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER,
                               STRING, tok_name)
 from parso.parser import BaseParser
 from jedi.common import splitlines
 class Parser(BaseParser):
    """
    This class is used to parse a Python file, it then divides them into a
    class structure of different scopes.
    :param grammar: The grammar object of pgen2. Loaded by load_grammar.
    """
    node_map = {
        'expr_stmt': tree.ExprStmt,
        'classdef': tree.Class,
        'funcdef': tree.Function,
        'file_input': tree.Module,
        'import_name': tree.ImportName,
        'import_from': tree.ImportFrom,
        'break_stmt': tree.KeywordStatement,
        'continue_stmt': tree.KeywordStatement,
        'return_stmt': tree.ReturnStmt,
        'raise_stmt': tree.KeywordStatement,
        'yield_expr': tree.YieldExpr,
        'del_stmt': tree.KeywordStatement,
        'pass_stmt': tree.KeywordStatement,
        'global_stmt': tree.GlobalStmt,
        'nonlocal_stmt': tree.KeywordStatement,
        'print_stmt': tree.KeywordStatement,
        'assert_stmt': tree.AssertStmt,
        'if_stmt': tree.IfStmt,
        'with_stmt': tree.WithStmt,
        'for_stmt': tree.ForStmt,
        'while_stmt': tree.WhileStmt,
        'try_stmt': tree.TryStmt,
        'comp_for': tree.CompFor,
        'decorator': tree.Decorator,
        'lambdef': tree.Lambda,
        'old_lambdef': tree.Lambda,
        'lambdef_nocond': tree.Lambda,
    }
    default_node = tree.PythonNode
    def __init__(self, grammar, error_recovery=True, start_symbol='file_input'):
        super(Parser, self).__init__(grammar, start_symbol, error_recovery=error_recovery)
        self.syntax_errors = []
        self._omit_dedent_list = []
        self._indent_counter = 0
        # TODO do print absolute import detection here.
        # try:
        #     del python_grammar_no_print_statement.keywords["print"]
        # except KeyError:
        #     pass  # Doesn't exist in the Python 3 grammar.
        # if self.options["print_function"]:
        #     python_grammar = pygram.python_grammar_no_print_statement
        # else:
    def parse(self, tokens):
        if self._error_recovery:
            if self._start_symbol != 'file_input':
                raise NotImplementedError
            tokens = self._recovery_tokenize(tokens)
        node = super(Parser, self).parse(tokens)
        if self._start_symbol == 'file_input' != node.type:
            # If there's only one statement, we get back a non-module. That's
            # not what we want, we want a module, so we add it here:
            node = self.convert_node(
                self._grammar,
                self._grammar.symbol2number['file_input'],
                [node]
            )
        return node
    def convert_node(self, grammar, type, children):
        """
        Convert raw node information to a PythonBaseNode instance.
        This is passed to the parser driver which calls it whenever a reduction of a
        grammar rule produces a new complete node, so that the tree is build
        strictly bottom-up.
        """
        # TODO REMOVE symbol, we don't want type here.
        symbol = grammar.number2symbol[type]
        try:
            return self.node_map[symbol](children)
        except KeyError:
            if symbol == 'suite':
                # We don't want the INDENT/DEDENT in our parser tree. Those
                # leaves are just cancer. They are virtual leaves and not real
                # ones and therefore have pseudo start/end positions and no
                # prefixes. Just ignore them.
                children = [children[0]] + children[2:-1]
            return self.default_node(symbol, children)
    def convert_leaf(self, grammar, type, value, prefix, start_pos):
        # print('leaf', repr(value), token.tok_name[type])
        if type == tokenize.NAME:
            if value in grammar.keywords:
                return tree.Keyword(value, start_pos, prefix)
            else:
                return tree.Name(value, start_pos, prefix)
        elif type == STRING:
            return tree.String(value, start_pos, prefix)
        elif type == NUMBER:
            return tree.Number(value, start_pos, prefix)
        elif type == NEWLINE:
            return tree.Newline(value, start_pos, prefix)
        elif type == ENDMARKER:
            return tree.EndMarker(value, start_pos, prefix)
        else:
            return tree.Operator(value, start_pos, prefix)
    def error_recovery(self, grammar, stack, arcs, typ, value, start_pos, prefix,
                       add_token_callback):
        """
        This parser is written in a dynamic way, meaning that this parser
        allows using different grammars (even non-Python). However, error
        recovery is purely written for Python.
        """
        if not self._error_recovery:
            return super(Parser, self).error_recovery(
                grammar, stack, arcs, typ, value, start_pos, prefix,
                add_token_callback)
        def current_suite(stack):
            # For now just discard everything that is not a suite or
            # file_input, if we detect an error.
            for index, (dfa, state, (type_, nodes)) in reversed(list(enumerate(stack))):
                # `suite` can sometimes be only simple_stmt, not stmt.
                symbol = grammar.number2symbol[type_]
                if symbol == 'file_input':
                    break
                elif symbol == 'suite' and len(nodes) > 1:
                    # suites without an indent in them get discarded.
                    break
            return index, symbol, nodes
        index, symbol, nodes = current_suite(stack)
        # print('err', token.tok_name[typ], repr(value), start_pos, len(stack), index)
        if self._stack_removal(grammar, stack, arcs, index + 1, value, start_pos):
            add_token_callback(typ, value, start_pos, prefix)
        else:
            if typ == INDENT:
                # For every deleted INDENT we have to delete a DEDENT as well.
                # Otherwise the parser will get into trouble and DEDENT too early.
                self._omit_dedent_list.append(self._indent_counter)
            else:
                error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix)
                stack[-1][2][1].append(error_leaf)
    def _stack_removal(self, grammar, stack, arcs, start_index, value, start_pos):
        failed_stack = []
        found = False
        all_nodes = []
        for dfa, state, (typ, nodes) in stack[start_index:]:
            if nodes:
                found = True
            if found:
                symbol = grammar.number2symbol[typ]
                failed_stack.append((symbol, nodes))
                all_nodes += nodes
        if failed_stack:
            stack[start_index - 1][2][1].append(tree.PythonErrorNode(all_nodes))
        stack[start_index:] = []
        return failed_stack
    def _recovery_tokenize(self, tokens):
        for typ, value, start_pos, prefix in tokens:
            # print(tokenize.tok_name[typ], repr(value), start_pos, repr(prefix))
            if typ == DEDENT:
                # We need to count indents, because if we just omit any DEDENT,
                # we might omit them in the wrong place.
                o = self._omit_dedent_list
                if o and o[-1] == self._indent_counter:
                    o.pop()
                    continue
                self._indent_counter -= 1
            elif typ == INDENT:
                self._indent_counter += 1
            yield typ, value, start_pos, prefix
 def _remove_last_newline(node):
    endmarker = node.children[-1]
    # The newline is either in the endmarker as a prefix or the previous
    # leaf as a newline token.
    prefix = endmarker.prefix
    leaf = endmarker.get_previous_leaf()
    if prefix:
        text = prefix
    else:
        if leaf is None:
            raise ValueError("You're trying to remove a newline from an empty module.")
        text = leaf.value
    if not text.endswith('\n'):
        raise ValueError("There's no newline at the end, cannot remove it.")
    text = text[:-1]
    if prefix:
        endmarker.prefix = text
        if leaf is None:
            end_pos = (1, 0)
        else:
            end_pos = leaf.end_pos
        lines = splitlines(text, keepends=True)
        if len(lines) == 1:
            end_pos = end_pos[0], end_pos[1] + len(lines[0])
        else:
            end_pos = end_pos[0] + len(lines) - 1,  len(lines[-1])
        endmarker.start_pos = end_pos
    else:
        leaf.value = text
        endmarker.start_pos = leaf.end_pos
--- a/parso/python/tree.py
+++ b/parso/python/tree.py
--- a/parso/token.py
+++ b/parso/token.py
@@ -0,0 +1,90 @@
 from __future__ import absolute_import
 from jedi._compatibility import is_py3, is_py35
 from token import *
 COMMENT = N_TOKENS
 tok_name[COMMENT] = 'COMMENT'
 N_TOKENS += 1
 NL = N_TOKENS
 tok_name[NL] = 'NL'
 N_TOKENS += 1
 if is_py3:
    BACKQUOTE = N_TOKENS
    tok_name[BACKQUOTE] = 'BACKQUOTE'
    N_TOKENS += 1
 else:
    RARROW = N_TOKENS
    tok_name[RARROW] = 'RARROW'
    N_TOKENS += 1
    ELLIPSIS = N_TOKENS
    tok_name[ELLIPSIS] = 'ELLIPSIS'
    N_TOKENS += 1
 if not is_py35:
    ATEQUAL = N_TOKENS
    tok_name[ATEQUAL] = 'ATEQUAL'
    N_TOKENS += 1
 # Map from operator to number (since tokenize doesn't do this)
 opmap_raw = """\
 ( LPAR
 ) RPAR
 [ LSQB
 ] RSQB
 : COLON
 , COMMA
 ; SEMI
 + PLUS
 - MINUS
 * STAR
 / SLASH
 | VBAR
 & AMPER
 < LESS
 > GREATER
 = EQUAL
 . DOT
 % PERCENT
 ` BACKQUOTE
 { LBRACE
 } RBRACE
@ AT
 == EQEQUAL
 != NOTEQUAL
 <> NOTEQUAL
 <= LESSEQUAL
 >= GREATEREQUAL
 ~ TILDE
 ^ CIRCUMFLEX
 << LEFTSHIFT
 >> RIGHTSHIFT
 ** DOUBLESTAR
 += PLUSEQUAL
 -= MINEQUAL
 *= STAREQUAL
 /= SLASHEQUAL
 %= PERCENTEQUAL
 &= AMPEREQUAL
 |= VBAREQUAL
@= ATEQUAL
 ^= CIRCUMFLEXEQUAL
 <<= LEFTSHIFTEQUAL
 >>= RIGHTSHIFTEQUAL
 **= DOUBLESTAREQUAL
 // DOUBLESLASH
 //= DOUBLESLASHEQUAL
 -> RARROW
 ... ELLIPSIS
 """
 opmap = {}
 for line in opmap_raw.splitlines():
    op, name = line.split()
    opmap[op] = globals()[name]
--- a/parso/tokenize.py
+++ b/parso/tokenize.py
@@ -0,0 +1,369 @@
 # -*- coding: utf-8 -*-
 """
 This tokenizer has been copied from the ``tokenize.py`` standard library
 tokenizer. The reason was simple: The standard library tokenizer fails
 if the indentation is not right. The fast parser of jedi however requires
 "wrong" indentation.
 Basically this is a stripped down version of the standard library module, so
 you can read the documentation there. Additionally we included some speed and
 memory optimizations here.
 """
 from __future__ import absolute_import
 import string
 import re
 from collections import namedtuple
 import itertools as _itertools
 from parso.token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, opmap,
                               NAME, OP, ERRORTOKEN, NEWLINE, INDENT, DEDENT)
 from jedi._compatibility import is_py3, py_version, u
 from jedi.common import splitlines
 cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
 if is_py3:
    # Python 3 has str.isidentifier() to check if a char is a valid identifier
    is_identifier = str.isidentifier
 else:
    namechars = string.ascii_letters + '_'
    is_identifier = lambda s: s in namechars
 COMMENT = N_TOKENS
 tok_name[COMMENT] = 'COMMENT'
 def group(*choices, **kwargs):
    capture = kwargs.pop('capture', False)  # Python 2, arrghhhhh :(
    assert not kwargs
    start = '('
    if not capture:
        start += '?:'
    return start + '|'.join(choices) + ')'
 def any(*choices):
    return group(*choices) + '*'
 def maybe(*choices):
    return group(*choices) + '?'
 # Note: we use unicode matching for names ("\w") but ascii matching for
 # number literals.
 Whitespace = r'[ \f\t]*'
 Comment = r'#[^\r\n]*'
 Name = r'\w+'
 if py_version >= 36:
    Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
    Binnumber = r'0[bB](?:_?[01])+'
    Octnumber = r'0[oO](?:_?[0-7])+'
    Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)'
    Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
    Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*'
    Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?',
                       r'\.[0-9](?:_?[0-9])*') + maybe(Exponent)
    Expfloat = r'[0-9](?:_?[0-9])*' + Exponent
    Floatnumber = group(Pointfloat, Expfloat)
    Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]')
 else:
    Hexnumber = r'0[xX][0-9a-fA-F]+'
    Binnumber = r'0[bB][01]+'
    if is_py3:
        Octnumber = r'0[oO][0-7]+'
    else:
        Octnumber = '0[0-7]+'
    Decnumber = r'(?:0+|[1-9][0-9]*)'
    Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
    Exponent = r'[eE][-+]?[0-9]+'
    Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent)
    Expfloat = r'[0-9]+' + Exponent
    Floatnumber = group(Pointfloat, Expfloat)
    Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
 Number = group(Imagnumber, Floatnumber, Intnumber)
 # Return the empty string, plus all of the valid string prefixes.
 def _all_string_prefixes():
    # The valid string prefixes. Only contain the lower case versions,
    #  and don't contain any permuations (include 'fr', but not
    #  'rf'). The various permutations will be generated.
    _valid_string_prefixes = ['b', 'r', 'u', 'br']
    if py_version >= 36:
        _valid_string_prefixes += ['f', 'fr']
    if py_version <= 27:
        # TODO this is actually not 100% valid. ur is valid in Python 2.7,
        # while ru is not.
        _valid_string_prefixes.append('ur')
    # if we add binary f-strings, add: ['fb', 'fbr']
    result = set([''])
    for prefix in _valid_string_prefixes:
        for t in _itertools.permutations(prefix):
            # create a list with upper and lower versions of each
            #  character
            for u in _itertools.product(*[(c, c.upper()) for c in t]):
                result.add(''.join(u))
    return result
 def _compile(expr):
    return re.compile(expr, re.UNICODE)
 # Note that since _all_string_prefixes includes the empty string,
 #  StringPrefix can be the empty string (making it optional).
 StringPrefix = group(*_all_string_prefixes())
 # Tail end of ' string.
 Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
 # Tail end of " string.
 Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
 # Tail end of ''' string.
 Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
 # Tail end of """ string.
 Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
 Triple = group(StringPrefix + "'''", StringPrefix + '"""')
 # Because of leftmost-then-longest match semantics, be sure to put the
 # longest operators first (e.g., if = came before ==, == would get
 # recognized as two instances of =).
 Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=",
                 r"//=?", r"->",
                 r"[+\-*/%&@|^=<>]=?",
                 r"~")
 Bracket = '[][(){}]'
 Special = group(r'\r?\n', r'\.\.\.', r'[:;.,@]')
 Funny = group(Operator, Bracket, Special)
 PlainToken = group(Number, Funny, Name, capture=True)
 # First (or only) line of ' or " string.
 ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
                group("'", r'\\\r?\n'),
                StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
                group('"', r'\\\r?\n'))
 PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple)
 PseudoToken = group(Whitespace, capture=True) + \
    group(PseudoExtras, Number, Funny, ContStr, Name, capture=True)
 # For a given string prefix plus quotes, endpats maps it to a regex
 #  to match the remainder of that string. _prefix can be empty, for
 #  a normal single or triple quoted string (with no prefix).
 endpats = {}
 for _prefix in _all_string_prefixes():
    endpats[_prefix + "'"] = _compile(Single)
    endpats[_prefix + '"'] = _compile(Double)
    endpats[_prefix + "'''"] = _compile(Single3)
    endpats[_prefix + '"""'] = _compile(Double3)
 # A set of all of the single and triple quoted string prefixes,
 #  including the opening quotes.
 single_quoted = set()
 triple_quoted = set()
 for t in _all_string_prefixes():
    for p in (t + '"', t + "'"):
        single_quoted.add(p)
    for p in (t + '"""', t + "'''"):
        triple_quoted.add(p)
 # TODO add with?
 ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except',
                       'finally', 'while', 'return')
 pseudo_token_compiled = _compile(PseudoToken)
 class TokenInfo(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])):
    def __repr__(self):
        return ('TokenInfo(type=%s, string=%r, start=%r, prefix=%r)' %
                self._replace(type=self.get_type_name()))
    def get_type_name(self, exact=True):
        if exact:
            typ = self.exact_type
        else:
            typ = self.type
        return tok_name[typ]
    @property
    def exact_type(self):
        if self.type == OP and self.string in opmap:
            return opmap[self.string]
        else:
            return self.type
    @property
    def end_pos(self):
        lines = splitlines(self.string)
        if len(lines) > 1:
            return self.start_pos[0] + len(lines) - 1, 0
        else:
            return self.start_pos[0], self.start_pos[1] + len(self.string)
 def source_tokens(source, use_exact_op_types=False):
    """Generate tokens from a the source code (string)."""
    lines = splitlines(source, keepends=True)
    return generate_tokens(lines, use_exact_op_types)
 def generate_tokens(lines, use_exact_op_types=False):
    """
    A heavily modified Python standard library tokenizer.
    Additionally to the default information, yields also the prefix of each
    token. This idea comes from lib2to3. The prefix contains all information
    that is irrelevant for the parser like newlines in parentheses or comments.
    """
    paren_level = 0  # count parentheses
    indents = [0]
    max = 0
    numchars = '0123456789'
    contstr = ''
    contline = None
    # We start with a newline. This makes indent at the first position
    # possible. It's not valid Python, but still better than an INDENT in the
    # second line (and not in the first). This makes quite a few things in
    # Jedi's fast parser possible.
    new_line = True
    prefix = ''  # Should never be required, but here for safety
    additional_prefix = ''
    for lnum, line in enumerate(lines, 1):  # loop over lines in stream
        pos, max = 0, len(line)
        if contstr:                                         # continued string
            endmatch = endprog.match(line)
            if endmatch:
                pos = endmatch.end(0)
                yield TokenInfo(STRING, contstr + line[:pos], contstr_start, prefix)
                contstr = ''
                contline = None
            else:
                contstr = contstr + line
                contline = contline + line
                continue
        while pos < max:
            pseudomatch = pseudo_token_compiled.match(line, pos)
            if not pseudomatch:                             # scan for tokens
                txt = line[pos:]
                if txt.endswith('\n'):
                    new_line = True
                yield TokenInfo(ERRORTOKEN, txt, (lnum, pos), prefix)
                break
            prefix = additional_prefix + pseudomatch.group(1)
            additional_prefix = ''
            start, pos = pseudomatch.span(2)
            spos = (lnum, start)
            token = pseudomatch.group(2)
            initial = token[0]
            if new_line and initial not in '\r\n#':
                new_line = False
                if paren_level == 0:
                    i = 0
                    while line[i] == '\f':
                        i += 1
                        start -= 1
                    if start > indents[-1]:
                        yield TokenInfo(INDENT, '', spos, '')
                        indents.append(start)
                    while start < indents[-1]:
                        yield TokenInfo(DEDENT, '', spos, '')
                        indents.pop()
            if (initial in numchars or                      # ordinary number
                    (initial == '.' and token != '.' and token != '...')):
                yield TokenInfo(NUMBER, token, spos, prefix)
            elif initial in '\r\n':
                if not new_line and paren_level == 0:
                    yield TokenInfo(NEWLINE, token, spos, prefix)
                else:
                    additional_prefix = prefix + token
                new_line = True
            elif initial == '#':  # Comments
                assert not token.endswith("\n")
                additional_prefix = prefix + token
            elif token in triple_quoted:
                endprog = endpats[token]
                endmatch = endprog.match(line, pos)
                if endmatch:                                # all on one line
                    pos = endmatch.end(0)
                    token = line[start:pos]
                    yield TokenInfo(STRING, token, spos, prefix)
                else:
                    contstr_start = (lnum, start)           # multiple lines
                    contstr = line[start:]
                    contline = line
                    break
            elif initial in single_quoted or \
                    token[:2] in single_quoted or \
                    token[:3] in single_quoted:
                if token[-1] == '\n':                       # continued string
                    contstr_start = lnum, start
                    endprog = (endpats.get(initial) or endpats.get(token[1])
                               or endpats.get(token[2]))
                    contstr = line[start:]
                    contline = line
                    break
                else:                                       # ordinary string
                    yield TokenInfo(STRING, token, spos, prefix)
            elif is_identifier(initial):                      # ordinary name
                if token in ALWAYS_BREAK_TOKENS:
                    paren_level = 0
                    while True:
                        indent = indents.pop()
                        if indent > start:
                            yield TokenInfo(DEDENT, '', spos, '')
                        else:
                            indents.append(indent)
                            break
                yield TokenInfo(NAME, token, spos, prefix)
            elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'):  # continued stmt
                additional_prefix += prefix + line[start:]
                break
            else:
                if token in '([{':
                    paren_level += 1
                elif token in ')]}':
                    paren_level -= 1
                try:
                    # This check is needed in any case to check if it's a valid
                    # operator or just some random unicode character.
                    exact_type = opmap[token]
                except KeyError:
                    exact_type = typ = ERRORTOKEN
                if use_exact_op_types:
                    typ = exact_type
                else:
                    typ = OP
                yield TokenInfo(typ, token, spos, prefix)
    if contstr:
        yield TokenInfo(ERRORTOKEN, contstr, contstr_start, prefix)
        if contstr.endswith('\n'):
            new_line = True
    end_pos = lnum, max
    # As the last position we just take the maximally possible position. We
    # remove -1 for the last new line.
    for indent in indents[1:]:
        yield TokenInfo(DEDENT, '', end_pos, '')
    yield TokenInfo(ENDMARKER, '', end_pos, additional_prefix)
 if __name__ == "__main__":
    import sys
    if len(sys.argv) >= 2:
        path = sys.argv[1]
        with open(path) as f:
            code = u(f.read())
    else:
        code = u(sys.stdin.read())
    for token in source_tokens(code, use_exact_op_types=True):
        print(token)
--- a/parso/tree.py
+++ b/parso/tree.py
@@ -0,0 +1,328 @@
 from abc import abstractmethod, abstractproperty
 from parso._compatibility import utf8_repr, encoding, is_py3
 def search_ancestor(node, *node_types):
    """
    Recursively looks at the parents of a node and checks if the type names
    match.
    :param node: The node that is looked at.
    :param node_types: A tuple or a string of type names that are
        searched for.
    """
    while True:
        node = node.parent
        if node is None or node.type in node_types:
            return node
 class NodeOrLeaf(object):
    """
    The base class for nodes and leaves.
    """
    __slots__ = ()
    def get_root_node(self):
        """
        Returns the root node of a parser tree. The returned node doesn't have
        a parent node like all the other nodes/leaves.
        """
        scope = self
        while scope.parent is not None:
            scope = scope.parent
        return scope
    def get_next_sibling(self):
        """
        The node immediately following the invocant in their parent's children
        list. If the invocant does not have a next sibling, it is None
        """
        # Can't use index(); we need to test by identity
        for i, child in enumerate(self.parent.children):
            if child is self:
                try:
                    return self.parent.children[i + 1]
                except IndexError:
                    return None
    def get_previous_sibling(self):
        """
        The node/leaf immediately preceding the invocant in their parent's
        children list. If the invocant does not have a previous sibling, it is
        None.
        """
        # Can't use index(); we need to test by identity
        for i, child in enumerate(self.parent.children):
            if child is self:
                if i == 0:
                    return None
                return self.parent.children[i - 1]
    def get_previous_leaf(self):
        """
        Returns the previous leaf in the parser tree.
        Raises an IndexError if it's the first element in the parser tree.
        """
        node = self
        while True:
            c = node.parent.children
            i = c.index(node)
            if i == 0:
                node = node.parent
                if node.parent is None:
                    return None
            else:
                node = c[i - 1]
                break
        while True:
            try:
                node = node.children[-1]
            except AttributeError:  # A Leaf doesn't have children.
                return node
    def get_next_leaf(self):
        """
        Returns the next leaf in the parser tree.
        Returns `None` if it's the last element in the parser tree.
        """
        node = self
        while True:
            c = node.parent.children
            i = c.index(node)
            if i == len(c) - 1:
                node = node.parent
                if node.parent is None:
                    return None
            else:
                node = c[i + 1]
                break
        while True:
            try:
                node = node.children[0]
            except AttributeError:  # A Leaf doesn't have children.
                return node
    @abstractproperty
    def start_pos(self):
        """
        Returns the starting position of the prefix as a tuple, e.g. `(3, 4)`.
        :return tuple of int: (line, column)
        """
    @abstractproperty
    def end_pos(self):
        """
        Returns the end position of the prefix as a tuple, e.g. `(3, 4)`.
        :return tuple of int: (line, column)
        """
    @abstractmethod
    def get_start_pos_of_prefix(self):
        """
        Returns the start_pos of the prefix. This means basically it returns
        the end_pos of the last prefix. The `get_start_pos_of_prefix()` of the
        prefix `+` in `2 + 1` would be `(1, 1)`, while the start_pos is
        `(1, 2)`.
        :return tuple of int: (line, column)
        """
    @abstractmethod
    def get_first_leaf(self):
        """
        Returns the first leaf of a node or itself it's a leaf.
        """
    @abstractmethod
    def get_last_leaf(self):
        """
        Returns the last leaf of a node or itself it's a leaf.
        """
    @abstractmethod
    def get_code(self, normalized=False, include_prefix=True):
        """
        Returns the code that was the input of the parser.
        If a normalizer is given, the returned code will be normalized and will
        not be equal to the input.
        :param include_prefix: Removes the prefix (whitespace and comments) of e.g. a statement.
        :param normalized: Deprecated. Please don't use. Will be replaced with something more powerful.
        """
 class Leaf(NodeOrLeaf):
    __slots__ = ('value', 'parent', 'line', 'indent', 'prefix')
    def __init__(self, value, start_pos, prefix=''):
        self.value = value
        self.start_pos = start_pos
        self.prefix = prefix
        self.parent = None
    @property
    def start_pos(self):
        return self.line, self.indent
    @start_pos.setter
    def start_pos(self, value):
        self.line = value[0]
        self.indent = value[1]
    def get_start_pos_of_prefix(self):
        previous_leaf = self.get_previous_leaf()
        if previous_leaf is None:
            return self.line - self.prefix.count('\n'), 0  # It's the first leaf.
        return previous_leaf.end_pos
    def get_first_leaf(self):
        return self
    def get_last_leaf(self):
        return self
    def get_code(self, normalized=False, include_prefix=True):
        if normalized:
            return self.value
        if include_prefix:
            return self.prefix + self.value
        else:
            return self.value
    @property
    def end_pos(self):
        lines = self.value.split('\n')
        end_pos_line = self.line + len(lines) - 1
        # Check for multiline token
        if self.line == end_pos_line:
            end_pos_indent = self.indent + len(lines[-1])
        else:
            end_pos_indent = len(lines[-1])
        return end_pos_line, end_pos_indent
    @utf8_repr
    def __repr__(self):
        return "<%s: %s start=%s>" % (type(self).__name__, self.value, self.start_pos)
 class BaseNode(NodeOrLeaf):
    """
    The super class for all nodes.
    If you create custom nodes, you will probably want to inherit from this
    ``BaseNode``.
    """
    __slots__ = ('children', 'parent')
    type = None
    def __init__(self, children):
        for c in children:
            c.parent = self
        self.children = children
        self.parent = None
    @property
    def start_pos(self):
        return self.children[0].start_pos
    def get_start_pos_of_prefix(self):
        return self.children[0].get_start_pos_of_prefix()
    @property
    def end_pos(self):
        return self.children[-1].end_pos
    def _get_code_for_children(self, children, normalized, include_prefix):
        # TODO implement normalized (depending on context).
        if include_prefix:
            return "".join(c.get_code(normalized) for c in children)
        else:
            first = children[0].get_code(include_prefix=False)
            return first + "".join(c.get_code(normalized) for c in children[1:])
    def get_code(self, normalized=False, include_prefix=True):
        return self._get_code_for_children(self.children, normalized, include_prefix)
    def get_leaf_for_position(self, position, include_prefixes=False):
        def binary_search(lower, upper):
            if lower == upper:
                element = self.children[lower]
                if not include_prefixes and position < element.start_pos:
                    # We're on a prefix.
                    return None
                # In case we have prefixes, a leaf always matches
                try:
                    return element.get_leaf_for_position(position, include_prefixes)
                except AttributeError:
                    return element
            index = int((lower + upper) / 2)
            element = self.children[index]
            if position <= element.end_pos:
                return binary_search(lower, index)
            else:
                return binary_search(index + 1, upper)
        if not ((1, 0) <= position <= self.children[-1].end_pos):
            raise ValueError('Please provide a position that exists within this node.')
        return binary_search(0, len(self.children) - 1)
    def get_first_leaf(self):
        return self.children[0].get_first_leaf()
    def get_last_leaf(self):
        return self.children[-1].get_last_leaf()
    @utf8_repr
    def __repr__(self):
        code = self.get_code().replace('\n', ' ').strip()
        if not is_py3:
            code = code.encode(encoding, 'replace')
        return "<%s: %s@%s,%s>" % \
            (type(self).__name__, code, self.start_pos[0], self.start_pos[1])
 class Node(BaseNode):
    """Concrete implementation for interior nodes."""
    __slots__ = ('type',)
    def __init__(self, type, children):
        super(Node, self).__init__(children)
        self.type = type
    def __repr__(self):
        return "%s(%s, %r)" % (self.__class__.__name__, self.type, self.children)
 class ErrorNode(BaseNode):
    """
    A node that containes valid nodes/leaves that we're follow by a token that
    was invalid. This basically means that the leaf after this node is where
    Python would mark a syntax error.
    """
    __slots__ = ()
    type = 'error_node'
 class ErrorLeaf(Leaf):
    """
    A leaf that is either completely invalid in a language (like `$` in Python)
    or is invalid at that position. Like the star in `1 +* 1`.
    """
    __slots__ = ('original_type')
    type = 'error_leaf'
    def __init__(self, original_type, value, start_pos, prefix=''):
        super(ErrorLeaf, self).__init__(value, start_pos, prefix)
        self.original_type = original_type
    def __repr__(self):
        return "<%s: %s:%s, %s)>" % \
            (type(self).__name__, self.original_type, repr(self.value), self.start_pos)