move all the parser stuff into a seperate package

2026-02-23 22:28:27 +08:00 · 2013-09-06 00:58:40 +04:30
parent dd4d0bc619
commit 390442dc3b
3 changed files with 6 additions and 6 deletions
--- a/jedi/parser/init.py
+++ b/jedi/parser/init.py
@@ -0,0 +1,682 @@
+"""
+The ``Parser`` tries to convert the available Python code in an easy to read
+format, something like an abstract syntax tree. The classes who represent this
+tree, are sitting in the :mod:`parsing_representation` module.
+
+The Python module ``tokenize`` is a very important part in the ``Parser``,
+because it splits the code into different words (tokens).  Sometimes it looks a
+bit messy. Sorry for that! You might ask now: "Why didn't you use the ``ast``
+module for this? Well, ``ast`` does a very good job understanding proper Python
+code, but fails to work as soon as there's a single line of broken code.
+
+There's one important optimization that needs to be known: Statements are not
+being parsed completely. ``Statement`` is just a representation of the tokens
+within the statement. This lowers memory usage and cpu time and reduces the
+complexity of the ``Parser`` (there's another parser sitting inside
+``Statement``, which produces ``Array`` and ``Call``).
+"""
+from __future__ import with_statement
+
+import tokenizer as tokenize
+import keyword
+
+from jedi._compatibility import next, StringIO
+from jedi import debug
+from jedi import common
+from jedi.parser import representation as pr
+
+
+class Parser(object):
+    """
+    This class is used to parse a Python file, it then divides them into a
+    class structure of different scopes.
+
+    :param source: The codebase for the parser.
+    :type source: str
+    :param module_path: The path of the module in the file system, may be None.
+    :type module_path: str
+    :param user_position: The line/column, the user is currently on.
+    :type user_position: tuple(int, int)
+    :param no_docstr: If True, a string at the beginning is not a docstr.
+    :param is_fast_parser: -> for fast_parser
+    :param top_module: Use this module as a parent instead of `self.module`.
+    """
+    def __init__(self, source, module_path=None, user_position=None,
+                 no_docstr=False, offset=(0, 0), is_fast_parser=None,
+                 top_module=None):
+        self.user_position = user_position
+        self.user_scope = None
+        self.user_stmt = None
+        self.no_docstr = no_docstr
+
+        self.start_pos = self.end_pos = 1 + offset[0], offset[1]
+        # initialize global Scope
+        self.module = pr.SubModule(module_path, self.start_pos, top_module)
+        self._scope = self.module
+        self._current = (None, None)
+
+        source = source + '\n'  # end with \n, because the parser needs it
+        buf = StringIO(source)
+        self._gen = common.NoErrorTokenizer(buf.readline, offset,
+                                            is_fast_parser)
+        self.top_module = top_module or self.module
+        try:
+            self._parse()
+        except (common.MultiLevelStopIteration, StopIteration):
+            # StopIteration needs to be added as well, because python 2 has a
+            # strange way of handling StopIterations.
+            # sometimes StopIteration isn't catched. Just ignore it.
+            pass
+
+        # clean up unused decorators
+        for d in self._decorators:
+            # set a parent for unused decorators, avoid NullPointerException
+            # because of `self.module.used_names`.
+            d.parent = self.module
+
+        if self._current[0] in (tokenize.NL, tokenize.NEWLINE):
+            # we added a newline before, so we need to "remove" it again.
+            self.end_pos = self._gen.previous[2]
+        elif self._current[0] == tokenize.INDENT:
+            self.end_pos = self._gen.last_previous[2]
+
+        self.start_pos = self.module.start_pos
+        self.module.end_pos = self.end_pos
+        del self._gen
+
+    def __repr__(self):
+        return "<%s: %s>" % (type(self).__name__, self.module)
+
+    def _check_user_stmt(self, simple):
+        # this is not user checking, just update the used_names
+        for tok_name in self.module.temp_used_names:
+            try:
+                self.module.used_names[tok_name].add(simple)
+            except KeyError:
+                self.module.used_names[tok_name] = set([simple])
+        self.module.temp_used_names = []
+
+        if not self.user_position:
+            return
+        # the position is right
+        if simple.start_pos <= self.user_position <= simple.end_pos:
+            if self.user_stmt is not None:
+                # if there is already a user position (another import, because
+                # imports are splitted) the names are checked.
+                for n in simple.get_set_vars():
+                    if n.start_pos < self.user_position <= n.end_pos:
+                        self.user_stmt = simple
+            else:
+                self.user_stmt = simple
+
+    def _parse_dot_name(self, pre_used_token=None):
+        """
+        The dot name parser parses a name, variable or function and returns
+        their names.
+
+        :return: Tuple of Name, token_type, nexttoken.
+        :rtype: tuple(Name, int, str)
+        """
+        def append(el):
+            names.append(el)
+            self.module.temp_used_names.append(el[0])
+
+        names = []
+        if pre_used_token is None:
+            token_type, tok = self.next()
+            if token_type != tokenize.NAME and tok != '*':
+                return [], token_type, tok
+        else:
+            token_type, tok = pre_used_token
+
+        if token_type != tokenize.NAME and tok != '*':
+            # token maybe a name or star
+            return None, token_type, tok
+
+        append((tok, self.start_pos))
+        first_pos = self.start_pos
+        while True:
+            end_pos = self.end_pos
+            token_type, tok = self.next()
+            if tok != '.':
+                break
+            token_type, tok = self.next()
+            if token_type != tokenize.NAME:
+                break
+            append((tok, self.start_pos))
+
+        n = pr.Name(self.module, names, first_pos, end_pos) if names else None
+        return n, token_type, tok
+
+    def _parse_import_list(self):
+        """
+        The parser for the imports. Unlike the class and function parse
+        function, this returns no Import class, but rather an import list,
+        which is then added later on.
+        The reason, why this is not done in the same class lies in the nature
+        of imports. There are two ways to write them:
+
+        - from ... import ...
+        - import ...
+
+        To distinguish, this has to be processed after the parser.
+
+        :return: List of imports.
+        :rtype: list
+        """
+        imports = []
+        brackets = False
+        continue_kw = [",", ";", "\n", ')'] \
+            + list(set(keyword.kwlist) - set(['as']))
+        while True:
+            defunct = False
+            token_type, tok = self.next()
+            if tok == '(':  # python allows only one `(` in the statement.
+                brackets = True
+                token_type, tok = self.next()
+            if brackets and tok == '\n':
+                self.next()
+            i, token_type, tok = self._parse_dot_name(self._current)
+            if not i:
+                defunct = True
+            name2 = None
+            if tok == 'as':
+                name2, token_type, tok = self._parse_dot_name()
+            imports.append((i, name2, defunct))
+            while tok not in continue_kw:
+                token_type, tok = self.next()
+            if not (tok == "," or brackets and tok == '\n'):
+                break
+        return imports
+
+    def _parse_parentheses(self):
+        """
+        Functions and Classes have params (which means for classes
+        super-classes). They are parsed here and returned as Statements.
+
+        :return: List of Statements
+        :rtype: list
+        """
+        names = []
+        tok = None
+        pos = 0
+        breaks = [',', ':']
+        while tok not in [')', ':']:
+            param, tok = self._parse_statement(added_breaks=breaks,
+                                               stmt_class=pr.Param)
+            if param and tok == ':':
+                # parse annotations
+                annotation, tok = self._parse_statement(added_breaks=breaks)
+                if annotation:
+                    param.add_annotation(annotation)
+
+            # params without vars are usually syntax errors.
+            if param and (param.get_set_vars()):
+                param.position_nr = pos
+                names.append(param)
+                pos += 1
+
+        return names
+
+    def _parse_function(self):
+        """
+        The parser for a text functions. Process the tokens, which follow a
+        function definition.
+
+        :return: Return a Scope representation of the tokens.
+        :rtype: Function
+        """
+        first_pos = self.start_pos
+        token_type, fname = self.next()
+        if token_type != tokenize.NAME:
+            return None
+
+        fname = pr.Name(self.module, [(fname, self.start_pos)], self.start_pos,
+                        self.end_pos)
+
+        token_type, open = self.next()
+        if open != '(':
+            return None
+        params = self._parse_parentheses()
+
+        token_type, colon = self.next()
+        annotation = None
+        if colon in ['-', '->']:
+            # parse annotations
+            if colon == '-':
+                # The Python 2 tokenizer doesn't understand this
+                token_type, colon = self.next()
+                if colon != '>':
+                    return None
+            annotation, colon = self._parse_statement(added_breaks=[':'])
+
+        if colon != ':':
+            return None
+
+        # because of 2 line func param definitions
+        scope = pr.Function(self.module, fname, params, first_pos, annotation)
+        if self.user_scope and scope != self.user_scope \
+                and self.user_position > first_pos:
+            self.user_scope = scope
+        return scope
+
+    def _parse_class(self):
+        """
+        The parser for a text class. Process the tokens, which follow a
+        class definition.
+
+        :return: Return a Scope representation of the tokens.
+        :rtype: Class
+        """
+        first_pos = self.start_pos
+        token_type, cname = self.next()
+        if token_type != tokenize.NAME:
+            debug.warning("class: syntax err, token is not a name@%s (%s: %s)"
+                          % (self.start_pos[0], tokenize.tok_name[token_type], cname))
+            return None
+
+        cname = pr.Name(self.module, [(cname, self.start_pos)], self.start_pos,
+                        self.end_pos)
+
+        super = []
+        token_type, _next = self.next()
+        if _next == '(':
+            super = self._parse_parentheses()
+            token_type, _next = self.next()
+
+        if _next != ':':
+            debug.warning("class syntax: %s@%s" % (cname, self.start_pos[0]))
+            return None
+
+        # because of 2 line class initializations
+        scope = pr.Class(self.module, cname, super, first_pos)
+        if self.user_scope and scope != self.user_scope \
+                and self.user_position > first_pos:
+            self.user_scope = scope
+        return scope
+
+    def _parse_statement(self, pre_used_token=None, added_breaks=None,
+                         stmt_class=pr.Statement, names_are_set_vars=False):
+        """
+        Parses statements like::
+
+            a = test(b)
+            a += 3 - 2 or b
+
+        and so on. One line at a time.
+
+        :param pre_used_token: The pre parsed token.
+        :type pre_used_token: set
+        :return: Statement + last parsed token.
+        :rtype: (Statement, str)
+        """
+        set_vars = []
+        level = 0  # The level of parentheses
+
+        if pre_used_token:
+            token_type, tok = pre_used_token
+        else:
+            token_type, tok = self.next()
+
+        while token_type == tokenize.COMMENT:
+            # remove newline and comment
+            self.next()
+            token_type, tok = self.next()
+
+        first_pos = self.start_pos
+        opening_brackets = ['{', '(', '[']
+        closing_brackets = ['}', ')', ']']
+
+        # the difference between "break" and "always break" is that the latter
+        # will even break in parentheses. This is true for typical flow
+        # commands like def and class and the imports, which will never be used
+        # in a statement.
+        breaks = set(['\n', ':', ')'])
+        always_break = [';', 'import', 'from', 'class', 'def', 'try', 'except',
+                        'finally', 'while', 'return', 'yield']
+        not_first_break = ['del', 'raise']
+        if added_breaks:
+            breaks |= set(added_breaks)
+
+        tok_list = []
+        as_names = []
+        while not (tok in always_break
+                   or tok in not_first_break and not tok_list
+                   or tok in breaks and level <= 0):
+            try:
+                # print 'parse_stmt', tok, tokenize.tok_name[token_type]
+                tok_list.append(self._current + (self.start_pos,))
+                if tok == 'as':
+                    token_type, tok = self.next()
+                    if token_type == tokenize.NAME:
+                        n, token_type, tok = self._parse_dot_name(self._current)
+                        if n:
+                            set_vars.append(n)
+                            as_names.append(n)
+                        tok_list.append(n)
+                    continue
+                elif tok in ['lambda', 'for', 'in']:
+                    # don't parse these keywords, parse later in stmt.
+                    if tok == 'lambda':
+                        breaks.discard(':')
+                elif token_type == tokenize.NAME:
+                    n, token_type, tok = self._parse_dot_name(self._current)
+                    # removed last entry, because we add Name
+                    tok_list.pop()
+                    if n:
+                        tok_list.append(n)
+                    continue
+                elif tok in opening_brackets:
+                    level += 1
+                elif tok in closing_brackets:
+                    level -= 1
+
+                token_type, tok = self.next()
+            except (StopIteration, common.MultiLevelStopIteration):
+                # comes from tokenizer
+                break
+
+        if not tok_list:
+            return None, tok
+
+        first_tok = tok_list[0]
+        # docstrings
+        if len(tok_list) == 1 and not isinstance(first_tok, pr.Name) \
+                and first_tok[0] == tokenize.STRING:
+            # Normal docstring check
+            if self.freshscope and not self.no_docstr:
+                self._scope.add_docstr(first_tok[1])
+                return None, tok
+
+            # Attribute docstring (PEP 224) support (sphinx uses it, e.g.)
+            # If string literal is being parsed...
+            elif first_tok[0] == tokenize.STRING:
+                with common.ignored(IndexError, AttributeError):
+                    # ...then set it as a docstring
+                    self._scope.statements[-1].add_docstr(first_tok[1])
+                    return None, tok
+
+
+        stmt = stmt_class(self.module, tok_list, first_pos, self.end_pos,
+                          as_names=as_names,
+                          names_are_set_vars=names_are_set_vars)
+
+        stmt.parent = self.top_module
+        self._check_user_stmt(stmt)
+
+        if tok in always_break + not_first_break:
+            self._gen.push_last_back()
+        return stmt, tok
+
+    def next(self):
+        return self.__next__()
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        """ Generate the next tokenize pattern. """
+        try:
+            typ, tok, start_pos, end_pos, self.parserline = next(self._gen)
+            # dedents shouldn't change positions
+            if typ != tokenize.DEDENT:
+                self.start_pos = start_pos
+                if typ not in (tokenize.INDENT, tokenize.NEWLINE, tokenize.NL):
+                    self.start_pos, self.end_pos = start_pos, end_pos
+        except (StopIteration, common.MultiLevelStopIteration):
+            # on finish, set end_pos correctly
+            s = self._scope
+            while s is not None:
+                if isinstance(s, pr.Module) \
+                        and not isinstance(s, pr.SubModule):
+                    self.module.end_pos = self.end_pos
+                    break
+                s.end_pos = self.end_pos
+                s = s.parent
+            raise
+
+        if self.user_position and (self.start_pos[0] == self.user_position[0]
+                                   or self.user_scope is None
+                                   and self.start_pos[0] >= self.user_position[0]):
+            debug.dbg('user scope found [%s] = %s' %
+                     (self.parserline.replace('\n', ''), repr(self._scope)))
+            self.user_scope = self._scope
+
+        self._current = typ, tok
+        return self._current
+
+    def _parse(self):
+        """
+        The main part of the program. It analyzes the given code-text and
+        returns a tree-like scope. For a more detailed description, see the
+        class description.
+
+        :param text: The code which should be parsed.
+        :param type: str
+
+        :raises: IndentationError
+        """
+        extended_flow = ['else', 'elif', 'except', 'finally']
+        statement_toks = ['{', '[', '(', '`']
+
+        self._decorators = []
+        self.freshscope = True
+        self.iterator = iter(self)
+        # This iterator stuff is not intentional. It grew historically.
+        for token_type, tok in self.iterator:
+            self.module.temp_used_names = []
+            # debug.dbg('main: tok=[%s] type=[%s] indent=[%s]'\
+            #    % (tok, tokenize.tok_name[token_type], start_position[0]))
+
+            while token_type == tokenize.DEDENT and self._scope != self.module:
+                token_type, tok = self.next()
+                if self.start_pos[1] <= self._scope.start_pos[1]:
+                    self._scope.end_pos = self.start_pos
+                    self._scope = self._scope.parent
+                    if isinstance(self._scope, pr.Module) \
+                            and not isinstance(self._scope, pr.SubModule):
+                        self._scope = self.module
+
+            # check again for unindented stuff. this is true for syntax
+            # errors. only check for names, because thats relevant here. If
+            # some docstrings are not indented, I don't care.
+            while self.start_pos[1] <= self._scope.start_pos[1] \
+                    and (token_type == tokenize.NAME or tok in ['(', '['])\
+                    and self._scope != self.module:
+                self._scope.end_pos = self.start_pos
+                self._scope = self._scope.parent
+                if isinstance(self._scope, pr.Module) \
+                        and not isinstance(self._scope, pr.SubModule):
+                    self._scope = self.module
+
+            use_as_parent_scope = self.top_module if isinstance(self._scope,
+                                                                pr.SubModule) else self._scope
+            first_pos = self.start_pos
+            if tok == 'def':
+                func = self._parse_function()
+                if func is None:
+                    debug.warning("function: syntax error@%s" %
+                                  self.start_pos[0])
+                    continue
+                self.freshscope = True
+                self._scope = self._scope.add_scope(func, self._decorators)
+                self._decorators = []
+            elif tok == 'class':
+                cls = self._parse_class()
+                if cls is None:
+                    debug.warning("class: syntax error@%s" % self.start_pos[0])
+                    continue
+                self.freshscope = True
+                self._scope = self._scope.add_scope(cls, self._decorators)
+                self._decorators = []
+            # import stuff
+            elif tok == 'import':
+                imports = self._parse_import_list()
+                for count, (m, alias, defunct) in enumerate(imports):
+                    e = (alias or m or self).end_pos
+                    end_pos = self.end_pos if count + 1 == len(imports) else e
+                    i = pr.Import(self.module, first_pos, end_pos, m,
+                                  alias, defunct=defunct)
+                    self._check_user_stmt(i)
+                    self._scope.add_import(i)
+                if not imports:
+                    i = pr.Import(self.module, first_pos, self.end_pos, None,
+                                  defunct=True)
+                    self._check_user_stmt(i)
+                self.freshscope = False
+            elif tok == 'from':
+                defunct = False
+                # take care for relative imports
+                relative_count = 0
+                while True:
+                    token_type, tok = self.next()
+                    if tok != '.':
+                        break
+                    relative_count += 1
+                # the from import
+                mod, token_type, tok = self._parse_dot_name(self._current)
+                if str(mod) == 'import' and relative_count:
+                    self._gen.push_last_back()
+                    tok = 'import'
+                    mod = None
+                if not mod and not relative_count or tok != "import":
+                    debug.warning("from: syntax error@%s" % self.start_pos[0])
+                    defunct = True
+                    if tok != 'import':
+                        self._gen.push_last_back()
+                names = self._parse_import_list()
+                for count, (name, alias, defunct2) in enumerate(names):
+                    star = name is not None and name.names[0] == '*'
+                    if star:
+                        name = None
+                    e = (alias or name or self).end_pos
+                    end_pos = self.end_pos if count + 1 == len(names) else e
+                    i = pr.Import(self.module, first_pos, end_pos, name,
+                                  alias, mod, star, relative_count,
+                                  defunct=defunct or defunct2)
+                    self._check_user_stmt(i)
+                    self._scope.add_import(i)
+                self.freshscope = False
+            # loops
+            elif tok == 'for':
+                set_stmt, tok = self._parse_statement(added_breaks=['in'],
+                                                      names_are_set_vars=True)
+                if tok == 'in':
+                    statement, tok = self._parse_statement()
+                    if tok == ':':
+                        s = [] if statement is None else [statement]
+                        f = pr.ForFlow(self.module, s, first_pos, set_stmt)
+                        self._scope = self._scope.add_statement(f)
+                    else:
+                        debug.warning('syntax err, for flow started @%s',
+                                      self.start_pos[0])
+                        if statement is not None:
+                            statement.parent = use_as_parent_scope
+                        if set_stmt is not None:
+                            set_stmt.parent = use_as_parent_scope
+                else:
+                    debug.warning('syntax err, for flow incomplete @%s',
+                                  self.start_pos[0])
+                    if set_stmt is not None:
+                        set_stmt.parent = use_as_parent_scope
+
+            elif tok in ['if', 'while', 'try', 'with'] + extended_flow:
+                added_breaks = []
+                command = tok
+                if command in ['except', 'with']:
+                    added_breaks.append(',')
+                # multiple inputs because of with
+                inputs = []
+                first = True
+                while first or command == 'with' \
+                        and tok not in [':', '\n']:
+                    statement, tok = \
+                        self._parse_statement(added_breaks=added_breaks)
+                    if command == 'except' and tok == ',':
+                        # the except statement defines a var
+                        # this is only true for python 2
+                        n, token_type, tok = self._parse_dot_name()
+                        if n:
+                            n.parent = statement
+                            statement.as_names.append(n)
+                    if statement:
+                        inputs.append(statement)
+                    first = False
+
+                if tok == ':':
+                    f = pr.Flow(self.module, command, inputs, first_pos)
+                    if command in extended_flow:
+                        # the last statement has to be another part of
+                        # the flow statement, because a dedent releases the
+                        # main scope, so just take the last statement.
+                        try:
+                            s = self._scope.statements[-1].set_next(f)
+                        except (AttributeError, IndexError):
+                            # If set_next doesn't exist, just add it.
+                            s = self._scope.add_statement(f)
+                    else:
+                        s = self._scope.add_statement(f)
+                    self._scope = s
+                else:
+                    for i in inputs:
+                        i.parent = use_as_parent_scope
+                    debug.warning('syntax err, flow started @%s',
+                                  self.start_pos[0])
+            # returns
+            elif tok in ['return', 'yield']:
+                s = self.start_pos
+                self.freshscope = False
+                # add returns to the scope
+                func = self._scope.get_parent_until(pr.Function)
+                if tok == 'yield':
+                    func.is_generator = True
+
+                stmt, tok = self._parse_statement()
+                if stmt is not None:
+                    stmt.parent = use_as_parent_scope
+                try:
+                    func.returns.append(stmt)
+                    # start_pos is the one of the return statement
+                    stmt.start_pos = s
+                except AttributeError:
+                    debug.warning('return in non-function')
+            # globals
+            elif tok == 'global':
+                stmt, tok = self._parse_statement(self._current)
+                if stmt:
+                    self._scope.add_statement(stmt)
+                    for t in stmt.token_list:
+                        if isinstance(t, pr.Name):
+                            # add the global to the top, because there it is
+                            # important.
+                            self.module.add_global(t)
+            # decorator
+            elif tok == '@':
+                stmt, tok = self._parse_statement()
+                if stmt is not None:
+                    self._decorators.append(stmt)
+            elif tok == 'pass':
+                continue
+            elif tok == 'assert':
+                stmt, tok = self._parse_statement()
+                if stmt is not None:
+                    stmt.parent = use_as_parent_scope
+                    self._scope.asserts.append(stmt)
+            # default
+            elif token_type in [tokenize.NAME, tokenize.STRING,
+                                tokenize.NUMBER] \
+                    or tok in statement_toks:
+                # this is the main part - a name can be a function or a
+                # normal var, which can follow anything. but this is done
+                # by the statement parser.
+                stmt, tok = self._parse_statement(self._current)
+                if stmt:
+                    self._scope.add_statement(stmt)
+                self.freshscope = False
+            else:
+                if token_type not in [tokenize.COMMENT, tokenize.INDENT,
+                                      tokenize.NEWLINE, tokenize.NL]:
+                    debug.warning('token not classified', tok, token_type,
+                                  self.start_pos[0])
+                continue
+            self.no_docstr = False
--- a/jedi/parser/fast.py
+++ b/jedi/parser/fast.py
@@ -0,0 +1,440 @@
+"""
+Basically a parser that is faster, because it tries to parse only parts and if
+anything changes, it only reparses the changed parts. But because it's not
+finished (and still not working as I want), I won't document it any further.
+"""
+import re
+
+from jedi._compatibility import use_metaclass
+from jedi import settings
+from jedi.parser import Parser
+from jedi.parser import representation as pr
+from jedi import cache
+from jedi import common
+
+
+SCOPE_CONTENTS = ['asserts', 'subscopes', 'imports', 'statements', 'returns']
+
+
+class Module(pr.Simple, pr.Module):
+    def __init__(self, parsers):
+        super(Module, self).__init__(self, (1, 0))
+        self.parsers = parsers
+        self.reset_caches()
+
+        self.start_pos = 1, 0
+        self.end_pos = None, None
+
+    def reset_caches(self):
+        """ This module does a whole lot of caching, because it uses different
+        parsers. """
+        self._used_names = None
+        for p in self.parsers:
+            p.user_scope = None
+            p.user_stmt = None
+
+    def __getattr__(self, name):
+        if name.startswith('__'):
+            raise AttributeError('Not available!')
+        else:
+            return getattr(self.parsers[0].module, name)
+
+    @property
+    def used_names(self):
+        if self._used_names is None:
+            dct = {}
+            for p in self.parsers:
+                for k, statement_set in p.module.used_names.items():
+                    if k in dct:
+                        dct[k] |= statement_set
+                    else:
+                        dct[k] = set(statement_set)
+
+            self._used_names = dct
+        return self._used_names
+
+    def __repr__(self):
+        return "<%s: %s@%s-%s>" % (type(self).__name__, self.name,
+                                   self.start_pos[0], self.end_pos[0])
+
+
+class CachedFastParser(type):
+    """ This is a metaclass for caching `FastParser`. """
+    def __call__(self, source, module_path=None, user_position=None):
+        if not settings.fast_parser:
+            return Parser(source, module_path, user_position)
+
+        pi = cache.parser_cache.get(module_path, None)
+        if pi is None or isinstance(pi.parser, Parser):
+            p = super(CachedFastParser, self).__call__(source, module_path,
+                                                       user_position)
+        else:
+            p = pi.parser  # pi is a `cache.ParserCacheItem`
+            p.update(source, user_position)
+        return p
+
+
+class ParserNode(object):
+    def __init__(self, parser, code, parent=None):
+        self.parent = parent
+        self.code = code
+        self.hash = hash(code)
+
+        self.children = []
+        # must be created before new things are added to it.
+        self.save_contents(parser)
+
+    def save_contents(self, parser):
+        self.parser = parser
+
+        try:
+            # with fast_parser we have either 1 subscope or only statements.
+            self.content_scope = parser.module.subscopes[0]
+        except IndexError:
+            self.content_scope = parser.module
+
+        scope = self.content_scope
+        self._contents = {}
+        for c in SCOPE_CONTENTS:
+            self._contents[c] = list(getattr(scope, c))
+        self._is_generator = scope.is_generator
+
+        self.old_children = self.children
+        self.children = []
+
+    def reset_contents(self):
+        scope = self.content_scope
+        for key, c in self._contents.items():
+            setattr(scope, key, list(c))
+        scope.is_generator = self._is_generator
+        self.parser.user_scope = self.parser.module
+
+        if self.parent is None:
+            # Global vars of the first one can be deleted, in the global scope
+            # they make no sense.
+            self.parser.module.global_vars = []
+
+        for c in self.children:
+            c.reset_contents()
+
+    def parent_until_indent(self, indent=None):
+        if indent is None or self.indent >= indent and self.parent:
+            self.old_children = []
+            if self.parent is not None:
+                return self.parent.parent_until_indent(indent)
+        return self
+
+    @property
+    def indent(self):
+        if not self.parent:
+            return 0
+        module = self.parser.module
+        try:
+            el = module.subscopes[0]
+        except IndexError:
+            try:
+                el = module.statements[0]
+            except IndexError:
+                try:
+                    el = module.imports[0]
+                except IndexError:
+                    try:
+                        el = [r for r in module.returns if r is not None][0]
+                    except IndexError:
+                        return self.parent.indent + 1
+        return el.start_pos[1]
+
+    def _set_items(self, parser, set_parent=False):
+        # insert parser objects into current structure
+        scope = self.content_scope
+        for c in SCOPE_CONTENTS:
+            content = getattr(scope, c)
+            items = getattr(parser.module, c)
+            if set_parent:
+                for i in items:
+                    if i is None:
+                        continue  # happens with empty returns
+                    i.parent = scope.use_as_parent
+                    if isinstance(i, (pr.Function, pr.Class)):
+                        for d in i.decorators:
+                            d.parent = scope.use_as_parent
+            content += items
+
+        # global_vars
+        cur = self
+        while cur.parent is not None:
+            cur = cur.parent
+        cur.parser.module.global_vars += parser.module.global_vars
+
+        scope.is_generator |= parser.module.is_generator
+
+    def add_node(self, node, set_parent=False):
+        """Adding a node means adding a node that was already added earlier"""
+        self.children.append(node)
+        self._set_items(node.parser, set_parent=set_parent)
+        node.old_children = node.children
+        node.children = []
+        return node
+
+    def add_parser(self, parser, code):
+        return self.add_node(ParserNode(parser, code, self), True)
+
+
+class FastParser(use_metaclass(CachedFastParser)):
+    def __init__(self, code, module_path=None, user_position=None):
+        # set values like `pr.Module`.
+        self.module_path = module_path
+        self.user_position = user_position
+        self._user_scope = None
+
+        self.current_node = None
+        self.parsers = []
+        self.module = Module(self.parsers)
+        self.reset_caches()
+
+        try:
+            self._parse(code)
+        except:
+            # FastParser is cached, be careful with exceptions
+            self.parsers[:] = []
+            raise
+
+    @property
+    def user_scope(self):
+        if self._user_scope is None:
+            for p in self.parsers:
+                if p.user_scope:
+                    if isinstance(p.user_scope, pr.SubModule):
+                        continue
+                    self._user_scope = p.user_scope
+
+        if isinstance(self._user_scope, pr.SubModule) \
+                or self._user_scope is None:
+            self._user_scope = self.module
+        return self._user_scope
+
+    @property
+    def user_stmt(self):
+        if self._user_stmt is None:
+            for p in self.parsers:
+                if p.user_stmt:
+                    self._user_stmt = p.user_stmt
+                    break
+        return self._user_stmt
+
+    def update(self, code, user_position=None):
+        self.user_position = user_position
+        self.reset_caches()
+
+
+        try:
+            self._parse(code)
+        except:
+            # FastParser is cached, be careful with exceptions
+            self.parsers[:] = []
+            raise
+
+    def _scan_user_scope(self, sub_module):
+        """ Scan with self.user_position. """
+        for scope in sub_module.statements + sub_module.subscopes:
+            if isinstance(scope, pr.Scope):
+                if scope.start_pos <= self.user_position <= scope.end_pos:
+                    return self._scan_user_scope(scope) or scope
+        return None
+
+    def _split_parts(self, code):
+        """
+        Split the code into different parts. This makes it possible to parse
+        each part seperately and therefore cache parts of the file and not
+        everything.
+        """
+        def add_part():
+            txt = '\n'.join(current_lines)
+            if txt:
+                if add_to_last and parts:
+                    parts[-1] += '\n' + txt
+                else:
+                    parts.append(txt)
+                current_lines[:] = []
+
+        r_keyword = '^[ \t]*(def|class|@|%s)' % '|'.join(common.FLOWS)
+
+        self._lines = code.splitlines()
+        current_lines = []
+        parts = []
+        is_decorator = False
+        current_indent = 0
+        old_indent = 0
+        new_indent = False
+        in_flow = False
+        add_to_last = False
+        # All things within flows are simply being ignored.
+        for i, l in enumerate(self._lines):
+            # check for dedents
+            m = re.match('^([\t ]*)(.?)', l)
+            indent = len(m.group(1))
+            if m.group(2) in ['', '#']:
+                current_lines.append(l)  # just ignore comments and blank lines
+                continue
+
+            if indent < current_indent:  # -> dedent
+                current_indent = indent
+                new_indent = False
+                if not in_flow or indent < old_indent:
+                    add_part()
+                    add_to_last = False
+                in_flow = False
+            elif new_indent:
+                current_indent = indent
+                new_indent = False
+
+            # Check lines for functions/classes and split the code there.
+            if not in_flow:
+                m = re.match(r_keyword, l)
+                if m:
+                    in_flow = m.group(1) in common.FLOWS
+                    if not is_decorator and not in_flow:
+                        add_part()
+                        add_to_last = False
+                    is_decorator = '@' == m.group(1)
+                    if not is_decorator:
+                        old_indent = current_indent
+                        current_indent += 1  # it must be higher
+                        new_indent = True
+                elif is_decorator:
+                    is_decorator = False
+                    add_to_last = True
+
+            current_lines.append(l)
+        add_part()
+
+        return parts
+
+    def _parse(self, code):
+        """ :type code: str """
+        def empty_parser():
+            new, temp = self._get_parser('', '', 0, [], False)
+            return new
+
+        parts = self._split_parts(code)
+        self.parsers[:] = []
+
+        line_offset = 0
+        start = 0
+        p = None
+        is_first = True
+
+        for code_part in parts:
+            lines = code_part.count('\n') + 1
+            if is_first or line_offset >= p.end_pos[0]:
+                indent = len(re.match(r'[ \t]*', code_part).group(0))
+                if is_first and self.current_node is not None:
+                    nodes = [self.current_node]
+                else:
+                    nodes = []
+                if self.current_node is not None:
+
+                    self.current_node = \
+                        self.current_node.parent_until_indent(indent)
+                    nodes += self.current_node.old_children
+
+                # check if code_part has already been parsed
+                # print '#'*45,line_offset, p and p.end_pos, '\n', code_part
+                p, node = self._get_parser(code_part, code[start:],
+                                           line_offset, nodes, not is_first)
+
+                # The actual used code_part is different from the given code
+                # part, because of docstrings for example there's a chance that
+                # splits are wrong.
+                used_lines = self._lines[line_offset:p.end_pos[0]]
+                code_part_actually_used = '\n'.join(used_lines)
+
+                if is_first and p.module.subscopes:
+                    # special case, we cannot use a function subscope as a
+                    # base scope, subscopes would save all the other contents
+                    new = empty_parser()
+                    if self.current_node is None:
+                        self.current_node = ParserNode(new, '')
+                    else:
+                        self.current_node.save_contents(new)
+                    self.parsers.append(new)
+                    is_first = False
+
+                if is_first:
+                    if self.current_node is None:
+                        self.current_node = ParserNode(p, code_part_actually_used)
+                    else:
+                        self.current_node.save_contents(p)
+                else:
+                    if node is None:
+                        self.current_node = \
+                            self.current_node.add_parser(p, code_part_actually_used)
+                    else:
+                        self.current_node = self.current_node.add_node(node)
+
+                if self.current_node.parent and (isinstance(p.user_scope,
+                                    pr.SubModule) or p.user_scope is None) \
+                        and self.user_position \
+                        and p.start_pos <= self.user_position < p.end_pos:
+                    p.user_scope = self.current_node.parent.content_scope
+
+                self.parsers.append(p)
+
+                is_first = False
+            #else:
+                #print '#'*45, line_offset, p.end_pos, 'theheck\n', repr(code_part)
+
+            line_offset += lines
+            start += len(code_part) + 1  # +1 for newline
+
+        if self.parsers:
+            self.current_node = self.current_node.parent_until_indent()
+        else:
+            self.parsers.append(empty_parser())
+
+        self.module.end_pos = self.parsers[-1].end_pos
+
+        # print(self.parsers[0].module.get_code())
+        del code
+
+    def _get_parser(self, code, parser_code, line_offset, nodes, no_docstr):
+        h = hash(code)
+        hashes = [n.hash for n in nodes]
+        node = None
+        try:
+            index = hashes.index(h)
+            if nodes[index].code != code:
+                raise ValueError()
+        except ValueError:
+            p = Parser(parser_code, self.module_path,
+                               self.user_position, offset=(line_offset, 0),
+                               is_fast_parser=True, top_module=self.module,
+                               no_docstr=no_docstr)
+            p.module.parent = self.module
+        else:
+            if nodes[index] != self.current_node:
+                offset = int(nodes[0] == self.current_node)
+                self.current_node.old_children.pop(index - offset)
+            node = nodes.pop(index)
+            p = node.parser
+            m = p.module
+            m.line_offset += line_offset + 1 - m.start_pos[0]
+            if self.user_position is not None and \
+                    m.start_pos[0] <= self.user_position[0] <= m.end_pos[0]:
+                # It's important to take care of the whole user
+                # positioning stuff, if no reparsing is being done.
+                p.user_stmt = m.get_statement_for_position(
+                    self.user_position, include_imports=True)
+                if p.user_stmt:
+                    p.user_scope = p.user_stmt.parent
+                else:
+                    p.user_scope = self._scan_user_scope(m) or m
+
+        return p, node
+
+    def reset_caches(self):
+        self._user_scope = None
+        self._user_stmt = None
+        self.module.reset_caches()
+        if self.current_node is not None:
+            self.current_node.reset_contents()
--- a/jedi/parser/representation.py
+++ b/jedi/parser/representation.py