Merge pull request #208 from davidhalter/fast

New improved Fast Parser
2026-05-24 17:28:36 +08:00 · 2013-05-02 14:11:05 -07:00
parent b3d9b6ce69 6204cb740b
commit 88f39a01cb
9 changed files with 445 additions and 217 deletions
@@ -177,5 +177,7 @@ except ImportError:
 try:
    encoding = sys.stdout.encoding
    if encoding is None:
        encoding = 'utf-8'
 except AttributeError:
    encoding = 'ascii'
@@ -7,6 +7,8 @@ import tokenizer as tokenize
 from jedi._compatibility import next, reraise
 from jedi import settings
 FLOWS = ['if', 'else', 'elif', 'while', 'with', 'try', 'except', 'finally']
 class MultiLevelStopIteration(Exception):
    """
@@ -81,17 +83,25 @@ class PushBackIterator(object):
 class NoErrorTokenizer(object):
-    def __init__(self, readline, offset=(0, 0), stop_on_scope=False):
+    def __init__(self, readline, offset=(0, 0), is_fast_parser=False):
        self.readline = readline
-        self.gen = PushBackIterator(tokenize.generate_tokens(readline))
+        self.gen = tokenize.generate_tokens(readline)
        self.offset = offset
        self.stop_on_scope = stop_on_scope
        self.first_scope = False
        self.closed = False
-        self.first = True
+        self.is_first = True
        self.push_backs = []
        # fast parser options
        self.is_fast_parser = is_fast_parser
        self.current = self.previous = [None, None, (0, 0), (0, 0), '']
        self.in_flow = False
        self.new_indent = False
        self.parser_indent = self.old_parser_indent = 0
        self.is_decorator = False
        self.first_stmt = True
    def push_last_back(self):
-        self.gen.push_back(self.current)
+        self.push_backs.append(self.current)
    def next(self):
        """ Python 2 Compatibility """
@@ -100,25 +110,71 @@ class NoErrorTokenizer(object):
    def __next__(self):
        if self.closed:
            raise MultiLevelStopIteration()
        if self.push_backs:
            return self.push_backs.pop(0)
        self.last_previous = self.previous
        self.previous = self.current
        self.current = next(self.gen)
        c = list(self.current)
-        # stop if a new class or definition is started at position zero.
+        if c[0] == tokenize.ENDMARKER:
-        breaks = ['def', 'class', '@']
+            self.current = self.previous
-        if self.stop_on_scope and c[1] in breaks and c[2][1] == 0:
+            self.previous = self.last_previous
-            if self.first_scope:
+            raise MultiLevelStopIteration()
                self.closed = True
                raise MultiLevelStopIteration()
            elif c[1] != '@':
                self.first_scope = True
-        if self.first:
+        # this is exactly the same check as in fast_parser, but this time with
        # tokenize and therefore precise.
        breaks = ['def', 'class', '@']
        if self.is_first:
            c[2] = self.offset[0] + c[2][0], self.offset[1] + c[2][1]
            c[3] = self.offset[0] + c[3][0], self.offset[1] + c[3][1]
-            self.first = False
+            self.is_first = False
        else:
            c[2] = self.offset[0] + c[2][0], c[2][1]
            c[3] = self.offset[0] + c[3][0], c[3][1]
        self.current = c
        def close():
            if not self.first_stmt:
                self.closed = True
                raise MultiLevelStopIteration()
        # ignore indents/comments
        if self.is_fast_parser \
                and self.previous[0] in (tokenize.INDENT, tokenize.NL, None,
                                         tokenize.NEWLINE, tokenize.DEDENT) \
                and c[0] not in (tokenize.COMMENT, tokenize.INDENT,
                             tokenize.NL, tokenize.NEWLINE, tokenize.DEDENT):
            #print c, tokenize.tok_name[c[0]]
            tok = c[1]
            indent = c[2][1]
            if indent < self.parser_indent:  # -> dedent
                self.parser_indent = indent
                self.new_indent = False
                if not self.in_flow or indent < self.old_parser_indent:
                    close()
                self.in_flow = False
            elif self.new_indent:
                self.parser_indent = indent
                self.new_indent = False
            if not self.in_flow:
                if tok in FLOWS or tok in breaks:
                    self.in_flow = tok in FLOWS
                    if not self.is_decorator and not self.in_flow:
                        close()
                    self.is_decorator = '@' == tok
                    if not self.is_decorator:
                        self.old_parser_indent = self.parser_indent
                        self.parser_indent += 1  # new scope: must be higher
                        self.new_indent = True
            if tok != '@':
                if self.first_stmt and not self.new_indent:
                    self.parser_indent = indent
                self.first_stmt = False
        return c
@@ -131,22 +131,22 @@ def get_names_of_scope(scope, position=None, star_search=True,
    ... def func():
    ...     y = None
    ... ''')
-    >>> scope = parser.scope.subscopes[0]
+    >>> scope = parser.module.subscopes[0]
    >>> scope
-    <Function: func@3-6>
+    <Function: func@3-5>
    `get_names_of_scope` is a generator.  First it yields names from
    most inner scope.
    >>> pairs = list(get_names_of_scope(scope))
    >>> pairs[0]
-    (<Function: func@3-6>, [<Name: y@4,4>])
+    (<Function: func@3-5>, [<Name: y@4,4>])
    Then it yield the names from one level outer scope.  For this
    example, this is the most outer scope.
    >>> pairs[1]
-    (<SubModule: None@1-6>, [<Name: x@2,0>, <Name: func@3,4>])
+    (<SubModule: None@1-5>, [<Name: x@2,0>, <Name: func@3,4>])
    Finally, it yields names from builtin, if `include_builtin` is
    true (default).
@@ -160,6 +160,10 @@ def get_names_of_scope(scope, position=None, star_search=True,
    in_func_scope = scope
    non_flow = scope.get_parent_until(pr.Flow, reverse=True)
    while scope:
        if isinstance(scope, pr.SubModule) and scope.parent:
            # we don't want submodules to report if we have modules.
            scope = scope.parent
            continue
        # `pr.Class` is used, because the parent is never `Class`.
        # Ignore the Flows, because the classes and functions care for that.
        # InstanceElement of Class is ignored, if it is not the start scope.
@@ -4,84 +4,44 @@ anything changes, it only reparses the changed parts. But because it's not
 finished (and still not working as I want), I won't document it any further.
 """
 import re
 import operator
-from jedi._compatibility import use_metaclass, reduce
+from jedi._compatibility import use_metaclass
 from jedi import settings
 from jedi import parsing
 from jedi import parsing_representation as pr
 from jedi import cache
 from jedi import common
 SCOPE_CONTENTS = ['asserts', 'subscopes', 'imports', 'statements', 'returns']
 class Module(pr.Simple, pr.Module):
    def __init__(self, parsers):
        self._end_pos = None, None
        super(Module, self).__init__(self, (1, 0))
        self.parsers = parsers
        self.reset_caches()
-        self.line_offset = 0
+
        self.start_pos = 1, 0
        self.end_pos = None, None
    def reset_caches(self):
        """ This module does a whole lot of caching, because it uses different
        parsers. """
-        self.cache = {}
+        self._used_names = None
        for p in self.parsers:
            p.user_scope = None
            p.user_stmt = None
    def _get(self, name, operation, execute=False, *args, **kwargs):
        key = (name, args, frozenset(kwargs.items()))
        if key not in self.cache:
            if execute:
                objs = (getattr(p.module, name)(*args, **kwargs)
                                                    for p in self.parsers)
            else:
                objs = (getattr(p.module, name) for p in self.parsers)
            self.cache[key] = reduce(operation, objs)
        return self.cache[key]
    def __getattr__(self, name):
-        operators = {
+        if name.startswith('__'):
-            'get_imports': operator.add,
+            raise AttributeError('Not available!')
            'get_code': operator.add,
            'get_set_vars': operator.add,
            'get_defined_names': operator.add,
            'is_empty': operator.and_
        }
        properties = {
            'subscopes': operator.add,
            'imports': operator.add,
            'statements': operator.add,
            'imports': operator.add,
            'asserts': operator.add,
            'global_vars': operator.add
        }
        if name in operators:
            return lambda *args, **kwargs: self._get(name, operators[name],
                                                        True, *args, **kwargs)
        elif name in properties:
            return self._get(name, properties[name])
        else:
-            raise AttributeError("__getattr__ doesn't offer %s" % name)
+            return getattr(self.parsers[0].module, name)
    def get_statement_for_position(self, pos):
        key = 'get_statement_for_position', pos
        if key not in self.cache:
            for p in self.parsers:
                s = p.module.get_statement_for_position(pos)
                if s:
                    self.cache[key] = s
                    break
            else:
                self.cache[key] = None
        return self.cache[key]
    @property
    def used_names(self):
-        if not self.parsers:
+        if self._used_names is None:
            raise NotImplementedError("Parser doesn't exist.")
        key = 'used_names'
        if key not in self.cache:
            dct = {}
            for p in self.parsers:
                for k, statement_set in p.module.used_names.items():
@@ -90,52 +50,8 @@ class Module(pr.Simple, pr.Module):
                    else:
                        dct[k] = set(statement_set)
-            self.cache[key] = dct
+            self._used_names = dct
-        return self.cache[key]
+        return self._used_names
    @property
    def docstr(self):
        if not self.parsers:
            raise NotImplementedError("Parser doesn't exist.")
        return self.parsers[0].module.docstr
    @property
    def name(self):
        if not self.parsers:
            raise NotImplementedError("Parser doesn't exist.")
        return self.parsers[0].module.name
    @property
    def path(self):
        if not self.parsers:
            raise NotImplementedError("Parser doesn't exist.")
        return self.parsers[0].module.path
    @property
    def is_builtin(self):
        if not self.parsers:
            raise NotImplementedError("Parser doesn't exist.")
        return self.parsers[0].module.is_builtin
    @property
    def start_pos(self):
        """ overwrite start_pos of Simple """
        return 1, 0
    @start_pos.setter
    def start_pos(self):
        """ ignore """
        pass
    @property
    def end_pos(self):
        return self._end_pos
    @end_pos.setter
    def end_pos(self, value):
        if None in self._end_pos \
                or None not in value and self._end_pos < value:
            self._end_pos = value
    def __repr__(self):
        return "<%s: %s@%s-%s>" % (type(self).__name__, self.name,
@@ -158,12 +74,120 @@ class CachedFastParser(type):
        return p
 class ParserNode(object):
    def __init__(self, parser, code, parent=None):
        self.parent = parent
        self.code = code
        self.hash = hash(code)
        self.children = []
        # must be created before new things are added to it.
        self.save_contents(parser)
    def save_contents(self, parser):
        self.parser = parser
        try:
            # with fast_parser we have either 1 subscope or only statements.
            self.content_scope = parser.module.subscopes[0]
        except IndexError:
            self.content_scope = parser.module
        scope = self.content_scope
        self._contents = {}
        for c in SCOPE_CONTENTS:
            self._contents[c] = list(getattr(scope, c))
        self._is_generator = scope.is_generator
        self.old_children = self.children
        self.children = []
    def reset_contents(self):
        scope = self.content_scope
        for key, c in self._contents.items():
            setattr(scope, key, list(c))
        scope.is_generator = self._is_generator
        self.parser.user_scope = self.parser.module
        if self.parent is None:
            # Global vars of the first one can be deleted, in the global scope
            # they make no sense.
            self.parser.module.global_vars = []
        for c in self.children:
            c.reset_contents()
    def parent_until_indent(self, indent=None):
        if indent is None or self.indent >= indent and self.parent:
            self.old_children = []
            if self.parent is not None:
                return self.parent.parent_until_indent(indent)
        return self
    @property
    def indent(self):
        if not self.parent:
            return 0
        module = self.parser.module
        try:
            el = module.subscopes[0]
        except IndexError:
            try:
                el = module.statements[0]
            except IndexError:
                try:
                    el = module.imports[0]
                except IndexError:
                    try:
                        el = [r for r in module.returns if r is not None][0]
                    except IndexError:
                        return self.parent.indent + 1
        return el.start_pos[1]
    def _set_items(self, parser, set_parent=False):
        # insert parser objects into current structure
        scope = self.content_scope
        for c in SCOPE_CONTENTS:
            content = getattr(scope, c)
            items = getattr(parser.module, c)
            if set_parent:
                for i in items:
                    if i is None:
                        continue  # happens with empty returns
                    i.parent = scope.use_as_parent
                    if isinstance(i, (pr.Function, pr.Class)):
                        for d in i.decorators:
                            d.parent = scope.use_as_parent
            content += items
        # global_vars
        cur = self
        while cur.parent is not None:
            cur = cur.parent
        cur.parser.module.global_vars += parser.module.global_vars
        scope.is_generator |= parser.module.is_generator
    def add_node(self, node, set_parent=False):
        """Adding a node means adding a node that was already added earlier"""
        self.children.append(node)
        self._set_items(node.parser, set_parent=set_parent)
        node.old_children = node.children
        node.children = []
        return node
    def add_parser(self, parser, code):
        return self.add_node(ParserNode(parser, code, self), True)
 class FastParser(use_metaclass(CachedFastParser)):
    def __init__(self, code, module_path=None, user_position=None):
        # set values like `pr.Module`.
        self.module_path = module_path
        self.user_position = user_position
        self._user_scope = None
        self.current_node = None
        self.parsers = []
        self.module = Module(self.parsers)
        self.reset_caches()
@@ -175,12 +199,12 @@ class FastParser(use_metaclass(CachedFastParser)):
        if self._user_scope is None:
            for p in self.parsers:
                if p.user_scope:
-                    if self._user_scope is not None and not \
+                    if isinstance(p.user_scope, pr.SubModule):
                            isinstance(self._user_scope, pr.SubModule):
                        continue
                    self._user_scope = p.user_scope
-        if isinstance(self._user_scope, pr.SubModule):
+        if isinstance(self._user_scope, pr.SubModule) \
                    or self._user_scope is None:
            self._user_scope = self.module
        return self._user_scope
@@ -199,79 +223,201 @@ class FastParser(use_metaclass(CachedFastParser)):
        self._parse(code)
-    def scan_user_scope(self, sub_module):
+    def _scan_user_scope(self, sub_module):
-        """ Scan with self.user_position.
+        """ Scan with self.user_position. """
        :type sub_module: pr.SubModule
        """
        for scope in sub_module.statements + sub_module.subscopes:
            if isinstance(scope, pr.Scope):
                if scope.start_pos <= self.user_position <= scope.end_pos:
-                    return self.scan_user_scope(scope) or scope
+                    return self._scan_user_scope(scope) or scope
        return None
    def _split_parts(self, code):
        """
        Split the code into different parts. This makes it possible to parse
        each part seperately and therefore cache parts of the file and not
        everything.
        """
        def add_part():
            txt = '\n'.join(current_lines)
            if txt:
                if add_to_last and parts:
                    parts[-1] += '\n' + txt
                else:
                    parts.append(txt)
                current_lines[:] = []
        r_keyword = '^[ \t]*(def|class|@|%s)' % '|'.join(common.FLOWS)
        lines = code.splitlines()
        current_lines = []
        parts = []
        is_decorator = False
        current_indent = 0
        old_indent = 0
        new_indent = False
        in_flow = False
        add_to_last = False
        # All things within flows are simply being ignored.
        for i, l in enumerate(lines):
            # check for dedents
            m = re.match('^([\t ]*)(.?)', l)
            indent = len(m.group(1))
            if m.group(2) in ['', '#']:
                current_lines.append(l)  # just ignore comments and blank lines
                continue
            if indent < current_indent:  # -> dedent
                current_indent = indent
                new_indent = False
                if not in_flow or indent < old_indent:
                    add_part()
                    add_to_last = False
                in_flow = False
            elif new_indent:
                current_indent = indent
                new_indent = False
            # Check lines for functions/classes and split the code there.
            if not in_flow:
                m = re.match(r_keyword, l)
                if m:
                    in_flow = m.group(1) in common.FLOWS
                    if not is_decorator and not in_flow:
                        add_part()
                        add_to_last = False
                    is_decorator = '@' == m.group(1)
                    if not is_decorator:
                        old_indent = current_indent
                        current_indent += 1  # it must be higher
                        new_indent = True
                elif is_decorator:
                    is_decorator = False
                    add_to_last = True
            current_lines.append(l)
        add_part()
        return parts
    def _parse(self, code):
        """ :type code: str """
-        r = r'(?:\n(?:def|class|@.*?\n(?:def|class))|^).*?' \
+        def empty_parser():
-            r'(?=\n(?:def|class|@)|$)'
+            new, temp = self._get_parser('', '', 0, [])
-        parts = re.findall(r, code, re.DOTALL)
+            return new
-        if len(parts) > 1 and not re.match('def|class|@', parts[0]):
+        parts = self._split_parts(code)
-            # Merge the first two because `common.NoErrorTokenizer` is not able
+        self.parsers[:] = []
            # to know if there's a class/func or not.
            # Therefore every part has it's own class/func. Exactly one.
            parts[0] += parts[1]
            parts.pop(1)
        if settings.fast_parser_always_reparse:
            self.parsers[:] = []
        # dict comprehensions are not available in 2.6 :-(
        hashes = dict((p.hash, p) for p in self.parsers)
        line_offset = 0
        start = 0
        p = None
-        parser_order = 0
+        is_first = True
        for code_part in parts:
-            lines = code_part.count('\n')
+            lines = code_part.count('\n') + 1
-            # the parser is using additional newlines, therefore substract
+            if is_first or line_offset >= p.end_pos[0]:
-            if p is None or line_offset >= p.end_pos[0] - 2:
+                indent = len(re.match(r'[ \t]*', code_part).group(0))
-                # check if code_part has already been parsed
+                if is_first and self.current_node is not None:
-                h = hash(code_part)
+                    nodes = [self.current_node]
                if h in hashes and hashes[h].code == code_part:
                    p = hashes[h]
                    del hashes[h]
                    m = p.module
                    m.line_offset += line_offset + 1 - m.start_pos[0]
                    if self.user_position is not None and \
                            m.start_pos <= self.user_position <= m.end_pos:
                        # It's important to take care of the whole user
                        # positioning stuff, if no reparsing is being done.
                        p.user_stmt = m.get_statement_for_position(
                                    self.user_position, include_imports=True)
                        if p.user_stmt:
                            p.user_scope = p.user_stmt.parent
                        else:
                            p.user_scope = self.scan_user_scope(m) \
                                            or self.module
                else:
-                    p = parsing.Parser(code[start:],
+                    nodes = []
-                                self.module_path, self.user_position,
+                if self.current_node is not None:
                                offset=(line_offset, 0), stop_on_scope=True,
                                top_module=self.module)
-                    p.hash = h
+                    self.current_node = \
-                    p.code = code_part
+                                self.current_node.parent_until_indent(indent)
-                    p.module.parent = self.module
+                    nodes += self.current_node.old_children
-                self.parsers.insert(parser_order, p)
+
                # check if code_part has already been parsed
                #print '#'*45,line_offset, p and p.end_pos, '\n', code_part
                p, node = self._get_parser(code_part, code[start:],
                                           line_offset, nodes)
                if is_first and p.module.subscopes:
                    # special case, we cannot use a function subscope as a
                    # base scope, subscopes would save all the other contents
                    new = empty_parser()
                    if self.current_node is None:
                        self.current_node = ParserNode(new, '')
                    else:
                        self.current_node.save_contents(new)
                    self.parsers.append(new)
                    is_first = False
                if is_first:
                    if self.current_node is None:
                        self.current_node = ParserNode(p, code_part)
                    else:
                        self.current_node.save_contents(p)
                else:
                    if node is None:
                        self.current_node = \
                                    self.current_node.add_parser(p, code_part)
                    else:
                        self.current_node = self.current_node.add_node(node)
                if self.current_node.parent and (isinstance(p.user_scope,
                                pr.SubModule) or p.user_scope is None) \
                        and self.user_position \
                        and p.start_pos <= self.user_position < p.end_pos:
                    p.user_scope = self.current_node.parent.content_scope
                self.parsers.append(p)
                is_first = False
            else:
                #print '#'*45, line_offset, p.end_pos, 'theheck\n', code_part 
                pass
                parser_order += 1
            line_offset += lines
-            start += len(code_part)
+            start += len(code_part) + 1  # +1 for newline
-        self.parsers[parser_order + 1:] = []
+
        if self.parsers:
            self.current_node = self.current_node.parent_until_indent()
        else:
            self.parsers.append(empty_parser())
        self.module.end_pos = self.parsers[-1].end_pos
        #print(self.parsers[0].module.get_code())
        del code
    def _get_parser(self, code, parser_code, line_offset, nodes):
        h = hash(code)
        hashes = [n.hash for n in nodes]
        node = None
        try:
            index = hashes.index(h)
            if nodes[index].code != code:
                raise ValueError()
        except ValueError:
            p = parsing.Parser(parser_code, self.module_path,
                               self.user_position, offset=(line_offset, 0),
                               is_fast_parser=True, top_module=self.module)
            p.module.parent = self.module
        else:
            if nodes[index] != self.current_node:
                offset = int(nodes[0] == self.current_node)
                self.current_node.old_children.pop(index - offset)
            node = nodes.pop(index)
            p = node.parser
            m = p.module
            m.line_offset += line_offset + 1 - m.start_pos[0]
            if self.user_position is not None and \
                    m.start_pos[0] <= self.user_position[0] <= m.end_pos[0]:
                # It's important to take care of the whole user
                # positioning stuff, if no reparsing is being done.
                p.user_stmt = m.get_statement_for_position(
                            self.user_position, include_imports=True)
                if p.user_stmt:
                    p.user_scope = p.user_stmt.parent
                else:
                    p.user_scope = self._scan_user_scope(m) or m
        return p, node
    def reset_caches(self):
        self._user_scope = None
        self._user_stmt = None
        self.module.reset_caches()
        if self.current_node is not None:
            self.current_node.reset_contents()
@@ -42,29 +42,27 @@ class Parser(object):
    :param user_position: The line/column, the user is currently on.
    :type user_position: tuple(int, int)
    :param no_docstr: If True, a string at the beginning is not a docstr.
-    :param stop_on_scope: Stop if a scope appears -> for fast_parser
+    :param is_fast_parser: -> for fast_parser
    :param top_module: Use this module as a parent instead of `self.module`.
    """
    def __init__(self, source, module_path=None, user_position=None,
-                        no_docstr=False, offset=(0, 0), stop_on_scope=None,
+                        no_docstr=False, offset=(0, 0), is_fast_parser=None,
                        top_module=None):
        self.user_position = user_position
        self.user_scope = None
        self.user_stmt = None
        self.no_docstr = no_docstr
        self.start_pos = self.end_pos = 1 + offset[0], offset[1]
        # initialize global Scope
-        self.module = pr.SubModule(module_path, (offset[0] + 1, offset[1]),
+        self.module = pr.SubModule(module_path, self.start_pos, top_module)
                                                            top_module)
        self.scope = self.module
        self.current = (None, None)
        self.start_pos = 1, 0
        self.end_pos = 1, 0
        source = source + '\n'  # end with \n, because the parser needs it
        buf = StringIO(source)
        self._gen = common.NoErrorTokenizer(buf.readline, offset,
-                                            stop_on_scope)
+                                            is_fast_parser)
        self.top_module = top_module or self.module
        try:
            self._parse()
@@ -80,6 +78,12 @@ class Parser(object):
            # because of `self.module.used_names`.
            d.parent = self.module
        if self.current[0] in (tokenize.NL, tokenize.NEWLINE):
            # we added a newline before, so we need to "remove" it again.
            self.end_pos = self._gen.previous[2]
        if self.current[0] == tokenize.INDENT:
            self.end_pos = self._gen.last_previous[2]
        self.start_pos = self.module.start_pos
        self.module.end_pos = self.end_pos
        del self._gen
@@ -171,8 +175,6 @@ class Parser(object):
        while True:
            defunct = False
            token_type, tok = self.next()
            if token_type == tokenize.ENDMARKER:
                break
            if brackets and tok == '\n':
                self.next()
            if tok == '(':  # python allows only one `(` in the statement.
@@ -421,12 +423,18 @@ class Parser(object):
    def __next__(self):
        """ Generate the next tokenize pattern. """
        try:
-            typ, tok, self.start_pos, self.end_pos, \
+            typ, tok, start_pos, end_pos, self.parserline = next(self._gen)
-                                self.parserline = next(self._gen)
+            # dedents shouldn't change positions
            if typ != tokenize.DEDENT:
                self.start_pos, self.end_pos = start_pos, end_pos
        except (StopIteration, common.MultiLevelStopIteration):
            # on finish, set end_pos correctly
            s = self.scope
            while s is not None:
                if isinstance(s, pr.Module) \
                                     and not isinstance(s, pr.SubModule):
                    self.module.end_pos = self.end_pos
                    break
                s.end_pos = self.end_pos
                s = s.parent
            raise
@@ -662,7 +670,6 @@ class Parser(object):
                self.freshscope = False
            else:
                if token_type not in [tokenize.COMMENT, tokenize.INDENT,
-                                      tokenize.NEWLINE, tokenize.NL,
+                                      tokenize.NEWLINE, tokenize.NL]:
                                      tokenize.ENDMARKER]:
                    debug.warning('token not classified', tok, token_type,
                                                        self.start_pos[0])
@@ -22,7 +22,7 @@ The easiest way to play with this module is to use :class:`parsing.Parser`.
 >>> parser = Parser('import os', 'example.py')
 >>> submodule = parser.scope
 >>> submodule
-<SubModule: example.py@1-2>
+<SubModule: example.py@1-1>
 Any subclasses of :class:`Scope`, including :class:`SubModule` has
 attribute :attr:`imports <Scope.imports>`.  This attribute has import
@@ -32,7 +32,6 @@ statements in this scope.  Check this out:
 [<Import: import os @1,0>]
 See also :attr:`Scope.subscopes` and :attr:`Scope.statements`.
 """
 from __future__ import with_statement
@@ -152,6 +151,10 @@ class Scope(Simple, IsScope):
        self.statements = []
        self.docstr = ''
        self.asserts = []
        # Needed here for fast_parser, because the fast_parser splits and
        # returns will be in "normal" modules.
        self.returns = []
        self.is_generator = False
    def add_scope(self, sub, decorators):
        sub.parent = self.use_as_parent
@@ -196,15 +199,15 @@ class Scope(Simple, IsScope):
        string = ""
        if len(self.docstr) > 0:
            string += '"""' + self.docstr + '"""\n'
        for i in self.imports:
            string += i.get_code()
        for sub in self.subscopes:
            string += sub.get_code(first_indent=True, indention=indention)
-        returns = self.returns if hasattr(self, 'returns') else []
+        objs = self.subscopes + self.imports + self.statements + self.returns
-        ret_str = '' if isinstance(self, Lambda) else 'return '
+        for obj in sorted(objs, key=lambda x: x.start_pos):
-        for stmt in self.statements + returns:
+            if isinstance(obj, Scope):
-            string += (ret_str if stmt in returns else '') + stmt.get_code()
+                string += obj.get_code(first_indent=True, indention=indention)
            else:
                if obj in self.returns and not isinstance(self, Lambda):
                    string += 'yield ' if self.is_generator else 'return '
                string += obj.get_code()
        if first_indent:
            string = common.indent_block(string, indention=indention)
@@ -399,7 +402,7 @@ class Class(Scope):
        string = "\n".join('@' + stmt.get_code() for stmt in self.decorators)
        string += 'class %s' % (self.name)
        if len(self.supers) > 0:
-            sup = ','.join(stmt.get_code() for stmt in self.supers)
+            sup = ', '.join(stmt.get_code(False) for stmt in self.supers)
            string += '(%s)' % sup
        string += ':\n'
        string += super(Class, self).get_code(True, indention)
@@ -441,8 +444,6 @@ class Function(Scope):
            p.parent = self.use_as_parent
            p.parent_function = self.use_as_parent
        self.decorators = []
        self.returns = []
        self.is_generator = False
        self.listeners = set()  # not used here, but in evaluation.
        if annotation is not None:
@@ -451,13 +452,16 @@ class Function(Scope):
    def get_code(self, first_indent=False, indention='    '):
        string = "\n".join('@' + stmt.get_code() for stmt in self.decorators)
-        params = ','.join([stmt.get_code() for stmt in self.params])
+        params = ', '.join([stmt.get_code(False) for stmt in self.params])
        string += "def %s(%s):\n" % (self.name, params)
        string += super(Function, self).get_code(True, indention)
        if self.is_empty():
-            string += "pass\n"
+            string += 'pass\n'
        return string
    def is_empty(self):
        return super(Function, self).is_empty() and not self.returns
    def get_set_vars(self):
        n = super(Function, self).get_set_vars()
        for p in self.params:
@@ -33,7 +33,6 @@ Parser
 ~~~~~~
 .. autodata:: fast_parser
 .. autodata:: fast_parser_always_reparse
 .. autodata:: use_function_definition_cache
@@ -150,12 +149,6 @@ something has been changed e.g. to a function. If this happens, only the
 function is being reparsed.
 """
 fast_parser_always_reparse = False
 """
 This is just a debugging option. Always reparsing means that the fast parser
 is basically useless. So don't use it.
 """
 use_function_definition_cache = True
 """
 Use the cache (full cache) to generate function_definition's. This may fail
@@ -126,6 +126,10 @@ class IntegrationTestCase(object):
        self.path = path
        self.skip = None
    @property
    def module_name(self):
        return re.sub('.*/|\.py', '', self.path)
    def __repr__(self):
        name = os.path.basename(self.path) if self.path else None
        return '<%s: %s:%s:%s>' % (self.__class__.__name__,
@@ -203,7 +207,7 @@ class IntegrationTestCase(object):
                # this means that there is a module specified
                wanted.append(pos_tup)
            else:
-                wanted.append(('renaming', self.line_nr + pos_tup[0],
+                wanted.append((self.module_name, self.line_nr + pos_tup[0],
                                pos_tup[1]))
        return compare_cb(self, compare, sorted(wanted))
@@ -327,7 +331,8 @@ if __name__ == '__main__':
        cases += collect_dir_tests(completion_test_dir, test_files, True)
    def file_change(current, tests, fails):
-        current = os.path.basename(current)
+        if current is not None:
            current = os.path.basename(current)
        print('%s \t\t %s tests and %s fails.' % (current, tests, fails))
    def report(case, actual, desired):
@@ -335,13 +340,20 @@ if __name__ == '__main__':
            return 0
        else:
            print("\ttest fail @%d, actual = %s, desired = %s"
-                    % (case.line_nr, actual, desired))
+                    % (case.line_nr - 1, actual, desired))
            return 1
    import traceback
    current = cases[0].path if cases else None
    count = fails = 0
    for c in cases:
-        if c.run(report):
+        try:
            if c.run(report):
                tests_fail += 1
                fails += 1
        except Exception:
            traceback.print_exc()
            print("\ttest fail @%d" % (c.line_nr - 1))
            tests_fail += 1
            fails += 1
        count += 1
@@ -18,6 +18,8 @@ from jedi._compatibility import utf8, unicode
 from jedi import api
 api_classes = api.api_classes
 import pytest
 #jedi.set_debug_function(jedi.debug.print_to_stdout)
@@ -296,10 +298,12 @@ class TestRegression(TestBase):
    def test_unicode_attribute(self):
        """ github jedi-vim issue #94 """
-        s1 = utf8('#-*- coding: utf-8 -*-\nclass Person():\n    name = "e"\n\nPerson().name.')
+        s1 = utf8('#-*- coding: utf-8 -*-\nclass Person():\n'
                  '    name = "e"\n\nPerson().name.')
        completions1 = self.complete(s1)
        assert 'strip' in [c.word for c in completions1]
-        s2 = utf8('#-*- coding: utf-8 -*-\nclass Person():\n    name = "é"\n\nPerson().name.')
+        s2 = utf8('#-*- coding: utf-8 -*-\nclass Person():\n'
                  '    name = "é"\n\nPerson().name.')
        completions2 = self.complete(s2)
        assert 'strip' in [c.word for c in completions2]