Some ideas for a diff parser.

2025-12-09 15:24:46 +08:00 · 2016-08-14 00:23:40 +02:00
parent 721195157a
commit b9040870c0
4 changed files with 172 additions and 582 deletions
--- a/jedi/parser/init.py
+++ b/jedi/parser/init.py
@@ -107,13 +107,13 @@ class Parser(object):
        # For the fast parser.
        self.position_modifier = pt.PositionModifier()
        self.source = source
        self._added_newline = False
        # The Python grammar needs a newline at the end of each statement.
        if not source.endswith('\n') and start_symbol == 'file_input':
            source += '\n'
            self._added_newline = True
        self.source = source
        self._start_symbol = start_symbol
        self._grammar = grammar
@@ -129,15 +129,12 @@ class Parser(object):
            return self._parsed
        start_number = self._grammar.symbol2number[self._start_symbol]
-        pgen_parser = PgenParser(
+        self.pgen_parser = PgenParser(
            self._grammar, self.convert_node, self.convert_leaf,
            self.error_recovery, start_number
        )
-        try:
+        self._parsed = self.pgen_parser.parse(tokenizer)
            self._parsed = pgen_parser.parse(tokenizer)
        finally:
            self.stack = pgen_parser.stack
        if self._start_symbol == 'file_input' != self._parsed.type:
            # If there's only one statement, we get back a non-module. That's
@@ -148,9 +145,15 @@ class Parser(object):
        if self._added_newline:
            self.remove_last_newline()
        # The stack is empty now, we don't need it anymore.
        del self.pgen_parser
        return self._parsed
    def get_parsed_node(self):
-        # TODO rename to get_root_node
+        # TODO remove in favor of get_root_node
        return self._parsed
    def get_root_node(self):
        return self._parsed
    def error_recovery(self, grammar, stack, arcs, typ, value, start_pos, prefix,
--- a/jedi/parser/fast.py
+++ b/jedi/parser/fast.py
@@ -5,617 +5,204 @@ finished (and still not working as I want), I won't document it any further.
 """
 import re
 from itertools import chain
 import difflib
 from jedi._compatibility import use_metaclass
 from jedi import settings
 from jedi.common import splitlines
 from jedi.parser import ParserWithRecovery
 from jedi.parser import tree
 from jedi.parser.utils import underscore_memoization, parser_cache
 from jedi.parser import tokenize
 from jedi import debug
-from jedi.parser.tokenize import (source_tokens, NEWLINE,
+from jedi.parser.tokenize import (generate_tokens, NEWLINE,
                                  ENDMARKER, INDENT, DEDENT)
 FLOWS = 'if', 'else', 'elif', 'while', 'with', 'try', 'except', 'finally', 'for'
 class FastModule(tree.Module):
    type = 'file_input'
    def __init__(self, module_path):
        super(FastModule, self).__init__([])
        self.modules = []
        self.reset_caches()
        self.names_dict = {}
        self.path = module_path
    def reset_caches(self):
        self.modules = []
        try:
            del self._used_names  # Remove the used names cache.
        except AttributeError:
            pass  # It was never used.
    @property
    @underscore_memoization
    def used_names(self):
        return MergedNamesDict([m.used_names for m in self.modules])
    @property
    def global_names(self):
        return [name for m in self.modules for name in m.global_names]
    @property
    def error_statements(self):
        return [e for m in self.modules for e in m.error_statements]
    def __repr__(self):
        return "<fast.%s: %s@%s-%s>" % (type(self).__name__, self.name,
                                        self.start_pos[0], self.end_pos[0])
    # To avoid issues with with the `parser.ParserWithRecovery`, we need
    # setters that do nothing, because if pickle comes along and sets those
    # values.
    @global_names.setter
    def global_names(self, value):
        pass
    @error_statements.setter
    def error_statements(self, value):
        pass
    @used_names.setter
    def used_names(self, value):
        pass
 class MergedNamesDict(object):
    def __init__(self, dicts):
        self.dicts = dicts
    def __iter__(self):
        return iter(set(key for dct in self.dicts for key in dct))
    def __getitem__(self, value):
        return list(chain.from_iterable(dct.get(value, []) for dct in self.dicts))
    def items(self):
        dct = {}
        for d in self.dicts:
            for key, values in d.items():
                try:
                    dct_values = dct[key]
                    dct_values += values
                except KeyError:
                    dct[key] = list(values)
        return dct.items()
    def values(self):
        lst = []
        for dct in self.dicts:
            lst += dct.values()
        return lst
 class CachedFastParser(type):
    """ This is a metaclass for caching `FastParser`. """
    def __call__(self, grammar, source, module_path=None):
-        if not settings.fast_parser:
+        pi = parser_cache.get(module_path, None)
        if pi is None or not settings.fast_parser:
            return ParserWithRecovery(grammar, source, module_path)
-        pi = parser_cache.get(module_path, None)
+        parser = pi.parser
-        if pi is None or isinstance(pi.parser, ParserWithRecovery):
+        d = DiffParser(parser)
-            p = super(CachedFastParser, self).__call__(grammar, source, module_path)
+        d.update(splitlines(source, keepends=True))
-        else:
+        return parser
            p = pi.parser  # pi is a `cache.ParserCacheItem`
            p.update(source)
        return p
 class ParserNode(object):
    def __init__(self, fast_module, parser, source):
        self._fast_module = fast_module
        self.parent = None
        self._node_children = []
        self.source = source
        self.hash = hash(source)
        self.parser = parser
        if source:
            self._end_pos = parser.module.end_pos
        else:
            self._end_pos = 1, 0
        try:
            # With fast_parser we have either 1 subscope or only statements.
            self._content_scope = parser.module.subscopes[0]
            # A parsed node's content will be in the first indent, because
            # everything that's parsed is within this subscope.
            self._is_class_or_def = True
        except IndexError:
            self._content_scope = parser.module
            self._is_class_or_def = False
        else:
            self._rewrite_last_newline()
        # We need to be able to reset the original children of a parser.
        self._old_children = list(self._content_scope.children)
    def is_root_node(self):
        return self.parent is None
    def _rewrite_last_newline(self):
        """
        The ENDMARKER can contain a newline in the prefix. However this prefix
        really belongs to the function - respectively to the next function or
        parser node. If we don't rewrite that newline, we end up with a newline
        in the wrong position, i.d. at the end of the file instead of in the
        middle.
        """
        c = self._content_scope.children
        if tree.is_node(c[-1], 'suite'):  # In a simple_stmt there's no DEDENT.
            end_marker = self.parser.module.children[-1]
            # Set the DEDENT prefix instead of the ENDMARKER.
            c[-1].children[-1].prefix = end_marker.prefix
            end_marker.prefix = ''
    def __repr__(self):
        module = self.parser.module
        try:
            return '<%s: %s-%s>' % (type(self).__name__, module.start_pos, module.end_pos)
        except IndexError:
            # There's no module yet.
            return '<%s: empty>' % type(self).__name__
    @property
    def end_pos(self):
        return self._end_pos[0] + self.parser.position_modifier.line, self._end_pos[1]
    def reset_node(self):
        """
        Removes changes that were applied in this class.
        """
        self._node_children = []
        scope = self._content_scope
        scope.children = list(self._old_children)
        try:
            # This works if it's a MergedNamesDict.
            # We are correcting it, because the MergedNamesDicts are artificial
            # and can change after closing a node.
            scope.names_dict = scope.names_dict.dicts[0]
        except AttributeError:
            pass
    def close(self):
        """
        Closes the current parser node. This means that after this no further
        nodes should be added anymore.
        """
        # We only need to replace the dict if multiple dictionaries are used:
        if self._node_children:
            dcts = [n.parser.module.names_dict for n in self._node_children]
            # Need to insert the own node as well.
            dcts.insert(0, self._content_scope.names_dict)
            self._content_scope.names_dict = MergedNamesDict(dcts)
            endmarker = self.parser.get_parsed_node().children[-1]
            assert endmarker.type == 'endmarker'
            last_parser = self._node_children[-1].parser
            endmarker.start_pos = last_parser.get_parsed_node().end_pos
    @property
    def _indent(self):
        if self.is_root_node():
            return 0
        return self.parser.module.children[0].start_pos[1]
    def add_node(self, node, start_line, indent):
        """
        Adding a node means adding a node that was either just parsed or one
        that can be reused.
        """
        # Content that is not a subscope can never be part of the current node,
        # because it's basically a sister node, that sits next to it and not
        # within it.
        if (self._indent >= indent or not self._is_class_or_def) and \
                not self.is_root_node():
            self.close()
            return self.parent.add_node(node, start_line, indent)
        # Changing the line offsets is very important, because if they don't
        # fit, all the start_pos values will be wrong.
        m = node.parser.module
        node.parser.position_modifier.line = start_line - 1
        self._fast_module.modules.append(m)
        node.parent = self
        self._node_children.append(node)
        # Insert parser objects into current structure. We only need to set the
        # parents and children in a good way.
        scope = self._content_scope
        for child in m.children:
            child.parent = scope
            scope.children.append(child)
        return node
    def all_sub_nodes(self):
        """
        Returns all nodes including nested ones.
        """
        for n in self._node_children:
            yield n
            for y in n.all_sub_nodes():
                yield y
    @underscore_memoization  # Should only happen once!
    def remove_last_newline(self):
        self.parser.remove_last_newline()
 class FastParser(use_metaclass(CachedFastParser)):
-    _FLOWS_NEED_SPACE = 'if', 'elif', 'while', 'with', 'except', 'for'
+    pass
    _FLOWS_NEED_COLON = 'else', 'try', 'except', 'finally'
    _keyword_re = re.compile('^[ \t]*(def |class |@|(?:%s)|(?:%s)\s*:)'
                             % ('|'.join(_FLOWS_NEED_SPACE),
                                '|'.join(_FLOWS_NEED_COLON)))
    def __init__(self, grammar, source, module_path=None):
        # set values like `tree.Module`.
        self._grammar = grammar
        self.module_path = module_path
        self._reset_caches()
        self.update(source)
-    def _reset_caches(self):
+class DiffParser():
-        self.module = FastModule(self.module_path)
+    def __init__(self, parser):
-        self.root_node = self.current_node = ParserNode(self.module, self, '')
+        self._parser = parser
        self._module = parser.get_root_node()
-    def get_parsed_node(self):
+    def _reset(self):
-        return self.module
+        self._delete_count = 0
        self._insert_count = 0
-    def update(self, source):
+        self._parsed_until_line = 0
        # Variables for testing purposes: It is important that the number of
        # parsers used can be minimized. With these variables we can test
        # against that.
        self.number_parsers_used = 0
        self.number_of_splits = 0
        self.number_of_misses = 0
        self.module.reset_caches()
        self.source = source
        try:
            self._parse(source)
        except:
            # FastParser is cached, be careful with exceptions.
            self._reset_caches()
            raise
-    def _split_parts(self, source):
+    def update(self, lines_new):
-        """
+        '''
-        Split the source code into different parts. This makes it possible to
+        The algorithm works as follows:
        parse each part seperately and therefore cache parts of the file and
        not everything.
        """
        def gen_part():
            text = ''.join(current_lines)
            del current_lines[:]
            self.number_of_splits += 1
            return text
-        def just_newlines(current_lines):
+        Equal:
-            for line in current_lines:
+            - Assure that the start is a newline, otherwise parse until we get
-                line = line.lstrip('\t \n\r')
+              one.
-                if line and line[0] != '#':
+            - Copy from parsed_until_line + 1 to max(i2 + 1)
-                    return False
+            - Make sure that the indentation is correct (e.g. add DEDENT)
-            return True
+            - Add old and change positions
        Insert:
            - Parse from parsed_until_line + 1 to min(j2 + 1), hopefully not
              much more.
        Always:
            - Set parsed_until_line
        '''
        self._lines_new = lines_new
        self._reset()
-        # Split only new lines. Distinction between \r\n is the tokenizer's
+        self._old_children = self._module.children
-        # job.
+        self._new_children = []
-        # It seems like there's no problem with form feed characters here,
+        self._prefix = ''
-        # because we're not counting lines.
+
-        self._lines = source.splitlines(True)
+        lines_old = splitlines(self._parser.source, keepends=True)
-        current_lines = []
+        sm = difflib.SequenceMatcher(None, lines_old, lines_new)
-        is_decorator = False
+        for operation, i1, i2, j1, j2 in sm.get_opcodes():
-        # Use -1, because that indent is always smaller than any other.
+            print(operation)
-        indent_list = [-1, 0]
+            if operation == 'equal':
-        new_indent = False
+                line_offset = j1 - i1
-        parentheses_level = 0
+                self._copy_from_old_parser(line_offset, i2 + 1, j2 + 1)
-        flow_indent = None
+            elif operation == 'replace':
-        previous_line = None
+                self._delete_count += 1
-        # All things within flows are simply being ignored.
+                self._insert(j2 + 1)
-        for i, l in enumerate(self._lines):
+            elif operation == 'insert':
-            # Handle backslash newline escaping.
+                self._insert(j2 + 1)
            if l.endswith('\\\n') or l.endswith('\\\r\n'):
                if previous_line is not None:
                    previous_line += l
            else:
-                    previous_line = l
+                assert operation == 'delete'
-                continue
+                self._delete_count += 1  # For statistics
            if previous_line is not None:
                l = previous_line + l
                previous_line = None
-            # check for dedents
+    def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new):
-            s = l.lstrip('\t \n\r')
+        while until_line_new < self._parsed_until_line:
-            indent = len(l) - len(s)
+            parsed_until_line_old = self._parsed_until_line + line_offset
-            if not s or s[0] == '#':
+            if matches:
-                current_lines.append(l)  # Just ignore comments and blank lines
+                # TODO check missing indent/dedent
-                continue
+                _copy_p()
-
+                self._update_positions(line_offset)
-            if new_indent and not parentheses_level:
+                # We have copied as much as possible (but definitely not too
-                if indent > indent_list[-2]:
+                # much). Therefore we escape, even if we're not at the end. The
-                    # Set the actual indent, not just the random old indent + 1.
+                # rest will be parsed.
-                    indent_list[-1] = indent
+                # Might not reach until the end, because there's a statement
-                new_indent = False
+                # that is not finished.
            while indent < indent_list[-1]:  # -> dedent
                indent_list.pop()
                # This automatically resets the flow_indent if there was a
                # dedent or a flow just on one line (with one simple_stmt).
                new_indent = False
                if flow_indent is None and current_lines and not parentheses_level:
                    yield gen_part()
                flow_indent = None
            # Check lines for functions/classes and split the code there.
            if flow_indent is None:
                m = self._keyword_re.match(l)
                if m:
                    # Strip whitespace and colon from flows as a check.
                    if m.group(1).strip(' \t\r\n:') in FLOWS:
                        if not parentheses_level:
                            flow_indent = indent
                    else:
                        if not is_decorator and not just_newlines(current_lines):
                            yield gen_part()
                    is_decorator = '@' == m.group(1)
                    if not is_decorator:
                        parentheses_level = 0
                        # The new indent needs to be higher
                        indent_list.append(indent + 1)
                        new_indent = True
                elif is_decorator:
                    is_decorator = False
            parentheses_level = \
                max(0, (l.count('(') + l.count('[') + l.count('{') -
                        l.count(')') - l.count(']') - l.count('}')))
            current_lines.append(l)
        if previous_line is not None:
            current_lines.append(previous_line)
        if current_lines:
            yield gen_part()
    def _parse(self, source):
        """ :type source: str """
        added_newline = False
        if not source or source[-1] != '\n':
            # To be compatible with Pythons grammar, we need a newline at the
            # end. The parser would handle it, but since the fast parser abuses
            # the normal parser in various ways, we need to care for this
            # ourselves.
            source += '\n'
            added_newline = True
        next_code_part_end_line = code_part_end_line = 1
        start = 0
        nodes = list(self.root_node.all_sub_nodes())
        # Now we can reset the node, because we have all the old nodes.
        self.root_node.reset_node()
        self.current_node = self.root_node
        last_end_line = 1
        for code_part in self._split_parts(source):
            next_code_part_end_line += code_part.count('\n')
            # If the last code part parsed isn't equal to the current end_pos,
            # we know that the parser went further (`def` start in a
            # docstring). So just parse the next part.
            if code_part_end_line == last_end_line:
                self._parse_part(code_part, source[start:], code_part_end_line, nodes)
            else:
                self.number_of_misses += 1
                # Means that some lines where not fully parsed. Parse it now.
                # This is a very rare case. Should only happens with very
                # strange code bits.
                while last_end_line < next_code_part_end_line:
                    code_part_end_line = last_end_line
                    # We could calculate the src in a more complicated way to
                    # make caching here possible as well. However, this is
                    # complicated and error-prone. Since this is not very often
                    # called - just ignore it.
                    src = ''.join(self._lines[code_part_end_line - 1:])
                    self._parse_part(code_part, src, code_part_end_line, nodes)
                    last_end_line = self.current_node.end_pos[0]
                debug.dbg("While parsing %s, starting with line %s wasn't included in split.",
                          self.module_path, code_part_end_line)
                #assert code_part_end_line > last_end_line
                # This means that the parser parsed faster than the last given
                # `code_part`.
                debug.dbg('While parsing %s, line %s slowed down the fast parser.',
                          self.module_path, code_part_end_line)
            code_part_end_line = next_code_part_end_line
            start += len(code_part)
            last_end_line = self.current_node.end_pos[0]
        if added_newline:
            self.current_node.remove_last_newline()
        # Now that the for loop is finished, we still want to close all nodes.
        node = self.current_node
        while node is not None:
            node.close()
            node = node.parent
        debug.dbg('Parsed %s, with %s parsers in %s splits.'
                  % (self.module_path, self.number_parsers_used,
                     self.number_of_splits))
    def _parse_part(self, source, parser_code, code_part_end_line, nodes):
        """
        Side effect: Alters the list of nodes.
        """
        h = hash(source)
        for index, node in enumerate(nodes):
            if node.hash == h and node.source == source:
                node.reset_node()
                nodes.remove(node)
                parser_code = source
                break
            else:
-            tokenizer = FastTokenizer(parser_code)
+                # Parse 1 line at least. We don't need more, because we just
-            self.number_parsers_used += 1
+                # want to get into a state where the old parser has starting
-            p = ParserWithRecovery(self._grammar, parser_code, self.module_path, tokenizer=tokenizer)
+                # statements again (not e.g. lines within parentheses).
                self._parse(self._parsed_until_line + 1)
-            end = code_part_end_line - 1 + p.module.end_pos[0]
+    def _update_positions(self, line_offset, line_start, line_end):
-            used_lines = self._lines[code_part_end_line - 1:end - 1]
+        if line_offset == 0:
-            code_part_actually_used = ''.join(used_lines)
+            return
-            node = ParserNode(self.module, p, code_part_actually_used)
+        # Find start node:
        node = self._parser.get_pared_node()
        while True:
            return node
-        indent = len(parser_code) - len(parser_code.lstrip('\t '))
+    def _insert(self, until_line_new):
        self._insert_count += 1
        self._parse(until_line_new)
-        self.current_node.add_node(node, code_part_end_line, indent)
+    def _get_before_insertion_node(self):
-        self.current_node = node
+        if not self._new_children:
            return None
        leaf = self._module.get_leaf_for_position((line, 0), include_prefixes=False)
        while leaf.type != 'newline':
            try:
                leaf = leaf.get_previous_leaf()
            except IndexError:
                # TODO
                raise NotImplementedError
-class FastTokenizer(object):
+        node = leaf
        while True:
            parent = node.parent
            print(parent)
            if parent.type in ('suite', 'file_input'):
                print(node)
                print(i, line, node.end_pos)
                assert node.end_pos[0] <= line
                assert node.end_pos[1] == 0
                return node
            node = parent
    def _parse(self, until_line):
        """
-    Breaks when certain conditions are met, i.e. a new function or class opens.
+        Parses at least until the given line, but might just parse more until a
        valid state is reached.
        """
-    def __init__(self, source):
+        while until_line > self._parsed_until_line:
-        self.source = source
+            node = self._parse_scope_part(before_node, until_line)
-        self._gen = source_tokens(source, use_exact_op_types=True)
+            first_leaf = node.first_leaf()
        self._closed = False
-        # fast parser options
+            before_node = self._get_before_insertion_node()
-        self.current = self.previous = NEWLINE, '', (0, 0)
+            if before_node is None:
-        self._in_flow = False
+                # The start of the file.
-        self._is_decorator = False
+                self.new_children += node.children
        self._first_stmt = True
        self._parentheses_level = 0
        self._indent_counter = 0
        self._flow_indent_counter = 0
        self._returned_endmarker = False
        self._expect_indent = False
    def __iter__(self):
        return self
    def next(self):
        """ Python 2 Compatibility """
        return self.__next__()
    def __next__(self):
        if self._closed:
            return self._finish_dedents()
        typ, value, start_pos, prefix = current = next(self._gen)
        if typ == ENDMARKER:
            self._closed = True
            self._returned_endmarker = True
            return current
        self.previous = self.current
        self.current = current
        if typ == INDENT:
            self._indent_counter += 1
            if not self._expect_indent and not self._first_stmt and not self._in_flow:
                # This does not mean that there is an actual flow, it means
                # that the INDENT is syntactically wrong.
                self._flow_indent_counter = self._indent_counter - 1
                self._in_flow = True
            self._expect_indent = False
        elif typ == DEDENT:
            self._indent_counter -= 1
            if self._in_flow:
                if self._indent_counter == self._flow_indent_counter:
                    self._in_flow = False
            else:
-                self._closed = True
+                before_node.parent.children += node.children
            return current
-        previous_type = self.previous[0]
+    def _parse_scope_node(self, before_node, until_line, line_offset=0):
-        if value in ('def', 'class') and self._parentheses_level:
+        # TODO speed up, shouldn't copy the whole thing all the time.
-            # Account for the fact that an open parentheses before a function
+        # memoryview?
-            # will reset the parentheses counter, but new lines before will
+        lines_after = self._lines_new[self._parsed_until_line + 1:]
-            # still be ignored. So check the prefix.
+        tokenizer = self._diff_tokenize(lines_after, until_line, line_offset)
        self._parser = ParserWithRecovery(
            self._parser._grammar,
            source=None,
            tokenizer=tokenizer,
            start_parsing=False
        )
        return self._parser.parse()
-            # TODO what about flow parentheses counter resets in the tokenizer?
+    def _diff_tokenize(lines, until_line, line_offset=0):
-            self._parentheses_level = 0
+        is_first_token = True
-            # We need to simulate a newline before the indent, because the
+        omited_first_indent = False
-            # open parentheses ignored them.
+        indent_count = 0
-            if re.search('\n\s*', prefix):
+        tokens = generate_tokens(lambda: next(l, ''))
-                previous_type = NEWLINE
+        for token_info in tokens:
            typ = token_info.type
            if typ == 'indent':
                indent_count += 1
                if is_first_token:
                    omited_first_indent = True
                    # We want to get rid of indents that are only here because
                    # we only parse part of the file. These indents would only
                    # get parsed as error leafs, which doesn't make any sense.
                    continue
            elif typ == 'dedent':
                indent_count -= 1
                if omited_first_indent and indent_count == 0:
                    # We are done here, only thing that can come now is an
                    # endmarker or another dedented code block.
                    break
            elif typ == 'newline' and token_info.start_pos[0] >= until_line:
                yield token_info
                x = self.
                import pdb; pdb.set_trace()
                break
-        # Parentheses ignore the indentation rules. The other three stand for
+            is_first_token = False
-        # new lines.
+            if line_offset != 0:
-        if previous_type in (NEWLINE, INDENT, DEDENT) \
+                raise NotImplementedError
-                and not self._parentheses_level and typ not in (INDENT, DEDENT):
+                yield tokenize.TokenInfo(*token_info.string[1:])
            if not self._in_flow:
                if value in FLOWS:
                    self._flow_indent_counter = self._indent_counter
                    self._first_stmt = False
                elif value in ('def', 'class', '@'):
                    # The values here are exactly the same check as in
                    # _split_parts, but this time with tokenize and therefore
                    # precise.
                    if not self._first_stmt and not self._is_decorator:
                        return self._close()
                    self._is_decorator = '@' == value
                    if not self._is_decorator:
                        self._first_stmt = False
                        self._expect_indent = True
                elif self._expect_indent:
                    return self._close()
            else:
-                    self._first_stmt = False
+                yield token_info
-        if value in '([{' and value:
+        yield tokenize.TokenInfo(tokenize.ENDMARKER, *token_info.string[1:])
            self._parentheses_level += 1
        elif value in ')]}' and value:
            # Ignore closing parentheses, because they are all
            # irrelevant for the indentation.
            self._parentheses_level = max(self._parentheses_level - 1, 0)
        return current
    def _close(self):
        if self._first_stmt:
            # Continue like nothing has happened, because we want to enter
            # the first class/function.
            if self.current[1] != '@':
                self._first_stmt = False
            return self.current
        else:
            self._closed = True
            return self._finish_dedents()
    def _finish_dedents(self):
        if self._indent_counter:
            self._indent_counter -= 1
            return DEDENT, '', self.current[2], ''
        elif not self._returned_endmarker:
            self._returned_endmarker = True
            return ENDMARKER, '', self.current[2], self._get_prefix()
        else:
            raise StopIteration
    def _get_prefix(self):
        """
        We're using the current prefix for the endmarker to not loose any
        information. However we care about "lost" lines. The prefix of the
        current line (indent) will always be included in the current line.
        """
        cur = self.current
        while cur[0] == DEDENT:
            cur = next(self._gen)
        prefix = cur[3]
        # \Z for the end of the string. $ is bugged, because it has the
        # same behavior with or without re.MULTILINE.
        return re.sub(r'[^\n]+\Z', '', prefix)
--- a/jedi/parser/utils.py
+++ b/jedi/parser/utils.py
@@ -86,7 +86,6 @@ def save_parser(path, parser, pickling=True):
 class ParserPickling(object):
    version = 26
    """
    Version number (integer) for file system cache.
--- a/test/test_parser/test_fast_parser.py
+++ b/test/test_parser/test_fast_parser.py
@@ -24,6 +24,7 @@ def test_add_to_end():
    class Two(Abc):
        def h(self):
            self
    """)      # ^ here is the first completion
    b = "    def g(self):\n" \