a basic approach to the new fast parser

2013-04-10 22:33:49 +04:30
parent a99d9541bd
commit 5dd05eff1a
4 changed files with 158 additions and 114 deletions
--- a/jedi/common.py
+++ b/jedi/common.py
@@ -126,7 +126,6 @@ class NoErrorTokenizer(object):
        else:
            c[2] = self.offset[0] + c[2][0], c[2][1]
            c[3] = self.offset[0] + c[3][0], c[3][1]
-        print 'h', c, tokenize.tok_name[c[0]], self.current[2:4]
        self.current = c

        def close():
@@ -139,7 +138,7 @@ class NoErrorTokenizer(object):
                                         tokenize.NEWLINE, tokenize.DEDENT) \
                and c[0] not in (tokenize.COMMENT, tokenize.INDENT,
                             tokenize.NL, tokenize.NEWLINE, tokenize.DEDENT):
-            print c, tokenize.tok_name[c[0]]
+            #print c, tokenize.tok_name[c[0]]

            tok = c[1]
            indent = c[2][1]
--- a/jedi/fast_parser.py
+++ b/jedi/fast_parser.py
@@ -14,13 +14,15 @@ import cache
 import common


+SCOPE_CONTENTS = ['asserts', 'subscopes', 'imports', 'statements', 'returns']
+
+
 class Module(pr.Simple, pr.Module):
    def __init__(self, parsers):
        self._end_pos = None, None
        super(Module, self).__init__(self, (1, 0))
        self.parsers = parsers
        self.reset_caches()
-        self.line_offset = 0

    def reset_caches(self):
        """ This module does a whole lot of caching, because it uses different
@@ -63,18 +65,6 @@ class Module(pr.Simple, pr.Module):
        else:
            raise AttributeError("__getattr__ doesn't offer %s" % name)

-    def get_statement_for_position(self, pos):
-        key = 'get_statement_for_position', pos
-        if key not in self.cache:
-            for p in self.parsers:
-                s = p.module.get_statement_for_position(pos)
-                if s:
-                    self.cache[key] = s
-                    break
-            else:
-                self.cache[key] = None
-        return self.cache[key]
-
    @property
    def used_names(self):
        if not self.parsers:
@@ -92,30 +82,6 @@ class Module(pr.Simple, pr.Module):
            self.cache[key] = dct
        return self.cache[key]

-    @property
-    def docstr(self):
-        if not self.parsers:
-            raise NotImplementedError("Parser doesn't exist.")
-        return self.parsers[0].module.docstr
-
-    @property
-    def name(self):
-        if not self.parsers:
-            raise NotImplementedError("Parser doesn't exist.")
-        return self.parsers[0].module.name
-
-    @property
-    def path(self):
-        if not self.parsers:
-            raise NotImplementedError("Parser doesn't exist.")
-        return self.parsers[0].module.path
-
-    @property
-    def is_builtin(self):
-        if not self.parsers:
-            raise NotImplementedError("Parser doesn't exist.")
-        return self.parsers[0].module.is_builtin
-
    @property
    def start_pos(self):
        """ overwrite start_pos of Simple """
@@ -157,12 +123,93 @@ class CachedFastParser(type):
        return p


+class ParserNode(object):
+    def __init__(self, parser, code, parent=None):
+        self.parent = parent
+        self.parser = parser
+        self.code = code
+        self.hash = hash(code)
+
+        self.children = []
+        self._checked = True
+        self.save_contents()
+
+    def save_contents(self):
+        scope = self._get_content_scope()
+        self._contents = {}
+        for c in SCOPE_CONTENTS:
+            self._contents[c] = list(getattr(scope, c))
+        self._is_generator = scope.is_generator
+
+    def _get_content_scope(self):
+        try:
+            # with fast_parser we have either 1 subscope or only statements.
+            return self.parser.module.subscopes[0]
+        except IndexError:
+            return self.parser.module
+
+    def reset_contents(self):
+        self._checked = False
+
+        scope = self._get_content_scope()
+        for key, c in self._contents.items():
+            setattr(scope, key, self.contents.items())
+        scope.is_generator = self._is_generator
+
+        for c in self.children:
+            c.reset_contents()
+
+    def parent_until_indent(self, indent):
+        if self.indent >= indent:
+            # check for
+            for i, c in enumerate(self.children):
+                if not c._checked:
+                    # all of the following 
+                    del self.children[i:]
+                    break
+
+            return self.parent.parent_until_indent(indent)
+        return self
+
+    @property
+    def indent(self):
+        if not self.parent:
+            return -1
+        module = self.parser.module
+        try:
+            el = module.subscopes[0]
+        except IndexError:
+            try:
+                el = module.statements[0]
+            except IndexError:
+                el = module.imports[0]
+        return el.start_pos[1]
+
+    def add_node(self, parser, code):
+        # only compare at the right indent level
+        insert = 0
+        for insert, c in enumerate(self.children):
+            if not c._checked:
+                break
+        node = ParserNode(parser, code, self)
+        self.children.insert(insert, node)
+
+        # insert parser objects into current structure
+        scope = self._get_content_scope()
+        for c in SCOPE_CONTENTS:
+            content = getattr(scope, c)
+            content += getattr(parser.module, c)
+        scope.is_generator |= parser.module.is_generator
+        return node
+
+
 class FastParser(use_metaclass(CachedFastParser)):
    def __init__(self, code, module_path=None, user_position=None):
        # set values like `pr.Module`.
        self.module_path = module_path
        self.user_position = user_position

+        self.current_node = None
        self.parsers = []
        self.module = Module(self.parsers)
        self.reset_caches()
@@ -274,50 +321,69 @@ class FastParser(use_metaclass(CachedFastParser)):

    def _parse(self, code):
        """ :type code: str """
-        def set_parent(module):
-            def get_indent(module):
-                try:
-                    el = module.subscopes[0]
-                except IndexError:
-                    try:
-                        el = module.statements[0]
-                    except IndexError:
-                        el = module.imports[0]
-                return el.start_pos[1]
-
-            if self.parsers and False:
-                new_indent = get_indent(module)
-                old_indent = get_indent(self.parsers[-1].module)
-                if old_indent < new_indent:
-                    #module.parent = self.parsers[-1].module.subscopes[0]
-                    # TODO set parents + add to subscopes
-                    return
-            p.module.parent = self.module
-
        parts = self._split_parts(code)
-
-        if settings.fast_parser_always_reparse:
        self.parsers[:] = []

-        # dict comprehensions are not available in py2.5/2.6 :-(
-        hashes = dict((p.hash, p) for p in self.parsers)
-
-        line_offset = 0
-        start = 0
+        self._code = code
+        self._line_offset = 0
+        self._start = 0
        p = None
-        parser_order = 0
+        is_first = True
        for code_part in parts:
            lines = code_part.count('\n') + 1
-            # the parser is using additional newlines, therefore substract
-            if p is None or line_offset >= p.end_pos[0] - 2:
-                # check if code_part has already been parsed
-                h = hash(code_part)
+            if is_first or self._line_offset >= p.end_pos[0] - 1:
+                indent = len(re.match(r'[ \t]*', code).groups(0))
+                if is_first and self.current_node is not None:
+                    nodes = [self]
+                else:
+                    nodes = []
+                if self.current_node is not None:

-                if h in hashes and hashes[h].code == code_part:
-                    p = hashes[h]
-                    del hashes[h]
+                    self.current_node = \
+                                self.current_node.parent_until_indent(indent)
+                    nodes += self.current_node.children
+
+                # check if code_part has already been parsed
+                p = self._get_parser(code, nodes)
+
+                if is_first:
+                    if self.current_node is None:
+                        self.current_node = ParserNode(p, code)
+                    else:
+                        self.current_node.parser = p
+                        self.current_node.save_contents()
+                else:
+                    self.current_node = self.current_node.add_node(p, code)
+                self.parsers.append(p)
+
+                is_first = False
+
+            self._line_offset += lines
+            self._start += len(code_part) + 1  # +1 for newline
+        print 'hmm'
+        for p in self.parsers:
+            print(p.module.get_code())
+            print(p.module.start_pos, p.module.end_pos)
+        exit()
+        del self._code
+
+    def _get_parser(self, code, nodes):
+        h = hash(code)
+        hashes = [n.hash for n in nodes]
+        try:
+            index = hashes.index(h)
+            if nodes[index].code != code:
+                raise ValueError()
+        except ValueError:
+            p = parsing.Parser(self._code[self._start:],
+                               self.module_path, self.user_position,
+                               offset=(self._line_offset, 0),
+                               is_fast_parser=True, top_module=self.module)
+        else:
+            node = nodes.pop(index)
+            p = node.parser
            m = p.module
-                    m.line_offset += line_offset + 1 - m.start_pos[0]
+            m.line_offset += self._line_offset + 1 - m.start_pos[0]
            if self.user_position is not None and \
                    m.start_pos <= self.user_position <= m.end_pos:
                # It's important to take care of the whole user
@@ -327,30 +393,12 @@ class FastParser(use_metaclass(CachedFastParser)):
                if p.user_stmt:
                    p.user_scope = p.user_stmt.parent
                else:
-                            p.user_scope = self.scan_user_scope(m) \
-                                            or self.module
-                else:
-                    p = parsing.Parser(code[start:],
-                                self.module_path, self.user_position,
-                                offset=(line_offset, 0), is_fast_parser=True,
-                                top_module=self.module)
-
-                    p.hash = h
-                    p.code = code_part
-                    set_parent(p.module)
-                self.parsers.insert(parser_order, p)
-
-                parser_order += 1
-            line_offset += lines
-            print line_offset
-            start += len(code_part) + 1  # +1 for newline
-        self.parsers[parser_order + 1:] = []
-        for p in self.parsers:
-            print(p.module.get_code())
-            print(p.module.start_pos, p.module.end_pos)
-        exit()
+                    p.user_scope = self.scan_user_scope(m) or self.module
+        return p

    def reset_caches(self):
        self._user_scope = None
        self._user_stmt = None
        self.module.reset_caches()
+        if self.current_node is not None:
+            self.current_node.reset_contents()
--- a/jedi/parsing_representation.py
+++ b/jedi/parsing_representation.py
@@ -149,6 +149,10 @@ class Scope(Simple, IsScope):
        self.statements = []
        self.docstr = ''
        self.asserts = []
+        # Needed here for fast_parser, because the fast_parser splits and
+        # returns will be in "normal" modules.
+        self.returns = []
+        self.is_generator = False

    def add_scope(self, sub, decorators):
        sub.parent = self.use_as_parent
@@ -194,13 +198,12 @@ class Scope(Simple, IsScope):
        if len(self.docstr) > 0:
            string += '"""' + self.docstr + '"""\n'

-        returns = self.returns if hasattr(self, 'returns') else []
-        objs = self.subscopes + self.imports + self.statements + returns
+        objs = self.subscopes + self.imports + self.statements + self.returns
        for obj in sorted(objs, key=lambda x: x.start_pos):
            if isinstance(obj, Scope):
                string += obj.get_code(first_indent=True, indention=indention)
            else:
-                if obj in returns and not isinstance(self, Lambda):
+                if obj in self.returns and not isinstance(self, Lambda):
                    string += 'yield ' if self.is_generator else 'return '
                string += obj.get_code()

@@ -439,8 +442,6 @@ class Function(Scope):
            p.parent = self.use_as_parent
            p.parent_function = self.use_as_parent
        self.decorators = []
-        self.returns = []
-        self.is_generator = False
        self.listeners = set()  # not used here, but in evaluation.

        if annotation is not None:
@@ -456,6 +457,9 @@ class Function(Scope):
            string += "pass\n"
        return string

+    def is_empty(self):
+        return super(Function, self).is_empty() and not self.returns
+
    def get_set_vars(self):
        n = super(Function, self).get_set_vars()
        for p in self.params:
--- a/jedi/settings.py
+++ b/jedi/settings.py
@@ -33,7 +33,6 @@ Parser
 ~~~~~~

 .. autodata:: fast_parser
-.. autodata:: fast_parser_always_reparse
 .. autodata:: use_function_definition_cache


@@ -150,12 +149,6 @@ something has been changed e.g. to a function. If this happens, only the
 function is being reparsed.
 """

-fast_parser_always_reparse = False
-"""
-This is just a debugging option. Always reparsing means that the fast parser
-is basically useless. So don't use it.
-"""
-
 use_function_definition_cache = True
 """
 Use the cache (full cache) to generate function_definition's. This may fail