Trying to restructure the fast parser.

2015-01-19 14:49:44 +01:00
parent d6b3b76d26
commit ce793b1066
2 changed files with 92 additions and 83 deletions
--- a/jedi/parser/fast.py
+++ b/jedi/parser/fast.py
@@ -20,9 +20,9 @@ from jedi.parser.tokenize import (source_tokens, FLOWS, NEWLINE, COMMENT,
 class FastModule(pr.Module, pr.Simple):
    type = 'file_input'
-    def __init__(self, parsers):
+    def __init__(self):
        super(FastModule, self).__init__([])
-        self.parsers = parsers
+        self.modules = []
        self.reset_caches()
    def reset_caches(self):
@@ -35,21 +35,21 @@ class FastModule(pr.Module, pr.Simple):
        if name.startswith('__'):
            raise AttributeError('Not available!')
        else:
-            return getattr(self.parsers[0].module, name)
+            return getattr(self.modules[0], name)
    @property
    @cache.underscore_memoization
    def used_names(self):
        """
        used_names = {}
-        for p in self.parsers:
+        for m in self.modules:
-            for k, statement_set in p.module.used_names.items():
+            for k, statement_set in m.used_names.items():
                if k in used_names:
                    used_names[k] |= statement_set
                else:
                    used_names[k] = set(statement_set)
        """
-        return MergedNamesDict([p.module.used_names for p in self.parsers])
+        return MergedNamesDict([m.used_names for m in self.modules])
    def __repr__(self):
        return "<fast.%s: %s@%s-%s>" % (type(self).__name__, self.name,
@@ -87,16 +87,23 @@ class CachedFastParser(type):
 class ParserNode(object):
-    def __init__(self, fast_module, parser, code, parent=None):
+    def __init__(self, fast_module, parent=None):
        self._fast_module = fast_module
        self.parent = parent
-        self.parser_children = []
+        self.node_children = []
-        # must be created before new things are added to it.
+        self.code = None
-        self.save_contents(parser, code)
+        self.hash = None
        self.parser = None
-    def save_contents(self, parser, code):
+    def __repr__(self):
-        print('SAVE')
+        if self.parser is None:
            return '<%s: empty>' % type(self).__name__
        module = self.parser.module
        return '<%s: %s-%s>' % (type(self).__name__, module.start_pos, module.end_pos)
    def set_parser(self, parser, code):
        self.code = code
        self.hash = hash(code)
        self.parser = parser
@@ -116,8 +123,7 @@ class ParserNode(object):
        self._is_generator = scope.is_generator
        """
-        self.old_children = self.parser_children
+        self.node_children = []
        self.parser_children = []
    def reset_contents(self):
        """
@@ -133,34 +139,31 @@ class ParserNode(object):
            # they make no sense.
            self.parser.module.global_vars = []
        """
-
+        # TODO REMOVE
        for c in self.parser_children:
            c.reset_contents()
    def close(self):
        """
        Closes the current parser node. This means that after this no further
        nodes should be added anymore.
        """
-        print('CLOSE NODE', self.parent, self.parser_children)
+        print('CLOSE NODE', self.parent, self.node_children)
        print(self.parser.module.names_dict, [p.parser.module.names_dict for p in
-        self.parser_children])
+        self.node_children])
        # We only need to replace the dict if multiple dictionaries are used:
-        if self.parser_children:
+        if self.node_children:
-            dcts = [n.parser.module.names_dict for n in self.parser_children]
+            dcts = [n.parser.module.names_dict for n in self.node_children]
            dct = MergedNamesDict([self._names_dict_scope.names_dict] + dcts)
            self._content_scope.names_dict = dct
    def parent_until_indent(self, indent=None):
-        if indent is None or self.indent >= indent and self.parent:
+        if indent is None or self._indent >= indent and self.parent:
            self.old_children = []
            if self.parent is not None:
                self.close()
                return self.parent.parent_until_indent(indent)
        return self
    @property
-    def indent(self):
+    def _indent(self):
        if not self.parent:
            return 0
        module = self.parser.module
@@ -202,13 +205,17 @@ class ParserNode(object):
        scope.is_generator |= parser.module.is_generator
        """
-    def add_node(self, node, set_parent=False):
+    def add_node(self, node, line_offset):
        """Adding a node means adding a node that was already added earlier"""
        print('ADD')
-        self.parser_children.append(node)
+        # Changing the line offsets is very important, because if they don't
-        self._set_items(node.parser, set_parent=set_parent)
+        # fit, all the start_pos values will be wrong.
-        node.old_children = node.parser_children  # TODO potential memory leak?
+        m = node.parser.module
-        node.parser_children = []
+        m.line_offset += line_offset + 1 - m.start_pos[0]
        self.node_children.append(node)
        self._set_items(node.parser, set_parent=node.parent == self)
        node.node_children = []
        """
        scope = self.content_scope
@@ -222,9 +229,20 @@ class ParserNode(object):
        return node
    def add_parser(self, parser, code):
        # TODO REMOVE
        raise NotImplementedError
        print('add parser')
        return self.add_node(ParserNode(self._fast_module, parser, code, self), True)
    def all_nodes(self):
        """
        Returns all nodes including nested ones.
        """
        yield self
        for n in self.node_children:
            for y in n.all_nodes():
                yield y
 class FastParser(use_metaclass(CachedFastParser)):
@@ -234,20 +252,20 @@ class FastParser(use_metaclass(CachedFastParser)):
        # set values like `pr.Module`.
        self._grammar = grammar
        self.module_path = module_path
        print(module_path)
-        self.current_node = None
+        self._reset_caches()
        self.parsers = []
        self.module = FastModule(self.parsers)
        self.reset_caches()
        try:
            self._parse(code)
        except:
            # FastParser is cached, be careful with exceptions
-            del self.parsers[:]
+            self._reset_caches()
            raise
    def _reset_caches(self):
        self.module = FastModule()
        self.current_node = ParserNode(self.module)
    def update(self, code):
        self.reset_caches()
@@ -255,7 +273,7 @@ class FastParser(use_metaclass(CachedFastParser)):
            self._parse(code)
        except:
            # FastParser is cached, be careful with exceptions
-            del self.parsers[:]
+            self._reset_caches()
            raise
    def _split_parts(self, code):
@@ -320,57 +338,45 @@ class FastParser(use_metaclass(CachedFastParser)):
    def _parse(self, code):
        """ :type code: str """
-        def empty_parser():
+        def empty_parser_node():
-            new, temp = self._get_parser(unicode(''), unicode(''), 0, [], False)
+            return self._get_node(unicode(''), unicode(''), 0, [], False)
            return new
        del self.parsers[:]
        line_offset = 0
        start = 0
        p = None
        is_first = True
        nodes = self.current_node.all_nodes()
        for code_part in self._split_parts(code):
            if is_first or line_offset + 1 == p.module.end_pos[0]:
                print(repr(code_part))
                indent = len(code_part) - len(code_part.lstrip('\t '))
-                if is_first and self.current_node is not None:
+                self.current_node = self.current_node.parent_until_indent(indent)
                    nodes = [self.current_node]
                else:
                    nodes = []
                if self.current_node is not None:
                    self.current_node = self.current_node.parent_until_indent(indent)
                    nodes += self.current_node.old_children
                # check if code_part has already been parsed
                # print '#'*45,line_offset, p and p.module.end_pos, '\n', code_part
-                p, node = self._get_parser(code_part, code[start:],
+                self.current_node = self._get_node(code_part, code[start:],
-                                           line_offset, nodes, not is_first)
+                                                   line_offset, nodes, not is_first)
-                print('HmmmmA', p.module.names_dict)
+                print('HmmmmA', self.current_node.parser.module.names_dict)
-                # The actual used code_part is different from the given code
+                if is_first and self.current_node.parser.module.subscopes:
                # part, because of docstrings for example there's a chance that
                # splits are wrong.
                used_lines = self._lines[line_offset:p.module.end_pos[0]]
                code_part_actually_used = '\n'.join(used_lines)
                if is_first and p.module.subscopes:
                    print('NOXXXX')
-                    # special case, we cannot use a function subscope as a
+                    raise NotImplementedError
                    # Special case, we cannot use a function subscope as a
                    # base scope, subscopes would save all the other contents
-                    new = empty_parser()
+                    new = empty_parser_node()  # TODO should be node = 
-                    if self.current_node is None:
+                    self.current_node.set_parser(new, '')
                        self.current_node = ParserNode(self.module, new, '')
                    else:
                        self.current_node.save_contents(new, '')
                    self.parsers.append(new)
                    is_first = False
                """
                if is_first:
                    if self.current_node is None:
                        self.current_node = ParserNode(self.module, p, code_part_actually_used)
                    else:
-                        self.current_node.save_contents(p, code_part_actually_used)
+                        pass
                else:
                    if node is None:
                        self.current_node = \
@@ -379,6 +385,7 @@ class FastParser(use_metaclass(CachedFastParser)):
                        self.current_node = self.current_node.add_node(node)
                self.parsers.append(p)
                """
                is_first = False
            #else:
@@ -387,11 +394,13 @@ class FastParser(use_metaclass(CachedFastParser)):
            line_offset += code_part.count('\n') + 1
            start += len(code_part) + 1  # +1 for newline
        # Now that the for loop is finished, we still want to close all nodes.
        if self.parsers:
            self.current_node = self.current_node.parent_until_indent()
            self.current_node.close()
        else:
-            self.parsers.append(empty_parser())
+            raise NotImplementedError
            self.parsers.append(empty_parser_node())
        """ TODO used?
        self.module.end_pos = self.parsers[-1].module.end_pos
@@ -399,30 +408,33 @@ class FastParser(use_metaclass(CachedFastParser)):
        # print(self.parsers[0].module.get_code())
-    def _get_parser(self, code, parser_code, line_offset, nodes, no_docstr):
+    def _get_node(self, code, parser_code, line_offset, nodes, no_docstr):
        """
        Side effect: Alters the list of nodes.
        """
        h = hash(code)
-        for index, node in enumerate(nodes):
+        for index, node in enumerate(list(nodes)):
            print('EQ', node, repr(node.code), repr(code))
            if node.hash == h and node.code == code:
-                if node != self.current_node:
+                nodes.remove(node)
                    offset = int(nodes[0] == self.current_node)
                    self.current_node.old_children.pop(index - offset)
                p = node.parser
                m = p.module
                m.line_offset += line_offset + 1 - m.start_pos[0]
                break
        else:
            print('ACTUALLY PARSING')
            tokenizer = FastTokenizer(parser_code, line_offset)
            p = Parser(self._grammar, parser_code, self.module_path, tokenizer=tokenizer)
            #p.module.parent = self.module  # With the new parser this is not
                                            # necessary anymore?
-            node = None
+            node = ParserNode(self.module, self.current_node)
-        return p, node
+            # The actual used code_part is different from the given code
            # part, because of docstrings for example there's a chance that
            # splits are wrong.
            used_lines = self._lines[line_offset:p.module.end_pos[0] - 1]
            code_part_actually_used = '\n'.join(used_lines)
            node.set_parser(p, code_part_actually_used)
-    def reset_caches(self):
+        self.current_node.add_node(node, line_offset)
-        self.module.reset_caches()
+        return node
        if self.current_node is not None:
            self.current_node.reset_contents()
 class FastTokenizer(object):
--- a/jedi/parser/tree.py
+++ b/jedi/parser/tree.py
@@ -609,9 +609,6 @@ class SubModule(Scope, Module):
        # this may be changed depending on fast_parser
        self.line_offset = 0
        if 0:
            self.use_as_parent = top_module or self
    def set_global_names(self, names):
        """
        Global means in these context a function (subscope) which has a global