a basic approach to the new fast parser

2013-04-10 22:33:49 +04:30
parent a99d9541bd
commit 5dd05eff1a
4 changed files with 158 additions and 114 deletions
--- a/jedi/common.py
+++ b/jedi/common.py
@@ -126,7 +126,6 @@ class NoErrorTokenizer(object):
        else:
            c[2] = self.offset[0] + c[2][0], c[2][1]
            c[3] = self.offset[0] + c[3][0], c[3][1]
        print 'h', c, tokenize.tok_name[c[0]], self.current[2:4]
        self.current = c
        def close():
@@ -139,7 +138,7 @@ class NoErrorTokenizer(object):
                                         tokenize.NEWLINE, tokenize.DEDENT) \
                and c[0] not in (tokenize.COMMENT, tokenize.INDENT,
                             tokenize.NL, tokenize.NEWLINE, tokenize.DEDENT):
-            print c, tokenize.tok_name[c[0]]
+            #print c, tokenize.tok_name[c[0]]
            tok = c[1]
            indent = c[2][1]
--- a/jedi/fast_parser.py
+++ b/jedi/fast_parser.py
@@ -14,13 +14,15 @@ import cache
 import common
 SCOPE_CONTENTS = ['asserts', 'subscopes', 'imports', 'statements', 'returns']
 class Module(pr.Simple, pr.Module):
    def __init__(self, parsers):
        self._end_pos = None, None
        super(Module, self).__init__(self, (1, 0))
        self.parsers = parsers
        self.reset_caches()
        self.line_offset = 0
    def reset_caches(self):
        """ This module does a whole lot of caching, because it uses different
@@ -63,18 +65,6 @@ class Module(pr.Simple, pr.Module):
        else:
            raise AttributeError("__getattr__ doesn't offer %s" % name)
    def get_statement_for_position(self, pos):
        key = 'get_statement_for_position', pos
        if key not in self.cache:
            for p in self.parsers:
                s = p.module.get_statement_for_position(pos)
                if s:
                    self.cache[key] = s
                    break
            else:
                self.cache[key] = None
        return self.cache[key]
    @property
    def used_names(self):
        if not self.parsers:
@@ -92,30 +82,6 @@ class Module(pr.Simple, pr.Module):
            self.cache[key] = dct
        return self.cache[key]
    @property
    def docstr(self):
        if not self.parsers:
            raise NotImplementedError("Parser doesn't exist.")
        return self.parsers[0].module.docstr
    @property
    def name(self):
        if not self.parsers:
            raise NotImplementedError("Parser doesn't exist.")
        return self.parsers[0].module.name
    @property
    def path(self):
        if not self.parsers:
            raise NotImplementedError("Parser doesn't exist.")
        return self.parsers[0].module.path
    @property
    def is_builtin(self):
        if not self.parsers:
            raise NotImplementedError("Parser doesn't exist.")
        return self.parsers[0].module.is_builtin
    @property
    def start_pos(self):
        """ overwrite start_pos of Simple """
@@ -157,12 +123,93 @@ class CachedFastParser(type):
        return p
 class ParserNode(object):
    def __init__(self, parser, code, parent=None):
        self.parent = parent
        self.parser = parser
        self.code = code
        self.hash = hash(code)
        self.children = []
        self._checked = True
        self.save_contents()
    def save_contents(self):
        scope = self._get_content_scope()
        self._contents = {}
        for c in SCOPE_CONTENTS:
            self._contents[c] = list(getattr(scope, c))
        self._is_generator = scope.is_generator
    def _get_content_scope(self):
        try:
            # with fast_parser we have either 1 subscope or only statements.
            return self.parser.module.subscopes[0]
        except IndexError:
            return self.parser.module
    def reset_contents(self):
        self._checked = False
        scope = self._get_content_scope()
        for key, c in self._contents.items():
            setattr(scope, key, self.contents.items())
        scope.is_generator = self._is_generator
        for c in self.children:
            c.reset_contents()
    def parent_until_indent(self, indent):
        if self.indent >= indent:
            # check for
            for i, c in enumerate(self.children):
                if not c._checked:
                    # all of the following 
                    del self.children[i:]
                    break
            return self.parent.parent_until_indent(indent)
        return self
    @property
    def indent(self):
        if not self.parent:
            return -1
        module = self.parser.module
        try:
            el = module.subscopes[0]
        except IndexError:
            try:
                el = module.statements[0]
            except IndexError:
                el = module.imports[0]
        return el.start_pos[1]
    def add_node(self, parser, code):
        # only compare at the right indent level
        insert = 0
        for insert, c in enumerate(self.children):
            if not c._checked:
                break
        node = ParserNode(parser, code, self)
        self.children.insert(insert, node)
        # insert parser objects into current structure
        scope = self._get_content_scope()
        for c in SCOPE_CONTENTS:
            content = getattr(scope, c)
            content += getattr(parser.module, c)
        scope.is_generator |= parser.module.is_generator
        return node
 class FastParser(use_metaclass(CachedFastParser)):
    def __init__(self, code, module_path=None, user_position=None):
        # set values like `pr.Module`.
        self.module_path = module_path
        self.user_position = user_position
        self.current_node = None
        self.parsers = []
        self.module = Module(self.parsers)
        self.reset_caches()
@@ -274,83 +321,84 @@ class FastParser(use_metaclass(CachedFastParser)):
    def _parse(self, code):
        """ :type code: str """
        def set_parent(module):
            def get_indent(module):
                try:
                    el = module.subscopes[0]
                except IndexError:
                    try:
                        el = module.statements[0]
                    except IndexError:
                        el = module.imports[0]
                return el.start_pos[1]
            if self.parsers and False:
                new_indent = get_indent(module)
                old_indent = get_indent(self.parsers[-1].module)
                if old_indent < new_indent:
                    #module.parent = self.parsers[-1].module.subscopes[0]
                    # TODO set parents + add to subscopes
                    return
            p.module.parent = self.module
        parts = self._split_parts(code)
        self.parsers[:] = []
-        if settings.fast_parser_always_reparse:
+        self._code = code
-            self.parsers[:] = []
+        self._line_offset = 0
-
+        self._start = 0
        # dict comprehensions are not available in py2.5/2.6 :-(
        hashes = dict((p.hash, p) for p in self.parsers)
        line_offset = 0
        start = 0
        p = None
-        parser_order = 0
+        is_first = True
        for code_part in parts:
            lines = code_part.count('\n') + 1
-            # the parser is using additional newlines, therefore substract
+            if is_first or self._line_offset >= p.end_pos[0] - 1:
-            if p is None or line_offset >= p.end_pos[0] - 2:
+                indent = len(re.match(r'[ \t]*', code).groups(0))
-                # check if code_part has already been parsed
+                if is_first and self.current_node is not None:
-                h = hash(code_part)
+                    nodes = [self]
                if h in hashes and hashes[h].code == code_part:
                    p = hashes[h]
                    del hashes[h]
                    m = p.module
                    m.line_offset += line_offset + 1 - m.start_pos[0]
                    if self.user_position is not None and \
                            m.start_pos <= self.user_position <= m.end_pos:
                        # It's important to take care of the whole user
                        # positioning stuff, if no reparsing is being done.
                        p.user_stmt = m.get_statement_for_position(
                                    self.user_position, include_imports=True)
                        if p.user_stmt:
                            p.user_scope = p.user_stmt.parent
                        else:
                            p.user_scope = self.scan_user_scope(m) \
                                            or self.module
                else:
-                    p = parsing.Parser(code[start:],
+                    nodes = []
-                                self.module_path, self.user_position,
+                if self.current_node is not None:
                                offset=(line_offset, 0), is_fast_parser=True,
                                top_module=self.module)
-                    p.hash = h
+                    self.current_node = \
-                    p.code = code_part
+                                self.current_node.parent_until_indent(indent)
-                    set_parent(p.module)
+                    nodes += self.current_node.children
                self.parsers.insert(parser_order, p)
-                parser_order += 1
+                # check if code_part has already been parsed
-            line_offset += lines
+                p = self._get_parser(code, nodes)
-            print line_offset
+
-            start += len(code_part) + 1  # +1 for newline
+                if is_first:
-        self.parsers[parser_order + 1:] = []
+                    if self.current_node is None:
                        self.current_node = ParserNode(p, code)
                    else:
                        self.current_node.parser = p
                        self.current_node.save_contents()
                else:
                    self.current_node = self.current_node.add_node(p, code)
                self.parsers.append(p)
                is_first = False
            self._line_offset += lines
            self._start += len(code_part) + 1  # +1 for newline
        print 'hmm'
        for p in self.parsers:
            print(p.module.get_code())
            print(p.module.start_pos, p.module.end_pos)
        exit()
        del self._code
    def _get_parser(self, code, nodes):
        h = hash(code)
        hashes = [n.hash for n in nodes]
        try:
            index = hashes.index(h)
            if nodes[index].code != code:
                raise ValueError()
        except ValueError:
            p = parsing.Parser(self._code[self._start:],
                               self.module_path, self.user_position,
                               offset=(self._line_offset, 0),
                               is_fast_parser=True, top_module=self.module)
        else:
            node = nodes.pop(index)
            p = node.parser
            m = p.module
            m.line_offset += self._line_offset + 1 - m.start_pos[0]
            if self.user_position is not None and \
                    m.start_pos <= self.user_position <= m.end_pos:
                # It's important to take care of the whole user
                # positioning stuff, if no reparsing is being done.
                p.user_stmt = m.get_statement_for_position(
                            self.user_position, include_imports=True)
                if p.user_stmt:
                    p.user_scope = p.user_stmt.parent
                else:
                    p.user_scope = self.scan_user_scope(m) or self.module
        return p
    def reset_caches(self):
        self._user_scope = None
        self._user_stmt = None
        self.module.reset_caches()
        if self.current_node is not None:
            self.current_node.reset_contents()
--- a/jedi/parsing_representation.py
+++ b/jedi/parsing_representation.py
@@ -149,6 +149,10 @@ class Scope(Simple, IsScope):
        self.statements = []
        self.docstr = ''
        self.asserts = []
        # Needed here for fast_parser, because the fast_parser splits and
        # returns will be in "normal" modules.
        self.returns = []
        self.is_generator = False
    def add_scope(self, sub, decorators):
        sub.parent = self.use_as_parent
@@ -194,13 +198,12 @@ class Scope(Simple, IsScope):
        if len(self.docstr) > 0:
            string += '"""' + self.docstr + '"""\n'
-        returns = self.returns if hasattr(self, 'returns') else []
+        objs = self.subscopes + self.imports + self.statements + self.returns
        objs = self.subscopes + self.imports + self.statements + returns
        for obj in sorted(objs, key=lambda x: x.start_pos):
            if isinstance(obj, Scope):
                string += obj.get_code(first_indent=True, indention=indention)
            else:
-                if obj in returns and not isinstance(self, Lambda):
+                if obj in self.returns and not isinstance(self, Lambda):
                    string += 'yield ' if self.is_generator else 'return '
                string += obj.get_code()
@@ -439,8 +442,6 @@ class Function(Scope):
            p.parent = self.use_as_parent
            p.parent_function = self.use_as_parent
        self.decorators = []
        self.returns = []
        self.is_generator = False
        self.listeners = set()  # not used here, but in evaluation.
        if annotation is not None:
@@ -456,6 +457,9 @@ class Function(Scope):
            string += "pass\n"
        return string
    def is_empty(self):
        return super(Function, self).is_empty() and not self.returns
    def get_set_vars(self):
        n = super(Function, self).get_set_vars()
        for p in self.params:
--- a/jedi/settings.py
+++ b/jedi/settings.py
@@ -33,7 +33,6 @@ Parser
 ~~~~~~
 .. autodata:: fast_parser
 .. autodata:: fast_parser_always_reparse
 .. autodata:: use_function_definition_cache
@@ -150,12 +149,6 @@ something has been changed e.g. to a function. If this happens, only the
 function is being reparsed.
 """
 fast_parser_always_reparse = False
 """
 This is just a debugging option. Always reparsing means that the fast parser
 is basically useless. So don't use it.
 """
 use_function_definition_cache = True
 """
 Use the cache (full cache) to generate function_definition's. This may fail