use generator to reduce memory usage

2026-05-18 06:19:39 +08:00 · 2014-07-06 11:53:22 +09:00
parent 1650f65507
commit da89b66594
1 changed files with 13 additions and 26 deletions
@@ -218,28 +218,22 @@ class FastParser(use_metaclass(CachedFastParser)):
        each part seperately and therefore cache parts of the file and not
        everything.
        """
-        def add_part():
+        def gen_part():
-            txt = '\n'.join(current_lines)
+            text = '\n'.join(current_lines)
-            if txt:
+            del current_lines[:]
-                if add_to_last and parts:
+            return text
                    parts[-1] += '\n' + txt
                else:
                    parts.append(txt)
                del current_lines[:]
        # Split only new lines. Distinction between \r\n is the tokenizer's
        # job.
        self._lines = code.split('\n')
        current_lines = []
        parts = []
        is_decorator = False
        current_indent = 0
        old_indent = 0
        new_indent = False
        in_flow = False
        add_to_last = False
        # All things within flows are simply being ignored.
-        for i, l in enumerate(self._lines):
+        for l in self._lines:
            # check for dedents
            s = l.lstrip('\t ')
            indent = len(l) - len(s)
@@ -251,8 +245,8 @@ class FastParser(use_metaclass(CachedFastParser)):
                current_indent = indent
                new_indent = False
                if not in_flow or indent < old_indent:
-                    add_part()
+                    if current_lines:
-                    add_to_last = False
+                        yield gen_part()
                in_flow = False
            elif new_indent:
                current_indent = indent
@@ -264,8 +258,8 @@ class FastParser(use_metaclass(CachedFastParser)):
                if m:
                    in_flow = m.group(1) in tokenize.FLOWS
                    if not is_decorator and not in_flow:
-                        add_part()
+                        if current_lines:
-                        add_to_last = False
+                            yield gen_part()
                    is_decorator = '@' == m.group(1)
                    if not is_decorator:
                        old_indent = current_indent
@@ -273,12 +267,10 @@ class FastParser(use_metaclass(CachedFastParser)):
                        new_indent = True
                elif is_decorator:
                    is_decorator = False
                    add_to_last = True
            current_lines.append(l)
-        add_part()
+        if current_lines:
-
+            yield gen_part()
        return parts
    def _parse(self, code):
        """ :type code: str """
@@ -286,16 +278,13 @@ class FastParser(use_metaclass(CachedFastParser)):
            new, temp = self._get_parser(unicode(''), unicode(''), 0, [], False)
            return new
        parts = self._split_parts(code)
        del self.parsers[:]
        line_offset = 0
        start = 0
        p = None
        is_first = True
-
+        for code_part in self._split_parts(code):
        for code_part in parts:
            lines = code_part.count('\n') + 1
            if is_first or line_offset >= p.module.end_pos[0]:
                indent = len(code_part) - len(code_part.lstrip('\t '))
                if is_first and self.current_node is not None:
@@ -303,7 +292,6 @@ class FastParser(use_metaclass(CachedFastParser)):
                else:
                    nodes = []
                if self.current_node is not None:
                    self.current_node = \
                        self.current_node.parent_until_indent(indent)
                    nodes += self.current_node.old_children
@@ -348,7 +336,7 @@ class FastParser(use_metaclass(CachedFastParser)):
            #else:
                #print '#'*45, line_offset, p.module.end_pos, 'theheck\n', repr(code_part)
-            line_offset += lines
+            line_offset += code_part.count('\n') + 1
            start += len(code_part) + 1  # +1 for newline
        if self.parsers:
@@ -359,7 +347,6 @@ class FastParser(use_metaclass(CachedFastParser)):
        self.module.end_pos = self.parsers[-1].module.end_pos
        # print(self.parsers[0].module.get_code())
        del code
    def _get_parser(self, code, parser_code, line_offset, nodes, no_docstr):
        h = hash(code)