use generator to reduce memory usage

This commit is contained in:
Akinori Hattori
2014-07-06 11:53:22 +09:00
parent 1650f65507
commit da89b66594
+13 -26
View File
@@ -218,28 +218,22 @@ class FastParser(use_metaclass(CachedFastParser)):
each part seperately and therefore cache parts of the file and not each part seperately and therefore cache parts of the file and not
everything. everything.
""" """
def add_part(): def gen_part():
txt = '\n'.join(current_lines) text = '\n'.join(current_lines)
if txt: del current_lines[:]
if add_to_last and parts: return text
parts[-1] += '\n' + txt
else:
parts.append(txt)
del current_lines[:]
# Split only new lines. Distinction between \r\n is the tokenizer's # Split only new lines. Distinction between \r\n is the tokenizer's
# job. # job.
self._lines = code.split('\n') self._lines = code.split('\n')
current_lines = [] current_lines = []
parts = []
is_decorator = False is_decorator = False
current_indent = 0 current_indent = 0
old_indent = 0 old_indent = 0
new_indent = False new_indent = False
in_flow = False in_flow = False
add_to_last = False
# All things within flows are simply being ignored. # All things within flows are simply being ignored.
for i, l in enumerate(self._lines): for l in self._lines:
# check for dedents # check for dedents
s = l.lstrip('\t ') s = l.lstrip('\t ')
indent = len(l) - len(s) indent = len(l) - len(s)
@@ -251,8 +245,8 @@ class FastParser(use_metaclass(CachedFastParser)):
current_indent = indent current_indent = indent
new_indent = False new_indent = False
if not in_flow or indent < old_indent: if not in_flow or indent < old_indent:
add_part() if current_lines:
add_to_last = False yield gen_part()
in_flow = False in_flow = False
elif new_indent: elif new_indent:
current_indent = indent current_indent = indent
@@ -264,8 +258,8 @@ class FastParser(use_metaclass(CachedFastParser)):
if m: if m:
in_flow = m.group(1) in tokenize.FLOWS in_flow = m.group(1) in tokenize.FLOWS
if not is_decorator and not in_flow: if not is_decorator and not in_flow:
add_part() if current_lines:
add_to_last = False yield gen_part()
is_decorator = '@' == m.group(1) is_decorator = '@' == m.group(1)
if not is_decorator: if not is_decorator:
old_indent = current_indent old_indent = current_indent
@@ -273,12 +267,10 @@ class FastParser(use_metaclass(CachedFastParser)):
new_indent = True new_indent = True
elif is_decorator: elif is_decorator:
is_decorator = False is_decorator = False
add_to_last = True
current_lines.append(l) current_lines.append(l)
add_part() if current_lines:
yield gen_part()
return parts
def _parse(self, code): def _parse(self, code):
""" :type code: str """ """ :type code: str """
@@ -286,16 +278,13 @@ class FastParser(use_metaclass(CachedFastParser)):
new, temp = self._get_parser(unicode(''), unicode(''), 0, [], False) new, temp = self._get_parser(unicode(''), unicode(''), 0, [], False)
return new return new
parts = self._split_parts(code)
del self.parsers[:] del self.parsers[:]
line_offset = 0 line_offset = 0
start = 0 start = 0
p = None p = None
is_first = True is_first = True
for code_part in self._split_parts(code):
for code_part in parts:
lines = code_part.count('\n') + 1
if is_first or line_offset >= p.module.end_pos[0]: if is_first or line_offset >= p.module.end_pos[0]:
indent = len(code_part) - len(code_part.lstrip('\t ')) indent = len(code_part) - len(code_part.lstrip('\t '))
if is_first and self.current_node is not None: if is_first and self.current_node is not None:
@@ -303,7 +292,6 @@ class FastParser(use_metaclass(CachedFastParser)):
else: else:
nodes = [] nodes = []
if self.current_node is not None: if self.current_node is not None:
self.current_node = \ self.current_node = \
self.current_node.parent_until_indent(indent) self.current_node.parent_until_indent(indent)
nodes += self.current_node.old_children nodes += self.current_node.old_children
@@ -348,7 +336,7 @@ class FastParser(use_metaclass(CachedFastParser)):
#else: #else:
#print '#'*45, line_offset, p.module.end_pos, 'theheck\n', repr(code_part) #print '#'*45, line_offset, p.module.end_pos, 'theheck\n', repr(code_part)
line_offset += lines line_offset += code_part.count('\n') + 1
start += len(code_part) + 1 # +1 for newline start += len(code_part) + 1 # +1 for newline
if self.parsers: if self.parsers:
@@ -359,7 +347,6 @@ class FastParser(use_metaclass(CachedFastParser)):
self.module.end_pos = self.parsers[-1].module.end_pos self.module.end_pos = self.parsers[-1].module.end_pos
# print(self.parsers[0].module.get_code()) # print(self.parsers[0].module.get_code())
del code
def _get_parser(self, code, parser_code, line_offset, nodes, no_docstr): def _get_parser(self, code, parser_code, line_offset, nodes, no_docstr):
h = hash(code) h = hash(code)