From 0610ef16aed25c0845721e821c00f33e18040cad Mon Sep 17 00:00:00 2001 From: Akinori Hattori Date: Sun, 6 Jul 2014 11:07:19 +0900 Subject: [PATCH 1/6] use del instead of assigning empty iterator --- jedi/parser/fast.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/jedi/parser/fast.py b/jedi/parser/fast.py index 93700532..df277cb6 100644 --- a/jedi/parser/fast.py +++ b/jedi/parser/fast.py @@ -196,7 +196,7 @@ class FastParser(use_metaclass(CachedFastParser)): self._parse(code) except: # FastParser is cached, be careful with exceptions - self.parsers[:] = [] + del self.parsers[:] raise def update(self, code): @@ -206,7 +206,7 @@ class FastParser(use_metaclass(CachedFastParser)): self._parse(code) except: # FastParser is cached, be careful with exceptions - self.parsers[:] = [] + del self.parsers[:] raise def _split_parts(self, code): @@ -222,7 +222,7 @@ class FastParser(use_metaclass(CachedFastParser)): parts[-1] += '\n' + txt else: parts.append(txt) - current_lines[:] = [] + del current_lines[:] r_keyword = '^[ \t]*(def|class|@|%s)' % '|'.join(tokenize.FLOWS) @@ -286,7 +286,7 @@ class FastParser(use_metaclass(CachedFastParser)): return new parts = self._split_parts(code) - self.parsers[:] = [] + del self.parsers[:] line_offset = 0 start = 0 From aab4891c4e3b103087645502cff9ed53d6e51638 Mon Sep 17 00:00:00 2001 From: Akinori Hattori Date: Sun, 6 Jul 2014 11:11:23 +0900 Subject: [PATCH 2/6] reduce regex searches and compile pattern --- jedi/parser/fast.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/jedi/parser/fast.py b/jedi/parser/fast.py index df277cb6..ef8e4058 100644 --- a/jedi/parser/fast.py +++ b/jedi/parser/fast.py @@ -183,6 +183,9 @@ class ParserNode(object): class FastParser(use_metaclass(CachedFastParser)): + + _keyword_re = re.compile('^[ \t]*(def|class|@|%s)' % '|'.join(tokenize.FLOWS)) + def __init__(self, code, module_path=None): # set values like `pr.Module`. self.module_path = module_path @@ -224,8 +227,6 @@ class FastParser(use_metaclass(CachedFastParser)): parts.append(txt) del current_lines[:] - r_keyword = '^[ \t]*(def|class|@|%s)' % '|'.join(tokenize.FLOWS) - # Split only new lines. Distinction between \r\n is the tokenizer's # job. self._lines = code.split('\n') @@ -240,9 +241,9 @@ class FastParser(use_metaclass(CachedFastParser)): # All things within flows are simply being ignored. for i, l in enumerate(self._lines): # check for dedents - m = re.match('^([\t ]*)(.?)', l) - indent = len(m.group(1)) - if m.group(2) in ['', '#']: + s = l.lstrip('\t ') + indent = len(l) - len(s) + if not s or s[0] == '#': current_lines.append(l) # just ignore comments and blank lines continue @@ -259,7 +260,7 @@ class FastParser(use_metaclass(CachedFastParser)): # Check lines for functions/classes and split the code there. if not in_flow: - m = re.match(r_keyword, l) + m = self._keyword_re.match(l) if m: in_flow = m.group(1) in tokenize.FLOWS if not is_decorator and not in_flow: @@ -296,7 +297,7 @@ class FastParser(use_metaclass(CachedFastParser)): for code_part in parts: lines = code_part.count('\n') + 1 if is_first or line_offset >= p.module.end_pos[0]: - indent = len(re.match(r'[ \t]*', code_part).group(0)) + indent = len(code_part) - len(code_part.lstrip('\t ')) if is_first and self.current_node is not None: nodes = [self.current_node] else: From 1650f65507d4353c43f943d6b8ab860dcfaa1103 Mon Sep 17 00:00:00 2001 From: Akinori Hattori Date: Sun, 6 Jul 2014 11:29:24 +0900 Subject: [PATCH 3/6] reduce loops for finding sub parser --- jedi/parser/fast.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/jedi/parser/fast.py b/jedi/parser/fast.py index ef8e4058..f207b88d 100644 --- a/jedi/parser/fast.py +++ b/jedi/parser/fast.py @@ -363,25 +363,23 @@ class FastParser(use_metaclass(CachedFastParser)): def _get_parser(self, code, parser_code, line_offset, nodes, no_docstr): h = hash(code) - hashes = [n.hash for n in nodes] - node = None - try: - index = hashes.index(h) - if nodes[index].code != code: - raise ValueError() - except ValueError: + for index, node in enumerate(nodes): + if node.hash != h or node.code != code: + continue + + if node != self.current_node: + offset = int(nodes[0] == self.current_node) + self.current_node.old_children.pop(index - offset) + p = node.parser + m = p.module + m.line_offset += line_offset + 1 - m.start_pos[0] + break + else: tokenizer = FastTokenizer(parser_code, line_offset) p = Parser(parser_code, self.module_path, tokenizer=tokenizer, top_module=self.module, no_docstr=no_docstr) p.module.parent = self.module - else: - if nodes[index] != self.current_node: - offset = int(nodes[0] == self.current_node) - self.current_node.old_children.pop(index - offset) - node = nodes.pop(index) - p = node.parser - m = p.module - m.line_offset += line_offset + 1 - m.start_pos[0] + node = None return p, node From da89b66594a7a57fef81ae56488aa0a56d490954 Mon Sep 17 00:00:00 2001 From: Akinori Hattori Date: Sun, 6 Jul 2014 11:53:22 +0900 Subject: [PATCH 4/6] use generator to reduce memory usage --- jedi/parser/fast.py | 39 +++++++++++++-------------------------- 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/jedi/parser/fast.py b/jedi/parser/fast.py index f207b88d..a9b0ecec 100644 --- a/jedi/parser/fast.py +++ b/jedi/parser/fast.py @@ -218,28 +218,22 @@ class FastParser(use_metaclass(CachedFastParser)): each part seperately and therefore cache parts of the file and not everything. """ - def add_part(): - txt = '\n'.join(current_lines) - if txt: - if add_to_last and parts: - parts[-1] += '\n' + txt - else: - parts.append(txt) - del current_lines[:] + def gen_part(): + text = '\n'.join(current_lines) + del current_lines[:] + return text # Split only new lines. Distinction between \r\n is the tokenizer's # job. self._lines = code.split('\n') current_lines = [] - parts = [] is_decorator = False current_indent = 0 old_indent = 0 new_indent = False in_flow = False - add_to_last = False # All things within flows are simply being ignored. - for i, l in enumerate(self._lines): + for l in self._lines: # check for dedents s = l.lstrip('\t ') indent = len(l) - len(s) @@ -251,8 +245,8 @@ class FastParser(use_metaclass(CachedFastParser)): current_indent = indent new_indent = False if not in_flow or indent < old_indent: - add_part() - add_to_last = False + if current_lines: + yield gen_part() in_flow = False elif new_indent: current_indent = indent @@ -264,8 +258,8 @@ class FastParser(use_metaclass(CachedFastParser)): if m: in_flow = m.group(1) in tokenize.FLOWS if not is_decorator and not in_flow: - add_part() - add_to_last = False + if current_lines: + yield gen_part() is_decorator = '@' == m.group(1) if not is_decorator: old_indent = current_indent @@ -273,12 +267,10 @@ class FastParser(use_metaclass(CachedFastParser)): new_indent = True elif is_decorator: is_decorator = False - add_to_last = True current_lines.append(l) - add_part() - - return parts + if current_lines: + yield gen_part() def _parse(self, code): """ :type code: str """ @@ -286,16 +278,13 @@ class FastParser(use_metaclass(CachedFastParser)): new, temp = self._get_parser(unicode(''), unicode(''), 0, [], False) return new - parts = self._split_parts(code) del self.parsers[:] line_offset = 0 start = 0 p = None is_first = True - - for code_part in parts: - lines = code_part.count('\n') + 1 + for code_part in self._split_parts(code): if is_first or line_offset >= p.module.end_pos[0]: indent = len(code_part) - len(code_part.lstrip('\t ')) if is_first and self.current_node is not None: @@ -303,7 +292,6 @@ class FastParser(use_metaclass(CachedFastParser)): else: nodes = [] if self.current_node is not None: - self.current_node = \ self.current_node.parent_until_indent(indent) nodes += self.current_node.old_children @@ -348,7 +336,7 @@ class FastParser(use_metaclass(CachedFastParser)): #else: #print '#'*45, line_offset, p.module.end_pos, 'theheck\n', repr(code_part) - line_offset += lines + line_offset += code_part.count('\n') + 1 start += len(code_part) + 1 # +1 for newline if self.parsers: @@ -359,7 +347,6 @@ class FastParser(use_metaclass(CachedFastParser)): self.module.end_pos = self.parsers[-1].module.end_pos # print(self.parsers[0].module.get_code()) - del code def _get_parser(self, code, parser_code, line_offset, nodes, no_docstr): h = hash(code) From feae67484c085e28f9f81202a27b165cd51b2119 Mon Sep 17 00:00:00 2001 From: Akinori Hattori Date: Tue, 8 Jul 2014 20:21:45 +0900 Subject: [PATCH 5/6] CRLF should be also treated as blank line --- jedi/parser/fast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jedi/parser/fast.py b/jedi/parser/fast.py index a9b0ecec..e2e618a3 100644 --- a/jedi/parser/fast.py +++ b/jedi/parser/fast.py @@ -237,7 +237,7 @@ class FastParser(use_metaclass(CachedFastParser)): # check for dedents s = l.lstrip('\t ') indent = len(l) - len(s) - if not s or s[0] == '#': + if not s or s[0] in ('#', '\r'): current_lines.append(l) # just ignore comments and blank lines continue From 3f75ea5cc7c9f75cec4dc8c6fa42ed0c0b5eac02 Mon Sep 17 00:00:00 2001 From: Akinori Hattori Date: Thu, 17 Jul 2014 18:29:00 +0900 Subject: [PATCH 6/6] skip newline at end of code --- jedi/parser/fast.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/jedi/parser/fast.py b/jedi/parser/fast.py index e2e618a3..20cae651 100644 --- a/jedi/parser/fast.py +++ b/jedi/parser/fast.py @@ -270,7 +270,12 @@ class FastParser(use_metaclass(CachedFastParser)): current_lines.append(l) if current_lines: - yield gen_part() + # skip newline at end of code, + # since it is not counted by Parser + if not current_lines[-1]: + del current_lines[-1] + if current_lines: + yield gen_part() def _parse(self, code): """ :type code: str """