From 0610ef16aed25c0845721e821c00f33e18040cad Mon Sep 17 00:00:00 2001
From: Akinori Hattori <hattya@gmail.com>
Date: Sun, 6 Jul 2014 11:07:19 +0900
Subject: [PATCH 1/6] use del instead of assigning empty iterator

---
 jedi/parser/fast.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/jedi/parser/fast.py b/jedi/parser/fast.py
index 93700532..df277cb6 100644
--- a/jedi/parser/fast.py
+++ b/jedi/parser/fast.py
@@ -196,7 +196,7 @@ class FastParser(use_metaclass(CachedFastParser)):
             self._parse(code)
         except:
             # FastParser is cached, be careful with exceptions
-            self.parsers[:] = []
+            del self.parsers[:]
             raise
 
     def update(self, code):
@@ -206,7 +206,7 @@ class FastParser(use_metaclass(CachedFastParser)):
             self._parse(code)
         except:
             # FastParser is cached, be careful with exceptions
-            self.parsers[:] = []
+            del self.parsers[:]
             raise
 
     def _split_parts(self, code):
@@ -222,7 +222,7 @@ class FastParser(use_metaclass(CachedFastParser)):
                     parts[-1] += '\n' + txt
                 else:
                     parts.append(txt)
-                current_lines[:] = []
+                del current_lines[:]
 
         r_keyword = '^[ \t]*(def|class|@|%s)' % '|'.join(tokenize.FLOWS)
 
@@ -286,7 +286,7 @@ class FastParser(use_metaclass(CachedFastParser)):
             return new
 
         parts = self._split_parts(code)
-        self.parsers[:] = []
+        del self.parsers[:]
 
         line_offset = 0
         start = 0

From aab4891c4e3b103087645502cff9ed53d6e51638 Mon Sep 17 00:00:00 2001
From: Akinori Hattori <hattya@gmail.com>
Date: Sun, 6 Jul 2014 11:11:23 +0900
Subject: [PATCH 2/6] reduce regex searches and compile pattern

---
 jedi/parser/fast.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/jedi/parser/fast.py b/jedi/parser/fast.py
index df277cb6..ef8e4058 100644
--- a/jedi/parser/fast.py
+++ b/jedi/parser/fast.py
@@ -183,6 +183,9 @@ class ParserNode(object):
 
 
 class FastParser(use_metaclass(CachedFastParser)):
+
+    _keyword_re = re.compile('^[ \t]*(def|class|@|%s)' % '|'.join(tokenize.FLOWS))
+
     def __init__(self, code, module_path=None):
         # set values like `pr.Module`.
         self.module_path = module_path
@@ -224,8 +227,6 @@ class FastParser(use_metaclass(CachedFastParser)):
                     parts.append(txt)
                 del current_lines[:]
 
-        r_keyword = '^[ \t]*(def|class|@|%s)' % '|'.join(tokenize.FLOWS)
-
         # Split only new lines. Distinction between \r\n is the tokenizer's
         # job.
         self._lines = code.split('\n')
@@ -240,9 +241,9 @@ class FastParser(use_metaclass(CachedFastParser)):
         # All things within flows are simply being ignored.
         for i, l in enumerate(self._lines):
             # check for dedents
-            m = re.match('^([\t ]*)(.?)', l)
-            indent = len(m.group(1))
-            if m.group(2) in ['', '#']:
+            s = l.lstrip('\t ')
+            indent = len(l) - len(s)
+            if not s or s[0] == '#':
                 current_lines.append(l)  # just ignore comments and blank lines
                 continue
 
@@ -259,7 +260,7 @@ class FastParser(use_metaclass(CachedFastParser)):
 
             # Check lines for functions/classes and split the code there.
             if not in_flow:
-                m = re.match(r_keyword, l)
+                m = self._keyword_re.match(l)
                 if m:
                     in_flow = m.group(1) in tokenize.FLOWS
                     if not is_decorator and not in_flow:
@@ -296,7 +297,7 @@ class FastParser(use_metaclass(CachedFastParser)):
         for code_part in parts:
             lines = code_part.count('\n') + 1
             if is_first or line_offset >= p.module.end_pos[0]:
-                indent = len(re.match(r'[ \t]*', code_part).group(0))
+                indent = len(code_part) - len(code_part.lstrip('\t '))
                 if is_first and self.current_node is not None:
                     nodes = [self.current_node]
                 else:

From 1650f65507d4353c43f943d6b8ab860dcfaa1103 Mon Sep 17 00:00:00 2001
From: Akinori Hattori <hattya@gmail.com>
Date: Sun, 6 Jul 2014 11:29:24 +0900
Subject: [PATCH 3/6] reduce loops for finding sub parser

---
 jedi/parser/fast.py | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/jedi/parser/fast.py b/jedi/parser/fast.py
index ef8e4058..f207b88d 100644
--- a/jedi/parser/fast.py
+++ b/jedi/parser/fast.py
@@ -363,25 +363,23 @@ class FastParser(use_metaclass(CachedFastParser)):
 
     def _get_parser(self, code, parser_code, line_offset, nodes, no_docstr):
         h = hash(code)
-        hashes = [n.hash for n in nodes]
-        node = None
-        try:
-            index = hashes.index(h)
-            if nodes[index].code != code:
-                raise ValueError()
-        except ValueError:
+        for index, node in enumerate(nodes):
+            if node.hash != h or node.code != code:
+                continue
+
+            if node != self.current_node:
+                offset = int(nodes[0] == self.current_node)
+                self.current_node.old_children.pop(index - offset)
+            p = node.parser
+            m = p.module
+            m.line_offset += line_offset + 1 - m.start_pos[0]
+            break
+        else:
             tokenizer = FastTokenizer(parser_code, line_offset)
             p = Parser(parser_code, self.module_path, tokenizer=tokenizer,
                        top_module=self.module, no_docstr=no_docstr)
             p.module.parent = self.module
-        else:
-            if nodes[index] != self.current_node:
-                offset = int(nodes[0] == self.current_node)
-                self.current_node.old_children.pop(index - offset)
-            node = nodes.pop(index)
-            p = node.parser
-            m = p.module
-            m.line_offset += line_offset + 1 - m.start_pos[0]
+            node = None
 
         return p, node
 

From da89b66594a7a57fef81ae56488aa0a56d490954 Mon Sep 17 00:00:00 2001
From: Akinori Hattori <hattya@gmail.com>
Date: Sun, 6 Jul 2014 11:53:22 +0900
Subject: [PATCH 4/6] use generator to reduce memory usage

---
 jedi/parser/fast.py | 39 +++++++++++++--------------------------
 1 file changed, 13 insertions(+), 26 deletions(-)

diff --git a/jedi/parser/fast.py b/jedi/parser/fast.py
index f207b88d..a9b0ecec 100644
--- a/jedi/parser/fast.py
+++ b/jedi/parser/fast.py
@@ -218,28 +218,22 @@ class FastParser(use_metaclass(CachedFastParser)):
         each part seperately and therefore cache parts of the file and not
         everything.
         """
-        def add_part():
-            txt = '\n'.join(current_lines)
-            if txt:
-                if add_to_last and parts:
-                    parts[-1] += '\n' + txt
-                else:
-                    parts.append(txt)
-                del current_lines[:]
+        def gen_part():
+            text = '\n'.join(current_lines)
+            del current_lines[:]
+            return text
 
         # Split only new lines. Distinction between \r\n is the tokenizer's
         # job.
         self._lines = code.split('\n')
         current_lines = []
-        parts = []
         is_decorator = False
         current_indent = 0
         old_indent = 0
         new_indent = False
         in_flow = False
-        add_to_last = False
         # All things within flows are simply being ignored.
-        for i, l in enumerate(self._lines):
+        for l in self._lines:
             # check for dedents
             s = l.lstrip('\t ')
             indent = len(l) - len(s)
@@ -251,8 +245,8 @@ class FastParser(use_metaclass(CachedFastParser)):
                 current_indent = indent
                 new_indent = False
                 if not in_flow or indent < old_indent:
-                    add_part()
-                    add_to_last = False
+                    if current_lines:
+                        yield gen_part()
                 in_flow = False
             elif new_indent:
                 current_indent = indent
@@ -264,8 +258,8 @@ class FastParser(use_metaclass(CachedFastParser)):
                 if m:
                     in_flow = m.group(1) in tokenize.FLOWS
                     if not is_decorator and not in_flow:
-                        add_part()
-                        add_to_last = False
+                        if current_lines:
+                            yield gen_part()
                     is_decorator = '@' == m.group(1)
                     if not is_decorator:
                         old_indent = current_indent
@@ -273,12 +267,10 @@ class FastParser(use_metaclass(CachedFastParser)):
                         new_indent = True
                 elif is_decorator:
                     is_decorator = False
-                    add_to_last = True
 
             current_lines.append(l)
-        add_part()
-
-        return parts
+        if current_lines:
+            yield gen_part()
 
     def _parse(self, code):
         """ :type code: str """
@@ -286,16 +278,13 @@ class FastParser(use_metaclass(CachedFastParser)):
             new, temp = self._get_parser(unicode(''), unicode(''), 0, [], False)
             return new
 
-        parts = self._split_parts(code)
         del self.parsers[:]
 
         line_offset = 0
         start = 0
         p = None
         is_first = True
-
-        for code_part in parts:
-            lines = code_part.count('\n') + 1
+        for code_part in self._split_parts(code):
             if is_first or line_offset >= p.module.end_pos[0]:
                 indent = len(code_part) - len(code_part.lstrip('\t '))
                 if is_first and self.current_node is not None:
@@ -303,7 +292,6 @@ class FastParser(use_metaclass(CachedFastParser)):
                 else:
                     nodes = []
                 if self.current_node is not None:
-
                     self.current_node = \
                         self.current_node.parent_until_indent(indent)
                     nodes += self.current_node.old_children
@@ -348,7 +336,7 @@ class FastParser(use_metaclass(CachedFastParser)):
             #else:
                 #print '#'*45, line_offset, p.module.end_pos, 'theheck\n', repr(code_part)
 
-            line_offset += lines
+            line_offset += code_part.count('\n') + 1
             start += len(code_part) + 1  # +1 for newline
 
         if self.parsers:
@@ -359,7 +347,6 @@ class FastParser(use_metaclass(CachedFastParser)):
         self.module.end_pos = self.parsers[-1].module.end_pos
 
         # print(self.parsers[0].module.get_code())
-        del code
 
     def _get_parser(self, code, parser_code, line_offset, nodes, no_docstr):
         h = hash(code)

From feae67484c085e28f9f81202a27b165cd51b2119 Mon Sep 17 00:00:00 2001
From: Akinori Hattori <hattya@gmail.com>
Date: Tue, 8 Jul 2014 20:21:45 +0900
Subject: [PATCH 5/6] CRLF should be also treated as blank line

---
 jedi/parser/fast.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jedi/parser/fast.py b/jedi/parser/fast.py
index a9b0ecec..e2e618a3 100644
--- a/jedi/parser/fast.py
+++ b/jedi/parser/fast.py
@@ -237,7 +237,7 @@ class FastParser(use_metaclass(CachedFastParser)):
             # check for dedents
             s = l.lstrip('\t ')
             indent = len(l) - len(s)
-            if not s or s[0] == '#':
+            if not s or s[0] in ('#', '\r'):
                 current_lines.append(l)  # just ignore comments and blank lines
                 continue
 

From 3f75ea5cc7c9f75cec4dc8c6fa42ed0c0b5eac02 Mon Sep 17 00:00:00 2001
From: Akinori Hattori <hattya@gmail.com>
Date: Thu, 17 Jul 2014 18:29:00 +0900
Subject: [PATCH 6/6] skip newline at end of code

---
 jedi/parser/fast.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/jedi/parser/fast.py b/jedi/parser/fast.py
index e2e618a3..20cae651 100644
--- a/jedi/parser/fast.py
+++ b/jedi/parser/fast.py
@@ -270,7 +270,12 @@ class FastParser(use_metaclass(CachedFastParser)):
 
             current_lines.append(l)
         if current_lines:
-            yield gen_part()
+            # skip newline at end of code,
+            # since it is not counted by Parser
+            if not current_lines[-1]:
+                del current_lines[-1]
+            if current_lines:
+                yield gen_part()
 
     def _parse(self, code):
         """ :type code: str """