From e23e354fe8750a184079a1cbac78a66de5a332db Mon Sep 17 00:00:00 2001
From: Dave Halter <davidhalter88@gmail.com>
Date: Tue, 3 Feb 2015 22:22:57 +0100
Subject: [PATCH] Simplified the line splitting and with that a few other
 things in the fast parser.

---
 jedi/parser/__init__.py |  2 +-
 jedi/parser/fast.py     | 39 +++++++++++++--------------------------
 2 files changed, 14 insertions(+), 27 deletions(-)

diff --git a/jedi/parser/__init__.py b/jedi/parser/__init__.py
index dd675dec..2d4f5cb2 100644
--- a/jedi/parser/__init__.py
+++ b/jedi/parser/__init__.py
@@ -307,7 +307,7 @@ class Parser(object):
 
     def _tokenize(self, tokenizer):
         for typ, value, start_pos, prefix in tokenizer:
-            print(token.tok_name[typ], repr(value), start_pos, repr(prefix))
+            #print(token.tok_name[typ], repr(value), start_pos, repr(prefix))
             if self._omit_dedent and typ == token.DEDENT:
                 self._omit_dedent -= 1
                 continue
diff --git a/jedi/parser/fast.py b/jedi/parser/fast.py
index 64b3cd8a..f12b0930 100644
--- a/jedi/parser/fast.py
+++ b/jedi/parser/fast.py
@@ -337,24 +337,21 @@ class FastParser(use_metaclass(CachedFastParser)):
         not everything.
         """
         def gen_part():
-            text = '\n'.join(current_lines)
+            text = ''.join(current_lines)
             del current_lines[:]
             self.number_of_splits += 1
-            if i == len(self._lines) - 1:
-                return text
-            else:
-                return text + '\n'
+            return text
 
         def just_newlines(current_lines):
             for line in current_lines:
-                line = line.lstrip('\t ')
-                if line and line[0] not in ('#', '\r'):
+                line = line.lstrip('\t \n\r')
+                if line and line[0] != '#':
                     return False
             return True
 
         # Split only new lines. Distinction between \r\n is the tokenizer's
         # job.
-        self._lines = source.split('\n')
+        self._lines = source.splitlines(keepends=True)
         current_lines = []
         is_decorator = False
         current_indent = 0
@@ -364,9 +361,9 @@ class FastParser(use_metaclass(CachedFastParser)):
         # All things within flows are simply being ignored.
         for i, l in enumerate(self._lines):
             # check for dedents
-            s = l.lstrip('\t ')
+            s = l.lstrip('\t \n\r')
             indent = len(l) - len(s)
-            if not s or s[0] in ('#', '\r'):
+            if not s or s[0] == '#':
                 current_lines.append(l)  # just ignore comments and blank lines
                 continue
 
@@ -388,7 +385,6 @@ class FastParser(use_metaclass(CachedFastParser)):
                     in_flow = m.group(1) in FLOWS
                     if not is_decorator and not in_flow:
                         if not just_newlines(current_lines):
-                            print('GEN', current_lines)
                             yield gen_part()
                     is_decorator = '@' == m.group(1)
                     if not is_decorator:
@@ -424,7 +420,7 @@ class FastParser(use_metaclass(CachedFastParser)):
 
         for code_part in self._split_parts(source):
             if not is_first:
-                print('OFF', line_offset, self.current_node.parser.module.end_pos)
+                #print('OFF', line_offset, self.current_node.parser.module.end_pos)
                 #import pdb; pdb.set_trace()
                 pass # TODO remove
             if is_first or line_offset + 1 == self.current_node.parser.module.end_pos[0]:
@@ -470,7 +466,6 @@ class FastParser(use_metaclass(CachedFastParser)):
             start += len(code_part)
 
         if added_newline:
-            print('REMOVE NL', self.current_node)
             self.current_node.remove_last_newline()
 
         # Now that the for loop is finished, we still want to close all nodes.
@@ -498,10 +493,9 @@ class FastParser(use_metaclass(CachedFastParser)):
         """
         Side effect: Alters the list of nodes.
         """
-        print('r', repr(source))
         h = hash(source)
         for index, node in enumerate(nodes):
-            print('EQ', node, repr(node.source), repr(source))
+            #print('EQ', node, repr(node.source), repr(source))
             if node.hash == h and node.source == source:
                 node.reset_node()
                 nodes.remove(node)
@@ -509,19 +503,13 @@ class FastParser(use_metaclass(CachedFastParser)):
         else:
             tokenizer = FastTokenizer(parser_code, 0)
             self.number_parsers_used += 1
-            print('CODE', repr(source))
+            #print('CODE', repr(source))
             p = Parser(self._grammar, parser_code, self.module_path, tokenizer=tokenizer)
             node = ParserNode(self.module)
 
             end = line_offset + p.module.end_pos[0]
-            if not (len(self._lines) == end):
-                # We don't keep the last line, except if were done. A newline
-                # ends on the next line, which is part of the next parser. But
-                # the last parser includes the last new line.
-                end -= 1
-            print(line_offset, end)
-            used_lines = self._lines[line_offset:end]
-            code_part_actually_used = '\n'.join(used_lines)
+            used_lines = self._lines[line_offset:end - 1]
+            code_part_actually_used = ''.join(used_lines)
             node.set_parser(p, code_part_actually_used)
 
         self.current_node.add_node(node, line_offset)
@@ -589,7 +577,7 @@ class FastTokenizer(object):
         if self.previous[0] in (NEWLINE, INDENT, DEDENT) \
                 and not self._parentheses_level and typ != INDENT:
             # Check for NEWLINE, which symbolizes the indent.
-            print('X', repr(value), tokenize.tok_name[typ])
+           # print('X', repr(value), tokenize.tok_name[typ])
             if not self._in_flow:
                 self._in_flow = value in FLOWS
                 if self._in_flow:
@@ -606,7 +594,6 @@ class FastTokenizer(object):
                         self._first_stmt = False
                         self._expect_indent = True
                 elif self._expect_indent:
-                    print('EXP', self._first_stmt)
                     return self._close()
                 else:
                     self._first_stmt = False