From 989e4bac89cb3a7b8335e9e9d2b1702a0a5786f3 Mon Sep 17 00:00:00 2001
From: Dave Halter <davidhalter88@gmail.com>
Date: Thu, 9 Mar 2017 08:58:57 +0100
Subject: [PATCH] Speed up splitlines.

We use the python function again with the modifications we need.
I ran it with:

    python3 -m timeit  -n 10000 -s 'from jedi.common import splitlines; x = open("test_regression.py").read()'

The speed differences are quite remarkable, it's ~3 times faster:

    10000 loops, best of 3: 52.1 usec per loop

vs. the old:

    10000 loops, best of 3: 148 usec per loop

We might need to speedup splitlines with  as well. It's probably
also a factor 2-3 slower than it should be.
---
 jedi/common.py      | 39 ++++++++++++++++++++++-----------------
 test/test_common.py |  4 ++++
 2 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/jedi/common.py b/jedi/common.py
index ee60636a..abb271ae 100644
--- a/jedi/common.py
+++ b/jedi/common.py
@@ -159,24 +159,29 @@ def splitlines(string, keepends=False):
     also on form feeds.
     """
     if keepends:
-        # If capturing parentheses are used in pattern, then the text of all
-        # groups in the pattern are also returned as part of the resulting
-        # list.
-        lst = re.split('(\n|\r\n)', string)
+        lst = string.splitlines(keepends=True)
 
-        # Need to merge the new lines with the actual lines.
-        odd = False
-        lines = []
-        for string in lst:
-            if odd:
-                line += string
-                lines.append(line)
-            else:
-                line = string
-            odd = not odd
-        if odd:
-            lines.append(line)
-        return lines
+        # We have to merge lines that were broken by form feed characters.
+        merge = []
+        for i, line in enumerate(lst):
+            if line.endswith('\f'):
+                merge.append(i)
+
+        for index in reversed(merge):
+            try:
+                lst[index] = lst[index] + lst[index + 1]
+                del lst[index + 1]
+            except IndexError:
+                # index + 1 can be empty and therefore there's no need to
+                # merge.
+                pass
+
+        # The stdlib's implementation of the end is inconsistent when calling
+        # it with/without keepends. One time there's an empty string in the
+        # end, one time there's none.
+        if string.endswith('\n') or string == '':
+            lst.append('')
+        return lst
     else:
         return re.split('\n|\r\n', string)
 
diff --git a/test/test_common.py b/test/test_common.py
index 8e3aae6b..217cdf52 100644
--- a/test/test_common.py
+++ b/test/test_common.py
@@ -5,9 +5,13 @@ def test_splitlines_no_keepends():
     assert splitlines('asd\r\n') == ['asd', '']
     assert splitlines('asd\r\n\f') == ['asd', '\f']
     assert splitlines('\fasd\r\n') == ['\fasd', '']
+    assert splitlines('') == ['']
+    assert splitlines('\n') == ['', '']
 
 
 def test_splitlines_keepends():
     assert splitlines('asd\r\n', keepends=True) == ['asd\r\n', '']
     assert splitlines('asd\r\n\f', keepends=True) == ['asd\r\n', '\f']
     assert splitlines('\fasd\r\n', keepends=True) == ['\fasd\r\n', '']
+    assert splitlines('', keepends=True) == ['']
+    assert splitlines('\n', keepends=True) == ['\n', '']