Speed up splitlines.

We use the python function again with the modifications we need. I ran it with: python3 -m timeit -n 10000 -s 'from jedi.common import splitlines; x = open("test_regression.py").read()' The speed differences are quite remarkable, it's ~3 times faster: 10000 loops, best of 3: 52.1 usec per loop vs. the old: 10000 loops, best of 3: 148 usec per loop We might need to speedup splitlines with as well. It's probably also a factor 2-3 slower than it should be.
2025-12-18 03:25:55 +08:00 · 2017-03-09 08:58:57 +01:00
parent b814a91f29
commit 989e4bac89
2 changed files with 26 additions and 17 deletions
--- a/jedi/common.py
+++ b/jedi/common.py
@@ -159,24 +159,29 @@ def splitlines(string, keepends=False):
    also on form feeds.
    """
    if keepends:
-        # If capturing parentheses are used in pattern, then the text of all
-        # groups in the pattern are also returned as part of the resulting
-        # list.
-        lst = re.split('(\n|\r\n)', string)
+        lst = string.splitlines(keepends=True)

-        # Need to merge the new lines with the actual lines.
-        odd = False
-        lines = []
-        for string in lst:
-            if odd:
-                line += string
-                lines.append(line)
-            else:
-                line = string
-            odd = not odd
-        if odd:
-            lines.append(line)
-        return lines
+        # We have to merge lines that were broken by form feed characters.
+        merge = []
+        for i, line in enumerate(lst):
+            if line.endswith('\f'):
+                merge.append(i)
+
+        for index in reversed(merge):
+            try:
+                lst[index] = lst[index] + lst[index + 1]
+                del lst[index + 1]
+            except IndexError:
+                # index + 1 can be empty and therefore there's no need to
+                # merge.
+                pass
+
+        # The stdlib's implementation of the end is inconsistent when calling
+        # it with/without keepends. One time there's an empty string in the
+        # end, one time there's none.
+        if string.endswith('\n') or string == '':
+            lst.append('')
+        return lst
    else:
        return re.split('\n|\r\n', string)

--- a/test/test_common.py
+++ b/test/test_common.py
@@ -5,9 +5,13 @@ def test_splitlines_no_keepends():
    assert splitlines('asd\r\n') == ['asd', '']
    assert splitlines('asd\r\n\f') == ['asd', '\f']
    assert splitlines('\fasd\r\n') == ['\fasd', '']
+    assert splitlines('') == ['']
+    assert splitlines('\n') == ['', '']


 def test_splitlines_keepends():
    assert splitlines('asd\r\n', keepends=True) == ['asd\r\n', '']
    assert splitlines('asd\r\n\f', keepends=True) == ['asd\r\n', '\f']
    assert splitlines('\fasd\r\n', keepends=True) == ['\fasd\r\n', '']
+    assert splitlines('', keepends=True) == ['']
+    assert splitlines('\n', keepends=True) == ['\n', '']