Speed up splitlines.

We use the python function again with the modifications we need. I ran it with: python3 -m timeit -n 10000 -s 'from jedi.common import splitlines; x = open("test_regression.py").read()' The speed differences are quite remarkable, it's ~3 times faster: 10000 loops, best of 3: 52.1 usec per loop vs. the old: 10000 loops, best of 3: 148 usec per loop We might need to speedup splitlines with as well. It's probably also a factor 2-3 slower than it should be.
2025-12-19 03:55:57 +08:00 · 2017-03-09 08:58:57 +01:00
parent b814a91f29
commit 989e4bac89
2 changed files with 26 additions and 17 deletions
--- a/jedi/common.py
+++ b/jedi/common.py
@@ -159,24 +159,29 @@ def splitlines(string, keepends=False):
    also on form feeds.
    """
    if keepends:
-        # If capturing parentheses are used in pattern, then the text of all
+        lst = string.splitlines(keepends=True)
        # groups in the pattern are also returned as part of the resulting
        # list.
        lst = re.split('(\n|\r\n)', string)
-        # Need to merge the new lines with the actual lines.
+        # We have to merge lines that were broken by form feed characters.
-        odd = False
+        merge = []
-        lines = []
+        for i, line in enumerate(lst):
-        for string in lst:
+            if line.endswith('\f'):
-            if odd:
+                merge.append(i)
-                line += string
+
-                lines.append(line)
+        for index in reversed(merge):
-            else:
+            try:
-                line = string
+                lst[index] = lst[index] + lst[index + 1]
-            odd = not odd
+                del lst[index + 1]
-        if odd:
+            except IndexError:
-            lines.append(line)
+                # index + 1 can be empty and therefore there's no need to
-        return lines
+                # merge.
                pass
        # The stdlib's implementation of the end is inconsistent when calling
        # it with/without keepends. One time there's an empty string in the
        # end, one time there's none.
        if string.endswith('\n') or string == '':
            lst.append('')
        return lst
    else:
        return re.split('\n|\r\n', string)
--- a/test/test_common.py
+++ b/test/test_common.py
@@ -5,9 +5,13 @@ def test_splitlines_no_keepends():
    assert splitlines('asd\r\n') == ['asd', '']
    assert splitlines('asd\r\n\f') == ['asd', '\f']
    assert splitlines('\fasd\r\n') == ['\fasd', '']
    assert splitlines('') == ['']
    assert splitlines('\n') == ['', '']
 def test_splitlines_keepends():
    assert splitlines('asd\r\n', keepends=True) == ['asd\r\n', '']
    assert splitlines('asd\r\n\f', keepends=True) == ['asd\r\n', '\f']
    assert splitlines('\fasd\r\n', keepends=True) == ['\fasd\r\n', '']
    assert splitlines('', keepends=True) == ['']
    assert splitlines('\n', keepends=True) == ['\n', '']