Speed up splitlines.

We use the python function again with the modifications we need.
I ran it with:

    python3 -m timeit  -n 10000 -s 'from jedi.common import splitlines; x = open("test_regression.py").read()'

The speed differences are quite remarkable, it's ~3 times faster:

    10000 loops, best of 3: 52.1 usec per loop

vs. the old:

    10000 loops, best of 3: 148 usec per loop

We might need to speedup splitlines with  as well. It's probably
also a factor 2-3 slower than it should be.
This commit is contained in:
Dave Halter
2017-03-09 08:58:57 +01:00
parent b814a91f29
commit 989e4bac89
2 changed files with 26 additions and 17 deletions

View File

@@ -159,24 +159,29 @@ def splitlines(string, keepends=False):
also on form feeds. also on form feeds.
""" """
if keepends: if keepends:
# If capturing parentheses are used in pattern, then the text of all lst = string.splitlines(keepends=True)
# groups in the pattern are also returned as part of the resulting
# list.
lst = re.split('(\n|\r\n)', string)
# Need to merge the new lines with the actual lines. # We have to merge lines that were broken by form feed characters.
odd = False merge = []
lines = [] for i, line in enumerate(lst):
for string in lst: if line.endswith('\f'):
if odd: merge.append(i)
line += string
lines.append(line) for index in reversed(merge):
else: try:
line = string lst[index] = lst[index] + lst[index + 1]
odd = not odd del lst[index + 1]
if odd: except IndexError:
lines.append(line) # index + 1 can be empty and therefore there's no need to
return lines # merge.
pass
# The stdlib's implementation of the end is inconsistent when calling
# it with/without keepends. One time there's an empty string in the
# end, one time there's none.
if string.endswith('\n') or string == '':
lst.append('')
return lst
else: else:
return re.split('\n|\r\n', string) return re.split('\n|\r\n', string)

View File

@@ -5,9 +5,13 @@ def test_splitlines_no_keepends():
assert splitlines('asd\r\n') == ['asd', ''] assert splitlines('asd\r\n') == ['asd', '']
assert splitlines('asd\r\n\f') == ['asd', '\f'] assert splitlines('asd\r\n\f') == ['asd', '\f']
assert splitlines('\fasd\r\n') == ['\fasd', ''] assert splitlines('\fasd\r\n') == ['\fasd', '']
assert splitlines('') == ['']
assert splitlines('\n') == ['', '']
def test_splitlines_keepends(): def test_splitlines_keepends():
assert splitlines('asd\r\n', keepends=True) == ['asd\r\n', ''] assert splitlines('asd\r\n', keepends=True) == ['asd\r\n', '']
assert splitlines('asd\r\n\f', keepends=True) == ['asd\r\n', '\f'] assert splitlines('asd\r\n\f', keepends=True) == ['asd\r\n', '\f']
assert splitlines('\fasd\r\n', keepends=True) == ['\fasd\r\n', ''] assert splitlines('\fasd\r\n', keepends=True) == ['\fasd\r\n', '']
assert splitlines('', keepends=True) == ['']
assert splitlines('\n', keepends=True) == ['\n', '']