Speed up splitlines.

We use the python function again with the modifications we need.
I ran it with:

    python3 -m timeit  -n 10000 -s 'from jedi.common import splitlines; x = open("test_regression.py").read()'

The speed differences are quite remarkable, it's ~3 times faster:

    10000 loops, best of 3: 52.1 usec per loop

vs. the old:

    10000 loops, best of 3: 148 usec per loop

We might need to speedup splitlines with  as well. It's probably
also a factor 2-3 slower than it should be.
This commit is contained in:
Dave Halter
2017-03-09 08:58:57 +01:00
parent b814a91f29
commit 989e4bac89
2 changed files with 26 additions and 17 deletions

View File

@@ -159,24 +159,29 @@ def splitlines(string, keepends=False):
also on form feeds.
"""
if keepends:
# If capturing parentheses are used in pattern, then the text of all
# groups in the pattern are also returned as part of the resulting
# list.
lst = re.split('(\n|\r\n)', string)
lst = string.splitlines(keepends=True)
# Need to merge the new lines with the actual lines.
odd = False
lines = []
for string in lst:
if odd:
line += string
lines.append(line)
else:
line = string
odd = not odd
if odd:
lines.append(line)
return lines
# We have to merge lines that were broken by form feed characters.
merge = []
for i, line in enumerate(lst):
if line.endswith('\f'):
merge.append(i)
for index in reversed(merge):
try:
lst[index] = lst[index] + lst[index + 1]
del lst[index + 1]
except IndexError:
# index + 1 can be empty and therefore there's no need to
# merge.
pass
# The stdlib's implementation of the end is inconsistent when calling
# it with/without keepends. One time there's an empty string in the
# end, one time there's none.
if string.endswith('\n') or string == '':
lst.append('')
return lst
else:
return re.split('\n|\r\n', string)

View File

@@ -5,9 +5,13 @@ def test_splitlines_no_keepends():
assert splitlines('asd\r\n') == ['asd', '']
assert splitlines('asd\r\n\f') == ['asd', '\f']
assert splitlines('\fasd\r\n') == ['\fasd', '']
assert splitlines('') == ['']
assert splitlines('\n') == ['', '']
def test_splitlines_keepends():
assert splitlines('asd\r\n', keepends=True) == ['asd\r\n', '']
assert splitlines('asd\r\n\f', keepends=True) == ['asd\r\n', '\f']
assert splitlines('\fasd\r\n', keepends=True) == ['\fasd\r\n', '']
assert splitlines('', keepends=True) == ['']
assert splitlines('\n', keepends=True) == ['\n', '']