mirror of
https://github.com/davidhalter/jedi.git
synced 2025-12-18 03:25:55 +08:00
Speed up splitlines.
We use the python function again with the modifications we need.
I ran it with:
python3 -m timeit -n 10000 -s 'from jedi.common import splitlines; x = open("test_regression.py").read()'
The speed differences are quite remarkable, it's ~3 times faster:
10000 loops, best of 3: 52.1 usec per loop
vs. the old:
10000 loops, best of 3: 148 usec per loop
We might need to speedup splitlines with as well. It's probably
also a factor 2-3 slower than it should be.
This commit is contained in:
@@ -159,24 +159,29 @@ def splitlines(string, keepends=False):
|
||||
also on form feeds.
|
||||
"""
|
||||
if keepends:
|
||||
# If capturing parentheses are used in pattern, then the text of all
|
||||
# groups in the pattern are also returned as part of the resulting
|
||||
# list.
|
||||
lst = re.split('(\n|\r\n)', string)
|
||||
lst = string.splitlines(keepends=True)
|
||||
|
||||
# Need to merge the new lines with the actual lines.
|
||||
odd = False
|
||||
lines = []
|
||||
for string in lst:
|
||||
if odd:
|
||||
line += string
|
||||
lines.append(line)
|
||||
else:
|
||||
line = string
|
||||
odd = not odd
|
||||
if odd:
|
||||
lines.append(line)
|
||||
return lines
|
||||
# We have to merge lines that were broken by form feed characters.
|
||||
merge = []
|
||||
for i, line in enumerate(lst):
|
||||
if line.endswith('\f'):
|
||||
merge.append(i)
|
||||
|
||||
for index in reversed(merge):
|
||||
try:
|
||||
lst[index] = lst[index] + lst[index + 1]
|
||||
del lst[index + 1]
|
||||
except IndexError:
|
||||
# index + 1 can be empty and therefore there's no need to
|
||||
# merge.
|
||||
pass
|
||||
|
||||
# The stdlib's implementation of the end is inconsistent when calling
|
||||
# it with/without keepends. One time there's an empty string in the
|
||||
# end, one time there's none.
|
||||
if string.endswith('\n') or string == '':
|
||||
lst.append('')
|
||||
return lst
|
||||
else:
|
||||
return re.split('\n|\r\n', string)
|
||||
|
||||
|
||||
@@ -5,9 +5,13 @@ def test_splitlines_no_keepends():
|
||||
assert splitlines('asd\r\n') == ['asd', '']
|
||||
assert splitlines('asd\r\n\f') == ['asd', '\f']
|
||||
assert splitlines('\fasd\r\n') == ['\fasd', '']
|
||||
assert splitlines('') == ['']
|
||||
assert splitlines('\n') == ['', '']
|
||||
|
||||
|
||||
def test_splitlines_keepends():
|
||||
assert splitlines('asd\r\n', keepends=True) == ['asd\r\n', '']
|
||||
assert splitlines('asd\r\n\f', keepends=True) == ['asd\r\n', '\f']
|
||||
assert splitlines('\fasd\r\n', keepends=True) == ['\fasd\r\n', '']
|
||||
assert splitlines('', keepends=True) == ['']
|
||||
assert splitlines('\n', keepends=True) == ['\n', '']
|
||||
|
||||
Reference in New Issue
Block a user