mirror of
https://github.com/davidhalter/jedi.git
synced 2025-12-19 03:55:57 +08:00
Speed up splitlines.
We use the python function again with the modifications we need.
I ran it with:
python3 -m timeit -n 10000 -s 'from jedi.common import splitlines; x = open("test_regression.py").read()'
The speed differences are quite remarkable, it's ~3 times faster:
10000 loops, best of 3: 52.1 usec per loop
vs. the old:
10000 loops, best of 3: 148 usec per loop
We might need to speedup splitlines with as well. It's probably
also a factor 2-3 slower than it should be.
This commit is contained in:
@@ -159,24 +159,29 @@ def splitlines(string, keepends=False):
|
|||||||
also on form feeds.
|
also on form feeds.
|
||||||
"""
|
"""
|
||||||
if keepends:
|
if keepends:
|
||||||
# If capturing parentheses are used in pattern, then the text of all
|
lst = string.splitlines(keepends=True)
|
||||||
# groups in the pattern are also returned as part of the resulting
|
|
||||||
# list.
|
|
||||||
lst = re.split('(\n|\r\n)', string)
|
|
||||||
|
|
||||||
# Need to merge the new lines with the actual lines.
|
# We have to merge lines that were broken by form feed characters.
|
||||||
odd = False
|
merge = []
|
||||||
lines = []
|
for i, line in enumerate(lst):
|
||||||
for string in lst:
|
if line.endswith('\f'):
|
||||||
if odd:
|
merge.append(i)
|
||||||
line += string
|
|
||||||
lines.append(line)
|
for index in reversed(merge):
|
||||||
else:
|
try:
|
||||||
line = string
|
lst[index] = lst[index] + lst[index + 1]
|
||||||
odd = not odd
|
del lst[index + 1]
|
||||||
if odd:
|
except IndexError:
|
||||||
lines.append(line)
|
# index + 1 can be empty and therefore there's no need to
|
||||||
return lines
|
# merge.
|
||||||
|
pass
|
||||||
|
|
||||||
|
# The stdlib's implementation of the end is inconsistent when calling
|
||||||
|
# it with/without keepends. One time there's an empty string in the
|
||||||
|
# end, one time there's none.
|
||||||
|
if string.endswith('\n') or string == '':
|
||||||
|
lst.append('')
|
||||||
|
return lst
|
||||||
else:
|
else:
|
||||||
return re.split('\n|\r\n', string)
|
return re.split('\n|\r\n', string)
|
||||||
|
|
||||||
|
|||||||
@@ -5,9 +5,13 @@ def test_splitlines_no_keepends():
|
|||||||
assert splitlines('asd\r\n') == ['asd', '']
|
assert splitlines('asd\r\n') == ['asd', '']
|
||||||
assert splitlines('asd\r\n\f') == ['asd', '\f']
|
assert splitlines('asd\r\n\f') == ['asd', '\f']
|
||||||
assert splitlines('\fasd\r\n') == ['\fasd', '']
|
assert splitlines('\fasd\r\n') == ['\fasd', '']
|
||||||
|
assert splitlines('') == ['']
|
||||||
|
assert splitlines('\n') == ['', '']
|
||||||
|
|
||||||
|
|
||||||
def test_splitlines_keepends():
|
def test_splitlines_keepends():
|
||||||
assert splitlines('asd\r\n', keepends=True) == ['asd\r\n', '']
|
assert splitlines('asd\r\n', keepends=True) == ['asd\r\n', '']
|
||||||
assert splitlines('asd\r\n\f', keepends=True) == ['asd\r\n', '\f']
|
assert splitlines('asd\r\n\f', keepends=True) == ['asd\r\n', '\f']
|
||||||
assert splitlines('\fasd\r\n', keepends=True) == ['\fasd\r\n', '']
|
assert splitlines('\fasd\r\n', keepends=True) == ['\fasd\r\n', '']
|
||||||
|
assert splitlines('', keepends=True) == ['']
|
||||||
|
assert splitlines('\n', keepends=True) == ['\n', '']
|
||||||
|
|||||||
Reference in New Issue
Block a user