Skip to content

Commit 48688fc

Browse files
authored
Use memoryviews to optimize array_to_qualitystring (PR #1363)
The data is contiguous so we can use [::1] to omit stride calculations. Using explicit memoryviews rather than the previous array indexing code leads to a 100x speedup within this function.
1 parent d123986 commit 48688fc

3 files changed

Lines changed: 43 additions & 6 deletions

File tree

pysam/libcutils.pyx

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,19 @@ cpdef array_to_qualitystring(c_array.array qualities, int offset=33):
4646
"""convert an array of quality values to a string."""
4747
if qualities is None:
4848
return None
49-
cdef int x
5049

51-
cdef c_array.array result
52-
result = c_array.clone(qualities, len(qualities), zero=False)
50+
cdef const unsigned char[::1] qualities_view = qualities
51+
cdef size_t n = qualities_view.shape[0]
5352

54-
for x from 0 <= x < len(qualities):
55-
result[x] = qualities[x] + offset
56-
return force_str(result.tobytes())
53+
cdef bytearray result_ba = bytearray(n)
54+
cdef unsigned char[::1] result_view = result_ba
55+
56+
cdef size_t i
57+
58+
for i in range(n):
59+
result_view[i] = qualities_view[i] + offset
60+
61+
return force_str(bytes(result_ba))
5762

5863

5964
cpdef qualities_to_qualitystring(qualities, int offset=33):

tests/AlignedSegment_test.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1901,5 +1901,27 @@ def test_string_export_import_with_tags(self):
19011901
self.assertEqual(a, b)
19021902

19031903

1904+
class TestArrayUtilities(unittest.TestCase):
1905+
def test_array_to_qualstr(self):
1906+
data = [
1907+
"",
1908+
"Q",
1909+
"""!"#$%&'()*+,-./012...xyz{|}~""",
1910+
">>?AB",
1911+
"ABDDEFGHIJabcdefghij",
1912+
"ACAFFGGFFFJDFJHHJIJIHKGGHKHHIJHHHJ7123" * 50,
1913+
]
1914+
1915+
for qual in data:
1916+
qual_array = pysam.qualitystring_to_array(qual)
1917+
result = pysam.array_to_qualitystring(qual_array)
1918+
self.assertEqual(result, qual)
1919+
1920+
def test_longarray_to_qualstr(self):
1921+
qual_array = array.array('l', [64, 65, 66, 67, 68])
1922+
with self.assertRaises(ValueError):
1923+
pysam.array_to_qualitystring(qual_array)
1924+
1925+
19041926
if __name__ == "__main__":
19051927
unittest.main()

tests/libcutils_bench.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
"""Benchmarking the libcutils module. Usage::
2+
3+
pytest tests/libcutils_bench.py
4+
"""
5+
import pysam
6+
7+
8+
def test_array_to_qualitystring_long_sequences(benchmark):
9+
result = benchmark(pysam.array_to_qualitystring, pysam.qualitystring_to_array("123") * 500)
10+
assert result == "123" * 500

0 commit comments

Comments
 (0)