I am looking to memory optimise np.packbits(A==A[:, None], axis=1), where A is dense array of integers of length n. A==A[:, None] is memory hungry for large n since the resulting Boolean array is stored inefficiently with each Boolean value costing 1 byte.
I wrote the below script to achieve the same result while packing bits one section at a time. It is, however, around 3x slower, so I am looking for ways to speed it up. Or, alternatively, a better algorithm with small memory overhead.
Note: this is a follow-up question to one I asked earlier; Comparing numpy array with itself by element efficiently.
Reproducible code below for benchmarking.
import numpy as np
from numba import jit
@jit(nopython=True)
def bool2int(x):
y = 0
for i, j in enumerate(x):
if j: y += int(j)<<(7-i)
return y
@jit(nopython=True)
def compare_elementwise(arr, result, section):
n = len(arr)
for row in range(n):
for col in range(n):
section[col%8] = arr[row] == arr[col]
if ((col + 1) % 8 == 0) or (col == (n-1)):
result[row, col // 8] = bool2int(section)
section[:] = 0
return result
n = 10000
A = np.random.randint(0, 1000, n)
result_arr = np.zeros((n, n // 8 if n % 8 == 0 else n // 8 + 1)).astype(np.uint8)
selection_arr = np.zeros(8).astype(np.uint8)
# memory efficient version, but slow
packed = compare_elementwise(A, result_arr, selection_arr)
# memory inefficient version, but fast
packed2 = np.packbits(A == A[:, None], axis=1)
assert (packed == packed2).all()
%timeit compare_elementwise(A, result_arr, selection_arr) # 1.6 seconds
%timeit np.packbits(A == A[:, None], axis=1) # 0.460 second