Performance
Fundamentally this is a vectorisation problem. The "easy" part is to vectorise away the innermost dimension, which requires removing min_max and your j loop, and broadcasting for maximum.
You're duplicating work with
for j in range(i,n):
because your horizontal and vertical indices overlap.
The performance benefit of making these changes is enough that the complexity of vectorising the second dimension may weigh more than any additional speedup.
Comparing the old and new approaches for a 250x250 matrix and one vectorised dimension,
OP duration: 747.69 ms
new duration: 15.73 ms
Numpy
Don't dtype=float. You should reuse the dtype of the inputs, and indeed they aren't float for your test data; they're ints.
Rather than np.zeros() for my_prod, use np.empty().
Demonstration (one vectorised loop)
import time
import numpy as np
def min_max(mat1, mat2, i, j):
''' this function takes 2 matrices and 2 indexes
and return the minimum of the element wise maximum
between row i of mat1 and col j of mat2 '''
return min(np.maximum(mat1[i,:], mat2[:,j]))
def max_prod_op(mat1,mat2):
''' this function takes 2 matrices and return a new matrix
where position (i,j) is min_max(mat1, mat2, i, j) '''
n = len(mat1)
my_prod = np.zeros((n,n), dtype=float)
for i in range(n):
for j in range(i,n):
my_prod[i,j] = min_max(mat1,mat2,i,j)
my_prod[j,i] = min_max(mat2,mat1,j,i)
return my_prod
def max_prod_new(mat1: np.ndarray, mat2: np.ndarray) -> np.ndarray:
"""
takes 2 matrices and returns a new matrix
where position (i,j) is min_max(mat1, mat2, i, j)
"""
n = len(mat1)
prod = np.empty_like(mat1)
for i in range(n):
row = mat1[i:i+1].T
cols = mat2[:, i+1:]
prod[i, i+1:] = np.maximum(row, cols).min(axis=0)
col = mat1[:, i].T
rows = mat2[i:]
prod[i:, i] = np.maximum(col, rows).min(axis=1)
return prod
def test() -> None:
rand = np.random.default_rng(seed=0)
# OP test case
A_small = np.array((
(17, 13, 10, 6, 6),
( 1, 2, 1, 4, 16),
(13, 18, 10, 12, 19),
(14, 13, 11, 11, 18),
( 6, 16, 13, 1, 8),
))
for method in (max_prod_op,
max_prod_new,):
prod = method(A_small, A_small)
assert np.array_equal(
prod,
np.array((
( 6, 13, 10, 6, 8),
( 2, 2, 2, 4, 6),
(13, 13, 10, 12, 13),
(13, 13, 11, 11, 14),
( 8, 13, 10, 6, 6),
)),
)
n = 250
A_large = rand.integers(low=0, high=100, size=(n, n))
B_large = rand.integers(low=0, high=100, size=(n, n))
ta = time.perf_counter()
prod_op = max_prod_op(A_large, B_large)
tb = time.perf_counter()
prod_new = max_prod_new(A_large, B_large)
tc = time.perf_counter()
assert np.array_equal(prod_new, prod_op)
print(f'OP duration: {(tb - ta)*1e3:.2f} ms')
print(f'new duration: {(tc - tb)*1e3:.2f} ms')
if __name__ == '__main__':
test()
Full vectorisation
Vectorising away both loops is possible. The risks are:
- It has more startup time, which means that for very small matrices, it will be slightly slower than the half-vectorised approach
- Because fancy indexing does not create a view, this will take up more memory on the order of O(n²).
In theory, it may have better runtime characteristics, but in practice this is not at all the case.
It requires setting up triangular indices:
import time
import numpy as np
def min_max(mat1, mat2, i, j):
''' this function takes 2 matrices and 2 indexes
and return the minimum of the element wise maximum
between row i of mat1 and col j of mat2 '''
return min(np.maximum(mat1[i,:], mat2[:,j]))
def max_prod_op(mat1,mat2):
''' this function takes 2 matrices and return a new matrix
where position (i,j) is min_max(mat1, mat2, i, j) '''
n = len(mat1)
my_prod = np.zeros((n,n), dtype=float)
for i in range(n):
for j in range(i,n):
my_prod[i,j] = min_max(mat1,mat2,i,j)
my_prod[j,i] = min_max(mat2,mat1,j,i)
return my_prod
def max_prod_new(mat1: np.ndarray, mat2: np.ndarray) -> np.ndarray:
"""
takes 2 matrices and returns a new matrix
where position (i,j) is min_max(mat1, mat2, i, j)
"""
n = len(mat1)
prod = np.empty_like(mat1)
iu, ju = np.triu_indices(n=n, k=1)
fast_cols = mat2[:, ju]
slow_rows = mat1[iu].T
prod[iu, ju] = np.maximum(fast_cols, slow_rows).min(axis=0)
iu, ju = np.triu_indices(n=n, k=0)
fast_rows = mat2[ju, :]
slow_cols = mat1[:, iu].T
prod[ju, iu] = np.maximum(fast_rows, slow_cols).min(axis=1)
return prod
def test() -> None:
rand = np.random.default_rng(seed=0)
# OP test case
A_small = np.array((
(17, 13, 10, 6, 6),
( 1, 2, 1, 4, 16),
(13, 18, 10, 12, 19),
(14, 13, 11, 11, 18),
( 6, 16, 13, 1, 8),
))
for method in (max_prod_op,
max_prod_new,):
prod = method(A_small, A_small)
assert np.array_equal(
prod,
np.array((
( 6, 13, 10, 6, 8),
( 2, 2, 2, 4, 6),
(13, 13, 10, 12, 13),
(13, 13, 11, 11, 14),
( 8, 13, 10, 6, 6),
)),
)
n = 250
A_large = rand.integers(low=0, high=100, size=(n, n))
B_large = rand.integers(low=0, high=100, size=(n, n))
ta = time.perf_counter()
prod_op = max_prod_op(A_large, B_large)
tb = time.perf_counter()
prod_new = max_prod_new(A_large, B_large)
tc = time.perf_counter()
assert np.array_equal(prod_new, prod_op)
print(f'OP duration: {(tb - ta)*1e3:.2f} ms')
print(f'new duration: {(tc - tb)*1e3:.2f} ms')
if __name__ == '__main__':
test()
For the same 250x250 input, it increases the runtime from 16 ms to 93 ms.