1

Suppose one has a high dimensional numpy array:

import numpy as np
x = np.zeros((200, 200, 200))

of which only a contiguous *sub-array are 'valid' entries. other entries may be ignored (in this example, each entry which is 1 is valid, 0s may be ignored)

sub_array = np.s_[100:110, 100:110, 100:110]
x[sub_array] = 1

How can I represent x in python such that it integrates with other numpy arrays (slicing, indexing etc) but does not waste memory on all the invalid entries?

*I'd be interested in a solution where the subset is not necessarily an array too, if possible

1

1 Answer 1

1

For several use cases, you may get away with a smartly crafted class implementing a __array__ method. Here is a sketch of one possible implementation like that:

import numpy as np

class PaddedArray:

    def __init__(self, arr, padding):
        self._arr = np.array(arr)
        self._pad = list(tuple(map(int, p)) for p in padding)
        assert self._arr.ndim == len(self._pad)
        assert all(len(p) == 2 for p in self._pad)

    def __array__(self, *args, **kwargs):
        ar = np.asarray(self._arr, *args, **kwargs)
        return np.pad(ar, self._pad, 'constant')

    def __getitem__(self, idx):
        if not isinstance(idx, (list, tuple)):
            idx = (idx,)
        new_arr = self._arr
        new_pad = list(self._pad)
        i_dim = 0
        for s in idx:
            n_arr = new_arr.shape[i_dim]
            p1, p2 = new_pad[i_dim]
            n = n_arr + p1 + p2
            if s is np.newaxis:
                new_pad.insert(i_dim, (0, 0))
                new_arr = np.expand_dims(new_arr, i_dim)
                i_dim += 1
            elif s is Ellipsis:
                # TODO - Support ellipsis
                assert False
            elif isinstance(s, int):
                s = s if s >= 0 else s + n
                assert 0 <= s < n
                new_pad.pop(i_dim)
                if s < p1 or s >= n - p2:
                    new_arr = np.zeros_like(np.take(new_arr, [0], axis=i_dim))
                else:
                    new_arr = np.take(new_arr, [s - p1], axis=i_dim)
                new_arr = np.squeeze(new_arr, i_dim)
            elif isinstance(s, slice):
                start = int(s.start) if s.start else 0
                stop = int(s.stop) if s.stop else n
                start = start if start >= 0 else start + n
                stop = stop if stop >= 0 else stop + n
                # TODO - Support arbitrary steps
                assert s.step in (None, 1)
                start = np.clip(start, 0, n)
                stop = np.clip(stop, start, n)
                d = stop - start
                if d == 0:
                    new_pad[i_dim] = (0, 0)
                    new_arr = np.take(new_arr, [], axis=i_dim)
                elif stop < p1 or start >= n - p2:
                    new_pad[i_dim] = (d, 0)
                    new_arr = np.take(new_arr, [], axis=i_dim)
                else:
                    new_pad[i_dim] = (max(p1 - start, 0), max(stop - p1 - n_arr, 0))
                    new_arr = new_arr[(slice(None),) * i_dim + (slice(max(start - p1, 0), min(stop - p1, n_arr)),)]
                i_dim += 1
            else:
                assert Fail
        return PaddedArray(new_arr, new_pad)

    @property
    def shape(self):
        return tuple(s + p1 + p2 for s, (p1, p2) in zip(self._arr.shape, self._pad))

Obviously, the complicated part is the slicing, which here does not support ellipsis (...) or arbitrary slice steps. Also, this will just instantiate a big array whenever you need to operate with it. You can use np.asarray to do that, although operating with another np.ndarray or using NumPy functions should trigger the conversion automatically. Here are some usage examples:

import numpy as np

a = np.arange(12).reshape(4, 3)
print(a)
# [[ 0  1  2]
#  [ 3  4  5]
#  [ 6  7  8]
#  [ 9 10 11]]
pa = PaddedArray(a, [(1, 3), (0, 2)])
print(pa.shape)
# (8, 5)
print(np.asarray(pa))
# [[ 0  0  0  0  0]
#  [ 0  1  2  0  0]
#  [ 3  4  5  0  0]
#  [ 6  7  8  0  0]
#  [ 9 10 11  0  0]
#  [ 0  0  0  0  0]
#  [ 0  0  0  0  0]
#  [ 0  0  0  0  0]]
print(np.asarray(pa[0]))
# [0 0 0 0 0]
print(np.asarray(pa[:, -3]))
# [ 0  2  5  8 11  0  0  0]
print(np.asarray(pa[3, np.newaxis, 2:]))
# [[8 0 0]]
print(pa[:4, :4] @ a)  # Note it is automatically converted
# [[  0   0   0]
#  [ 15  18  21]
#  [ 42  54  66]
#  [ 69  90 111]]
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.