maybe the following will be helpful:
import numpy as np
from numbers import Integral
from itertools import zip_longest
def _range_len(start, stop, step):
#here it works like len(range(start, stop, step))
if step == 0:
raise ValueError("slice step cannot be zero")
n = (stop - start)
if (n > 0 and step > 0) or (n < 0 and step < 0):
return 1 + (abs(n) - 1) // abs(step)
return 0
def _broadcast_shape(shapes):
if not shapes:
return ()
out = []
rev = [s[::-1] for s in shapes]
for dims in zip_longest(*rev, fillvalue=1):
dim = 1
for d in dims:
if d == 1 or dim == 1:
dim = max(dim, d)
elif d != dim:
raise IndexError(f"index arrays could not be broadcast together: {shapes}")
out.append(dim)
return tuple(reversed(out))
def _as_tuple_index(index):
return index if isinstance(index, tuple) else (index,)
def _expand_ellipsis(index, ndim):
idx = _as_tuple_index(index)
if sum(i is Ellipsis for i in idx) > 1:
raise IndexError("an index can only have a single ellipsis ('...')")
#count how many array axes are explicitly consumed (None/newaxis does not)
consumes = sum(i is not None and i is not Ellipsis for i in idx)
to_fill = max(0, ndim - consumes)
out = []
for i in idx:
if i is Ellipsis:
out.extend([slice(None)] * to_fill)
else:
out.append(i)
return tuple(out)
def shape_after_index(shape, index):
"""Return the shape you'd get from arr.shape==shape indexed by `index`."""
ndim = len(shape)
idx = _expand_ellipsis(index, ndim)
#pad with trailing full slices to cover remaining axes
consumes = sum(i is not None and i is not Ellipsis for i in idx)
if consumes < ndim:
idx = idx + (slice(None),) * (ndim - consumes)
axis = 0
basic_before = []
basic_after = []
advanced_shapes = []
seen_advanced = False
for obj in idx:
if obj is None: #newaxis
(basic_after if seen_advanced else basic_before).append(1)
continue
if isinstance(obj, slice):
start, stop, step = obj.indices(shape[axis])
length = _range_len(start, stop, step)
(basic_after if seen_advanced else basic_before).append(length)
axis += 1
continue
if isinstance(obj, (Integral, np.integer)): # integer index -> drop axis
axis += 1
continue
#advance indexing
arr = np.asarray(obj)
seen_advanced = True
if arr.dtype == np.bool_:
#boolean mask: result along that advanced block has size == number of Trues
advanced_shapes.append((int(arr.sum()),))
else:
advanced_shapes.append(arr.shape)
axis += 1
bshape = _broadcast_shape(advanced_shapes)
return tuple(basic_before) + bshape + tuple(basic_after)
some example usage:
shape_after_index((1000, 50), np.s_[:200, :])
# (200, 50)
shape_after_index((1000, 50), np.s_[None, 1:2:3, ..., 0])
# (1, 1)
(100,10)), index it, and extrapolate the result to the real target size.numpyprobably does much the same but in compiled code. But there's little need to return just the final shape.