I've started using Numba and right now I'm trying speed up an algorithm using Numba. However, I'm having trouble with a numpy.dot operation. The problem is when I slice a 2d array of strings column-by-column it produces an array of type array([unichr x 100], 1d, A). I need this type to be array([unichr x 100], 1d, C) in order for the numpy.where to produce an array of type array(float64, 1d, C). This array is then used in the numpy.dot operation with another array of the same type. Numba is telling me that I doesn't like the fact that the arrays have different orders, A and C. The algorithm works fine without Numba.
Here is a short example to illustrate the problem.
data_X = [['a1','b2','c1'],
['a1','b2','c2'],
['a2','b1','c3'],
['a1','b2','c1'],
['a2','b1','c3']]
data_Y = [1.0, 2.0, 3.0, 4.0, 5.0]
X = np.array(data_X, dtype='<U100')
Y = np.array(data_Y, dtype=np.float64)
@nb.jit(
nopython=True,
locals={
'X': nb.types.Array(nb.types.UnicodeCharSeq(100), 2, 'C'),
'Y': nb.types.Array(nb.float64, 1, 'C'),
}
)
def func(X, Y):
results = []
for i in range(X.shape[1]):
uniqs = np.unique(X[:,i])
for u in uniqs:
X_vars = np.where(X[:,i] == np.full_like(X[:,i], u), 1.0, 0.0)
results.append(np.dot(X_vars, Y))
return results
func(X, Y)
The answer I get without Numba is [7.0, 8.0, 8.0, 7.0, 5.0, 2.0, 8.0]. With Numba I get the following error:
<ipython-input-27-42fe2e73a7cd>:23: NumbaPerformanceWarning: np.dot() is faster on contiguous arrays, called on (array(float64, 1d, A), array(float64, 1d, C))
results.append(np.dot(X_vars, Y))
Traceback (most recent call last):
File "C:\DataScience\lib\site-packages\numba\core\errors.py", line 745, in new_error_context
yield
File "C:\DataScience\lib\site-packages\numba\core\lowering.py", line 273, in lower_block
self.lower_inst(inst)
File "C:\DataScience\lib\site-packages\numba\core\lowering.py", line 370, in lower_inst
val = self.lower_assign(ty, inst)
File "C:\DataScience\lib\site-packages\numba\core\lowering.py", line 544, in lower_assign
return self.lower_expr(ty, value)
File "C:\DataScience\lib\site-packages\numba\core\lowering.py", line 1266, in lower_expr
res = self.context.special_ops[expr.op](self, expr)
File "C:\DataScience\lib\site-packages\numba\np\ufunc\array_exprs.py", line 397, in _lower_array_expr
context, builder, outer_sig, args, ExprKernel, explicit_output=False)
File "C:\DataScience\lib\site-packages\numba\np\npyimpl.py", line 327, in numpy_ufunc_kernel
output = _build_array(context, builder, ret_ty, sig.args, arguments)
File "C:\DataScience\lib\site-packages\numba\np\npyimpl.py", line 281, in _build_array
dest_shape_tup)
File "C:\DataScience\lib\site-packages\numba\np\arrayobj.py", line 3385, in _empty_nd_impl
arrtype.layout))
NotImplementedError: Don't know how to allocate array with layout 'A'.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<ipython-input-27-42fe2e73a7cd>", line 26, in <module>
func(X, Y)
File "C:\DataScience\lib\site-packages\numba\core\dispatcher.py", line 434, in _compile_for_args
raise e
File "C:\DataScience\lib\site-packages\numba\core\dispatcher.py", line 367, in _compile_for_args
return self.compile(tuple(argtypes))
File "C:\DataScience\lib\site-packages\numba\core\compiler_lock.py", line 32, in _acquire_compile_lock
return func(*args, **kwargs)
File "C:\DataScience\lib\site-packages\numba\core\dispatcher.py", line 808, in compile
cres = self._compiler.compile(args, return_type)
File "C:\DataScience\lib\site-packages\numba\core\dispatcher.py", line 78, in compile
status, retval = self._compile_cached(args, return_type)
File "C:\DataScience\lib\site-packages\numba\core\dispatcher.py", line 92, in _compile_cached
retval = self._compile_core(args, return_type)
File "C:\DataScience\lib\site-packages\numba\core\dispatcher.py", line 110, in _compile_core
pipeline_class=self.pipeline_class)
File "C:\DataScience\lib\site-packages\numba\core\compiler.py", line 603, in compile_extra
return pipeline.compile_extra(func)
File "C:\DataScience\lib\site-packages\numba\core\compiler.py", line 339, in compile_extra
return self._compile_bytecode()
File "C:\DataScience\lib\site-packages\numba\core\compiler.py", line 401, in _compile_bytecode
return self._compile_core()
File "C:\DataScience\lib\site-packages\numba\core\compiler.py", line 381, in _compile_core
raise e
File "C:\DataScience\lib\site-packages\numba\core\compiler.py", line 372, in _compile_core
pm.run(self.state)
File "C:\DataScience\lib\site-packages\numba\core\compiler_machinery.py", line 341, in run
raise patched_exception
File "C:\DataScience\lib\site-packages\numba\core\compiler_machinery.py", line 332, in run
self._runPass(idx, pass_inst, state)
File "C:\DataScience\lib\site-packages\numba\core\compiler_lock.py", line 32, in _acquire_compile_lock
return func(*args, **kwargs)
File "C:\DataScience\lib\site-packages\numba\core\compiler_machinery.py", line 291, in _runPass
mutated |= check(pss.run_pass, internal_state)
File "C:\DataScience\lib\site-packages\numba\core\compiler_machinery.py", line 264, in check
mangled = func(compiler_state)
File "C:\DataScience\lib\site-packages\numba\core\typed_passes.py", line 442, in run_pass
NativeLowering().run_pass(state)
File "C:\DataScience\lib\site-packages\numba\core\typed_passes.py", line 370, in run_pass
lower.lower()
File "C:\DataScience\lib\site-packages\numba\core\lowering.py", line 179, in lower
self.lower_normal_function(self.fndesc)
File "C:\DataScience\lib\site-packages\numba\core\lowering.py", line 233, in lower_normal_function
entry_block_tail = self.lower_function_body()
File "C:\DataScience\lib\site-packages\numba\core\lowering.py", line 259, in lower_function_body
self.lower_block(block)
File "C:\DataScience\lib\site-packages\numba\core\lowering.py", line 273, in lower_block
self.lower_inst(inst)
File "C:\DataScience\lib\contextlib.py", line 130, in __exit__
self.gen.throw(type, value, traceback)
File "C:\DataScience\lib\site-packages\numba\core\errors.py", line 752, in new_error_context
reraise(type(newerr), newerr, tb)
File "C:\DataScience\lib\site-packages\numba\core\utils.py", line 81, in reraise
raise value
LoweringError: Don't know how to allocate array with layout 'A'.
numbaisn't as flexible on these kinds of things asnumpy. That's the price of greater speed. But I suspect you can do this task in more basic code without thewhereor thedot.Yis 1d so the dot is a simple 1d iteration.