I'm doing a regression that is working but to improve results I wanted to add a numpy array (it represents user attributes that I preprocessed outside the application).
Here's a example of my data:
MPG Cylinders Displacement Horsepower Weight Acceleration Model Year Origin NumpyColumn
0 18.0 8 307.0 130.0 3504.0 12.0 70 1 [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
1 15.0 8 350.0 165.0 3693.0 11.5 70 1 [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
2 18.0 8 318.0 150.0 3436.0 11.0 70 1 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
3 16.0 8 304.0 150.0 3433.0 12.0 70 1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
4 17.0 8 302.0 140.0 3449.0 10.5 70 1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
... ... ... ... ... ... ... ... ... ...
393 27.0 4 140.0 86.0 2790.0 15.6 82 1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
394 44.0 4 97.0 52.0 2130.0 24.6 82 2 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
395 32.0 4 135.0 84.0 2295.0 11.6 82 1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
396 28.0 4 120.0 79.0 2625.0 18.6 82 1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
397 31.0 4 119.0 82.0 2720.0 19.4 82 1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
Here's how to generate it:
import numpy as np
import pandas as pd
import scipy.sparse as sparse
#download data
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight',
'Acceleration', 'Model Year', 'Origin']
df = pd.read_csv(url, names=column_names,
na_values='?', comment='\t',
sep=' ', skipinitialspace=True)
lenOfDF = (len(df))
#add numpy array
arr = sparse.coo_matrix(([1,1,1], ([0,1,2], [1,2,0])), shape=(lenOfDF,lenOfDF))
df['NumpyColumn'] = arr.toarray().tolist()
Then my model is similar to this:
g_input = Input(shape=[Xtrain.shape[1]])
H1 = Dense(512)(g_input)
H1r = Activation('relu')(H1)
H2 = Dense(256)(H1r)
H2r = Activation('relu')(H2)
H3 = Dense(256)(H2r)
H3r = Activation('relu')(H3)
H4 = Dense(128)(H3r)
H4r = Activation('relu')(H4)
H5 = Dense(128)(H4r)
H5r = Activation('relu')(H5)
H6 = Dense(64)(H5r)
H6r = Activation('relu')(H6)
H7 = Dense(32)(H6r)
Hr = Activation('relu')(H7)
g_V = Dense(1)(Hr)
generator = Model(g_input,g_V)
generator.compile(loss='binary_crossentropy', optimizer=opt)
When I call it using the dataset with the NumpyColumn(x_batch is just a split and scaled dataset of above dataframe with the numpy array passed through so it remains unchanged). I get the following error:
# generated = generator.predict(x_batch) #making prediction from the generator
generated = generator.predict(tf.convert_to_tensor(x_batch)) #making prediction from the generator
Error:
ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type float).
What am I doing wrong here? My thought is that having a array would provide the model information to make better prediction so I'm trying to test it. Is it possible to add a numpy array to a dataframe to train? Or is there an alternative approach I should be doing?
Edit 1
Above is a sample to quickly help you understand the problem. In my case after encoding/scaling the dataframe, I have a numpy array that looks like this (it's numeric representing the catergorical encodings + two numpy arrays at the end):
array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0,
0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 9921.0,
20.0, 0.40457918757980704, 0.11369258150627903, 0.868421052631579,
0.47368421052631576, 0.894736842105263, 0.06688034531010473,
0.16160188713280013, 0.7368421052631579, 0.1673332894736842,
0.2099143206854345, 0.3690644464300929, 0.07097828135799109,
0.8157894736842104, 0.9210526315789473, 0.23091420289239645,
0.08623506024464939, 0.5789473684210527, 0.763157894736842, 0.0,
0.18421052631578946, 0.07949239000059796, 0.18763907099960708,
0.7368421052631579, 0.2668740256483197, 0.6842105263157894,
0.13699219747488295, 0.868421052631579, 0.868421052631579,
0.052631349139178094, 0.6842105263157894, 0.5526315789473684,
0.6842105263157894, 0.6842105263157894, 0.6842105263157894,
0.7105263157894737, 0.7105263157894737, 0.7105263157894737,
0.23684210526315788, 0.0, 0.7105263157894737, 0.5789473684210527,
0.763157894736842, 0.5263157894736842, 0.6578947368421052,
0.6842105263157894, 0.7105263157894737, 0.0, 0.5789473684210527,
0.2631578947368421, 0.6842105263157894, 0.6578947368421052,
0.42105263157894735, 0.5789473684210527, 0.42105263157894735,
0.7368421052631579, 0.7368421052631579, 0.15207999030227856,
0.8445892232119124, 0.2683721567016762, 0.3142850329243405,
0.18421052631578946, 0.19132292433056333, 0.20615136344079915,
0.14475710664724623, 0.1624920232728424, 0.6989826700898587,
0.18421052631578946, 0.21052631578947367, 0.4793448772543646,
0.7894736842105263, 0.682967263567459, 0.37139592674256894,
0.21123755190149363, 0.18421052631578946, 0.6578947368421052,
0.39473684210526316, 0.631578947368421, 0.7894736842105263,
0.36842105263157887, 0.1863353145721346, 0.7368421052631579,
0.26809396092240706, 0.22492185003691062, 0.1460488284639197,
0.631578947368421, 0.15347526114630458, 0.763157894736842,
0.2097323620058104, 0.3684210526315789, 0.631578947368421,
0.631578947368421, 0.631578947368421, 0.6842105263157894,
0.36842105263157887, 0.10507952765043811, 0.22418515695024185,
0.23755698619020282, 0.22226500126902, 0.530004040377794,
0.3421052631578947, 0.19018711711349692, 0.19629244102133708,
0.5789473684210527, 0.10526315789473684, 0.49999999999999994,
0.5263157894736842, 0.5263157894736842, 0.49999999999999994,
0.1052631578947368, 0.10526315789473678, 0.5263157894736842,
0.4736842105263157, 2013.0,
array([0. , 0. , 0. , 0.62235785, 0. ,
0.27049118, 0. , 0.31094068, 0. , 0. ,
0. , 0. , 0. , 0.4330532 , 0. ,
0. , 0.2515796 , 0. , 0. , 0. ,
0.40683705, 0.01569915, 0. , 0. , 0. ,
0.13090582, 0. , 0.49955425, 0.06970194, 0.29155406,
0. , 0. , 0.27342197, 0. , 0. ,
0. , 0.04415211, 0. , 0.03908829, 0. ,
0.07673171, 0.33199945, 0. , 0.51759815, 0. ,
0.4719149 , 0.4538082 , 0.13475986, 0. , 0. ,
0. , 0. , 0. , 0. , 0.08000553,
0. , 0.02991109, 0. , 0.5051543 , 0. ,
0.24663273, 0. , 0.50839704, 0. , 0. ,
0.05281948, 0.44884402, 0. , 0.44542992, 0.15376966,
0. , 0. , 0. , 0.39128256, 0.49497205,
0. , 0. ], dtype=float32),
array([0. , 0. , 0. , 0.62235785, 0. ,
0.27049118, 0. , 0.31094068, 0. , 0. ,
0. , 0. , 0. , 0.4330532 , 0. ,
0. , 0.25157961, 0. , 0. , 0. ,
0.40683705, 0.01569915, 0. , 0. , 0. ,
0.13090582, 0. , 0.49955425, 0.06970194, 0.29155406,
0. , 0. , 0.27342197, 0. , 0. ,
0. , 0.04415211, 0. , 0.03908829, 0. ,
0.07673171, 0.33199945, 0. , 0.51759815, 0. ,
0.47191489, 0.45380819, 0.13475986, 0. , 0. ,
0. , 0. , 0. , 0. , 0.08000553,
0. , 0.02991109, 0. , 0.50515431, 0. ,
0.24663273, 0. , 0.50839704, 0. , 0. ,
0.05281948, 0.44884402, 0. , 0.44542992, 0.15376966,
0. , 0. , 0. , 0.39128256, 0.49497205,
0. , 0. ])], dtype=object)


xbatch, as you extract it from the dataframe (not as you feed it to the frame). What is itsdtypeandshape. My guess it is a 1d object dtype, containing arrays, not a 2d numeric array.shape?torch. However constructed, the dataframe put one array in eachcell.np.stack(xbatch)might work to join the individual arrays into one 2d array.