-1

We're currently implementing a ML model in Python for a local company to predict credit scores in the range 0-999 points. There are 11 independent variables extracted from the database (credit history and payment behavior) and one dependent variable (the credit score). The client has stated that to be useful the MAE of the production model must be less than 100 points. The problem is we have tried several algorithms to implement this regression but our models were unable to generalize well on unseen data. So far the best performing algorithm seems to be Random Forest but its MAE on test data is still beyond acceptable values. Here's our code:

import numpy as np
from sklearn.preprocessing import RobustScaler
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
from keras.layers import Dense
from keras.models import Sequential

# Linear Model
def GetLinearModel(X, y):
    model = LinearRegression()
    model.fit(X, y)
    return model   

# Ridge Regression
def GetRidge(X, y):
    model = Ridge(alpha=0.01)
    model.fit(X_train, y_train) 
    return model

# LASSO Regression
def GetLASSO(X, y):
    model = Lasso(alpha=0.01)
    model.fit(X_train, y_train) 
    return model

# ElasticNet Regression
def GetElasticNet(X, y):
    model = ElasticNet(alpha=0.01)
    model.fit(X_train, y_train) 
    return model

# Random Forest
def GetRandomForest(X, y):
    model = RandomForestRegressor(n_estimators=32, random_state=0)
    model.fit(X, y)
    return model

# Neural Networks
def GetNeuralNetworks(X, y):
    model = Sequential()
    model.add(Dense(32, activation = 'relu', input_dim = 11))
    model.add(Dense(units = 32, activation = 'relu'))
    model.add(Dense(units = 32, activation = 'relu'))
    model.add(Dense(units = 32, activation = 'relu'))
    model.add(Dense(units = 32, activation = 'relu'))
    model.add(Dense(units = 1))
    model.compile(optimizer = 'adam', loss = 'mean_absolute_error')
    model.fit(X, y, batch_size = 100, epochs = 500, verbose=0)
    return model

# Train data
train_set = np.array([\
[2, 5, 9, 28, 0, 0.153668, 500, 0, 0, 0.076923077, 0, 800],\
[3, 0, 0, 42, 2, 0.358913, 500, 0, 0, 0.230769231, 0, 900],\
[3, 0, 0, 12, 2, 0, 500, 0, 0, 0.076923077, 0, 500],\
[1, 0, 0, 6, 1, 0.340075, 457, 0, 0, 0.076923077, 0, 560],\
[1, 5, 0, 12, 3, 0.458358, 457, 0, 0, 0.153846154, 0, 500],\
[1, 3, 4, 32, 2, 0.460336, 457, 0, 0, 0.153846154, 0, 600],\
[3, 0, 0, 42, 4, 0.473414, 500, 0, 0, 0.230769231, 0, 700],\
[1, 3, 0, 16, 0, 0.332991, 500, 0, 0, 0.076923077, 0, 600],\
[1, 3, 19, 27, 0, 0.3477, 500, 0, 0, 0.076923077, 0, 580],\
[1, 5, 20, 74, 1, 0.52076, 500, 0, 0, 0.230769231, 0, 550],\
[6, 0, 0, 9, 3, 0, 500, 0, 0, 0.076923077, 0, 570],\
[1, 8, 47, 0, 0, 0.840656, 681, 0, 0, 0, 0, 50],\
[1, 0, 0, 8, 14, 0, 681, 0, 0, 0.076923077, 0, 400],\
[5, 6, 19, 7, 1, 0.251423, 500, 0, 1, 0.076923077, 1, 980],\
[1, 0, 0, 2, 2, 0.121852, 500, 1, 0, 0.076923077, 9, 780],\
[2, 0, 0, 4, 0, 0.37242, 500, 1, 0, 0.076923077, 0, 920],\
[3, 4, 5, 20, 0, 0.37682, 500, 1, 0, 0.076923077, 0, 700],\
[3, 8, 17, 20, 0, 0.449545, 500, 1, 0, 0.076923077, 0, 300],\
[3, 12, 30, 20, 0, 0.551193, 500, 1, 0, 0.076923077, 0, 30],\
[0, 1, 10, 8, 3, 0.044175, 500, 0, 0, 0.076923077, 0, 350],\
[1, 0, 0, 14, 3, 0.521714, 500, 0, 0, 0.153846154, 0, 650],\
[2, 4, 15, 0, 0, 0.985122, 500, 0, 0, 0, 0, 550],\
[2, 4, 34, 0, 0, 0.666666, 500, 0, 0, 0, 0, 600],\
[1, 16, 17, 10, 3, 0.299756, 330, 0, 0, 0.153846154, 0, 650],\
[2, 0, 0, 16, 1, 0, 500, 0, 0, 0.076923077, 0, 900],\
[2, 5, 31, 26, 0, 0.104847, 500, 0, 0, 0.076923077, 0, 850],\
[2, 6, 16, 34, 1, 0.172947, 500, 0, 0, 0.153846154, 0, 900],\
[1, 4, 0, 16, 6, 0.206403, 500, 0, 0, 0.153846154, 0, 630],\
[1, 8, 20, 12, 5, 0.495897, 500, 0, 0, 0.153846154, 0, 500],\
[1, 8, 46, 8, 6, 0.495897, 500, 0, 0, 0.153846154, 0, 250],\
[2, 0, 0, 4, 8, 0, 500, 0, 0, 0.076923077, 0, 550],\
[2, 6, 602, 0, 0, 0, 500, 0, 0, 0, 0, 20],\
[0, 12, 5, 21, 0, 0.158674, 645, 0, 0, 0.153846154, 0, 850],\
[0, 12, 20, 21, 0, 0.158674, 645, 0, 0, 0.153846154, 0, 700],\
[1, 0, 0, 33, 0, 0.041473, 645, 0, 0, 0.230769231, 0, 890],\
[1, 0, 0, 12, 2, 0.147325, 500, 0, 0, 0.076923077, 0, 780],\
[1, 8, 296, 0, 0, 2.891695, 521, 0, 0, 0, 0, 1],\
[1, 0, 0, 4, 0, 0.098953, 445, 0, 0, 0.076923077, 0, 600],\
[1, 0, 0, 4, 0, 0.143443, 500, 0, 0, 0.076923077, 0, 500],\
[0, 8, 20, 0, 0, 1.110002, 833, 0, 0, 0, 0, 100],\
[0, 0, 0, 8, 2, 0, 833, 0, 0, 0.076923077, 0, 300],\
[1, 4, 60, 20, 6, 0.78685, 833, 0, 0, 0.153846154, 0, 100],\
[1, 4, 112, 20, 6, 0.78685, 833, 0, 0, 0.153846154, 0, 1],\
[1, 0, 0, 21, 10, 0.305556, 500, 0, 0, 0.307692308, 0, 150],\
[1, 0, 0, 21, 10, 0.453743, 500, 0, 0, 0.307692308, 0, 300],\
[0, 0, 0, 8, 0, 0, 570, 0, 0, 0, 0, 500],\
[0, 10, 10, 8, 0, 0.325975, 570, 0, 0, 0.076923077, 0, 450],\
[1, 7, 16, 15, 1, 0.266311, 570, 0, 0, 0.076923077, 0, 450],\
[1, 1, 32, 30, 4, 0.134606, 570, 0, 0, 0.230769231, 0, 250],\
[1, 0, 0, 32, 5, 0.105576, 570, 0, 0, 0.230769231, 0, 430],\
[1, 4, 34, 32, 5, 0.519103, 500, 0, 0, 0.230769231, 0, 350],\
[1, 0, 0, 12, 1, 0.109559, 669, 0, 0, 0.076923077, 0, 600],\
[11, 4, 15, 2, 3, 0.235709, 500, 0, 1, 0, 2, 900],\
[11, 4, 15, 1, 6, 0.504134, 500, 0, 1, 0, 2, 534],\
[2, 0, 0, 15, 9, 0.075403, 500, 0, 0, 0.076923077, 0, 573],\
[10, 0, 0, 51, 11, 2.211951, 500, 0, 0, 0.307692308, 7, 547],\
[9, 0, 0, 28, 4, 0.328037, 500, 0, 0, 0.230769231, 0, 747],\
[9, 2, 0, 0, 0, 0.166666, 500, 0, 1, 0.076923077, 4, 448],\
[8, 0, 0, 4, 1, 0, 500, 0, 1, 0, 1, 719],\
[3, 4, 15, 8, 1, 0.150237, 500, 0, 1, 0, 0, 827],\
[7, 138, 35, 37, 1, 0.414154, 500, 0, 1, 0.076923077, 3, 950],\
[6, 19, 41, 84, 1, 0.41248, 500, 0, 0, 0.230769231, 0, 750],\
[1, 6, 10, 0, 0, 0.232647, 500, 0, 1, 0, 0, 700],\
[0, 10, 27, 0, 0, 0.411712, 4, 0, 0, 0, 0, 520],\
[3, 31, 45, 80, 0, 0.266299, 500, 0, 0, 0.153846154, 0, 750],\
[3, 24, 49, 2, 1, 0.981102, 500, 0, 0, 0.076923077, 0, 550],\
[1, 12, 31, 11, 1, 0.333551, 500, 0, 0, 0.153846154, 0, 500],\
[0, 18, 30, 13, 2, 0.602826, 406, 0, 0, 0.076923077, 0, 580],\
[2, 2, 31, 0, 0, 1, 500, 0, 0, 0, 0, 427],\
[1, 18, 40, 83, 1, 0.332792, 500, 0, 0, 0.307692308, 0, 485],\
[2, 14, 35, 9, 3, 0.39671, 500, 0, 1, 0.076923077, 3, 664],\
[2, 88, 32, 7, 2, 0.548066, 500, 0, 1, 0, 1, 90],\
[2, 26, 26, 32, 2, 0.415991, 500, 0, 0, 0.153846154, 0, 90],\
[1, 14, 30, 11, 1, 0.51743, 599, 0, 0, 0.153846154, 0, 300],\
[1, 15, 28, 26, 0, 0.4413, 500, 0, 0, 0.076923077, 0, 610],\
[1, 17, 50, 34, 1, 0.313789, 500, 0, 0, 0.230769231, 0, 450],\
[0, 4, 15, 0, 0, 0.535163, 500, 0, 0, 0, 0, 375],\
[0, 8, 23, 0, 0, 0.51242, 500, 0, 0, 0, 0, 550],\
[3, 6, 44, 2, 3, 0.268062, 500, 0, 1, 0, 2, 744],\
[6, 38, 51, 35, 0, 0.28396, 500, 0, 1, 0.076923077, 1, 980],\
[6, 5, 63, 6, 5, 0.566661, 500, 0, 0, 0.153846154, 0, 850],\
[6, 0, 0, 0, 0, 0.174852, 500, 0, 0, 0, 0, 800],\
[6, 4, 60, 6, 3, 0.517482, 500, 0, 0, 0.076923077, 0, 750],\
[5, 16, 52, 49, 4, 0.378441, 500, 0, 1, 0.153846154, 6, 720],\
[5, 26, 84, 103, 1, 0.472361, 500, 0, 0, 0.230769231, 0, 300],\
[1, 6, 34, 36, 1, 0.298553, 500, 0, 1, 0.153846154, 0, 628],\
[5, 6, 65, 34, 0, 0.301907, 500, 0, 0, 0.153846154, 0, 710],\
[3, 16, 177, 29, 10, 0.501831, 500, 1, 0, 0.153846154, 0, 40],\
[2, 5, 45, 0, 0, 0.351668, 500, 0, 0, 0, 0, 708],\
[2, 7, 57, 7, 4, 0.432374, 500, 0, 0, 0.153846154, 0, 753],\
[1, 1, 75, 36, 0, 0.154085, 500, 0, 0, 0.076923077, 0, 610],\
[1, 16, 63, 13, 2, 0.331244, 500, 0, 0, 0.076923077, 0, 620],\
[1, 3, 55, 9, 0, 0.377253, 500, 0, 0, 0.076923077, 0, 640],\
[1, 1, 75, 5, 5, 0.877696, 500, 0, 0, 0.076923077, 0, 480],\
[1, 0, 0, 8, 5, 0.208742, 500, 0, 0, 0.153846154, 0, 520],\
[1, 3, 55, 29, 0, 0.228812, 678, 0, 0, 0.153846154, 0, 547],\
[1, 0, 0, 2, 2, 0.090459, 553, 0, 0, 0.076923077, 0, 535],\
[0, 4, 29, 0, 0, 0.292161, 500, 0, 0, 0, 0, 594],\
[1, 3, 64, 18, 6, 0.602431, 500, 0, 0, 0.230769231, 0, 500],\
[6, 9, 40, 74, 0, 0.567179, 500, 0, 0, 0.076923077, 0, 910],\
[4, 10, 65, 14, 1, 0.423915, 500, 0, 1, 0, 1, 713],\
[2, 0, 0, 6, 1, 0.114637, 500, 0, 0, 0.076923077, 0, 650],\
[5, 18, 74, 34, 0, 0.489314, 500, 0, 0, 0.153846154, 0, 500],\
[0, 6, 43, 9, 15, 0.599918, 612, 0, 0, 0.153846154, 0, 100],\
[4, 25, 64, 135, 0, 0.472659, 500, 0, 0, 0.230769231, 0, 560],\
[6, 3, 94, 12, 10, 0.31713, 500, 0, 0, 0.230769231, 0, 580],\
[1, 4, 69, 18, 9, 0.412528, 500, 0, 0, 0.307692308, 0, 362],\
[2, 21, 58, 21, 0, 0.53184, 500, 0, 0, 0.153846154, 0, 370],\
[0, 0, 0, 21, 4, 0.033438, 500, 0, 0, 0.153846154, 0, 500],\
[0, 10, 53, 20, 0, 0.619595, 500, 0, 0, 0.076923077, 0, 200],\
[2, 15, 63, 28, 2, 0.593453, 500, 0, 0, 0.153846154, 0, 574],\
[3, 2, 84, 21, 1, 0.302636, 500, 0, 0, 0.153846154, 0, 790],\
[4, 19, 47, 28, 0, 0.256892, 500, 0, 0, 0.076923077, 0, 748],\
[1, 0, 0, 0, 0, 0.119599, 500, 0, 0, 0, 0, 517],\
[3, 10, 53, 22, 0, 0.419703, 500, 0, 0, 0.153846154, 0, 800],\
[4, 7, 66, 70, 1, 0.362268, 500, 0, 0, 0.230769231, 0, 550],\
[0, 16, 88, 18, 3, 0.597145, 16, 0, 0, 0.153846154, 0, 50],\
[5, 8, 38, 0, 0, 0.666666, 500, 0, 0, 0, 0, 667]])

# Test data    
test_set = np.array([\
[2, 16, 87, 30, 0, 0.168057, 500, 0, 1, 0.153846154, 1, 760],\
[3, 5, 83, 6, 4, 0.273522, 500, 0, 0, 0.076923077, 0, 877],\
[1, 0, 0, 12, 0, 0.262797, 500, 0, 0, 0.153846154, 0, 596],\
[2, 15, 46, 28, 0, 0.495495, 500, 0, 0, 0.076923077, 0, 680],\
[1, 0, 0, 22, 9, 0.254813, 500, 0, 0, 0.230769231, 0, 450],\
[3, 19, 59, 12, 0, 0.437851, 500, 0, 0, 0.153846154, 0, 850],\
[4, 5, 28, 0, 0, 0.34559, 500, 0, 1, 0.076923077, 1, 800],\
[1, 5, 58, 0, 0, 0.385379, 500, 0, 0, 0, 0, 641],\
[1, 4, 65, 15, 1, 0.2945, 500, 0, 0, 0.153846154, 0, 644],\
[0, 0, 0, 9, 3, 0.421612, 500, 0, 0, 0.076923077, 0, 580],\
[3, 31, 83, 2, 2, 0.436883, 500, 0, 0, 0.076923077, 0, 410],\
[0, 0, 0, 18, 5, 0.044898, 377, 0, 0, 0.230769231, 0, 520],\
[0, 8, 49, 12, 3, 0.428529, 500, 0, 1, 0.076923077, 1, 370],\
[0, 22, 89, 2, 1, 0.819431, 500, 0, 0, 0.076923077, 0, 440],\
[3, 27, 63, 124, 0, 0.375306, 500, 0, 0, 0.076923077, 0, 880],\
[3, 20, 64, 18, 5, 0.439412, 500, 0, 1, 0.076923077, 3, 820],\
[1, 6, 34, 2, 12, 0.495654, 500, 0, 0, 0.076923077, 0, 653],\
[0, 14, 225, 0, 0, 1, 486, 0, 0, 0, 0, 1],\
[2, 8, 87, 32, 1, 0.829792, 500, 0, 0, 0.230769231, 0, 570],\
[2, 15, 46, 24, 4, 0.500442, 500, 0, 0, 0.153846154, 0, 568]])

# split datasets into independent and dependent variables
X_train, y_train = train_set[:, :-1], train_set[:, -1]    
X_test, y_test = test_set[:, :-1], test_set[:, -1]    

# feature scaling
sc = RobustScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

# Linear model
reg = GetLinearModel(X_train, y_train)
y_pred = reg.predict(X_test)
mae = metrics.mean_absolute_error(y_test, y_pred)
print("%15s: %10f" % ("Linear", mae))

# Ridge Regression
reg = GetRidge(X_train, y_train)
y_pred = reg.predict(X_test)
mae = metrics.mean_absolute_error(y_test, y_pred)
print("%15s: %10f" % ("Ridge", mae))

# LASSO Regression
reg = GetLASSO(X_train, y_train)
y_pred = reg.predict(X_test)
mae = metrics.mean_absolute_error(y_test, y_pred)
print("%15s: %10f" % ("LASSO", mae))

# ElasticNet Regression
reg = GetElasticNet(X_train, y_train)
y_pred = reg.predict(X_test)
mae = metrics.mean_absolute_error(y_test, y_pred)
print("%15s: %10f" % ("ElasticNet", mae))

# Random Forest
reg = GetRandomForest(X_train, y_train)
y_pred = reg.predict(X_test)
mae = metrics.mean_absolute_error(y_test, y_pred)
print("%15s: %10f" % ("Random Forest", mae))

# Neural networks
reg = GetNeuralNetworks(X_train, y_train)
y_pred = reg.predict(X_test)
mae = metrics.mean_absolute_error(y_test, y_pred)
print("%15s: %10f" % ("Neural Networks", mae))

Output:

         Linear: 141.265089
          Ridge: 141.267797
          LASSO: 141.274700
     ElasticNet: 141.413544
  Random Forest: 102.701562
WARNING:tensorflow:11 out of the last 11 calls to <function Model.make_predict_function.<locals>.predict_function at 0x00000229766694C0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for  more details.
Neural Networks: 122.301840

Any help on how to improve accuracy of the model will be greatly appreciated.

Kind regards.

4 Answers 4

1

I am using the dataset what you have provided in your example Also i have created Training,validation and testing dataset to avoid data leak as mentioned by @Prayson W. Daniel

For Neural networks you need to ensure both labels and feature are scaled.You can go for standard scalar. Also you need to ensure the feature and labels must be 2 dim.In your example your label is one dimensional array.

Use the following code to extract 2d feature

Train_labels=train_set[:,[-1]]

You can use StandardScaler for normalizing the data,you need to ensure both labels and feature needs to normalized

Now once you are building ANN you needs to ensure you network sees alot of data since you have very less train and test you can go with K fold cross validation I am not using k fold for now but i am creating the model

from keras import regularizers
def build_model() :
    Model=K.models.Sequential()
    Model.add(K.layers.Dense(units=21,activation='relu',
              kernel_regularizer=regularizers.l2(0.001),input_dim=11))
    Model.add(K.layers.Dropout(0.2))
    Model.add(K.layers.Dense(21,activation='relu',
              kernel_regularizer=regularizers.l2(0.001)))
    Model.add(K.layers.Dropout(0.2))
    Model.add(K.layers.Dense(21,activation='relu'))
    Model.add(K.layers.Dense(1))

    #Compile the model


    Optimizer=K.optimizers.Nadam()
    Model.compile(optimizer=Optimizer,loss='mae',metrics=r2_keras_custom)
    return Model


model=build_model()
history=model.fit(x=X_train,y=Y_train,epochs=200,batch_size=29,validation_data= 
(X_test,Y_test))

I am using R2 as custom metric,you can also create one 

Here i am using r2 which 1-RSS/TSS

plt.plot(history.history['val_r2_keras_custom'])
plt.plot(history.history['r2_keras_custom'])
plt.legend(['Test_score','Train_score'])
plt.plot()

enter image description here

Final score

I hope this helps ,other can correct me

Sign up to request clarification or add additional context in comments.

Comments

0

If that is the whole dataset, it's tiny. One option to consider is instead of splitting your data into training and validation (AKA test), research Cross-Validation. Cross-Validation is an approach for small datasets where all the data gets used for training and all for validation, but still preventing overfitting.

Comments

0

You could perform hyper-parameter tuning for each model and a cross-validation.

This class can help you do that: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html

GridSearchCV is compatible with Keras model too. For that, you can have a look at: https://machinelearningmastery.com/grid-search-hyperparameters-deep-learning-models-python-keras/

Comments

0

Personally, a small number of records in the training dataset means a small number of base classifiers in training ensembles of machine learning algorithms. Checking your code, I have not used RobustScaler before, but I would use transform on the test dataset, not fit_transform.

Going back to your code, it looks like the random forest is having the best accuracy. With hyper tuning some of the parameters including the number of estimators and max_depth, better performance can be reported. Hereafter, as other answers/comments recommended, hypertuning of algorithms parameters is required here.

# -*- coding: utf-8 -*-
"""
Created on Wed Jan  6 20:50:44 2021

@author: AliHaidar
"""

import numpy as np
from sklearn.preprocessing import RobustScaler
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor,AdaBoostRegressor
from sklearn import metrics

from xgboost import XGBRegressor


# Linear Model
def GetLinearModel(X, y):
    model = LinearRegression()
    model.fit(X, y)
    return model   

# Ridge Regression
def GetRidge(X, y):
    model = Ridge(alpha=0.01)
    model.fit(X_train, y_train) 
    return model

# LASSO Regression
def GetLASSO(X, y):
    model = Lasso(alpha=0.01)
    model.fit(X_train, y_train) 
    return model

# ElasticNet Regression
def GetElasticNet(X, y):
    model = ElasticNet(alpha=0.01)
    model.fit(X_train, y_train) 
    return model

# Random Forest
def GetRandomForest(X, y):
    model = RandomForestRegressor(n_estimators=4, random_state=0,max_depth=11)
    model.fit(X, y)
    return model


# Train data
train_set = np.array([\
[2, 5, 9, 28, 0, 0.153668, 500, 0, 0, 0.076923077, 0, 800],\
[3, 0, 0, 42, 2, 0.358913, 500, 0, 0, 0.230769231, 0, 900],\
[3, 0, 0, 12, 2, 0, 500, 0, 0, 0.076923077, 0, 500],\
[1, 0, 0, 6, 1, 0.340075, 457, 0, 0, 0.076923077, 0, 560],\
[1, 5, 0, 12, 3, 0.458358, 457, 0, 0, 0.153846154, 0, 500],\
[1, 3, 4, 32, 2, 0.460336, 457, 0, 0, 0.153846154, 0, 600],\
[3, 0, 0, 42, 4, 0.473414, 500, 0, 0, 0.230769231, 0, 700],\
[1, 3, 0, 16, 0, 0.332991, 500, 0, 0, 0.076923077, 0, 600],\
[1, 3, 19, 27, 0, 0.3477, 500, 0, 0, 0.076923077, 0, 580],\
[1, 5, 20, 74, 1, 0.52076, 500, 0, 0, 0.230769231, 0, 550],\
[6, 0, 0, 9, 3, 0, 500, 0, 0, 0.076923077, 0, 570],\
[1, 8, 47, 0, 0, 0.840656, 681, 0, 0, 0, 0, 50],\
[1, 0, 0, 8, 14, 0, 681, 0, 0, 0.076923077, 0, 400],\
[5, 6, 19, 7, 1, 0.251423, 500, 0, 1, 0.076923077, 1, 980],\
[1, 0, 0, 2, 2, 0.121852, 500, 1, 0, 0.076923077, 9, 780],\
[2, 0, 0, 4, 0, 0.37242, 500, 1, 0, 0.076923077, 0, 920],\
[3, 4, 5, 20, 0, 0.37682, 500, 1, 0, 0.076923077, 0, 700],\
[3, 8, 17, 20, 0, 0.449545, 500, 1, 0, 0.076923077, 0, 300],\
[3, 12, 30, 20, 0, 0.551193, 500, 1, 0, 0.076923077, 0, 30],\
[0, 1, 10, 8, 3, 0.044175, 500, 0, 0, 0.076923077, 0, 350],\
[1, 0, 0, 14, 3, 0.521714, 500, 0, 0, 0.153846154, 0, 650],\
[2, 4, 15, 0, 0, 0.985122, 500, 0, 0, 0, 0, 550],\
[2, 4, 34, 0, 0, 0.666666, 500, 0, 0, 0, 0, 600],\
[1, 16, 17, 10, 3, 0.299756, 330, 0, 0, 0.153846154, 0, 650],\
[2, 0, 0, 16, 1, 0, 500, 0, 0, 0.076923077, 0, 900],\
[2, 5, 31, 26, 0, 0.104847, 500, 0, 0, 0.076923077, 0, 850],\
[2, 6, 16, 34, 1, 0.172947, 500, 0, 0, 0.153846154, 0, 900],\
[1, 4, 0, 16, 6, 0.206403, 500, 0, 0, 0.153846154, 0, 630],\
[1, 8, 20, 12, 5, 0.495897, 500, 0, 0, 0.153846154, 0, 500],\
[1, 8, 46, 8, 6, 0.495897, 500, 0, 0, 0.153846154, 0, 250],\
[2, 0, 0, 4, 8, 0, 500, 0, 0, 0.076923077, 0, 550],\
[2, 6, 602, 0, 0, 0, 500, 0, 0, 0, 0, 20],\
[0, 12, 5, 21, 0, 0.158674, 645, 0, 0, 0.153846154, 0, 850],\
[0, 12, 20, 21, 0, 0.158674, 645, 0, 0, 0.153846154, 0, 700],\
[1, 0, 0, 33, 0, 0.041473, 645, 0, 0, 0.230769231, 0, 890],\
[1, 0, 0, 12, 2, 0.147325, 500, 0, 0, 0.076923077, 0, 780],\
[1, 8, 296, 0, 0, 2.891695, 521, 0, 0, 0, 0, 1],\
[1, 0, 0, 4, 0, 0.098953, 445, 0, 0, 0.076923077, 0, 600],\
[1, 0, 0, 4, 0, 0.143443, 500, 0, 0, 0.076923077, 0, 500],\
[0, 8, 20, 0, 0, 1.110002, 833, 0, 0, 0, 0, 100],\
[0, 0, 0, 8, 2, 0, 833, 0, 0, 0.076923077, 0, 300],\
[1, 4, 60, 20, 6, 0.78685, 833, 0, 0, 0.153846154, 0, 100],\
[1, 4, 112, 20, 6, 0.78685, 833, 0, 0, 0.153846154, 0, 1],\
[1, 0, 0, 21, 10, 0.305556, 500, 0, 0, 0.307692308, 0, 150],\
[1, 0, 0, 21, 10, 0.453743, 500, 0, 0, 0.307692308, 0, 300],\
[0, 0, 0, 8, 0, 0, 570, 0, 0, 0, 0, 500],\
[0, 10, 10, 8, 0, 0.325975, 570, 0, 0, 0.076923077, 0, 450],\
[1, 7, 16, 15, 1, 0.266311, 570, 0, 0, 0.076923077, 0, 450],\
[1, 1, 32, 30, 4, 0.134606, 570, 0, 0, 0.230769231, 0, 250],\
[1, 0, 0, 32, 5, 0.105576, 570, 0, 0, 0.230769231, 0, 430],\
[1, 4, 34, 32, 5, 0.519103, 500, 0, 0, 0.230769231, 0, 350],\
[1, 0, 0, 12, 1, 0.109559, 669, 0, 0, 0.076923077, 0, 600],\
[11, 4, 15, 2, 3, 0.235709, 500, 0, 1, 0, 2, 900],\
[11, 4, 15, 1, 6, 0.504134, 500, 0, 1, 0, 2, 534],\
[2, 0, 0, 15, 9, 0.075403, 500, 0, 0, 0.076923077, 0, 573],\
[10, 0, 0, 51, 11, 2.211951, 500, 0, 0, 0.307692308, 7, 547],\
[9, 0, 0, 28, 4, 0.328037, 500, 0, 0, 0.230769231, 0, 747],\
[9, 2, 0, 0, 0, 0.166666, 500, 0, 1, 0.076923077, 4, 448],\
[8, 0, 0, 4, 1, 0, 500, 0, 1, 0, 1, 719],\
[3, 4, 15, 8, 1, 0.150237, 500, 0, 1, 0, 0, 827],\
[7, 138, 35, 37, 1, 0.414154, 500, 0, 1, 0.076923077, 3, 950],\
[6, 19, 41, 84, 1, 0.41248, 500, 0, 0, 0.230769231, 0, 750],\
[1, 6, 10, 0, 0, 0.232647, 500, 0, 1, 0, 0, 700],\
[0, 10, 27, 0, 0, 0.411712, 4, 0, 0, 0, 0, 520],\
[3, 31, 45, 80, 0, 0.266299, 500, 0, 0, 0.153846154, 0, 750],\
[3, 24, 49, 2, 1, 0.981102, 500, 0, 0, 0.076923077, 0, 550],\
[1, 12, 31, 11, 1, 0.333551, 500, 0, 0, 0.153846154, 0, 500],\
[0, 18, 30, 13, 2, 0.602826, 406, 0, 0, 0.076923077, 0, 580],\
[2, 2, 31, 0, 0, 1, 500, 0, 0, 0, 0, 427],\
[1, 18, 40, 83, 1, 0.332792, 500, 0, 0, 0.307692308, 0, 485],\
[2, 14, 35, 9, 3, 0.39671, 500, 0, 1, 0.076923077, 3, 664],\
[2, 88, 32, 7, 2, 0.548066, 500, 0, 1, 0, 1, 90],\
[2, 26, 26, 32, 2, 0.415991, 500, 0, 0, 0.153846154, 0, 90],\
[1, 14, 30, 11, 1, 0.51743, 599, 0, 0, 0.153846154, 0, 300],\
[1, 15, 28, 26, 0, 0.4413, 500, 0, 0, 0.076923077, 0, 610],\
[1, 17, 50, 34, 1, 0.313789, 500, 0, 0, 0.230769231, 0, 450],\
[0, 4, 15, 0, 0, 0.535163, 500, 0, 0, 0, 0, 375],\
[0, 8, 23, 0, 0, 0.51242, 500, 0, 0, 0, 0, 550],\
[3, 6, 44, 2, 3, 0.268062, 500, 0, 1, 0, 2, 744],\
[6, 38, 51, 35, 0, 0.28396, 500, 0, 1, 0.076923077, 1, 980],\
[6, 5, 63, 6, 5, 0.566661, 500, 0, 0, 0.153846154, 0, 850],\
[6, 0, 0, 0, 0, 0.174852, 500, 0, 0, 0, 0, 800],\
[6, 4, 60, 6, 3, 0.517482, 500, 0, 0, 0.076923077, 0, 750],\
[5, 16, 52, 49, 4, 0.378441, 500, 0, 1, 0.153846154, 6, 720],\
[5, 26, 84, 103, 1, 0.472361, 500, 0, 0, 0.230769231, 0, 300],\
[1, 6, 34, 36, 1, 0.298553, 500, 0, 1, 0.153846154, 0, 628],\
[5, 6, 65, 34, 0, 0.301907, 500, 0, 0, 0.153846154, 0, 710],\
[3, 16, 177, 29, 10, 0.501831, 500, 1, 0, 0.153846154, 0, 40],\
[2, 5, 45, 0, 0, 0.351668, 500, 0, 0, 0, 0, 708],\
[2, 7, 57, 7, 4, 0.432374, 500, 0, 0, 0.153846154, 0, 753],\
[1, 1, 75, 36, 0, 0.154085, 500, 0, 0, 0.076923077, 0, 610],\
[1, 16, 63, 13, 2, 0.331244, 500, 0, 0, 0.076923077, 0, 620],\
[1, 3, 55, 9, 0, 0.377253, 500, 0, 0, 0.076923077, 0, 640],\
[1, 1, 75, 5, 5, 0.877696, 500, 0, 0, 0.076923077, 0, 480],\
[1, 0, 0, 8, 5, 0.208742, 500, 0, 0, 0.153846154, 0, 520],\
[1, 3, 55, 29, 0, 0.228812, 678, 0, 0, 0.153846154, 0, 547],\
[1, 0, 0, 2, 2, 0.090459, 553, 0, 0, 0.076923077, 0, 535],\
[0, 4, 29, 0, 0, 0.292161, 500, 0, 0, 0, 0, 594],\
[1, 3, 64, 18, 6, 0.602431, 500, 0, 0, 0.230769231, 0, 500],\
[6, 9, 40, 74, 0, 0.567179, 500, 0, 0, 0.076923077, 0, 910],\
[4, 10, 65, 14, 1, 0.423915, 500, 0, 1, 0, 1, 713],\
[2, 0, 0, 6, 1, 0.114637, 500, 0, 0, 0.076923077, 0, 650],\
[5, 18, 74, 34, 0, 0.489314, 500, 0, 0, 0.153846154, 0, 500],\
[0, 6, 43, 9, 15, 0.599918, 612, 0, 0, 0.153846154, 0, 100],\
[4, 25, 64, 135, 0, 0.472659, 500, 0, 0, 0.230769231, 0, 560],\
[6, 3, 94, 12, 10, 0.31713, 500, 0, 0, 0.230769231, 0, 580],\
[1, 4, 69, 18, 9, 0.412528, 500, 0, 0, 0.307692308, 0, 362],\
[2, 21, 58, 21, 0, 0.53184, 500, 0, 0, 0.153846154, 0, 370],\
[0, 0, 0, 21, 4, 0.033438, 500, 0, 0, 0.153846154, 0, 500],\
[0, 10, 53, 20, 0, 0.619595, 500, 0, 0, 0.076923077, 0, 200],\
[2, 15, 63, 28, 2, 0.593453, 500, 0, 0, 0.153846154, 0, 574],\
[3, 2, 84, 21, 1, 0.302636, 500, 0, 0, 0.153846154, 0, 790],\
[4, 19, 47, 28, 0, 0.256892, 500, 0, 0, 0.076923077, 0, 748],\
[1, 0, 0, 0, 0, 0.119599, 500, 0, 0, 0, 0, 517],\
[3, 10, 53, 22, 0, 0.419703, 500, 0, 0, 0.153846154, 0, 800],\
[4, 7, 66, 70, 1, 0.362268, 500, 0, 0, 0.230769231, 0, 550],\
[0, 16, 88, 18, 3, 0.597145, 16, 0, 0, 0.153846154, 0, 50],\
[5, 8, 38, 0, 0, 0.666666, 500, 0, 0, 0, 0, 667]])

# Test data    
test_set = np.array([\
[2, 16, 87, 30, 0, 0.168057, 500, 0, 1, 0.153846154, 1, 760],\
[3, 5, 83, 6, 4, 0.273522, 500, 0, 0, 0.076923077, 0, 877],\
[1, 0, 0, 12, 0, 0.262797, 500, 0, 0, 0.153846154, 0, 596],\
[2, 15, 46, 28, 0, 0.495495, 500, 0, 0, 0.076923077, 0, 680],\
[1, 0, 0, 22, 9, 0.254813, 500, 0, 0, 0.230769231, 0, 450],\
[3, 19, 59, 12, 0, 0.437851, 500, 0, 0, 0.153846154, 0, 850],\
[4, 5, 28, 0, 0, 0.34559, 500, 0, 1, 0.076923077, 1, 800],\
[1, 5, 58, 0, 0, 0.385379, 500, 0, 0, 0, 0, 641],\
[1, 4, 65, 15, 1, 0.2945, 500, 0, 0, 0.153846154, 0, 644],\
[0, 0, 0, 9, 3, 0.421612, 500, 0, 0, 0.076923077, 0, 580],\
[3, 31, 83, 2, 2, 0.436883, 500, 0, 0, 0.076923077, 0, 410],\
[0, 0, 0, 18, 5, 0.044898, 377, 0, 0, 0.230769231, 0, 520],\
[0, 8, 49, 12, 3, 0.428529, 500, 0, 1, 0.076923077, 1, 370],\
[0, 22, 89, 2, 1, 0.819431, 500, 0, 0, 0.076923077, 0, 440],\
[3, 27, 63, 124, 0, 0.375306, 500, 0, 0, 0.076923077, 0, 880],\
[3, 20, 64, 18, 5, 0.439412, 500, 0, 1, 0.076923077, 3, 820],\
[1, 6, 34, 2, 12, 0.495654, 500, 0, 0, 0.076923077, 0, 653],\
[0, 14, 225, 0, 0, 1, 486, 0, 0, 0, 0, 1],\
[2, 8, 87, 32, 1, 0.829792, 500, 0, 0, 0.230769231, 0, 570],\
[2, 15, 46, 24, 4, 0.500442, 500, 0, 0, 0.153846154, 0, 568]])

# split datasets into independent and dependent variables
X_train, y_train = train_set[:, :-1], train_set[:, -1]    
X_test, y_test = test_set[:, :-1], test_set[:, -1]    

# feature scaling
sc = RobustScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

# Linear model
reg = GetLinearModel(X_train, y_train)
y_pred = reg.predict(X_test)
mae = metrics.mean_absolute_error(y_test, y_pred)
print("%15s: %10f" % ("Linear", mae))

# Ridge Regression
reg = GetRidge(X_train, y_train)
y_pred = reg.predict(X_test)
mae = metrics.mean_absolute_error(y_test, y_pred)
print("%15s: %10f" % ("Ridge", mae))

# LASSO Regression
reg = GetLASSO(X_train, y_train)
y_pred = reg.predict(X_test)
mae = metrics.mean_absolute_error(y_test, y_pred)
print("%15s: %10f" % ("LASSO", mae))

# ElasticNet Regression
reg = GetElasticNet(X_train, y_train)
y_pred = reg.predict(X_test)
mae = metrics.mean_absolute_error(y_test, y_pred)
print("%15s: %10f" % ("ElasticNet", mae))

# Random Forest
reg = GetRandomForest(X_train, y_train)
y_pred = reg.predict(X_test)
mae = metrics.mean_absolute_error(y_test, y_pred)
print("%15s: %10f" % ("Random Forest", mae))


Output:

         Linear: 141.265089
          Ridge: 141.267797
          LASSO: 141.274700
     ElasticNet: 141.413544
  Random Forest:  90.776332

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.