Getting to know Tensorflow, I built a toy network for classification. It consists of 15 input nodes for features identical to the one-hot encoding of the corresponding class label (with indexing beginning at 1) - so the data to be loaded from an input CSV may look like this:
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2
...
0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,15
The network has only one hidden layer and an output layer, the latter containing probabilities for a given class. Here's my problem: during training the network assings a growing probability for whatever was fed in as the very first input.
Here are the relevant lines of code (some lines are omitted):
# number_of_p : number of samples
# number_of_a : number of attributes (features) -> 15
# number_of_s : number of styles (labels) -> 15
# function for generating hidden layers
# nodes is a list of nodes in each layer (len(nodes) = number of hidden layers)
def hidden_generation(nodes):
hidden_nodes = [number_of_a] + nodes + [number_of_s]
number_of_layers = len(hidden_nodes) - 1
print(hidden_nodes)
hidden_layer = list()
for i in range (0,number_of_layers):
hidden_layer.append(tf.zeros([hidden_nodes[i],batch_size]))
hidden_weights = list()
for i in range (0,number_of_layers):
hidden_weights.append(tf.Variable(tf.random_normal([hidden_nodes[i+1], hidden_nodes[i]])))
hidden_biases = list()
for i in range (0,number_of_layers):
hidden_biases.append(tf.Variable(tf.zeros([hidden_nodes[i+1],batch_size])))
return hidden_layer, hidden_weights, hidden_biases
#loss function
def loss(labels, logits):
cross_entropy = tf.losses.softmax_cross_entropy(
onehot_labels = labels, logits = logits)
return tf.reduce_mean(cross_entropy, name = 'xentropy_mean')
hidden_layer, hidden_weights, hidden_biases = hidden_generation(hidden_layers)
with tf.Session() as training_sess:
training_sess.run(tf.global_variables_initializer())
training_sess.run(a_iterator.initializer, feed_dict = {a_placeholder_feed: training_set.data})
current_a = training_sess.run(next_a)
training_sess.run(s_iterator.initializer, feed_dict = {s_placeholder_feed: training_set.target})
current_s = training_sess.run(next_s)
s_one_hot = training_sess.run(tf.one_hot((current_s - 1), number_of_s))
for i in range (1,len(hidden_layers)+1):
hidden_layer[i] = tf.tanh(tf.matmul(hidden_weights[i-1], (hidden_layer[i-1])) + hidden_biases[i-1])
output = tf.nn.softmax(tf.transpose(tf.matmul(hidden_weights[-1],hidden_layer[-1]) + hidden_biases[-1]))
optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.1)
# using the AdamOptimizer does not help, nor does choosing a much bigger and smaller learning rate
train = optimizer.minimize(loss(s_one_hot, output))
training_sess.run(train)
for i in range (0, (number_of_p)):
current_a = training_sess.run(next_a)
current_s = training_sess.run(next_s)
s_one_hot = training_sess.run(tf.transpose(tf.one_hot((current_s - 1), number_of_s)))
# (no idea why I have to declare those twice for the datastream to move)
training_sess.run(train)
I assume the loss function is being declared at the wrong place and always references the same vectors. However, replacing the loss function did not help me by now. I will gladly provide the rest of the code if anyone is kind enough to help me.
EDIT: I've already discovered and fixed one major (and dumb) mistake: weights go before values node values in tf.matmul.