Is it possible to make a binary classification? Especially for pedestrian detection, whether it is or not a pedestrian. I couldn't find anything in the API or any good tutorials for this. I tried to adapt the code from the deep MNIST tutorial, that was used for multi-class classification; I made the images with pedestrians in them labeled with 1, and the negatives with 0, and used 3 channels(for colours, shouldn't be a problem right?), but the accuracy just jumps all over the place.
The code
import dataset as input_data
import tensorflow as tf
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 3, 3, 1],
strides=[1, 2, 2, 1], padding='SAME')
data = input_data.read_data_sets()
sess = tf.InteractiveSession()
x = tf.placeholder("float", shape=[None, input_data.HEIGHT * input_data.WIDTH * 3])
y_ = tf.placeholder("float", shape=[None, 2])
W_conv1 = weight_variable([5, 5, 3, 64])
b_conv1 = bias_variable([64])
x_image = tf.reshape(x, [-1, input_data.WIDTH, input_data.HEIGHT, 3])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
h_norm1 = tf.nn.lrn(h_pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
W_conv2 = weight_variable([5, 5, 64, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_norm1, W_conv2) + b_conv2)
h_norm2 = tf.nn.lrn(h_conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
h_pool2 = max_pool_2x2(h_norm2)
W_fc1 = weight_variable([input_data.HEIGHT / 4 * input_data.WIDTH / 4 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, input_data.HEIGHT / 4 * input_data.WIDTH / 4 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder("float")
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight_variable([1024, 2])
b_fc2 = bias_variable([2])
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
cross_entropy = -tf.reduce_sum(y_ * tf.log(y_conv))
train_step = tf.train.AdamOptimizer(1e-6).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
sess.run(tf.initialize_all_variables())
for i in range(20000):
batch = data.train.next_batch(50)
if i % 100 == 0:
train_accuracy = accuracy.eval(feed_dict={
x: batch[0], y_: batch[1], keep_prob: 1.0})
print "step %d, training accuracy %g" % (i, train_accuracy)
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
print "test accuracy %g" % accuracy.eval(feed_dict={
x: data.test.images, y_: data.test.labels, keep_prob: 1.0})
The output
step 0, training accuracy 0.14
step 100, training accuracy 0.54
step 200, training accuracy 0.28
step 300, training accuracy 0.46
step 400, training accuracy 0.32
step 500, training accuracy 0.52
step 600, training accuracy 0.56
step 700, training accuracy 0.76
step 800, training accuracy 0.66
Help would be apriciated, thanks.