Autograd¶
Autograd can automatically differentiate native Python and Numpy code. It can handle a large subset of Python's features, including loops, ifs, recursion and closures, and it can even take derivatives of derivatives of derivatives. It uses reverse-mode differentiation (a.k.a. backpropagation), which means it can efficiently take gradients of scalar-valued functions with respect to array-valued arguments. There's also a forward-mode extension, which lets you arbitrarily mix forward- and reverse-mode accumulation. The main intended application of autograd is gradient-based optimization.
# Use pip install autograd OR conda install autograd (if you have Anaconda)
import autograd.numpy as np
from autograd import grad
import matplotlib.pyplot as plt
%matplotlib inline
# Define a simple tan function
def tanh(x):
y = np.exp(-x)
return (1.0 - y) / (1.0 + y)
grad_tanh = grad(tanh) # get its gradient function
#Calculate gradient when x = 1.0
grad_tanh(1.0)
0.39322386648296376
# Compare to finite differences
(tanh(1.0001) - tanh(0.9999)) / 0.0002
0.39322386636453377
Differentiate as many time as you like¶
We can continue to differentiate as many times as we like, and use numpy's broadcasting of scalar-valued functions across many different input values:
x = np.linspace(-7, 7, 200)
plt.plot(x, tanh(x),
x, grad(tanh)(x), # first derivative
x, grad(grad(tanh))(x), # second derivative
x, grad(grad(grad(tanh)))(x), # third derivative
x, grad(grad(grad(grad(tanh))))(x), # fourth derivative
x, grad(grad(grad(grad(grad(tanh)))))(x), # fifth derivative
x, grad(grad(grad(grad(grad(grad(tanh))))))(x)) # sixth derivative
plt.axis('off')
plt.savefig("tanh.png")
plt.show()
Another simpler example¶
def taylor_sine(x): # Taylor approximation to sine function
ans = currterm = x
i = 0
while np.abs(currterm) > 0.001:
currterm = -currterm * x**2 / ((2 * i + 3) * (2 * i + 2))
ans = ans + currterm
i += 1
return ans
grad_sine = grad(taylor_sine)
print("Gradient of sin(pi) is: ", grad_sine(np.pi))
Gradient of sin(pi) is: -0.9998995297042174
Complete Example: Logistic Regression¶
A common use case for automatic differentiation is to train a probabilistic model. Here we present a very simple (but complete) example of specifying and training a logistic regression model for binary classification:
def sigmoid(x):
return 0.5*(np.tanh(x) + 1)
def logistic_predictions(weights, inputs):
# Outputs probability of a label being true according to logistic model.
return sigmoid(np.dot(inputs, weights))
def training_loss(weights):
# Training loss is the negative log-likelihood of the training labels.
preds = logistic_predictions(weights, inputs)
label_probabilities = preds * targets + (1 - preds) * (1 - targets)
return -np.sum(np.log(label_probabilities))
# Build a toy dataset.
inputs = np.array([[0.52, 1.12, 0.77],
[0.88, -1.08, 0.15],
[0.52, 0.06, -1.30],
[0.74, -2.49, 1.39]])
targets = np.array([True, True, False, True])
# Define a function that returns gradients of training loss using autograd.
training_gradient_fun = grad(training_loss)
# Optimize weights using gradient descent.
weights = np.array([0.0, 0.0, 0.0])
print("Initial loss:", training_loss(weights))
for i in range(100):
weights -= training_gradient_fun(weights) * 0.01
print("Trained loss:", training_loss(weights))
Initial loss: 2.77258872224 Trained loss: 0.389007543156
A Simple Neural Network¶
A multi-layer perceptron for classification of MNIST handwritten digits
from __future__ import absolute_import, division
from __future__ import print_function
import autograd.numpy as np
import autograd.numpy.random as npr
from autograd.scipy.misc import logsumexp
from autograd import grad
from autograd.util import flatten
from autograd.optimizers import adam
from data import load_mnist
def init_random_params(scale, layer_sizes, rs=npr.RandomState(0)):
"""Build a list of (weights, biases) tuples,
one for each layer in the net."""
return [(scale * rs.randn(m, n), # weight matrix
scale * rs.randn(n)) # bias vector
for m, n in zip(layer_sizes[:-1], layer_sizes[1:])]
def neural_net_predict(params, inputs):
"""Implements a deep neural network for classification.
params is a list of (weights, bias) tuples.
inputs is an (N x D) matrix.
returns normalized class log-probabilities."""
for W, b in params:
outputs = np.dot(inputs, W) + b
inputs = np.tanh(outputs)
return outputs - logsumexp(outputs, axis=1, keepdims=True)
def l2_norm(params):
"""Computes l2 norm of params by flattening them into a vector."""
flattened, _ = flatten(params)
return np.dot(flattened, flattened)
def log_posterior(params, inputs, targets, L2_reg):
log_prior = -L2_reg * l2_norm(params)
log_lik = np.sum(neural_net_predict(params, inputs) * targets)
return log_prior + log_lik
def accuracy(params, inputs, targets):
target_class = np.argmax(targets, axis=1)
predicted_class = np.argmax(neural_net_predict(params, inputs), axis=1)
return np.mean(predicted_class == target_class)
if __name__ == '__main__':
# Model parameters
layer_sizes = [784, 200, 100, 10]
L2_reg = 1.0
# Training parameters
param_scale = 0.1
batch_size = 256
num_epochs = 10
step_size = 0.001
print("Loading training data...")
N, train_images, train_labels, test_images, test_labels = load_mnist()
init_params = init_random_params(param_scale, layer_sizes)
num_batches = int(np.ceil(len(train_images) / batch_size))
def batch_indices(iter):
idx = iter % num_batches
return slice(idx * batch_size, (idx+1) * batch_size)
# Define training objective
def objective(params, iter):
idx = batch_indices(iter)
return -log_posterior(params, train_images[idx], train_labels[idx], L2_reg)
# Get gradient of objective using autograd.
objective_grad = grad(objective)
print(" Epoch | Train accuracy | Test accuracy ")
def print_perf(params, iter, gradient):
if iter % num_batches == 0:
train_acc = accuracy(params, train_images, train_labels)
test_acc = accuracy(params, test_images, test_labels)
print("{:15}|{:20}|{:20}".format(iter//num_batches, train_acc, test_acc))
# The optimizers provided can optimize lists, tuples, or dicts of parameters.
optimized_params = adam(objective_grad, init_params, step_size=step_size,
num_iters=num_epochs * num_batches, callback=print_perf)
Loading training data...
Epoch | Train accuracy | Test accuracy
0| 0.1186| 0.1228
1| 0.9070666666666667| 0.9119
2| 0.91235| 0.9157
3| 0.91715| 0.9208
4| 0.9211166666666667| 0.9252
5| 0.9249833333333334| 0.9284
6| 0.9274| 0.9302
7| 0.9301166666666667| 0.9328
8| 0.9317333333333333| 0.9339
9| 0.9333666666666667| 0.9354
Coming up...¶
- Convolutional Neural Net Example
- ** RNN - Recurrent Neural Net ** Example
- ** LTSM - Long Short-Term Memory** Example
- ** Backprop - Back Propagation with some fluid simulation** Example
- ** GAN - Generative Adversarial Net** Example
- ** Gaussian & Deep Gaussian** Example
- ** Bayesian Neural Net** Example
- And much more...