#!/usr/bin/env python # coding: utf-8 # # Autograd # Autograd can automatically differentiate native Python and Numpy code. It can handle a large subset of Python's features, including loops, ifs, recursion and closures, and it can even take derivatives of derivatives of derivatives. It uses reverse-mode differentiation (a.k.a. backpropagation), which means it can efficiently take gradients of scalar-valued functions with respect to array-valued arguments. There's also a forward-mode extension, which lets you arbitrarily mix forward- and reverse-mode accumulation. The main intended application of autograd is gradient-based optimization. # In[24]: # Use pip install autograd OR conda install autograd (if you have Anaconda) import autograd.numpy as np from autograd import grad import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # In[25]: # Define a simple tan function def tanh(x): y = np.exp(-x) return (1.0 - y) / (1.0 + y) grad_tanh = grad(tanh) # get its gradient function # In[26]: #Calculate gradient when x = 1.0 grad_tanh(1.0) # In[27]: # Compare to finite differences (tanh(1.0001) - tanh(0.9999)) / 0.0002 # #### Differentiate as many time as you like # We can continue to differentiate as many times as we like, and use numpy's broadcasting of scalar-valued functions across many different input values: # In[ ]: x = np.linspace(-7, 7, 200) plt.plot(x, tanh(x), x, grad(tanh)(x), # first derivative x, grad(grad(tanh))(x), # second derivative x, grad(grad(grad(tanh)))(x), # third derivative x, grad(grad(grad(grad(tanh))))(x), # fourth derivative x, grad(grad(grad(grad(grad(tanh)))))(x), # fifth derivative x, grad(grad(grad(grad(grad(grad(tanh))))))(x)) # sixth derivative plt.axis('off') plt.savefig("tanh.png") plt.show() # ## Another simpler example # In[37]: def taylor_sine(x): # Taylor approximation to sine function ans = currterm = x i = 0 while np.abs(currterm) > 0.001: currterm = -currterm * x**2 / ((2 * i + 3) * (2 * i + 2)) ans = ans + currterm i += 1 return ans grad_sine = grad(taylor_sine) print("Gradient of sin(pi) is: ", grad_sine(np.pi)) # ## Complete Example: Logistic Regression # A common use case for automatic differentiation is to train a probabilistic model. Here we present a very simple (but complete) example of specifying and training a logistic regression model for binary classification: # In[39]: def sigmoid(x): return 0.5*(np.tanh(x) + 1) def logistic_predictions(weights, inputs): # Outputs probability of a label being true according to logistic model. return sigmoid(np.dot(inputs, weights)) def training_loss(weights): # Training loss is the negative log-likelihood of the training labels. preds = logistic_predictions(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) return -np.sum(np.log(label_probabilities)) # Build a toy dataset. inputs = np.array([[0.52, 1.12, 0.77], [0.88, -1.08, 0.15], [0.52, 0.06, -1.30], [0.74, -2.49, 1.39]]) targets = np.array([True, True, False, True]) # Define a function that returns gradients of training loss using autograd. training_gradient_fun = grad(training_loss) # Optimize weights using gradient descent. weights = np.array([0.0, 0.0, 0.0]) print("Initial loss:", training_loss(weights)) for i in range(100): weights -= training_gradient_fun(weights) * 0.01 print("Trained loss:", training_loss(weights)) # ## A Simple Neural Network # A multi-layer perceptron for classification of MNIST handwritten digits # In[43]: from __future__ import absolute_import, division from __future__ import print_function import autograd.numpy as np import autograd.numpy.random as npr from autograd.scipy.misc import logsumexp from autograd import grad from autograd.util import flatten from autograd.optimizers import adam from data import load_mnist # In[44]: def init_random_params(scale, layer_sizes, rs=npr.RandomState(0)): """Build a list of (weights, biases) tuples, one for each layer in the net.""" return [(scale * rs.randn(m, n), # weight matrix scale * rs.randn(n)) # bias vector for m, n in zip(layer_sizes[:-1], layer_sizes[1:])] # In[45]: def neural_net_predict(params, inputs): """Implements a deep neural network for classification. params is a list of (weights, bias) tuples. inputs is an (N x D) matrix. returns normalized class log-probabilities.""" for W, b in params: outputs = np.dot(inputs, W) + b inputs = np.tanh(outputs) return outputs - logsumexp(outputs, axis=1, keepdims=True) # In[46]: def l2_norm(params): """Computes l2 norm of params by flattening them into a vector.""" flattened, _ = flatten(params) return np.dot(flattened, flattened) # In[47]: def log_posterior(params, inputs, targets, L2_reg): log_prior = -L2_reg * l2_norm(params) log_lik = np.sum(neural_net_predict(params, inputs) * targets) return log_prior + log_lik # In[48]: def accuracy(params, inputs, targets): target_class = np.argmax(targets, axis=1) predicted_class = np.argmax(neural_net_predict(params, inputs), axis=1) return np.mean(predicted_class == target_class) # In[50]: if __name__ == '__main__': # Model parameters layer_sizes = [784, 200, 100, 10] L2_reg = 1.0 # Training parameters param_scale = 0.1 batch_size = 256 num_epochs = 10 step_size = 0.001 print("Loading training data...") N, train_images, train_labels, test_images, test_labels = load_mnist() init_params = init_random_params(param_scale, layer_sizes) num_batches = int(np.ceil(len(train_images) / batch_size)) def batch_indices(iter): idx = iter % num_batches return slice(idx * batch_size, (idx+1) * batch_size) # Define training objective def objective(params, iter): idx = batch_indices(iter) return -log_posterior(params, train_images[idx], train_labels[idx], L2_reg) # Get gradient of objective using autograd. objective_grad = grad(objective) print(" Epoch | Train accuracy | Test accuracy ") def print_perf(params, iter, gradient): if iter % num_batches == 0: train_acc = accuracy(params, train_images, train_labels) test_acc = accuracy(params, test_images, test_labels) print("{:15}|{:20}|{:20}".format(iter//num_batches, train_acc, test_acc)) # The optimizers provided can optimize lists, tuples, or dicts of parameters. optimized_params = adam(objective_grad, init_params, step_size=step_size, num_iters=num_epochs * num_batches, callback=print_perf) # ### Coming up... # # 1. **Convolutional Neural Net** Example # 2. ** RNN - Recurrent Neural Net ** Example # 3. ** LTSM - Long Short-Term Memory** Example # 4. ** Backprop - Back Propagation with some fluid simulation** Example # 5. ** GAN - Generative Adversarial Net** Example # 6. ** Gaussian & Deep Gaussian** Example # 7. ** Bayesian Neural Net** Example # 8. And much more...