I am trying to compute the gradient for a loss of a simple linear model. However, I face the problem that while using TensorFlow the gradient is computed as 'none'. Why is this happening and how to compute the gradient using TensorFlow?
import numpy as np
import tensorflow as tf
inputs = np.array([[73, 67, 43],
[91, 88, 64],
[87, 134, 58],
[102, 43, 37],
[69, 96, 70]], dtype='float32')
targets = np.array([[56, 70],
[81, 101],
[119, 133],
[22, 37],
[103, 119]], dtype='float32')
inputs = tf.convert_to_tensor(inputs)
targets = tf.convert_to_tensor(targets)
w = tf.random.normal(shape=(2, 3))
b = tf.random.normal(shape=(2,))
print(w, b)
def model(x):
return tf.matmul(x, w, transpose_b = True) + b
def mse(t1, t2):
diff = t1-t2
return tf.reduce_sum(diff * diff) / tf.cast(tf.size(diff), 'float32')
with tf.GradientTape() as tape:
pred = model(inputs)
loss = mse(pred, targets)
print(tape.gradient(loss, [w, b]))
Here is the working code using PyTorch. The gradients are computed as expected.
import torch
inputs = np.array([[73, 67, 43],
[91, 88, 64],
[87, 134, 58],
[102, 43, 37],
[69, 96, 70]], dtype='float32')
targets = np.array([[56, 70],
[81, 101],
[119, 133],
[22, 37],
[103, 119]], dtype='float32')
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
w = torch.randn(2, 3, requires_grad = True)
b = torch.randn(2, requires_grad = True)
def model(x):
return x @ w.t() + b
def mse(t1, t2):
diff = t1 - t2
return torch.sum(diff * diff) / diff.numel()
pred = model(inputs)
loss = mse(pred, targets)
loss.backward()
print(w.grad)
print(b.grad)