import numpy as np
# Define the system of equations
A = np.array([[1, -2, 3], [3, 2, -5], [2, -5, 2]])
b = np.array([[-1], [3], [0]])
# Define the objective function
def objective_function(X):
return 0.5 * np.linalg.norm(A @ X - b) ** 2
# Define the gradient of the objective function
def gradient(X):
return A.T @ (A @ X - b)
# Gradient descent method
def gradient_descent(A, b, alpha=0.01, max_iter=1000, tol=1e-6):
X = np.random.rand(A.shape[1], 1) # Start with a random initial guess
for i in range(max_iter):
grad = gradient(X)
X_new = X - alpha * grad
if np.linalg.norm(X_new - X) < tol:
break
X = X_new
# For debugging or analysis, you can print the objective function value
# print(f"Iteration {i}, Objective Function Value: {objective_function(X)}")
return X
# Running the Gradient Descent Method
solution = gradient_descent(A, b)
print("Solution found by gradient descent:")
print("x =", solution[0, 0])
print("y =", solution[1, 0])
print("z =", solution[2, 0])
Solution found by gradient descent:
x = 0.2609087468229011
y = -0.08692084284170505
z = -0.47822679318745703
Explanation of the corrections:
- Objective function: The objective function is correctly defined as $$f(x) = \frac{1}{2} |Ax - b|^2$$.

- Gradient calculation: The gradient of the objective function is $$\nabla f(x) = A^T (A x - b)$$.

- Gradient Descent Implementation: A standard gradient descent loop is implemented with a stopping criterion based on the change in $$X$$
(the solution vector).
- Debugging Information: Added to track the objective function value during iterations for better understanding and debugging.
This ensures the gradient descent method minimizes the correct objective function and should converge to the correct solution.
Issues in original implementation:
- Incorrect Objective Function
- Incorrect Gradient Calculation
- The gradient descent function was based on an incorrect gradient and objective function, which led to incorrect updates and convergence to the wrong solution.
- The initial guess
cur_pos = (10, 10, 10) was arbitrary and the convergence criteria based on the tolerance for individual gradient components were not ideal. It is generally better to check the norm of the gradient vector.
Note:
- The
@ operator can be used as a shorthand for np.matmul on ndarrays.
References:
import numpy as np
import matplotlib.pyplot as plt
class GradientDescentSolver:
def __init__(self, A: np.ndarray, b: np.ndarray, alpha: float = 0.01, max_iter: int = 1000, tol: float = 1e-6):
self.A = A
self.b = b
self.alpha = alpha
self.max_iter = max_iter
self.tol = tol
self.history = []
self.objective_values = []
def objective_function(self, X: np.ndarray) -> float:
return 0.5 * np.linalg.norm(self.A @ X - self.b) ** 2
def gradient(self, X: np.ndarray) -> np.ndarray:
return self.A.T @ (self.A @ X - self.b)
def solve(self) -> np.ndarray:
X = np.random.rand(self.A.shape[1], 1) # Start with a random initial guess
for i in range(self.max_iter):
grad = self.gradient(X)
X_new = X - self.alpha * grad
if np.linalg.norm(X_new - X) < self.tol:
break
X = X_new
self.history.append(X.copy())
self.objective_values.append(self.objective_function(X))
return X
def plot_solution(self) -> None:
if not self.history:
raise ValueError("No history found. Run solve() first.")
history = np.array(self.history).squeeze()
fig, axs = plt.subplots(2, figsize=(10, 10))
# Plot objective function value
axs[0].plot(self.objective_values, label='Objective Function Value')
axs[0].set_xlabel('Iteration')
axs[0].set_ylabel('Objective Function Value')
axs[0].set_title('Gradient Descent Convergence')
axs[0].legend()
# Plot parameter values
axs[1].plot(history[:, 0], label='x')
axs[1].plot(history[:, 1], label='y')
axs[1].plot(history[:, 2], label='z')
axs[1].set_xlabel('Iteration')
axs[1].set_ylabel('Parameter Values')
axs[1].set_title('Parameter Trajectories')
axs[1].legend()
plt.tight_layout()
plt.show()
# Define the system of equations
A = np.array([[1, -2, 3], [3, 2, -5], [2, -5, 2]])
b = np.array([[-1], [3], [0]])
# Running the Gradient Descent Method
solver = GradientDescentSolver(A, b)
solution = solver.solve()
print("Solution found by gradient descent:")
print("x =", solution[0, 0])
print("y =", solution[1, 0])
print("z =", solution[2, 0])
# Plot the solution if desired
solver.plot_solution()
Solution found by gradient descent:
x = 0.2609088084845793
y = -0.08692078669238036
z = -0.4782267395600823
