"""Gradient descent implementations.""" from dataclasses import dataclass, field from typing import List, Literal, Optional import numpy as np from .functions import Function1D, Function2D from .line_search import golden_section_search, armijo_step, armijo_step_1d StepMethod = Literal["constant", "golden_section", "armijo"] @dataclass class IterationInfo1D: """Information about a single iteration of 1D gradient descent.""" iteration: int x: float f_x: float grad: float step_size: float @dataclass class GradientDescentResult1D: """Result of 1D gradient descent.""" x_star: float f_star: float iterations: List[IterationInfo1D] converged: bool method: str @property def trajectory(self) -> List[float]: return [it.x for it in self.iterations] @dataclass class IterationInfo2D: """Information about a single iteration of 2D gradient descent.""" iteration: int x: np.ndarray f_x: float grad: np.ndarray step_size: float @dataclass class GradientDescentResult2D: """Result of 2D gradient descent.""" x_star: np.ndarray f_star: float iterations: List[IterationInfo2D] converged: bool method: str @property def trajectory(self) -> List[np.ndarray]: return [it.x for it in self.iterations] def gradient_descent_1d( func: Function1D, x0: float, step_method: StepMethod = "constant", step_size: float = 0.1, eps_x: float = 0.05, eps_f: float = 0.001, max_iters: int = 100, armijo_params: Optional[dict] = None, golden_section_bounds: Optional[tuple] = None, ) -> GradientDescentResult1D: """ Gradient descent for 1D function. Args: func: Function to minimize x0: Starting point step_method: Step selection method ("constant", "golden_section", "armijo") step_size: Step size for constant method eps_x: Tolerance for x convergence eps_f: Tolerance for f convergence max_iters: Maximum number of iterations armijo_params: Parameters for Armijo rule (d_init, epsilon, theta) golden_section_bounds: Search bounds for golden section (a, b) Returns: GradientDescentResult1D with trajectory and final result """ x = x0 iterations: List[IterationInfo1D] = [] converged = False armijo_params = armijo_params or {"d_init": 1.0, "epsilon": 0.1, "theta": 0.5} for k in range(max_iters): f_x = func(x) grad = func.gradient(x) # Select step size if step_method == "constant": alpha = step_size elif step_method == "golden_section": # Optimize phi(alpha) = f(x - alpha * grad) using golden section bounds = golden_section_bounds or (0.0, 2.0) phi = lambda a: func(x - a * grad) alpha = golden_section_search(phi, bounds[0], bounds[1]) elif step_method == "armijo": alpha = armijo_step_1d( func, x, grad, d_init=armijo_params.get("d_init", 1.0), epsilon=armijo_params.get("epsilon", 0.1), theta=armijo_params.get("theta", 0.5), ) else: raise ValueError(f"Unknown step method: {step_method}") iterations.append(IterationInfo1D( iteration=k + 1, x=x, f_x=f_x, grad=grad, step_size=alpha, )) # Update x x_new = x - alpha * grad f_new = func(x_new) # Check convergence if abs(x_new - x) < eps_x and abs(f_new - f_x) < eps_f: x = x_new converged = True break x = x_new # Add final point iterations.append(IterationInfo1D( iteration=len(iterations) + 1, x=x, f_x=func(x), grad=func.gradient(x), step_size=0.0, )) method_names = { "constant": "Константный шаг", "golden_section": "Золотое сечение", "armijo": "Правило Армихо", } return GradientDescentResult1D( x_star=x, f_star=func(x), iterations=iterations, converged=converged, method=method_names.get(step_method, step_method), ) def gradient_descent_2d( func: Function2D, x0: np.ndarray, step_method: StepMethod = "constant", step_size: float = 0.01, eps_x: float = 1e-5, eps_f: float = 1e-6, max_iters: int = 1000, armijo_params: Optional[dict] = None, golden_section_bounds: Optional[tuple] = None, ) -> GradientDescentResult2D: """ Gradient descent for 2D function. Args: func: Function to minimize x0: Starting point [x1, x2] step_method: Step selection method ("constant", "golden_section", "armijo") step_size: Step size for constant method eps_x: Tolerance for x convergence eps_f: Tolerance for f convergence max_iters: Maximum number of iterations armijo_params: Parameters for Armijo rule golden_section_bounds: Search bounds for golden section Returns: GradientDescentResult2D with trajectory and final result """ x = np.array(x0, dtype=float) iterations: List[IterationInfo2D] = [] converged = False armijo_params = armijo_params or {"d_init": 1.0, "epsilon": 0.1, "theta": 0.5} for k in range(max_iters): f_x = func(x) grad = func.gradient(x) grad_norm = np.linalg.norm(grad) # Check if gradient is too small if grad_norm < 1e-10: converged = True iterations.append(IterationInfo2D( iteration=k + 1, x=x.copy(), f_x=f_x, grad=grad.copy(), step_size=0.0, )) break # Select step size if step_method == "constant": alpha = step_size elif step_method == "golden_section": bounds = golden_section_bounds or (0.0, 1.0) phi = lambda a: func(x - a * grad) alpha = golden_section_search(phi, bounds[0], bounds[1]) elif step_method == "armijo": alpha = armijo_step( func, x, grad, d_init=armijo_params.get("d_init", 1.0), epsilon=armijo_params.get("epsilon", 0.1), theta=armijo_params.get("theta", 0.5), ) else: raise ValueError(f"Unknown step method: {step_method}") iterations.append(IterationInfo2D( iteration=k + 1, x=x.copy(), f_x=f_x, grad=grad.copy(), step_size=alpha, )) # Update x x_new = x - alpha * grad f_new = func(x_new) # Check convergence if np.linalg.norm(x_new - x) < eps_x and abs(f_new - f_x) < eps_f: x = x_new converged = True break x = x_new # Add final point iterations.append(IterationInfo2D( iteration=len(iterations) + 1, x=x.copy(), f_x=func(x), grad=func.gradient(x), step_size=0.0, )) method_names = { "constant": "Константный шаг", "golden_section": "Золотое сечение", "armijo": "Правило Армихо", } return GradientDescentResult2D( x_star=x, f_star=func(x), iterations=iterations, converged=converged, method=method_names.get(step_method, step_method), ) def heavy_ball_1d( func: Function1D, x0: float, alpha: float = 0.1, beta: float = 0.9, eps_x: float = 0.05, eps_f: float = 0.001, max_iters: int = 100, ) -> GradientDescentResult1D: """ Heavy Ball method for 1D function. x_{k+1} = x_k - α f'(x_k) + β (x_k - x_{k-1}) Args: func: Function to minimize x0: Starting point alpha: Step size (learning rate) beta: Momentum parameter (0 <= beta < 1) eps_x: Tolerance for x convergence eps_f: Tolerance for f convergence max_iters: Maximum number of iterations Returns: GradientDescentResult1D with trajectory and final result """ x = x0 x_prev = x0 # For first iteration, no momentum iterations: List[IterationInfo1D] = [] converged = False for k in range(max_iters): f_x = func(x) grad = func.gradient(x) # Heavy ball update: x_{k+1} = x_k - α∇f(x_k) + β(x_k - x_{k-1}) momentum = beta * (x - x_prev) if k > 0 else 0.0 iterations.append(IterationInfo1D( iteration=k + 1, x=x, f_x=f_x, grad=grad, step_size=alpha, )) # Update x x_new = x - alpha * grad + momentum f_new = func(x_new) # Check convergence if abs(x_new - x) < eps_x and abs(f_new - f_x) < eps_f: x_prev = x x = x_new converged = True break x_prev = x x = x_new # Add final point iterations.append(IterationInfo1D( iteration=len(iterations) + 1, x=x, f_x=func(x), grad=func.gradient(x), step_size=0.0, )) return GradientDescentResult1D( x_star=x, f_star=func(x), iterations=iterations, converged=converged, method=f"Тяжёлый шарик (α={alpha}, β={beta})", ) def heavy_ball_2d( func: Function2D, x0: np.ndarray, alpha: float = 0.01, beta: float = 0.9, eps_x: float = 1e-5, eps_f: float = 1e-6, max_iters: int = 1000, ) -> GradientDescentResult2D: """ Heavy Ball method for 2D function. x_{k+1} = x_k - α ∇f(x_k) + β (x_k - x_{k-1}) Args: func: Function to minimize x0: Starting point [x1, x2] alpha: Step size (learning rate) beta: Momentum parameter (0 <= beta < 1) eps_x: Tolerance for x convergence eps_f: Tolerance for f convergence max_iters: Maximum number of iterations Returns: GradientDescentResult2D with trajectory and final result """ x = np.array(x0, dtype=float) x_prev = x.copy() # For first iteration, no momentum iterations: List[IterationInfo2D] = [] converged = False for k in range(max_iters): f_x = func(x) grad = func.gradient(x) grad_norm = np.linalg.norm(grad) # Check if gradient is too small if grad_norm < 1e-10: converged = True iterations.append(IterationInfo2D( iteration=k + 1, x=x.copy(), f_x=f_x, grad=grad.copy(), step_size=0.0, )) break # Heavy ball update: x_{k+1} = x_k - α∇f(x_k) + β(x_k - x_{k-1}) momentum = beta * (x - x_prev) if k > 0 else np.zeros_like(x) iterations.append(IterationInfo2D( iteration=k + 1, x=x.copy(), f_x=f_x, grad=grad.copy(), step_size=alpha, )) # Update x x_new = x - alpha * grad + momentum f_new = func(x_new) # Check convergence if np.linalg.norm(x_new - x) < eps_x and abs(f_new - f_x) < eps_f: x_prev = x.copy() x = x_new converged = True break x_prev = x.copy() x = x_new # Add final point iterations.append(IterationInfo2D( iteration=len(iterations) + 1, x=x.copy(), f_x=func(x), grad=func.gradient(x), step_size=0.0, )) return GradientDescentResult2D( x_star=x, f_star=func(x), iterations=iterations, converged=converged, method=f"Тяжёлый шарик (α={alpha}, β={beta})", )