task2

2026-01-07 15:08:09 +03:00
parent 61cc472669
commit 850791b25d
7 changed files with 1669 additions and 0 deletions
--- a/task2/common/init.py
+++ b/task2/common/init.py
@@ -0,0 +1,25 @@
+# Common utilities for gradient descent optimization
+from .functions import (
+    Function1D,
+    Function2D,
+    TaskFunction1D,
+    HimmelblauFunction,
+    RavineFunction,
+)
+from .line_search import golden_section_search, armijo_step
+from .gradient_descent import gradient_descent_1d, gradient_descent_2d, heavy_ball_1d, heavy_ball_2d
+
+__all__ = [
+    "Function1D",
+    "Function2D", 
+    "TaskFunction1D",
+    "HimmelblauFunction",
+    "RavineFunction",
+    "golden_section_search",
+    "armijo_step",
+    "gradient_descent_1d",
+    "gradient_descent_2d",
+    "heavy_ball_1d",
+    "heavy_ball_2d",
+]
+
--- a/task2/common/functions.py
+++ b/task2/common/functions.py
@@ -0,0 +1,147 @@
+"""Function definitions with their gradients for optimization."""
+
+import math
+from abc import ABC, abstractmethod
+from typing import Tuple
+
+import numpy as np
+
+
+class Function1D(ABC):
+    """Abstract base class for 1D functions."""
+
+    name: str = "Abstract 1D Function"
+
+    @abstractmethod
+    def __call__(self, x: float) -> float:
+        """Evaluate function at x."""
+        pass
+
+    @abstractmethod
+    def gradient(self, x: float) -> float:
+        """Compute gradient (derivative) at x."""
+        pass
+
+    @property
+    @abstractmethod
+    def domain(self) -> Tuple[float, float]:
+        """Return the domain [a, b] for this function."""
+        pass
+
+
+class Function2D(ABC):
+    """Abstract base class for 2D functions."""
+
+    name: str = "Abstract 2D Function"
+
+    @abstractmethod
+    def __call__(self, x: np.ndarray) -> float:
+        """Evaluate function at point x = [x1, x2]."""
+        pass
+
+    @abstractmethod
+    def gradient(self, x: np.ndarray) -> np.ndarray:
+        """Compute gradient at point x = [x1, x2]."""
+        pass
+
+    @property
+    @abstractmethod
+    def plot_bounds(self) -> Tuple[Tuple[float, float], Tuple[float, float]]:
+        """Return bounds ((x1_min, x1_max), (x2_min, x2_max)) for plotting."""
+        pass
+
+
+class TaskFunction1D(Function1D):
+    """
+    f(x) = sqrt(x^2 + 9) / 4 + (5 - x) / 5
+
+    Derivative: f'(x) = x / (4 * sqrt(x^2 + 9)) - 1/5
+    """
+
+    name = "f(x) = √(x² + 9)/4 + (5 - x)/5"
+
+    def __call__(self, x: float) -> float:
+        return math.sqrt(x**2 + 9) / 4 + (5 - x) / 5
+
+    def gradient(self, x: float) -> float:
+        return x / (4 * math.sqrt(x**2 + 9)) - 1 / 5
+
+    @property
+    def domain(self) -> Tuple[float, float]:
+        return (-3.0, 8.0)
+
+
+class HimmelblauFunction(Function2D):
+    """
+    Himmelblau's function:
+    f(x, y) = (x^2 + y - 11)^2 + (x + y^2 - 7)^2
+
+    Has 4 identical local minima at:
+    - (3.0, 2.0)
+    - (-2.805118, 3.131312)
+    - (-3.779310, -3.283186)
+    - (3.584428, -1.848126)
+
+    Gradient:
+    ∂f/∂x = 4x(x² + y - 11) + 2(x + y² - 7)
+    ∂f/∂y = 2(x² + y - 11) + 4y(x + y² - 7)
+    """
+
+    name = "Himmelblau: (x² + y - 11)² + (x + y² - 7)²"
+
+    def __call__(self, x: np.ndarray) -> float:
+        x1, x2 = x[0], x[1]
+        return (x1**2 + x2 - 11) ** 2 + (x1 + x2**2 - 7) ** 2
+
+    def gradient(self, x: np.ndarray) -> np.ndarray:
+        x1, x2 = x[0], x[1]
+        df_dx1 = 4 * x1 * (x1**2 + x2 - 11) + 2 * (x1 + x2**2 - 7)
+        df_dx2 = 2 * (x1**2 + x2 - 11) + 4 * x2 * (x1 + x2**2 - 7)
+        return np.array([df_dx1, df_dx2])
+
+    @property
+    def plot_bounds(self) -> Tuple[Tuple[float, float], Tuple[float, float]]:
+        return ((-5.0, 5.0), (-5.0, 5.0))
+
+
+class RavineFunction(Function2D):
+    """
+    Овражная функция (эллиптический параболоид):
+    f(x, y) = x² + 20y²
+
+    Минимум в (0, 0), f(0,0) = 0
+
+    Демонстрирует "эффект оврага" - градиент почти перпендикулярен
+    направлению к минимуму, что замедляет сходимость.
+
+    Gradient:
+    ∂f/∂x = 2x
+    ∂f/∂y = 40y
+    """
+
+    name = "Овраг: f(x,y) = x² + 20y²"
+
+    def __call__(self, x: np.ndarray) -> float:
+        x1, x2 = x[0], x[1]
+        return x1**2 + 20 * x2**2
+
+    def gradient(self, x: np.ndarray) -> np.ndarray:
+        x1, x2 = x[0], x[1]
+        df_dx1 = 2 * x1
+        df_dx2 = 40 * x2
+        return np.array([df_dx1, df_dx2])
+
+    @property
+    def plot_bounds(self) -> Tuple[Tuple[float, float], Tuple[float, float]]:
+        return ((-2.0, 2.0), (-0.5, 0.5))
+
+
+# Registry of available functions
+FUNCTIONS_1D = {
+    "task": TaskFunction1D,
+}
+
+FUNCTIONS_2D = {
+    "himmelblau": HimmelblauFunction,
+    "ravine": RavineFunction,
+}
--- a/task2/common/gradient_descent.py
+++ b/task2/common/gradient_descent.py
@@ -0,0 +1,441 @@
+"""Gradient descent implementations."""
+
+from dataclasses import dataclass, field
+from typing import List, Literal, Optional
+import numpy as np
+
+from .functions import Function1D, Function2D
+from .line_search import golden_section_search, armijo_step, armijo_step_1d
+
+
+StepMethod = Literal["constant", "golden_section", "armijo"]
+
+
+@dataclass
+class IterationInfo1D:
+    """Information about a single iteration of 1D gradient descent."""
+    iteration: int
+    x: float
+    f_x: float
+    grad: float
+    step_size: float
+
+
+@dataclass
+class GradientDescentResult1D:
+    """Result of 1D gradient descent."""
+    x_star: float
+    f_star: float
+    iterations: List[IterationInfo1D]
+    converged: bool
+    method: str
+    
+    @property
+    def trajectory(self) -> List[float]:
+        return [it.x for it in self.iterations]
+
+
+@dataclass
+class IterationInfo2D:
+    """Information about a single iteration of 2D gradient descent."""
+    iteration: int
+    x: np.ndarray
+    f_x: float
+    grad: np.ndarray
+    step_size: float
+
+
+@dataclass
+class GradientDescentResult2D:
+    """Result of 2D gradient descent."""
+    x_star: np.ndarray
+    f_star: float
+    iterations: List[IterationInfo2D]
+    converged: bool
+    method: str
+    
+    @property
+    def trajectory(self) -> List[np.ndarray]:
+        return [it.x for it in self.iterations]
+
+
+def gradient_descent_1d(
+    func: Function1D,
+    x0: float,
+    step_method: StepMethod = "constant",
+    step_size: float = 0.1,
+    eps_x: float = 0.05,
+    eps_f: float = 0.001,
+    max_iters: int = 100,
+    armijo_params: Optional[dict] = None,
+    golden_section_bounds: Optional[tuple] = None,
+) -> GradientDescentResult1D:
+    """
+    Gradient descent for 1D function.
+    
+    Args:
+        func: Function to minimize
+        x0: Starting point
+        step_method: Step selection method ("constant", "golden_section", "armijo")
+        step_size: Step size for constant method
+        eps_x: Tolerance for x convergence
+        eps_f: Tolerance for f convergence
+        max_iters: Maximum number of iterations
+        armijo_params: Parameters for Armijo rule (d_init, epsilon, theta)
+        golden_section_bounds: Search bounds for golden section (a, b)
+    
+    Returns:
+        GradientDescentResult1D with trajectory and final result
+    """
+    x = x0
+    iterations: List[IterationInfo1D] = []
+    converged = False
+    
+    armijo_params = armijo_params or {"d_init": 1.0, "epsilon": 0.1, "theta": 0.5}
+    
+    for k in range(max_iters):
+        f_x = func(x)
+        grad = func.gradient(x)
+        
+        # Select step size
+        if step_method == "constant":
+            alpha = step_size
+        elif step_method == "golden_section":
+            # Optimize phi(alpha) = f(x - alpha * grad) using golden section
+            bounds = golden_section_bounds or (0.0, 2.0)
+            phi = lambda a: func(x - a * grad)
+            alpha = golden_section_search(phi, bounds[0], bounds[1])
+        elif step_method == "armijo":
+            alpha = armijo_step_1d(
+                func, x, grad,
+                d_init=armijo_params.get("d_init", 1.0),
+                epsilon=armijo_params.get("epsilon", 0.1),
+                theta=armijo_params.get("theta", 0.5),
+            )
+        else:
+            raise ValueError(f"Unknown step method: {step_method}")
+        
+        iterations.append(IterationInfo1D(
+            iteration=k + 1,
+            x=x,
+            f_x=f_x,
+            grad=grad,
+            step_size=alpha,
+        ))
+        
+        # Update x
+        x_new = x - alpha * grad
+        f_new = func(x_new)
+        
+        # Check convergence
+        if abs(x_new - x) < eps_x and abs(f_new - f_x) < eps_f:
+            x = x_new
+            converged = True
+            break
+        
+        x = x_new
+    
+    # Add final point
+    iterations.append(IterationInfo1D(
+        iteration=len(iterations) + 1,
+        x=x,
+        f_x=func(x),
+        grad=func.gradient(x),
+        step_size=0.0,
+    ))
+    
+    method_names = {
+        "constant": "Константный шаг",
+        "golden_section": "Золотое сечение",
+        "armijo": "Правило Армихо",
+    }
+    
+    return GradientDescentResult1D(
+        x_star=x,
+        f_star=func(x),
+        iterations=iterations,
+        converged=converged,
+        method=method_names.get(step_method, step_method),
+    )
+
+
+def gradient_descent_2d(
+    func: Function2D,
+    x0: np.ndarray,
+    step_method: StepMethod = "constant",
+    step_size: float = 0.01,
+    eps_x: float = 1e-5,
+    eps_f: float = 1e-6,
+    max_iters: int = 1000,
+    armijo_params: Optional[dict] = None,
+    golden_section_bounds: Optional[tuple] = None,
+) -> GradientDescentResult2D:
+    """
+    Gradient descent for 2D function.
+    
+    Args:
+        func: Function to minimize
+        x0: Starting point [x1, x2]
+        step_method: Step selection method ("constant", "golden_section", "armijo")
+        step_size: Step size for constant method
+        eps_x: Tolerance for x convergence
+        eps_f: Tolerance for f convergence
+        max_iters: Maximum number of iterations
+        armijo_params: Parameters for Armijo rule
+        golden_section_bounds: Search bounds for golden section
+    
+    Returns:
+        GradientDescentResult2D with trajectory and final result
+    """
+    x = np.array(x0, dtype=float)
+    iterations: List[IterationInfo2D] = []
+    converged = False
+    
+    armijo_params = armijo_params or {"d_init": 1.0, "epsilon": 0.1, "theta": 0.5}
+    
+    for k in range(max_iters):
+        f_x = func(x)
+        grad = func.gradient(x)
+        grad_norm = np.linalg.norm(grad)
+        
+        # Check if gradient is too small
+        if grad_norm < 1e-10:
+            converged = True
+            iterations.append(IterationInfo2D(
+                iteration=k + 1,
+                x=x.copy(),
+                f_x=f_x,
+                grad=grad.copy(),
+                step_size=0.0,
+            ))
+            break
+        
+        # Select step size
+        if step_method == "constant":
+            alpha = step_size
+        elif step_method == "golden_section":
+            bounds = golden_section_bounds or (0.0, 1.0)
+            phi = lambda a: func(x - a * grad)
+            alpha = golden_section_search(phi, bounds[0], bounds[1])
+        elif step_method == "armijo":
+            alpha = armijo_step(
+                func, x, grad,
+                d_init=armijo_params.get("d_init", 1.0),
+                epsilon=armijo_params.get("epsilon", 0.1),
+                theta=armijo_params.get("theta", 0.5),
+            )
+        else:
+            raise ValueError(f"Unknown step method: {step_method}")
+        
+        iterations.append(IterationInfo2D(
+            iteration=k + 1,
+            x=x.copy(),
+            f_x=f_x,
+            grad=grad.copy(),
+            step_size=alpha,
+        ))
+        
+        # Update x
+        x_new = x - alpha * grad
+        f_new = func(x_new)
+        
+        # Check convergence
+        if np.linalg.norm(x_new - x) < eps_x and abs(f_new - f_x) < eps_f:
+            x = x_new
+            converged = True
+            break
+        
+        x = x_new
+    
+    # Add final point
+    iterations.append(IterationInfo2D(
+        iteration=len(iterations) + 1,
+        x=x.copy(),
+        f_x=func(x),
+        grad=func.gradient(x),
+        step_size=0.0,
+    ))
+    
+    method_names = {
+        "constant": "Константный шаг",
+        "golden_section": "Золотое сечение",
+        "armijo": "Правило Армихо",
+    }
+    
+    return GradientDescentResult2D(
+        x_star=x,
+        f_star=func(x),
+        iterations=iterations,
+        converged=converged,
+        method=method_names.get(step_method, step_method),
+    )
+
+
+def heavy_ball_1d(
+    func: Function1D,
+    x0: float,
+    alpha: float = 0.1,
+    beta: float = 0.9,
+    eps_x: float = 0.05,
+    eps_f: float = 0.001,
+    max_iters: int = 100,
+) -> GradientDescentResult1D:
+    """
+    Heavy Ball method for 1D function.
+    
+    x_{k+1} = x_k - α f'(x_k) + β (x_k - x_{k-1})
+    
+    Args:
+        func: Function to minimize
+        x0: Starting point
+        alpha: Step size (learning rate)
+        beta: Momentum parameter (0 <= beta < 1)
+        eps_x: Tolerance for x convergence
+        eps_f: Tolerance for f convergence
+        max_iters: Maximum number of iterations
+    
+    Returns:
+        GradientDescentResult1D with trajectory and final result
+    """
+    x = x0
+    x_prev = x0  # For first iteration, no momentum
+    iterations: List[IterationInfo1D] = []
+    converged = False
+    
+    for k in range(max_iters):
+        f_x = func(x)
+        grad = func.gradient(x)
+        
+        # Heavy ball update: x_{k+1} = x_k - α∇f(x_k) + β(x_k - x_{k-1})
+        momentum = beta * (x - x_prev) if k > 0 else 0.0
+        
+        iterations.append(IterationInfo1D(
+            iteration=k + 1,
+            x=x,
+            f_x=f_x,
+            grad=grad,
+            step_size=alpha,
+        ))
+        
+        # Update x
+        x_new = x - alpha * grad + momentum
+        f_new = func(x_new)
+        
+        # Check convergence
+        if abs(x_new - x) < eps_x and abs(f_new - f_x) < eps_f:
+            x_prev = x
+            x = x_new
+            converged = True
+            break
+        
+        x_prev = x
+        x = x_new
+    
+    # Add final point
+    iterations.append(IterationInfo1D(
+        iteration=len(iterations) + 1,
+        x=x,
+        f_x=func(x),
+        grad=func.gradient(x),
+        step_size=0.0,
+    ))
+    
+    return GradientDescentResult1D(
+        x_star=x,
+        f_star=func(x),
+        iterations=iterations,
+        converged=converged,
+        method=f"Тяжёлый шарик (α={alpha}, β={beta})",
+    )
+
+
+def heavy_ball_2d(
+    func: Function2D,
+    x0: np.ndarray,
+    alpha: float = 0.01,
+    beta: float = 0.9,
+    eps_x: float = 1e-5,
+    eps_f: float = 1e-6,
+    max_iters: int = 1000,
+) -> GradientDescentResult2D:
+    """
+    Heavy Ball method for 2D function.
+    
+    x_{k+1} = x_k - α ∇f(x_k) + β (x_k - x_{k-1})
+    
+    Args:
+        func: Function to minimize
+        x0: Starting point [x1, x2]
+        alpha: Step size (learning rate)
+        beta: Momentum parameter (0 <= beta < 1)
+        eps_x: Tolerance for x convergence
+        eps_f: Tolerance for f convergence
+        max_iters: Maximum number of iterations
+    
+    Returns:
+        GradientDescentResult2D with trajectory and final result
+    """
+    x = np.array(x0, dtype=float)
+    x_prev = x.copy()  # For first iteration, no momentum
+    iterations: List[IterationInfo2D] = []
+    converged = False
+    
+    for k in range(max_iters):
+        f_x = func(x)
+        grad = func.gradient(x)
+        grad_norm = np.linalg.norm(grad)
+        
+        # Check if gradient is too small
+        if grad_norm < 1e-10:
+            converged = True
+            iterations.append(IterationInfo2D(
+                iteration=k + 1,
+                x=x.copy(),
+                f_x=f_x,
+                grad=grad.copy(),
+                step_size=0.0,
+            ))
+            break
+        
+        # Heavy ball update: x_{k+1} = x_k - α∇f(x_k) + β(x_k - x_{k-1})
+        momentum = beta * (x - x_prev) if k > 0 else np.zeros_like(x)
+        
+        iterations.append(IterationInfo2D(
+            iteration=k + 1,
+            x=x.copy(),
+            f_x=f_x,
+            grad=grad.copy(),
+            step_size=alpha,
+        ))
+        
+        # Update x
+        x_new = x - alpha * grad + momentum
+        f_new = func(x_new)
+        
+        # Check convergence
+        if np.linalg.norm(x_new - x) < eps_x and abs(f_new - f_x) < eps_f:
+            x_prev = x.copy()
+            x = x_new
+            converged = True
+            break
+        
+        x_prev = x.copy()
+        x = x_new
+    
+    # Add final point
+    iterations.append(IterationInfo2D(
+        iteration=len(iterations) + 1,
+        x=x.copy(),
+        f_x=func(x),
+        grad=func.gradient(x),
+        step_size=0.0,
+    ))
+    
+    return GradientDescentResult2D(
+        x_star=x,
+        f_star=func(x),
+        iterations=iterations,
+        converged=converged,
+        method=f"Тяжёлый шарик (α={alpha}, β={beta})",
+    )
+
--- a/task2/common/line_search.py
+++ b/task2/common/line_search.py
@@ -0,0 +1,139 @@
+"""Line search methods for step size selection."""
+
+import math
+from typing import Callable, Tuple
+import numpy as np
+
+
+def golden_section_search(
+    phi: Callable[[float], float],
+    a: float,
+    b: float,
+    tol: float = 1e-5,
+    max_iters: int = 100,
+) -> float:
+    """
+    Golden section search for 1D optimization.
+    
+    Finds argmin phi(alpha) on [a, b].
+    
+    Args:
+        phi: Function to minimize (typically f(x - alpha * grad))
+        a: Left bound of search interval
+        b: Right bound of search interval
+        tol: Tolerance for stopping
+        max_iters: Maximum number of iterations
+    
+    Returns:
+        Optimal step size alpha
+    """
+    # Golden ratio constants
+    gr = (1 + math.sqrt(5)) / 2
+    r = 1 / gr  # ~0.618
+    c = 1 - r   # ~0.382
+    
+    y = a + c * (b - a)
+    z = a + r * (b - a)
+    fy = phi(y)
+    fz = phi(z)
+    
+    for _ in range(max_iters):
+        if b - a < tol:
+            break
+            
+        if fy <= fz:
+            b = z
+            z = y
+            fz = fy
+            y = a + c * (b - a)
+            fy = phi(y)
+        else:
+            a = y
+            y = z
+            fy = fz
+            z = a + r * (b - a)
+            fz = phi(z)
+    
+    return (a + b) / 2
+
+
+def armijo_step(
+    f: Callable[[np.ndarray], float],
+    x: np.ndarray,
+    grad: np.ndarray,
+    d_init: float = 1.0,
+    epsilon: float = 0.1,
+    theta: float = 0.5,
+    max_iters: int = 100,
+) -> float:
+    """
+    Armijo rule for step size selection.
+    
+    Finds step d such that:
+    f(x - d * grad) <= f(x) - epsilon * d * ||grad||^2
+    
+    Note: Using descent direction s = -grad, so inner product <grad, s> = -||grad||^2
+    
+    Args:
+        f: Function to minimize
+        x: Current point
+        grad: Gradient at x
+        d_init: Initial step size
+        epsilon: Armijo parameter (0 < epsilon < 1)
+        theta: Step reduction factor (0 < theta < 1)
+        max_iters: Maximum number of reductions
+    
+    Returns:
+        Step size satisfying Armijo condition
+    """
+    d = d_init
+    fx = f(x)
+    grad_norm_sq = np.dot(grad, grad)
+    
+    for _ in range(max_iters):
+        # Armijo condition: f(x - d*grad) <= f(x) - epsilon * d * ||grad||^2
+        x_new = x - d * grad
+        if f(x_new) <= fx - epsilon * d * grad_norm_sq:
+            return d
+        d *= theta
+    
+    return d
+
+
+def armijo_step_1d(
+    f: Callable[[float], float],
+    x: float,
+    grad: float,
+    d_init: float = 1.0,
+    epsilon: float = 0.1,
+    theta: float = 0.5,
+    max_iters: int = 100,
+) -> float:
+    """
+    Armijo rule for step size selection (1D version).
+    
+    Args:
+        f: Function to minimize
+        x: Current point
+        grad: Gradient (derivative) at x
+        d_init: Initial step size
+        epsilon: Armijo parameter (0 < epsilon < 1)
+        theta: Step reduction factor (0 < theta < 1)
+        max_iters: Maximum number of reductions
+    
+    Returns:
+        Step size satisfying Armijo condition
+    """
+    d = d_init
+    fx = f(x)
+    grad_sq = grad * grad
+    
+    for _ in range(max_iters):
+        # Armijo condition: f(x - d*grad) <= f(x) - epsilon * d * grad^2
+        x_new = x - d * grad
+        if f(x_new) <= fx - epsilon * d * grad_sq:
+            return d
+        d *= theta
+    
+    return d
+