diff --git a/task2/1d/gradient_descent_1d.py b/task2/1d/gradient_descent_1d.py new file mode 100644 index 0000000..a9b83be --- /dev/null +++ b/task2/1d/gradient_descent_1d.py @@ -0,0 +1,348 @@ +#!/usr/bin/env python3 +""" +Градиентный спуск для одномерной функции с тремя методами выбора шага: +1. Константный шаг +2. Золотое сечение (одномерная оптимизация) +3. Правило Армихо + +Функция: f(x) = sqrt(x^2 + 9) / 4 + (5 - x) / 5 +Область: [-3, 8] +""" + +import shutil +import sys +from pathlib import Path + +# Add parent directory to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent)) + +import matplotlib.pyplot as plt +import numpy as np +from common.functions import TaskFunction1D +from common.gradient_descent import ( + GradientDescentResult1D, + gradient_descent_1d, + heavy_ball_1d, +) + +# ============================================================================ +# НАСТРОЙКИ +# ============================================================================ + +# Стартовая точка +X0 = -1.0 + +# Параметры сходимости +EPS_X = 0.05 +EPS_F = 0.001 +MAX_ITERS = 100 + +# Шаг для константного метода (небольшой, чтобы было 3-4+ итерации) +CONSTANT_STEP = 12 + +# Параметры для правила Армихо +ARMIJO_PARAMS = { + "d_init": 12.0, + "epsilon": 0.1, + "theta": 0.5, +} + +# Границы для золотого сечения +GOLDEN_SECTION_BOUNDS = (0.0, 30.0) + +# Параметры для метода тяжёлого шарика +HEAVY_BALL_ALPHA = 0.5 +HEAVY_BALL_BETA = 0.8 + +# Папка для сохранения графиков +OUTPUT_DIR = Path(__file__).parent / "plots" + + +# ============================================================================ +# ВИЗУАЛИЗАЦИЯ +# ============================================================================ + + +def plot_iteration( + func: TaskFunction1D, + result: GradientDescentResult1D, + iter_idx: int, + output_path: Path, +): + """Построить график для одной итерации.""" + info = result.iterations[iter_idx] + + # Диапазон для графика + a, b = func.domain + x_plot = np.linspace(a, b, 500) + y_plot = [func(x) for x in x_plot] + + plt.figure(figsize=(10, 6)) + + # Функция + plt.plot(x_plot, y_plot, "b-", linewidth=2, label="f(x)") + + # Траектория до текущей точки + trajectory_x = [result.iterations[i].x for i in range(iter_idx + 1)] + trajectory_y = [result.iterations[i].f_x for i in range(iter_idx + 1)] + + if len(trajectory_x) > 1: + plt.plot( + trajectory_x, + trajectory_y, + "g--", + linewidth=1.5, + alpha=0.7, + label="Траектория", + ) + + # Предыдущие точки + for i, (x, y) in enumerate(zip(trajectory_x[:-1], trajectory_y[:-1])): + plt.plot(x, y, "go", markersize=6, alpha=0.5) + + # Текущая точка + plt.plot( + info.x, + info.f_x, + "ro", + markersize=12, + label=f"x = {info.x:.4f}, f(x) = {info.f_x:.4f}", + ) + + # Направление градиента (касательная) + grad_scale = 0.5 + x_grad = np.array([info.x - grad_scale, info.x + grad_scale]) + y_grad = info.f_x + info.grad * (x_grad - info.x) + plt.plot( + x_grad, y_grad, "m-", linewidth=2, alpha=0.6, label=f"f'(x) = {info.grad:.4f}" + ) + + plt.xlabel("x", fontsize=12) + plt.ylabel("f(x)", fontsize=12) + plt.title( + f"{result.method} — Итерация {info.iteration}\nШаг: {info.step_size:.6f}", + fontsize=14, + fontweight="bold", + ) + plt.legend(fontsize=10, loc="upper right") + plt.grid(True, alpha=0.3) + plt.tight_layout() + plt.savefig(output_path, dpi=150) + plt.close() + + +def plot_final_result( + func: TaskFunction1D, + result: GradientDescentResult1D, + output_path: Path, +): + """Построить итоговый график с полной траекторией.""" + a, b = func.domain + x_plot = np.linspace(a, b, 500) + y_plot = [func(x) for x in x_plot] + + plt.figure(figsize=(10, 6)) + + # Функция + plt.plot(x_plot, y_plot, "b-", linewidth=2, label="f(x)") + + # Траектория + trajectory_x = [it.x for it in result.iterations] + trajectory_y = [it.f_x for it in result.iterations] + plt.plot( + trajectory_x, trajectory_y, "g-", linewidth=2, alpha=0.7, label="Траектория" + ) + + # Все точки + for i, (x, y) in enumerate(zip(trajectory_x[:-1], trajectory_y[:-1])): + plt.plot(x, y, "go", markersize=8, alpha=0.6) + + # Финальная точка + plt.plot( + result.x_star, + result.f_star, + "r*", + markersize=20, + label=f"x* = {result.x_star:.6f}\nf(x*) = {result.f_star:.6f}", + ) + + plt.xlabel("x", fontsize=12) + plt.ylabel("f(x)", fontsize=12) + plt.title( + f"{result.method} — Результат\nИтераций: {len(result.iterations) - 1}", + fontsize=14, + fontweight="bold", + ) + plt.legend(fontsize=10, loc="upper right") + plt.grid(True, alpha=0.3) + plt.tight_layout() + plt.savefig(output_path, dpi=150) + plt.close() + + +def run_and_visualize( + func: TaskFunction1D, + method: str, + method_name_short: str, + **kwargs, +): + """Запустить метод и создать визуализации.""" + + result = gradient_descent_1d( + func=func, + x0=X0, + step_method=method, + eps_x=EPS_X, + eps_f=EPS_F, + max_iters=MAX_ITERS, + **kwargs, + ) + + # Создаём папку для этого метода + method_dir = OUTPUT_DIR / method_name_short + method_dir.mkdir(parents=True, exist_ok=True) + + # Печатаем информацию + print(f"\n{'=' * 80}") + print(f"{result.method}") + print("=" * 80) + + for info in result.iterations[:-1]: # Без финальной точки + print( + f"Итерация {info.iteration:3d}: " + f"x = {info.x:10.6f}, f(x) = {info.f_x:10.6f}, " + f"f'(x) = {info.grad:10.6f}, шаг = {info.step_size:.6f}" + ) + + # Строим график для каждой итерации + plot_iteration( + func, + result, + info.iteration - 1, + method_dir / f"iteration_{info.iteration:02d}.png", + ) + + # Итоговый результат + print("-" * 80) + print(f"x* = {result.x_star:.6f}") + print(f"f(x*) = {result.f_star:.6f}") + print(f"Итераций: {len(result.iterations) - 1}") + + # Финальный график + plot_final_result(func, result, method_dir / "final_result.png") + print(f"Графики сохранены в: {method_dir}") + + return result + + +def run_and_visualize_heavy_ball( + func: TaskFunction1D, + method_name_short: str, + alpha: float, + beta: float, +): + """Запустить метод тяжёлого шарика и создать визуализации.""" + + result = heavy_ball_1d( + func=func, + x0=X0, + alpha=alpha, + beta=beta, + eps_x=EPS_X, + eps_f=EPS_F, + max_iters=MAX_ITERS, + ) + + # Создаём папку для этого метода + method_dir = OUTPUT_DIR / method_name_short + method_dir.mkdir(parents=True, exist_ok=True) + + # Печатаем информацию + print(f"\n{'=' * 80}") + print(f"{result.method}") + print("=" * 80) + + for info in result.iterations[:-1]: + print( + f"Итерация {info.iteration:3d}: " + f"x = {info.x:10.6f}, f(x) = {info.f_x:10.6f}, " + f"f'(x) = {info.grad:10.6f}, шаг = {info.step_size:.6f}" + ) + + plot_iteration( + func, + result, + info.iteration - 1, + method_dir / f"iteration_{info.iteration:02d}.png", + ) + + # Итоговый результат + print("-" * 80) + print(f"x* = {result.x_star:.6f}") + print(f"f(x*) = {result.f_star:.6f}") + print(f"Итераций: {len(result.iterations) - 1}") + + # Финальный график + plot_final_result(func, result, method_dir / "final_result.png") + print(f"Графики сохранены в: {method_dir}") + + return result + + +def main(): + """Главная функция.""" + + func = TaskFunction1D() + + print("=" * 80) + print("ГРАДИЕНТНЫЙ СПУСК ДЛЯ ОДНОМЕРНОЙ ФУНКЦИИ") + print("=" * 80) + print(f"Функция: {func.name}") + print(f"Область: {func.domain}") + print(f"Стартовая точка: x₀ = {X0}") + print(f"Параметры: eps_x = {EPS_X}, eps_f = {EPS_F}, max_iters = {MAX_ITERS}") + + # Очищаем и создаём папку для графиков + if OUTPUT_DIR.exists(): + shutil.rmtree(OUTPUT_DIR) + OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + + # 1. Константный шаг + run_and_visualize( + func, + method="constant", + method_name_short="constant", + step_size=CONSTANT_STEP, + ) + + # 2. Золотое сечение + run_and_visualize( + func, + method="golden_section", + method_name_short="golden_section", + golden_section_bounds=GOLDEN_SECTION_BOUNDS, + ) + + # 3. Правило Армихо + run_and_visualize( + func, + method="armijo", + method_name_short="armijo", + armijo_params=ARMIJO_PARAMS, + ) + + # 4. Метод тяжёлого шарика + run_and_visualize_heavy_ball( + func, + method_name_short="heavy_ball", + alpha=HEAVY_BALL_ALPHA, + beta=HEAVY_BALL_BETA, + ) + + print("\n" + "=" * 80) + print("ГОТОВО! Все графики сохранены.") + print("=" * 80) + + +if __name__ == "__main__": + main() diff --git a/task2/2d/gradient_descent_2d.py b/task2/2d/gradient_descent_2d.py new file mode 100644 index 0000000..9ede13f --- /dev/null +++ b/task2/2d/gradient_descent_2d.py @@ -0,0 +1,514 @@ +#!/usr/bin/env python3 +""" +Градиентный спуск для двумерных функций с тремя методами выбора шага: +1. Константный шаг +2. Золотое сечение (одномерная оптимизация) +3. Правило Армихо +""" + +import shutil +import sys +from pathlib import Path + +# Add parent directory to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent)) + +import matplotlib.pyplot as plt +import numpy as np +from common.functions import Function2D, HimmelblauFunction, RavineFunction +from common.gradient_descent import ( + GradientDescentResult2D, + gradient_descent_2d, + heavy_ball_2d, +) +from matplotlib import cm + +# ============================================================================ +# НАСТРОЙКИ +# ============================================================================ + +# Выбор функции: "himmelblau" или "ravine" +FUNCTION_CHOICE = "himmelblau" + +# Стартовые точки для разных функций +START_POINTS = { + "himmelblau": np.array([0.0, 0.0]), + "ravine": np.array([1.0, 0.3]), # Стартуем в овраге +} + +# Параметры сходимости +EPS_X = 1e-2 +EPS_F = 1e-2 +MAX_ITERS = 200 + +CONSTANT_STEPS = { + "himmelblau": 0.01, + "ravine": 0.01, +} + +# Параметры для правила Армихо +ARMIJO_PARAMS = { + "d_init": 1.0, + "epsilon": 0.1, + "theta": 0.5, +} + +# Границы для золотого сечения +GOLDEN_SECTION_BOUNDS = (0.0, 1.0) + +# Параметры для метода тяжёлого шарика +HEAVY_BALL_PARAMS = { + "himmelblau": {"alpha": 0.01, "beta": 0.7}, + "ravine": {"alpha": 0.01, "beta": 0.8}, +} + +# Папка для сохранения графиков +OUTPUT_DIR = Path(__file__).parent / "plots" + + +# ============================================================================ +# ВИЗУАЛИЗАЦИЯ +# ============================================================================ + + +def create_contour_grid(func: Function2D, resolution: int = 200): + """Создать сетку для контурного графика.""" + (x1_min, x1_max), (x2_min, x2_max) = func.plot_bounds + x1 = np.linspace(x1_min, x1_max, resolution) + x2 = np.linspace(x2_min, x2_max, resolution) + X1, X2 = np.meshgrid(x1, x2) + + Z = np.zeros_like(X1) + for i in range(X1.shape[0]): + for j in range(X1.shape[1]): + Z[i, j] = func(np.array([X1[i, j], X2[i, j]])) + + return X1, X2, Z + + +def plot_iteration_2d( + func: Function2D, + result: GradientDescentResult2D, + iter_idx: int, + output_path: Path, + X1: np.ndarray, + X2: np.ndarray, + Z: np.ndarray, + levels: np.ndarray, +): + """Построить контурный график для одной итерации.""" + info = result.iterations[iter_idx] + + fig, ax = plt.subplots(figsize=(10, 8)) + + # Контурные линии + contour = ax.contour( + X1, X2, Z, levels=levels, colors="gray", alpha=0.6, linewidths=0.8 + ) + ax.clabel(contour, inline=True, fontsize=8, fmt="%.1f") + + # Заливка + ax.contourf(X1, X2, Z, levels=levels, cmap=cm.viridis, alpha=0.3) + + # Траектория до текущей точки + trajectory = np.array([result.iterations[i].x for i in range(iter_idx + 1)]) + + if len(trajectory) > 1: + ax.plot( + trajectory[:, 0], + trajectory[:, 1], + "b-", + linewidth=2, + alpha=0.7, + label="Траектория", + zorder=5, + ) + + # Предыдущие точки + for i in range(len(trajectory) - 1): + ax.plot( + trajectory[i, 0], trajectory[i, 1], "bo", markersize=6, alpha=0.5, zorder=6 + ) + + # Текущая точка + ax.plot( + info.x[0], + info.x[1], + "ro", + markersize=12, + label=f"x = ({info.x[0]:.4f}, {info.x[1]:.4f})\nf(x) = {info.f_x:.4f}", + zorder=7, + ) + + # Направление антиградиента + grad_norm = np.linalg.norm(info.grad) + if grad_norm > 0: + direction = -info.grad / grad_norm * 0.5 # Нормализуем и масштабируем + ax.arrow( + info.x[0], + info.x[1], + direction[0], + direction[1], + head_width=0.1, + head_length=0.05, + fc="magenta", + ec="magenta", + alpha=0.7, + zorder=8, + ) + + ax.set_xlabel("x₁", fontsize=12) + ax.set_ylabel("x₂", fontsize=12) + ax.set_title( + f"{result.method} — Итерация {info.iteration}\n" + f"Шаг: {info.step_size:.6f}, ||∇f|| = {np.linalg.norm(info.grad):.6f}", + fontsize=14, + fontweight="bold", + ) + ax.legend(fontsize=10, loc="upper right") + ax.set_aspect("equal") + ax.grid(True, alpha=0.3) + + plt.tight_layout() + plt.savefig(output_path, dpi=150) + plt.close() + + +def plot_final_result_2d( + func: Function2D, + result: GradientDescentResult2D, + output_path: Path, + X1: np.ndarray, + X2: np.ndarray, + Z: np.ndarray, + levels: np.ndarray, +): + """Построить итоговый контурный график с полной траекторией.""" + fig, ax = plt.subplots(figsize=(10, 8)) + + # Контурные линии + contour = ax.contour( + X1, X2, Z, levels=levels, colors="gray", alpha=0.6, linewidths=0.8 + ) + ax.clabel(contour, inline=True, fontsize=8, fmt="%.1f") + + # Заливка + ax.contourf(X1, X2, Z, levels=levels, cmap=cm.viridis, alpha=0.3) + + # Траектория + trajectory = np.array([it.x for it in result.iterations]) + ax.plot( + trajectory[:, 0], + trajectory[:, 1], + "b-", + linewidth=2, + alpha=0.8, + label="Траектория", + zorder=5, + ) + + # Все точки + for i, point in enumerate(trajectory[:-1]): + ax.plot(point[0], point[1], "bo", markersize=6, alpha=0.5, zorder=6) + + # Стартовая точка + ax.plot( + trajectory[0, 0], + trajectory[0, 1], + "go", + markersize=12, + label=f"Старт: ({trajectory[0, 0]:.2f}, {trajectory[0, 1]:.2f})", + zorder=7, + ) + + # Финальная точка + ax.plot( + result.x_star[0], + result.x_star[1], + "r*", + markersize=20, + label=f"x* = ({result.x_star[0]:.4f}, {result.x_star[1]:.4f})\n" + f"f(x*) = {result.f_star:.6f}", + zorder=8, + ) + + ax.set_xlabel("x₁", fontsize=12) + ax.set_ylabel("x₂", fontsize=12) + ax.set_title( + f"{result.method} — Результат\nИтераций: {len(result.iterations) - 1}", + fontsize=14, + fontweight="bold", + ) + ax.legend(fontsize=10, loc="upper right") + ax.set_aspect("equal") + ax.grid(True, alpha=0.3) + + plt.tight_layout() + plt.savefig(output_path, dpi=150) + plt.close() + + +def run_and_visualize_2d( + func: Function2D, + x0: np.ndarray, + method: str, + method_name_short: str, + X1: np.ndarray, + X2: np.ndarray, + Z: np.ndarray, + levels: np.ndarray, + max_plot_iters: int = 20, + **kwargs, +): + """Запустить метод и создать визуализации.""" + + result = gradient_descent_2d( + func=func, + x0=x0, + step_method=method, + eps_x=EPS_X, + eps_f=EPS_F, + max_iters=MAX_ITERS, + **kwargs, + ) + + # Создаём папку для этого метода + method_dir = OUTPUT_DIR / method_name_short + method_dir.mkdir(parents=True, exist_ok=True) + + # Печатаем информацию + print(f"\n{'=' * 80}") + print(f"{result.method}") + print("=" * 80) + + # Определяем какие итерации визуализировать + total_iters = len(result.iterations) - 1 + if total_iters <= max_plot_iters: + plot_indices = list(range(total_iters)) + else: + # Выбираем равномерно распределённые итерации + step = total_iters / max_plot_iters + plot_indices = [int(i * step) for i in range(max_plot_iters)] + if total_iters - 1 not in plot_indices: + plot_indices.append(total_iters - 1) + + for idx, info in enumerate(result.iterations[:-1]): + print( + f"Итерация {info.iteration:3d}: " + f"x = ({info.x[0]:10.6f}, {info.x[1]:10.6f}), " + f"f(x) = {info.f_x:12.6f}, ||∇f|| = {np.linalg.norm(info.grad):10.6f}, " + f"шаг = {info.step_size:.6f}" + ) + + # Строим график только для выбранных итераций + if idx in plot_indices: + plot_iteration_2d( + func, + result, + idx, + method_dir / f"iteration_{info.iteration:03d}.png", + X1, + X2, + Z, + levels, + ) + + # Итоговый результат + print("-" * 80) + print(f"x* = ({result.x_star[0]:.6f}, {result.x_star[1]:.6f})") + print(f"f(x*) = {result.f_star:.6f}") + print(f"Итераций: {len(result.iterations) - 1}") + + # Финальный график + plot_final_result_2d( + func, result, method_dir / "final_result.png", X1, X2, Z, levels + ) + print(f"Графики сохранены в: {method_dir}") + + return result + + +def run_and_visualize_heavy_ball( + func: Function2D, + x0: np.ndarray, + method_name_short: str, + X1: np.ndarray, + X2: np.ndarray, + Z: np.ndarray, + levels: np.ndarray, + alpha: float, + beta: float, + max_plot_iters: int = 20, +): + """Запустить метод тяжёлого шарика и создать визуализации.""" + + result = heavy_ball_2d( + func=func, + x0=x0, + alpha=alpha, + beta=beta, + eps_x=EPS_X, + eps_f=EPS_F, + max_iters=MAX_ITERS, + ) + + # Создаём папку для этого метода + method_dir = OUTPUT_DIR / method_name_short + method_dir.mkdir(parents=True, exist_ok=True) + + # Печатаем информацию + print(f"\n{'=' * 80}") + print(f"{result.method}") + print("=" * 80) + + # Определяем какие итерации визуализировать + total_iters = len(result.iterations) - 1 + if total_iters <= max_plot_iters: + plot_indices = list(range(total_iters)) + else: + step = total_iters / max_plot_iters + plot_indices = [int(i * step) for i in range(max_plot_iters)] + if total_iters - 1 not in plot_indices: + plot_indices.append(total_iters - 1) + + for idx, info in enumerate(result.iterations[:-1]): + print( + f"Итерация {info.iteration:3d}: " + f"x = ({info.x[0]:10.6f}, {info.x[1]:10.6f}), " + f"f(x) = {info.f_x:12.6f}, ||∇f|| = {np.linalg.norm(info.grad):10.6f}, " + f"шаг = {info.step_size:.6f}" + ) + + if idx in plot_indices: + plot_iteration_2d( + func, + result, + idx, + method_dir / f"iteration_{info.iteration:03d}.png", + X1, + X2, + Z, + levels, + ) + + # Итоговый результат + print("-" * 80) + print(f"x* = ({result.x_star[0]:.6f}, {result.x_star[1]:.6f})") + print(f"f(x*) = {result.f_star:.6f}") + print(f"Итераций: {len(result.iterations) - 1}") + + # Финальный график + plot_final_result_2d( + func, result, method_dir / "final_result.png", X1, X2, Z, levels + ) + print(f"Графики сохранены в: {method_dir}") + + return result + + +def main(): + """Главная функция.""" + + # Выбираем функцию + if FUNCTION_CHOICE == "himmelblau": + func = HimmelblauFunction() + elif FUNCTION_CHOICE == "ravine": + func = RavineFunction() + else: + raise ValueError(f"Unknown function: {FUNCTION_CHOICE}") + + x0 = START_POINTS[FUNCTION_CHOICE] + constant_step = CONSTANT_STEPS[FUNCTION_CHOICE] + + print("=" * 80) + print("ГРАДИЕНТНЫЙ СПУСК ДЛЯ ДВУМЕРНОЙ ФУНКЦИИ") + print("=" * 80) + print(f"Функция: {func.name}") + print(f"Стартовая точка: x₀ = ({x0[0]}, {x0[1]})") + print(f"Параметры: eps_x = {EPS_X}, eps_f = {EPS_F}, max_iters = {MAX_ITERS}") + + # Очищаем и создаём папку для графиков + if OUTPUT_DIR.exists(): + shutil.rmtree(OUTPUT_DIR) + OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + + # Создаём сетку для контурных графиков (один раз) + print("\nСоздание сетки для контурных графиков...") + X1, X2, Z = create_contour_grid(func, resolution=200) + + # Уровни для контурных линий + z_min, z_max = Z.min(), Z.max() + if FUNCTION_CHOICE == "himmelblau": + # Логарифмические уровни для лучшей визуализации + levels = np.array([0.5, 1, 2, 5, 10, 20, 40, 80, 150, 300, 500]) + levels = levels[levels < z_max] + elif FUNCTION_CHOICE == "ravine": + # Уровни для овражной функции - эллипсы + levels = np.array([0.01, 0.05, 0.1, 0.2, 0.5, 1, 2, 3, 5, 7, 10]) + levels = levels[levels < z_max] + else: + levels = np.linspace(z_min, min(z_max, 100), 20) + + # Убедимся, что уровни уникальны и отсортированы + levels = np.unique(levels) + + # 1. Константный шаг + run_and_visualize_2d( + func, + x0, + method="constant", + method_name_short="constant", + X1=X1, + X2=X2, + Z=Z, + levels=levels, + step_size=constant_step, + ) + + # 2. Золотое сечение + run_and_visualize_2d( + func, + x0, + method="golden_section", + method_name_short="golden_section", + X1=X1, + X2=X2, + Z=Z, + levels=levels, + golden_section_bounds=GOLDEN_SECTION_BOUNDS, + ) + + # 3. Правило Армихо + run_and_visualize_2d( + func, + x0, + method="armijo", + method_name_short="armijo", + X1=X1, + X2=X2, + Z=Z, + levels=levels, + armijo_params=ARMIJO_PARAMS, + ) + + # 4. Метод тяжёлого шарика + hb_params = HEAVY_BALL_PARAMS[FUNCTION_CHOICE] + run_and_visualize_heavy_ball( + func, + x0, + method_name_short="heavy_ball", + X1=X1, + X2=X2, + Z=Z, + levels=levels, + alpha=hb_params["alpha"], + beta=hb_params["beta"], + ) + + print("\n" + "=" * 80) + print("ГОТОВО! Все графики сохранены.") + print("=" * 80) + + +if __name__ == "__main__": + main() diff --git a/task2/common/__init__.py b/task2/common/__init__.py new file mode 100644 index 0000000..99e45b2 --- /dev/null +++ b/task2/common/__init__.py @@ -0,0 +1,25 @@ +# Common utilities for gradient descent optimization +from .functions import ( + Function1D, + Function2D, + TaskFunction1D, + HimmelblauFunction, + RavineFunction, +) +from .line_search import golden_section_search, armijo_step +from .gradient_descent import gradient_descent_1d, gradient_descent_2d, heavy_ball_1d, heavy_ball_2d + +__all__ = [ + "Function1D", + "Function2D", + "TaskFunction1D", + "HimmelblauFunction", + "RavineFunction", + "golden_section_search", + "armijo_step", + "gradient_descent_1d", + "gradient_descent_2d", + "heavy_ball_1d", + "heavy_ball_2d", +] + diff --git a/task2/common/functions.py b/task2/common/functions.py new file mode 100644 index 0000000..bc9d059 --- /dev/null +++ b/task2/common/functions.py @@ -0,0 +1,147 @@ +"""Function definitions with their gradients for optimization.""" + +import math +from abc import ABC, abstractmethod +from typing import Tuple + +import numpy as np + + +class Function1D(ABC): + """Abstract base class for 1D functions.""" + + name: str = "Abstract 1D Function" + + @abstractmethod + def __call__(self, x: float) -> float: + """Evaluate function at x.""" + pass + + @abstractmethod + def gradient(self, x: float) -> float: + """Compute gradient (derivative) at x.""" + pass + + @property + @abstractmethod + def domain(self) -> Tuple[float, float]: + """Return the domain [a, b] for this function.""" + pass + + +class Function2D(ABC): + """Abstract base class for 2D functions.""" + + name: str = "Abstract 2D Function" + + @abstractmethod + def __call__(self, x: np.ndarray) -> float: + """Evaluate function at point x = [x1, x2].""" + pass + + @abstractmethod + def gradient(self, x: np.ndarray) -> np.ndarray: + """Compute gradient at point x = [x1, x2].""" + pass + + @property + @abstractmethod + def plot_bounds(self) -> Tuple[Tuple[float, float], Tuple[float, float]]: + """Return bounds ((x1_min, x1_max), (x2_min, x2_max)) for plotting.""" + pass + + +class TaskFunction1D(Function1D): + """ + f(x) = sqrt(x^2 + 9) / 4 + (5 - x) / 5 + + Derivative: f'(x) = x / (4 * sqrt(x^2 + 9)) - 1/5 + """ + + name = "f(x) = √(x² + 9)/4 + (5 - x)/5" + + def __call__(self, x: float) -> float: + return math.sqrt(x**2 + 9) / 4 + (5 - x) / 5 + + def gradient(self, x: float) -> float: + return x / (4 * math.sqrt(x**2 + 9)) - 1 / 5 + + @property + def domain(self) -> Tuple[float, float]: + return (-3.0, 8.0) + + +class HimmelblauFunction(Function2D): + """ + Himmelblau's function: + f(x, y) = (x^2 + y - 11)^2 + (x + y^2 - 7)^2 + + Has 4 identical local minima at: + - (3.0, 2.0) + - (-2.805118, 3.131312) + - (-3.779310, -3.283186) + - (3.584428, -1.848126) + + Gradient: + ∂f/∂x = 4x(x² + y - 11) + 2(x + y² - 7) + ∂f/∂y = 2(x² + y - 11) + 4y(x + y² - 7) + """ + + name = "Himmelblau: (x² + y - 11)² + (x + y² - 7)²" + + def __call__(self, x: np.ndarray) -> float: + x1, x2 = x[0], x[1] + return (x1**2 + x2 - 11) ** 2 + (x1 + x2**2 - 7) ** 2 + + def gradient(self, x: np.ndarray) -> np.ndarray: + x1, x2 = x[0], x[1] + df_dx1 = 4 * x1 * (x1**2 + x2 - 11) + 2 * (x1 + x2**2 - 7) + df_dx2 = 2 * (x1**2 + x2 - 11) + 4 * x2 * (x1 + x2**2 - 7) + return np.array([df_dx1, df_dx2]) + + @property + def plot_bounds(self) -> Tuple[Tuple[float, float], Tuple[float, float]]: + return ((-5.0, 5.0), (-5.0, 5.0)) + + +class RavineFunction(Function2D): + """ + Овражная функция (эллиптический параболоид): + f(x, y) = x² + 20y² + + Минимум в (0, 0), f(0,0) = 0 + + Демонстрирует "эффект оврага" - градиент почти перпендикулярен + направлению к минимуму, что замедляет сходимость. + + Gradient: + ∂f/∂x = 2x + ∂f/∂y = 40y + """ + + name = "Овраг: f(x,y) = x² + 20y²" + + def __call__(self, x: np.ndarray) -> float: + x1, x2 = x[0], x[1] + return x1**2 + 20 * x2**2 + + def gradient(self, x: np.ndarray) -> np.ndarray: + x1, x2 = x[0], x[1] + df_dx1 = 2 * x1 + df_dx2 = 40 * x2 + return np.array([df_dx1, df_dx2]) + + @property + def plot_bounds(self) -> Tuple[Tuple[float, float], Tuple[float, float]]: + return ((-2.0, 2.0), (-0.5, 0.5)) + + +# Registry of available functions +FUNCTIONS_1D = { + "task": TaskFunction1D, +} + +FUNCTIONS_2D = { + "himmelblau": HimmelblauFunction, + "ravine": RavineFunction, +} diff --git a/task2/common/gradient_descent.py b/task2/common/gradient_descent.py new file mode 100644 index 0000000..c1d5bdd --- /dev/null +++ b/task2/common/gradient_descent.py @@ -0,0 +1,441 @@ +"""Gradient descent implementations.""" + +from dataclasses import dataclass, field +from typing import List, Literal, Optional +import numpy as np + +from .functions import Function1D, Function2D +from .line_search import golden_section_search, armijo_step, armijo_step_1d + + +StepMethod = Literal["constant", "golden_section", "armijo"] + + +@dataclass +class IterationInfo1D: + """Information about a single iteration of 1D gradient descent.""" + iteration: int + x: float + f_x: float + grad: float + step_size: float + + +@dataclass +class GradientDescentResult1D: + """Result of 1D gradient descent.""" + x_star: float + f_star: float + iterations: List[IterationInfo1D] + converged: bool + method: str + + @property + def trajectory(self) -> List[float]: + return [it.x for it in self.iterations] + + +@dataclass +class IterationInfo2D: + """Information about a single iteration of 2D gradient descent.""" + iteration: int + x: np.ndarray + f_x: float + grad: np.ndarray + step_size: float + + +@dataclass +class GradientDescentResult2D: + """Result of 2D gradient descent.""" + x_star: np.ndarray + f_star: float + iterations: List[IterationInfo2D] + converged: bool + method: str + + @property + def trajectory(self) -> List[np.ndarray]: + return [it.x for it in self.iterations] + + +def gradient_descent_1d( + func: Function1D, + x0: float, + step_method: StepMethod = "constant", + step_size: float = 0.1, + eps_x: float = 0.05, + eps_f: float = 0.001, + max_iters: int = 100, + armijo_params: Optional[dict] = None, + golden_section_bounds: Optional[tuple] = None, +) -> GradientDescentResult1D: + """ + Gradient descent for 1D function. + + Args: + func: Function to minimize + x0: Starting point + step_method: Step selection method ("constant", "golden_section", "armijo") + step_size: Step size for constant method + eps_x: Tolerance for x convergence + eps_f: Tolerance for f convergence + max_iters: Maximum number of iterations + armijo_params: Parameters for Armijo rule (d_init, epsilon, theta) + golden_section_bounds: Search bounds for golden section (a, b) + + Returns: + GradientDescentResult1D with trajectory and final result + """ + x = x0 + iterations: List[IterationInfo1D] = [] + converged = False + + armijo_params = armijo_params or {"d_init": 1.0, "epsilon": 0.1, "theta": 0.5} + + for k in range(max_iters): + f_x = func(x) + grad = func.gradient(x) + + # Select step size + if step_method == "constant": + alpha = step_size + elif step_method == "golden_section": + # Optimize phi(alpha) = f(x - alpha * grad) using golden section + bounds = golden_section_bounds or (0.0, 2.0) + phi = lambda a: func(x - a * grad) + alpha = golden_section_search(phi, bounds[0], bounds[1]) + elif step_method == "armijo": + alpha = armijo_step_1d( + func, x, grad, + d_init=armijo_params.get("d_init", 1.0), + epsilon=armijo_params.get("epsilon", 0.1), + theta=armijo_params.get("theta", 0.5), + ) + else: + raise ValueError(f"Unknown step method: {step_method}") + + iterations.append(IterationInfo1D( + iteration=k + 1, + x=x, + f_x=f_x, + grad=grad, + step_size=alpha, + )) + + # Update x + x_new = x - alpha * grad + f_new = func(x_new) + + # Check convergence + if abs(x_new - x) < eps_x and abs(f_new - f_x) < eps_f: + x = x_new + converged = True + break + + x = x_new + + # Add final point + iterations.append(IterationInfo1D( + iteration=len(iterations) + 1, + x=x, + f_x=func(x), + grad=func.gradient(x), + step_size=0.0, + )) + + method_names = { + "constant": "Константный шаг", + "golden_section": "Золотое сечение", + "armijo": "Правило Армихо", + } + + return GradientDescentResult1D( + x_star=x, + f_star=func(x), + iterations=iterations, + converged=converged, + method=method_names.get(step_method, step_method), + ) + + +def gradient_descent_2d( + func: Function2D, + x0: np.ndarray, + step_method: StepMethod = "constant", + step_size: float = 0.01, + eps_x: float = 1e-5, + eps_f: float = 1e-6, + max_iters: int = 1000, + armijo_params: Optional[dict] = None, + golden_section_bounds: Optional[tuple] = None, +) -> GradientDescentResult2D: + """ + Gradient descent for 2D function. + + Args: + func: Function to minimize + x0: Starting point [x1, x2] + step_method: Step selection method ("constant", "golden_section", "armijo") + step_size: Step size for constant method + eps_x: Tolerance for x convergence + eps_f: Tolerance for f convergence + max_iters: Maximum number of iterations + armijo_params: Parameters for Armijo rule + golden_section_bounds: Search bounds for golden section + + Returns: + GradientDescentResult2D with trajectory and final result + """ + x = np.array(x0, dtype=float) + iterations: List[IterationInfo2D] = [] + converged = False + + armijo_params = armijo_params or {"d_init": 1.0, "epsilon": 0.1, "theta": 0.5} + + for k in range(max_iters): + f_x = func(x) + grad = func.gradient(x) + grad_norm = np.linalg.norm(grad) + + # Check if gradient is too small + if grad_norm < 1e-10: + converged = True + iterations.append(IterationInfo2D( + iteration=k + 1, + x=x.copy(), + f_x=f_x, + grad=grad.copy(), + step_size=0.0, + )) + break + + # Select step size + if step_method == "constant": + alpha = step_size + elif step_method == "golden_section": + bounds = golden_section_bounds or (0.0, 1.0) + phi = lambda a: func(x - a * grad) + alpha = golden_section_search(phi, bounds[0], bounds[1]) + elif step_method == "armijo": + alpha = armijo_step( + func, x, grad, + d_init=armijo_params.get("d_init", 1.0), + epsilon=armijo_params.get("epsilon", 0.1), + theta=armijo_params.get("theta", 0.5), + ) + else: + raise ValueError(f"Unknown step method: {step_method}") + + iterations.append(IterationInfo2D( + iteration=k + 1, + x=x.copy(), + f_x=f_x, + grad=grad.copy(), + step_size=alpha, + )) + + # Update x + x_new = x - alpha * grad + f_new = func(x_new) + + # Check convergence + if np.linalg.norm(x_new - x) < eps_x and abs(f_new - f_x) < eps_f: + x = x_new + converged = True + break + + x = x_new + + # Add final point + iterations.append(IterationInfo2D( + iteration=len(iterations) + 1, + x=x.copy(), + f_x=func(x), + grad=func.gradient(x), + step_size=0.0, + )) + + method_names = { + "constant": "Константный шаг", + "golden_section": "Золотое сечение", + "armijo": "Правило Армихо", + } + + return GradientDescentResult2D( + x_star=x, + f_star=func(x), + iterations=iterations, + converged=converged, + method=method_names.get(step_method, step_method), + ) + + +def heavy_ball_1d( + func: Function1D, + x0: float, + alpha: float = 0.1, + beta: float = 0.9, + eps_x: float = 0.05, + eps_f: float = 0.001, + max_iters: int = 100, +) -> GradientDescentResult1D: + """ + Heavy Ball method for 1D function. + + x_{k+1} = x_k - α f'(x_k) + β (x_k - x_{k-1}) + + Args: + func: Function to minimize + x0: Starting point + alpha: Step size (learning rate) + beta: Momentum parameter (0 <= beta < 1) + eps_x: Tolerance for x convergence + eps_f: Tolerance for f convergence + max_iters: Maximum number of iterations + + Returns: + GradientDescentResult1D with trajectory and final result + """ + x = x0 + x_prev = x0 # For first iteration, no momentum + iterations: List[IterationInfo1D] = [] + converged = False + + for k in range(max_iters): + f_x = func(x) + grad = func.gradient(x) + + # Heavy ball update: x_{k+1} = x_k - α∇f(x_k) + β(x_k - x_{k-1}) + momentum = beta * (x - x_prev) if k > 0 else 0.0 + + iterations.append(IterationInfo1D( + iteration=k + 1, + x=x, + f_x=f_x, + grad=grad, + step_size=alpha, + )) + + # Update x + x_new = x - alpha * grad + momentum + f_new = func(x_new) + + # Check convergence + if abs(x_new - x) < eps_x and abs(f_new - f_x) < eps_f: + x_prev = x + x = x_new + converged = True + break + + x_prev = x + x = x_new + + # Add final point + iterations.append(IterationInfo1D( + iteration=len(iterations) + 1, + x=x, + f_x=func(x), + grad=func.gradient(x), + step_size=0.0, + )) + + return GradientDescentResult1D( + x_star=x, + f_star=func(x), + iterations=iterations, + converged=converged, + method=f"Тяжёлый шарик (α={alpha}, β={beta})", + ) + + +def heavy_ball_2d( + func: Function2D, + x0: np.ndarray, + alpha: float = 0.01, + beta: float = 0.9, + eps_x: float = 1e-5, + eps_f: float = 1e-6, + max_iters: int = 1000, +) -> GradientDescentResult2D: + """ + Heavy Ball method for 2D function. + + x_{k+1} = x_k - α ∇f(x_k) + β (x_k - x_{k-1}) + + Args: + func: Function to minimize + x0: Starting point [x1, x2] + alpha: Step size (learning rate) + beta: Momentum parameter (0 <= beta < 1) + eps_x: Tolerance for x convergence + eps_f: Tolerance for f convergence + max_iters: Maximum number of iterations + + Returns: + GradientDescentResult2D with trajectory and final result + """ + x = np.array(x0, dtype=float) + x_prev = x.copy() # For first iteration, no momentum + iterations: List[IterationInfo2D] = [] + converged = False + + for k in range(max_iters): + f_x = func(x) + grad = func.gradient(x) + grad_norm = np.linalg.norm(grad) + + # Check if gradient is too small + if grad_norm < 1e-10: + converged = True + iterations.append(IterationInfo2D( + iteration=k + 1, + x=x.copy(), + f_x=f_x, + grad=grad.copy(), + step_size=0.0, + )) + break + + # Heavy ball update: x_{k+1} = x_k - α∇f(x_k) + β(x_k - x_{k-1}) + momentum = beta * (x - x_prev) if k > 0 else np.zeros_like(x) + + iterations.append(IterationInfo2D( + iteration=k + 1, + x=x.copy(), + f_x=f_x, + grad=grad.copy(), + step_size=alpha, + )) + + # Update x + x_new = x - alpha * grad + momentum + f_new = func(x_new) + + # Check convergence + if np.linalg.norm(x_new - x) < eps_x and abs(f_new - f_x) < eps_f: + x_prev = x.copy() + x = x_new + converged = True + break + + x_prev = x.copy() + x = x_new + + # Add final point + iterations.append(IterationInfo2D( + iteration=len(iterations) + 1, + x=x.copy(), + f_x=func(x), + grad=func.gradient(x), + step_size=0.0, + )) + + return GradientDescentResult2D( + x_star=x, + f_star=func(x), + iterations=iterations, + converged=converged, + method=f"Тяжёлый шарик (α={alpha}, β={beta})", + ) + diff --git a/task2/common/line_search.py b/task2/common/line_search.py new file mode 100644 index 0000000..3789fbe --- /dev/null +++ b/task2/common/line_search.py @@ -0,0 +1,139 @@ +"""Line search methods for step size selection.""" + +import math +from typing import Callable, Tuple +import numpy as np + + +def golden_section_search( + phi: Callable[[float], float], + a: float, + b: float, + tol: float = 1e-5, + max_iters: int = 100, +) -> float: + """ + Golden section search for 1D optimization. + + Finds argmin phi(alpha) on [a, b]. + + Args: + phi: Function to minimize (typically f(x - alpha * grad)) + a: Left bound of search interval + b: Right bound of search interval + tol: Tolerance for stopping + max_iters: Maximum number of iterations + + Returns: + Optimal step size alpha + """ + # Golden ratio constants + gr = (1 + math.sqrt(5)) / 2 + r = 1 / gr # ~0.618 + c = 1 - r # ~0.382 + + y = a + c * (b - a) + z = a + r * (b - a) + fy = phi(y) + fz = phi(z) + + for _ in range(max_iters): + if b - a < tol: + break + + if fy <= fz: + b = z + z = y + fz = fy + y = a + c * (b - a) + fy = phi(y) + else: + a = y + y = z + fy = fz + z = a + r * (b - a) + fz = phi(z) + + return (a + b) / 2 + + +def armijo_step( + f: Callable[[np.ndarray], float], + x: np.ndarray, + grad: np.ndarray, + d_init: float = 1.0, + epsilon: float = 0.1, + theta: float = 0.5, + max_iters: int = 100, +) -> float: + """ + Armijo rule for step size selection. + + Finds step d such that: + f(x - d * grad) <= f(x) - epsilon * d * ||grad||^2 + + Note: Using descent direction s = -grad, so inner product = -||grad||^2 + + Args: + f: Function to minimize + x: Current point + grad: Gradient at x + d_init: Initial step size + epsilon: Armijo parameter (0 < epsilon < 1) + theta: Step reduction factor (0 < theta < 1) + max_iters: Maximum number of reductions + + Returns: + Step size satisfying Armijo condition + """ + d = d_init + fx = f(x) + grad_norm_sq = np.dot(grad, grad) + + for _ in range(max_iters): + # Armijo condition: f(x - d*grad) <= f(x) - epsilon * d * ||grad||^2 + x_new = x - d * grad + if f(x_new) <= fx - epsilon * d * grad_norm_sq: + return d + d *= theta + + return d + + +def armijo_step_1d( + f: Callable[[float], float], + x: float, + grad: float, + d_init: float = 1.0, + epsilon: float = 0.1, + theta: float = 0.5, + max_iters: int = 100, +) -> float: + """ + Armijo rule for step size selection (1D version). + + Args: + f: Function to minimize + x: Current point + grad: Gradient (derivative) at x + d_init: Initial step size + epsilon: Armijo parameter (0 < epsilon < 1) + theta: Step reduction factor (0 < theta < 1) + max_iters: Maximum number of reductions + + Returns: + Step size satisfying Armijo condition + """ + d = d_init + fx = f(x) + grad_sq = grad * grad + + for _ in range(max_iters): + # Armijo condition: f(x - d*grad) <= f(x) - epsilon * d * grad^2 + x_new = x - d * grad + if f(x_new) <= fx - epsilon * d * grad_sq: + return d + d *= theta + + return d + diff --git a/task2/task.md b/task2/task.md new file mode 100644 index 0000000..4afee40 --- /dev/null +++ b/task2/task.md @@ -0,0 +1,55 @@ +## Задача + +$$ +f(x) = \frac{\sqrt{x^2 + 9}}{4} + \frac{5 - x}{5} +$$ + +при условии, что + +$$ +\bar{X} = [-3,\; 8]. +$$ + +Взять: +- $ N = 10 $, +- $ \varepsilon_x = 0{,}05 $, +- $ \varepsilon_f = 0{,}001 $. + + +Взять эту функцию. Сделать градиентный спуск, выбирая шаги 3 методами + +1. Константный шаг, задаваемый 1 раз перед стартом алгоритма +2. Численный метод - это на каждом шаге оптимизируем функцию $ f(x_k - a_k * grad(f(x_k)) $ золотым сечением например (одномерная оптимизация) +3. На каждом шаге пересчитываем шаг по правилу армихо + +Нужно на каждый из 3 случаев нарисовать линии уровни с траекторией спуска + + +## Про Правило Армихо + +Пусть $f(\cdot)$ — дифференцируема в $\mathbb{R}^n$. +Фиксируем $\hat d > 0$, $\varepsilon \in (0,1)$. +Полагаем $d = \hat d$. + +### Шаг 1 +Проверяется выполнение неравенства Армихо: +$$ +f(x_k + d \cdot s_k) \le f(x_k) + \varepsilon \cdot d \cdot \langle \nabla f(x_k), s_k \rangle. +$$ +$(6.4)$ + +### Шаг 2 +Если неравенство $(6.4)$ не выполняется, то полагают +$$ +d := \theta \cdot d +$$ +и переходят к шагу 1. +В противном случае $d_k := d$. + +### Вывод +Шаг $d_k$ вычисляется как первое из чисел $d$, получаемых в результате +дробления начального значения $\hat d$ (параметр $\theta$), +для которых выполняется неравенство Армихо $(6.4)$: +$$ +f(x_{k+1}) \le f(x_k) + \varepsilon \cdot d_k \cdot \langle \nabla f(x_k), s_k \rangle. +$$