This commit is contained in:
2026-03-17 11:21:20 +03:00
parent 6f718668c8
commit bbb1b91e95
10 changed files with 627 additions and 0 deletions

5
task2/.gitignore vendored Normal file
View File

@@ -0,0 +1,5 @@
bin/
results/*.out
results/*.err
results/*.csv
*.pyc

122
task2/README.md Normal file
View File

@@ -0,0 +1,122 @@
# Задание 2: MPI-реализация волнового алгоритма
MPI-версия волнового алгоритма (алгоритм Ли) для поиска кратчайшего пути робота на полигоне. Для сравнения также подготовлена CUDA-версия того же алгоритма.
## Структура
- `src/wave_mpi.c` — MPI-реализация (декомпозиция по строкам, ghost rows).
- `src/wave_cuda.cu` — CUDA-реализация (глобальная память, по мотивам программы прошлого семестра).
- `scripts/build_mpi.sh` — сборка MPI-версии.
- `scripts/build_cuda.sh` — сборка CUDA-версии.
- `scripts/run_mpi.slurm` — пакетный запуск MPI на кластере.
- `scripts/run_cuda.slurm` — пакетный запуск CUDA на кластере.
- `scripts/plot_task2_results.py` — построение графика для отчёта.
## Что сделать на СКЦ
### 1. Передать папку на кластер
```bash
scp -r task2 polytech:~/supercomputers/
```
### 2. Подключиться
```bash
ssh polytech
cd ~/supercomputers/task2
```
### 3. Проверить доступные MPI-модули
```bash
module avail mpi
```
Если модуль `mpi/openmpi` не найден, посмотри список и подставь нужное имя в `scripts/run_mpi.slurm` (строка `module load mpi/openmpi`).
### 4. Запустить CUDA-версию (для сравнения)
```bash
sbatch scripts/run_cuda.slurm
```
### 5. Запустить MPI на 1, 2, 4 узлах
```bash
sbatch --nodes=1 scripts/run_mpi.slurm
sbatch --nodes=2 scripts/run_mpi.slurm
sbatch --nodes=4 scripts/run_mpi.slurm
```
### 6. Проверить статус
```bash
squeue -u tm3u21
sacct -j <JOBID> --format=JobID,JobName,Partition,State,Elapsed,NNodes,AllocTRES%40,NodeList,ExitCode
```
### 7. Посмотреть результаты
```bash
less results/task2-mpi-<JOBID>.out
cat results/task2-mpi-1n-<JOBID>.csv
cat results/task2-mpi-2n-<JOBID>.csv
cat results/task2-mpi-4n-<JOBID>.csv
cat results/task2-cuda-<JOBID>.csv
```
### 8. Построить график
На локальной машине (нужен `matplotlib`):
```bash
python3 scripts/plot_task2_results.py \
--mpi1 results/task2-mpi-1n-XXXXX.csv \
--mpi2 results/task2-mpi-2n-XXXXX.csv \
--mpi4 results/task2-mpi-4n-XXXXX.csv \
--cuda results/task2-cuda-XXXXX.csv \
-o ../report/img/task2-time-comparison.png
```
## Что нужно собрать для отчёта
### Скриншот 1: `task2-mpi-run.png` — вывод MPI-программы
После завершения MPI-задач открой вывод одной из них:
```bash
less results/task2-mpi-<JOBID>.out
```
Сделай скрин блока `===== benchmark =====` — там будут все размеры с временем.
### Скриншот 2: `task2-cuda-run.png` — вывод CUDA-программы
```bash
less results/task2-cuda-<JOBID>.out
```
Сделай скрин блока `===== benchmark =====`.
### Скриншот 3: `task2-sacct.png` — сведения Slurm
Собери все JOBID (3 MPI + 1 CUDA) и выполни:
```bash
sacct -j <JOB_MPI1>,<JOB_MPI2>,<JOB_MPI4>,<JOB_CUDA> \
--format=JobID,JobName,Partition,State,Elapsed,NNodes,AllocTRES%40,NodeList,ExitCode
```
### Скриншот 4: `task2-time-comparison.png` — график
Генерируется скриптом `plot_task2_results.py` (см. шаг 8 выше).
### Куда положить скриншоты
Все картинки кладутся в `report/img/`:
- `report/img/task2-mpi-run.png`
- `report/img/task2-cuda-run.png`
- `report/img/task2-sacct.png`
- `report/img/task2-time-comparison.png`

0
task2/results/.gitkeep Normal file
View File

7
task2/scripts/build_cuda.sh Executable file
View File

@@ -0,0 +1,7 @@
#!/usr/bin/env bash
set -euo pipefail
cd "$(dirname "$0")/.."
CUDA_ARCH="${CUDA_ARCH:-sm_35}"
mkdir -p bin
nvcc -ccbin g++ -O3 -arch="$CUDA_ARCH" -o bin/wave_cuda src/wave_cuda.cu
echo "Built bin/wave_cuda (arch=$CUDA_ARCH)"

6
task2/scripts/build_mpi.sh Executable file
View File

@@ -0,0 +1,6 @@
#!/usr/bin/env bash
set -euo pipefail
cd "$(dirname "$0")/.."
mkdir -p bin
mpicc -O3 -std=c99 -o bin/wave_mpi src/wave_mpi.c
echo "Built bin/wave_mpi"

View File

@@ -0,0 +1,73 @@
#!/usr/bin/env python3
"""
Строит график зависимости времени вычисления от размера полигона
для MPI (1, 2, 4 узла) и CUDA.
Использование:
python3 plot_task2_results.py \
--mpi1 results/task2-mpi-1n-XXXXX.csv \
--mpi2 results/task2-mpi-2n-XXXXX.csv \
--mpi4 results/task2-mpi-4n-XXXXX.csv \
--cuda results/task2-cuda-XXXXX.csv \
-o report/img/task2-time-comparison.png
"""
import argparse
import csv
from pathlib import Path
import matplotlib.pyplot as plt
def read_mpi_csv(path: str) -> tuple[list[int], list[float]]:
sizes, times = [], []
with open(path) as f:
reader = csv.DictReader(f)
for row in reader:
sizes.append(int(row["n"]))
times.append(float(row["time_ms"]))
return sizes, times
def read_cuda_csv(path: str) -> tuple[list[int], list[float]]:
sizes, times = [], []
with open(path) as f:
reader = csv.DictReader(f)
for row in reader:
sizes.append(int(row["n"]))
times.append(float(row["time_ms"]))
return sizes, times
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--mpi1", required=True, help="CSV for MPI 1 node")
parser.add_argument("--mpi2", required=True, help="CSV for MPI 2 nodes")
parser.add_argument("--mpi4", required=True, help="CSV for MPI 4 nodes")
parser.add_argument("--cuda", required=True, help="CSV for CUDA")
parser.add_argument("-o", "--output", default="task2-time-comparison.png")
args = parser.parse_args()
fig, ax = plt.subplots(figsize=(10, 6))
for label, path in [
("MPI 1 node", args.mpi1),
("MPI 2 nodes", args.mpi2),
("MPI 4 nodes", args.mpi4),
("CUDA", args.cuda),
]:
sizes, times = read_mpi_csv(path) if "mpi" in label.lower() else read_cuda_csv(path)
ax.plot(sizes, times, marker="o", label=label)
ax.set_xlabel("Размер полигона n")
ax.set_ylabel("Время, мс")
ax.set_title("Зависимость времени вычисления от размера полигона")
ax.legend()
ax.grid(True, alpha=0.3)
Path(args.output).parent.mkdir(parents=True, exist_ok=True)
fig.savefig(args.output, dpi=150, bbox_inches="tight")
print(f"Saved: {args.output}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,49 @@
#!/usr/bin/env bash
#SBATCH --job-name=task2-cuda
#SBATCH --partition=tornado-k40
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --time=00:20:00
#SBATCH --output=results/%x-%j.out
#SBATCH --error=results/%x-%j.err
set -euo pipefail
cd "${SLURM_SUBMIT_DIR}"
module purge
module load compiler/gcc/11
module load nvidia/cuda/11.6u2
mkdir -p results bin
./scripts/build_cuda.sh
echo "===== account info ====="
whoami; hostname; date
echo
echo "===== slurm info ====="
echo "SLURM_JOB_ID=${SLURM_JOB_ID:-unknown}"
echo "SLURM_JOB_PARTITION=${SLURM_JOB_PARTITION:-unknown}"
echo "SLURM_NODELIST=${SLURM_NODELIST:-unknown}"
scontrol show job "${SLURM_JOB_ID}" || true
echo
echo "===== node config ====="
lscpu | head -20
nvidia-smi -L || true
nvidia-smi || true
CSV="results/task2-cuda-${SLURM_JOB_ID}.csv"
echo "n,impl,time_ms,path_len,iterations" > "$CSV"
echo
echo "===== benchmark ====="
for N in 500 1000 2000 3000 5000; do
echo "--- n=$N ---"
./bin/wave_cuda "$N" 256 256 "$CSV"
done
echo
echo "===== done ====="

View File

@@ -0,0 +1,54 @@
#!/usr/bin/env bash
#SBATCH --job-name=task2-mpi
#SBATCH --partition=tornado
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=56
#SBATCH --time=00:20:00
#SBATCH --output=results/%x-%j.out
#SBATCH --error=results/%x-%j.err
set -euo pipefail
cd "${SLURM_SUBMIT_DIR}"
module purge
module load compiler/gcc/11
module load mpi/openmpi
mkdir -p results bin
./scripts/build_mpi.sh
RANKS=${SLURM_JOB_NUM_NODES}
echo "===== account info ====="
whoami; hostname; date
echo
echo "===== slurm info ====="
echo "SLURM_JOB_ID=${SLURM_JOB_ID:-unknown}"
echo "SLURM_JOB_PARTITION=${SLURM_JOB_PARTITION:-unknown}"
echo "SLURM_JOB_NUM_NODES=${SLURM_JOB_NUM_NODES:-unknown}"
echo "SLURM_NODELIST=${SLURM_NODELIST:-unknown}"
echo "RANKS=${RANKS}"
scontrol show job "${SLURM_JOB_ID}" || true
echo
echo "===== node config ====="
lscpu | head -20
if [ -n "${SLURMD_NODENAME:-}" ]; then
scontrol show node "${SLURMD_NODENAME}" || true
fi
CSV="results/task2-mpi-${RANKS}n-${SLURM_JOB_ID}.csv"
echo "n,procs,time_ms,path_len,iterations" > "$CSV"
echo
echo "===== benchmark (${RANKS} nodes / ${RANKS} ranks) ====="
for N in 500 1000 2000 3000 5000; do
echo "--- n=$N ---"
mpirun -np "${RANKS}" --map-by ppr:1:node --bind-to none ./bin/wave_mpi "$N" "$CSV"
done
echo
echo "===== done ====="

126
task2/src/wave_cuda.cu Normal file
View File

@@ -0,0 +1,126 @@
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#define INF UINT_MAX
#define OBSTACLE_PROB 10
#define DEFAULT_BLOCKS 256
#define DEFAULT_THREADS 256
static void generate_polygon(int *P, int n) {
srand(42);
for (int i = 0; i < n * n; i++)
P[i] = (rand() % 100 < OBSTACLE_PROB) ? -1 : 0;
int sx = 2, sy = 2;
int fx = n - 3, fy = n - 3;
P[sx * n + sy] = 0;
P[fx * n + fy] = 0;
}
__global__ void wave_step(int *P, unsigned int *dist, int n, bool *changed) {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
while (tid < n * n) {
int i = tid / n;
int j = tid % n;
if (P[tid] != -1) {
unsigned int cur = dist[tid];
unsigned int mn = cur;
if (i > 0 && dist[(i-1)*n + j] != INF) mn = min(mn, dist[(i-1)*n + j] + 1);
if (i < n - 1 && dist[(i+1)*n + j] != INF) mn = min(mn, dist[(i+1)*n + j] + 1);
if (j > 0 && dist[i*n + j - 1] != INF) mn = min(mn, dist[i*n + j - 1] + 1);
if (j < n - 1 && dist[i*n + j + 1] != INF) mn = min(mn, dist[i*n + j + 1] + 1);
if (mn < cur) {
dist[tid] = mn;
*changed = true;
}
}
tid += blockDim.x * gridDim.x;
}
}
int main(int argc, char *argv[]) {
if (argc < 2) {
fprintf(stderr, "Usage: %s <matrix_size> [blocks] [threads] [csv_file]\n", argv[0]);
return 1;
}
int n = atoi(argv[1]);
int blocks = (argc >= 3) ? atoi(argv[2]) : DEFAULT_BLOCKS;
int threads = (argc >= 4) ? atoi(argv[3]) : DEFAULT_THREADS;
const char *csv_path = (argc >= 5) ? argv[4] : NULL;
int sx = 2, sy = 2;
int fx = n - 3, fy = n - 3;
int *P = (int *)malloc(n * n * sizeof(int));
generate_polygon(P, n);
unsigned int *dist_h = (unsigned int *)malloc(n * n * sizeof(unsigned int));
for (int i = 0; i < n * n; i++) dist_h[i] = INF;
dist_h[sx * n + sy] = 0;
int *d_P;
unsigned int *d_dist;
bool *d_changed;
cudaMalloc(&d_P, n * n * sizeof(int));
cudaMalloc(&d_dist, n * n * sizeof(unsigned int));
cudaMalloc(&d_changed, sizeof(bool));
cudaMemcpy(d_P, P, n * n * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_dist, dist_h, n * n * sizeof(unsigned int), cudaMemcpyHostToDevice);
cudaEvent_t t0, t1;
cudaEventCreate(&t0);
cudaEventCreate(&t1);
cudaEventRecord(t0);
int iterations = 0;
bool changed;
do {
changed = false;
cudaMemcpy(d_changed, &changed, sizeof(bool), cudaMemcpyHostToDevice);
wave_step<<<blocks, threads>>>(d_P, d_dist, n, d_changed);
cudaDeviceSynchronize();
cudaMemcpy(&changed, d_changed, sizeof(bool), cudaMemcpyDeviceToHost);
iterations++;
} while (changed && iterations < 2 * n);
cudaEventRecord(t1);
cudaEventSynchronize(t1);
float elapsed_ms = 0;
cudaEventElapsedTime(&elapsed_ms, t0, t1);
cudaMemcpy(dist_h, d_dist, n * n * sizeof(unsigned int), cudaMemcpyDeviceToHost);
unsigned int path_len = dist_h[fx * n + fy];
if (path_len == INF)
printf("n=%d Path not found! time=%.2f ms iters=%d blocks=%d threads=%d\n",
n, elapsed_ms, iterations, blocks, threads);
else
printf("n=%d path_len=%u time=%.2f ms iters=%d blocks=%d threads=%d\n",
n, path_len, elapsed_ms, iterations, blocks, threads);
if (csv_path) {
FILE *fp = fopen(csv_path, "a");
if (fp) {
fprintf(fp, "%d,cuda,%.4f,%u,%d\n", n, elapsed_ms, path_len, iterations);
fclose(fp);
}
}
free(P);
free(dist_h);
cudaFree(d_P);
cudaFree(d_dist);
cudaFree(d_changed);
cudaEventDestroy(t0);
cudaEventDestroy(t1);
return 0;
}

185
task2/src/wave_mpi.c Normal file
View File

@@ -0,0 +1,185 @@
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <string.h>
#include <mpi.h>
#define INF UINT_MAX
#define OBSTACLE_PROB 10
static void generate_polygon(int *P, int n) {
srand(42);
for (int i = 0; i < n * n; i++)
P[i] = (rand() % 100 < OBSTACLE_PROB) ? -1 : 0;
int sx = 2, sy = 2;
int fx = n - 3, fy = n - 3;
P[sx * n + sy] = 0;
P[fx * n + fy] = 0;
}
int main(int argc, char *argv[]) {
MPI_Init(&argc, &argv);
int rank, size;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (argc < 2) {
if (rank == 0)
fprintf(stderr, "Usage: mpirun -np <P> %s <matrix_size> [csv_file]\n", argv[0]);
MPI_Finalize();
return 1;
}
int n = atoi(argv[1]);
const char *csv_path = (argc >= 3) ? argv[2] : NULL;
int sx = 2, sy = 2;
int fx = n - 3, fy = n - 3;
int *P = (int *)malloc(n * n * sizeof(int));
unsigned int *dist = NULL;
if (rank == 0) {
generate_polygon(P, n);
dist = (unsigned int *)malloc(n * n * sizeof(unsigned int));
for (int i = 0; i < n * n; i++)
dist[i] = INF;
dist[sx * n + sy] = 0;
}
MPI_Bcast(P, n * n, MPI_INT, 0, MPI_COMM_WORLD);
int base_rows = n / size;
int remainder = n % size;
int local_rows = base_rows + (rank < remainder ? 1 : 0);
int start_row = rank * base_rows + (rank < remainder ? rank : remainder);
int ghost_top = (rank > 0) ? 1 : 0;
int ghost_bot = (rank < size - 1) ? 1 : 0;
int total_local = (ghost_top + local_rows + ghost_bot) * n;
unsigned int *local_dist = (unsigned int *)malloc(total_local * sizeof(unsigned int));
int *local_P = (int *)malloc(total_local * sizeof(int));
for (int i = 0; i < total_local; i++) {
local_dist[i] = INF;
local_P[i] = -1;
}
int *sendcounts = NULL, *displs = NULL;
if (rank == 0) {
sendcounts = (int *)malloc(size * sizeof(int));
displs = (int *)malloc(size * sizeof(int));
int off = 0;
for (int r = 0; r < size; r++) {
int rr = base_rows + (r < remainder ? 1 : 0);
sendcounts[r] = rr * n;
displs[r] = off;
off += rr * n;
}
}
MPI_Scatterv(
(rank == 0) ? dist : NULL, sendcounts, displs, MPI_UNSIGNED,
local_dist + ghost_top * n, local_rows * n, MPI_UNSIGNED,
0, MPI_COMM_WORLD);
for (int i = 0; i < local_rows; i++)
memcpy(local_P + (ghost_top + i) * n, P + (start_row + i) * n, n * sizeof(int));
if (ghost_top) {
memcpy(local_P, P + (start_row - 1) * n, n * sizeof(int));
}
if (ghost_bot) {
memcpy(local_P + (ghost_top + local_rows) * n,
P + (start_row + local_rows) * n, n * sizeof(int));
}
MPI_Barrier(MPI_COMM_WORLD);
double t_start = MPI_Wtime();
int prev_rank = (rank > 0) ? rank - 1 : MPI_PROC_NULL;
int next_rank = (rank < size - 1) ? rank + 1 : MPI_PROC_NULL;
int iteration = 0;
int global_changed;
do {
/* exchange ghost rows */
MPI_Sendrecv(
local_dist + ghost_top * n, n, MPI_UNSIGNED, prev_rank, 0,
local_dist + (ghost_top + local_rows) * n, n, MPI_UNSIGNED, next_rank, 0,
MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Sendrecv(
local_dist + (ghost_top + local_rows - 1) * n, n, MPI_UNSIGNED, next_rank, 1,
local_dist, n, MPI_UNSIGNED, prev_rank, 1,
MPI_COMM_WORLD, MPI_STATUS_IGNORE);
int local_changed = 0;
for (int li = ghost_top; li < ghost_top + local_rows; li++) {
for (int j = 0; j < n; j++) {
int idx = li * n + j;
if (local_P[idx] == -1) continue;
unsigned int cur = local_dist[idx];
unsigned int mn = cur;
if (li > 0 && local_dist[(li - 1) * n + j] != INF)
mn = (local_dist[(li - 1) * n + j] + 1 < mn) ? local_dist[(li - 1) * n + j] + 1 : mn;
if (li < ghost_top + local_rows + ghost_bot - 1 && local_dist[(li + 1) * n + j] != INF)
mn = (local_dist[(li + 1) * n + j] + 1 < mn) ? local_dist[(li + 1) * n + j] + 1 : mn;
if (j > 0 && local_dist[li * n + j - 1] != INF)
mn = (local_dist[li * n + j - 1] + 1 < mn) ? local_dist[li * n + j - 1] + 1 : mn;
if (j < n - 1 && local_dist[li * n + j + 1] != INF)
mn = (local_dist[li * n + j + 1] + 1 < mn) ? local_dist[li * n + j + 1] + 1 : mn;
if (mn < cur) {
local_dist[idx] = mn;
local_changed = 1;
}
}
}
MPI_Allreduce(&local_changed, &global_changed, 1, MPI_INT, MPI_LOR, MPI_COMM_WORLD);
iteration++;
} while (global_changed && iteration < 2 * n);
double t_end = MPI_Wtime();
double elapsed_ms = (t_end - t_start) * 1000.0;
MPI_Gatherv(
local_dist + ghost_top * n, local_rows * n, MPI_UNSIGNED,
(rank == 0) ? dist : NULL, sendcounts, displs, MPI_UNSIGNED,
0, MPI_COMM_WORLD);
if (rank == 0) {
unsigned int path_len = dist[fx * n + fy];
if (path_len == INF)
printf("n=%d Path not found! time=%.2f ms iters=%d procs=%d\n",
n, elapsed_ms, iteration, size);
else
printf("n=%d path_len=%u time=%.2f ms iters=%d procs=%d\n",
n, path_len, elapsed_ms, iteration, size);
if (csv_path) {
FILE *fp = fopen(csv_path, "a");
if (fp) {
fprintf(fp, "%d,%d,%.4f,%u,%d\n",
n, size, elapsed_ms, path_len, iteration);
fclose(fp);
}
}
free(sendcounts);
free(displs);
free(dist);
}
free(P);
free(local_dist);
free(local_P);
MPI_Finalize();
return 0;
}