From bbb1b91e953660cdff13d3d67adb4886d6820a33 Mon Sep 17 00:00:00 2001 From: Arity-T Date: Tue, 17 Mar 2026 11:21:20 +0300 Subject: [PATCH] task2 --- task2/.gitignore | 5 + task2/README.md | 122 ++++++++++++++++++ task2/results/.gitkeep | 0 task2/scripts/build_cuda.sh | 7 ++ task2/scripts/build_mpi.sh | 6 + task2/scripts/plot_task2_results.py | 73 +++++++++++ task2/scripts/run_cuda.slurm | 49 ++++++++ task2/scripts/run_mpi.slurm | 54 ++++++++ task2/src/wave_cuda.cu | 126 +++++++++++++++++++ task2/src/wave_mpi.c | 185 ++++++++++++++++++++++++++++ 10 files changed, 627 insertions(+) create mode 100644 task2/.gitignore create mode 100644 task2/README.md create mode 100644 task2/results/.gitkeep create mode 100755 task2/scripts/build_cuda.sh create mode 100755 task2/scripts/build_mpi.sh create mode 100755 task2/scripts/plot_task2_results.py create mode 100644 task2/scripts/run_cuda.slurm create mode 100644 task2/scripts/run_mpi.slurm create mode 100644 task2/src/wave_cuda.cu create mode 100644 task2/src/wave_mpi.c diff --git a/task2/.gitignore b/task2/.gitignore new file mode 100644 index 0000000..33517b1 --- /dev/null +++ b/task2/.gitignore @@ -0,0 +1,5 @@ +bin/ +results/*.out +results/*.err +results/*.csv +*.pyc diff --git a/task2/README.md b/task2/README.md new file mode 100644 index 0000000..4e00f5d --- /dev/null +++ b/task2/README.md @@ -0,0 +1,122 @@ +# Задание 2: MPI-реализация волнового алгоритма + +MPI-версия волнового алгоритма (алгоритм Ли) для поиска кратчайшего пути робота на полигоне. Для сравнения также подготовлена CUDA-версия того же алгоритма. + +## Структура + +- `src/wave_mpi.c` — MPI-реализация (декомпозиция по строкам, ghost rows). +- `src/wave_cuda.cu` — CUDA-реализация (глобальная память, по мотивам программы прошлого семестра). +- `scripts/build_mpi.sh` — сборка MPI-версии. +- `scripts/build_cuda.sh` — сборка CUDA-версии. +- `scripts/run_mpi.slurm` — пакетный запуск MPI на кластере. +- `scripts/run_cuda.slurm` — пакетный запуск CUDA на кластере. +- `scripts/plot_task2_results.py` — построение графика для отчёта. + +## Что сделать на СКЦ + +### 1. Передать папку на кластер + +```bash +scp -r task2 polytech:~/supercomputers/ +``` + +### 2. Подключиться + +```bash +ssh polytech +cd ~/supercomputers/task2 +``` + +### 3. Проверить доступные MPI-модули + +```bash +module avail mpi +``` + +Если модуль `mpi/openmpi` не найден, посмотри список и подставь нужное имя в `scripts/run_mpi.slurm` (строка `module load mpi/openmpi`). + +### 4. Запустить CUDA-версию (для сравнения) + +```bash +sbatch scripts/run_cuda.slurm +``` + +### 5. Запустить MPI на 1, 2, 4 узлах + +```bash +sbatch --nodes=1 scripts/run_mpi.slurm +sbatch --nodes=2 scripts/run_mpi.slurm +sbatch --nodes=4 scripts/run_mpi.slurm +``` + +### 6. Проверить статус + +```bash +squeue -u tm3u21 +sacct -j --format=JobID,JobName,Partition,State,Elapsed,NNodes,AllocTRES%40,NodeList,ExitCode +``` + +### 7. Посмотреть результаты + +```bash +less results/task2-mpi-.out +cat results/task2-mpi-1n-.csv +cat results/task2-mpi-2n-.csv +cat results/task2-mpi-4n-.csv +cat results/task2-cuda-.csv +``` + +### 8. Построить график + +На локальной машине (нужен `matplotlib`): + +```bash +python3 scripts/plot_task2_results.py \ + --mpi1 results/task2-mpi-1n-XXXXX.csv \ + --mpi2 results/task2-mpi-2n-XXXXX.csv \ + --mpi4 results/task2-mpi-4n-XXXXX.csv \ + --cuda results/task2-cuda-XXXXX.csv \ + -o ../report/img/task2-time-comparison.png +``` + +## Что нужно собрать для отчёта + +### Скриншот 1: `task2-mpi-run.png` — вывод MPI-программы + +После завершения MPI-задач открой вывод одной из них: + +```bash +less results/task2-mpi-.out +``` + +Сделай скрин блока `===== benchmark =====` — там будут все размеры с временем. + +### Скриншот 2: `task2-cuda-run.png` — вывод CUDA-программы + +```bash +less results/task2-cuda-.out +``` + +Сделай скрин блока `===== benchmark =====`. + +### Скриншот 3: `task2-sacct.png` — сведения Slurm + +Собери все JOBID (3 MPI + 1 CUDA) и выполни: + +```bash +sacct -j ,,, \ + --format=JobID,JobName,Partition,State,Elapsed,NNodes,AllocTRES%40,NodeList,ExitCode +``` + +### Скриншот 4: `task2-time-comparison.png` — график + +Генерируется скриптом `plot_task2_results.py` (см. шаг 8 выше). + +### Куда положить скриншоты + +Все картинки кладутся в `report/img/`: + +- `report/img/task2-mpi-run.png` +- `report/img/task2-cuda-run.png` +- `report/img/task2-sacct.png` +- `report/img/task2-time-comparison.png` diff --git a/task2/results/.gitkeep b/task2/results/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/task2/scripts/build_cuda.sh b/task2/scripts/build_cuda.sh new file mode 100755 index 0000000..88e7050 --- /dev/null +++ b/task2/scripts/build_cuda.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "$(dirname "$0")/.." +CUDA_ARCH="${CUDA_ARCH:-sm_35}" +mkdir -p bin +nvcc -ccbin g++ -O3 -arch="$CUDA_ARCH" -o bin/wave_cuda src/wave_cuda.cu +echo "Built bin/wave_cuda (arch=$CUDA_ARCH)" diff --git a/task2/scripts/build_mpi.sh b/task2/scripts/build_mpi.sh new file mode 100755 index 0000000..0f81b2d --- /dev/null +++ b/task2/scripts/build_mpi.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "$(dirname "$0")/.." +mkdir -p bin +mpicc -O3 -std=c99 -o bin/wave_mpi src/wave_mpi.c +echo "Built bin/wave_mpi" diff --git a/task2/scripts/plot_task2_results.py b/task2/scripts/plot_task2_results.py new file mode 100755 index 0000000..a4c553a --- /dev/null +++ b/task2/scripts/plot_task2_results.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 +""" +Строит график зависимости времени вычисления от размера полигона +для MPI (1, 2, 4 узла) и CUDA. + +Использование: + python3 plot_task2_results.py \ + --mpi1 results/task2-mpi-1n-XXXXX.csv \ + --mpi2 results/task2-mpi-2n-XXXXX.csv \ + --mpi4 results/task2-mpi-4n-XXXXX.csv \ + --cuda results/task2-cuda-XXXXX.csv \ + -o report/img/task2-time-comparison.png +""" +import argparse +import csv +from pathlib import Path + +import matplotlib.pyplot as plt + + +def read_mpi_csv(path: str) -> tuple[list[int], list[float]]: + sizes, times = [], [] + with open(path) as f: + reader = csv.DictReader(f) + for row in reader: + sizes.append(int(row["n"])) + times.append(float(row["time_ms"])) + return sizes, times + + +def read_cuda_csv(path: str) -> tuple[list[int], list[float]]: + sizes, times = [], [] + with open(path) as f: + reader = csv.DictReader(f) + for row in reader: + sizes.append(int(row["n"])) + times.append(float(row["time_ms"])) + return sizes, times + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--mpi1", required=True, help="CSV for MPI 1 node") + parser.add_argument("--mpi2", required=True, help="CSV for MPI 2 nodes") + parser.add_argument("--mpi4", required=True, help="CSV for MPI 4 nodes") + parser.add_argument("--cuda", required=True, help="CSV for CUDA") + parser.add_argument("-o", "--output", default="task2-time-comparison.png") + args = parser.parse_args() + + fig, ax = plt.subplots(figsize=(10, 6)) + + for label, path in [ + ("MPI 1 node", args.mpi1), + ("MPI 2 nodes", args.mpi2), + ("MPI 4 nodes", args.mpi4), + ("CUDA", args.cuda), + ]: + sizes, times = read_mpi_csv(path) if "mpi" in label.lower() else read_cuda_csv(path) + ax.plot(sizes, times, marker="o", label=label) + + ax.set_xlabel("Размер полигона n") + ax.set_ylabel("Время, мс") + ax.set_title("Зависимость времени вычисления от размера полигона") + ax.legend() + ax.grid(True, alpha=0.3) + + Path(args.output).parent.mkdir(parents=True, exist_ok=True) + fig.savefig(args.output, dpi=150, bbox_inches="tight") + print(f"Saved: {args.output}") + + +if __name__ == "__main__": + main() diff --git a/task2/scripts/run_cuda.slurm b/task2/scripts/run_cuda.slurm new file mode 100644 index 0000000..6d79c11 --- /dev/null +++ b/task2/scripts/run_cuda.slurm @@ -0,0 +1,49 @@ +#!/usr/bin/env bash +#SBATCH --job-name=task2-cuda +#SBATCH --partition=tornado-k40 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --time=00:20:00 +#SBATCH --output=results/%x-%j.out +#SBATCH --error=results/%x-%j.err + +set -euo pipefail + +cd "${SLURM_SUBMIT_DIR}" + +module purge +module load compiler/gcc/11 +module load nvidia/cuda/11.6u2 + +mkdir -p results bin + +./scripts/build_cuda.sh + +echo "===== account info =====" +whoami; hostname; date + +echo +echo "===== slurm info =====" +echo "SLURM_JOB_ID=${SLURM_JOB_ID:-unknown}" +echo "SLURM_JOB_PARTITION=${SLURM_JOB_PARTITION:-unknown}" +echo "SLURM_NODELIST=${SLURM_NODELIST:-unknown}" +scontrol show job "${SLURM_JOB_ID}" || true + +echo +echo "===== node config =====" +lscpu | head -20 +nvidia-smi -L || true +nvidia-smi || true + +CSV="results/task2-cuda-${SLURM_JOB_ID}.csv" +echo "n,impl,time_ms,path_len,iterations" > "$CSV" + +echo +echo "===== benchmark =====" +for N in 500 1000 2000 3000 5000; do + echo "--- n=$N ---" + ./bin/wave_cuda "$N" 256 256 "$CSV" +done + +echo +echo "===== done =====" diff --git a/task2/scripts/run_mpi.slurm b/task2/scripts/run_mpi.slurm new file mode 100644 index 0000000..1766d2e --- /dev/null +++ b/task2/scripts/run_mpi.slurm @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +#SBATCH --job-name=task2-mpi +#SBATCH --partition=tornado +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=56 +#SBATCH --time=00:20:00 +#SBATCH --output=results/%x-%j.out +#SBATCH --error=results/%x-%j.err + +set -euo pipefail + +cd "${SLURM_SUBMIT_DIR}" + +module purge +module load compiler/gcc/11 +module load mpi/openmpi + +mkdir -p results bin + +./scripts/build_mpi.sh + +RANKS=${SLURM_JOB_NUM_NODES} + +echo "===== account info =====" +whoami; hostname; date + +echo +echo "===== slurm info =====" +echo "SLURM_JOB_ID=${SLURM_JOB_ID:-unknown}" +echo "SLURM_JOB_PARTITION=${SLURM_JOB_PARTITION:-unknown}" +echo "SLURM_JOB_NUM_NODES=${SLURM_JOB_NUM_NODES:-unknown}" +echo "SLURM_NODELIST=${SLURM_NODELIST:-unknown}" +echo "RANKS=${RANKS}" +scontrol show job "${SLURM_JOB_ID}" || true + +echo +echo "===== node config =====" +lscpu | head -20 +if [ -n "${SLURMD_NODENAME:-}" ]; then + scontrol show node "${SLURMD_NODENAME}" || true +fi + +CSV="results/task2-mpi-${RANKS}n-${SLURM_JOB_ID}.csv" +echo "n,procs,time_ms,path_len,iterations" > "$CSV" + +echo +echo "===== benchmark (${RANKS} nodes / ${RANKS} ranks) =====" +for N in 500 1000 2000 3000 5000; do + echo "--- n=$N ---" + mpirun -np "${RANKS}" --map-by ppr:1:node --bind-to none ./bin/wave_mpi "$N" "$CSV" +done + +echo +echo "===== done =====" diff --git a/task2/src/wave_cuda.cu b/task2/src/wave_cuda.cu new file mode 100644 index 0000000..e41d4c4 --- /dev/null +++ b/task2/src/wave_cuda.cu @@ -0,0 +1,126 @@ +#include +#include +#include +#include +#include + +#define INF UINT_MAX +#define OBSTACLE_PROB 10 +#define DEFAULT_BLOCKS 256 +#define DEFAULT_THREADS 256 + +static void generate_polygon(int *P, int n) { + srand(42); + for (int i = 0; i < n * n; i++) + P[i] = (rand() % 100 < OBSTACLE_PROB) ? -1 : 0; + int sx = 2, sy = 2; + int fx = n - 3, fy = n - 3; + P[sx * n + sy] = 0; + P[fx * n + fy] = 0; +} + +__global__ void wave_step(int *P, unsigned int *dist, int n, bool *changed) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + + while (tid < n * n) { + int i = tid / n; + int j = tid % n; + + if (P[tid] != -1) { + unsigned int cur = dist[tid]; + unsigned int mn = cur; + + if (i > 0 && dist[(i-1)*n + j] != INF) mn = min(mn, dist[(i-1)*n + j] + 1); + if (i < n - 1 && dist[(i+1)*n + j] != INF) mn = min(mn, dist[(i+1)*n + j] + 1); + if (j > 0 && dist[i*n + j - 1] != INF) mn = min(mn, dist[i*n + j - 1] + 1); + if (j < n - 1 && dist[i*n + j + 1] != INF) mn = min(mn, dist[i*n + j + 1] + 1); + + if (mn < cur) { + dist[tid] = mn; + *changed = true; + } + } + tid += blockDim.x * gridDim.x; + } +} + +int main(int argc, char *argv[]) { + if (argc < 2) { + fprintf(stderr, "Usage: %s [blocks] [threads] [csv_file]\n", argv[0]); + return 1; + } + + int n = atoi(argv[1]); + int blocks = (argc >= 3) ? atoi(argv[2]) : DEFAULT_BLOCKS; + int threads = (argc >= 4) ? atoi(argv[3]) : DEFAULT_THREADS; + const char *csv_path = (argc >= 5) ? argv[4] : NULL; + + int sx = 2, sy = 2; + int fx = n - 3, fy = n - 3; + + int *P = (int *)malloc(n * n * sizeof(int)); + generate_polygon(P, n); + + unsigned int *dist_h = (unsigned int *)malloc(n * n * sizeof(unsigned int)); + for (int i = 0; i < n * n; i++) dist_h[i] = INF; + dist_h[sx * n + sy] = 0; + + int *d_P; + unsigned int *d_dist; + bool *d_changed; + cudaMalloc(&d_P, n * n * sizeof(int)); + cudaMalloc(&d_dist, n * n * sizeof(unsigned int)); + cudaMalloc(&d_changed, sizeof(bool)); + + cudaMemcpy(d_P, P, n * n * sizeof(int), cudaMemcpyHostToDevice); + cudaMemcpy(d_dist, dist_h, n * n * sizeof(unsigned int), cudaMemcpyHostToDevice); + + cudaEvent_t t0, t1; + cudaEventCreate(&t0); + cudaEventCreate(&t1); + cudaEventRecord(t0); + + int iterations = 0; + bool changed; + do { + changed = false; + cudaMemcpy(d_changed, &changed, sizeof(bool), cudaMemcpyHostToDevice); + wave_step<<>>(d_P, d_dist, n, d_changed); + cudaDeviceSynchronize(); + cudaMemcpy(&changed, d_changed, sizeof(bool), cudaMemcpyDeviceToHost); + iterations++; + } while (changed && iterations < 2 * n); + + cudaEventRecord(t1); + cudaEventSynchronize(t1); + float elapsed_ms = 0; + cudaEventElapsedTime(&elapsed_ms, t0, t1); + + cudaMemcpy(dist_h, d_dist, n * n * sizeof(unsigned int), cudaMemcpyDeviceToHost); + + unsigned int path_len = dist_h[fx * n + fy]; + if (path_len == INF) + printf("n=%d Path not found! time=%.2f ms iters=%d blocks=%d threads=%d\n", + n, elapsed_ms, iterations, blocks, threads); + else + printf("n=%d path_len=%u time=%.2f ms iters=%d blocks=%d threads=%d\n", + n, path_len, elapsed_ms, iterations, blocks, threads); + + if (csv_path) { + FILE *fp = fopen(csv_path, "a"); + if (fp) { + fprintf(fp, "%d,cuda,%.4f,%u,%d\n", n, elapsed_ms, path_len, iterations); + fclose(fp); + } + } + + free(P); + free(dist_h); + cudaFree(d_P); + cudaFree(d_dist); + cudaFree(d_changed); + cudaEventDestroy(t0); + cudaEventDestroy(t1); + + return 0; +} diff --git a/task2/src/wave_mpi.c b/task2/src/wave_mpi.c new file mode 100644 index 0000000..d78de30 --- /dev/null +++ b/task2/src/wave_mpi.c @@ -0,0 +1,185 @@ +#include +#include +#include +#include +#include + +#define INF UINT_MAX +#define OBSTACLE_PROB 10 + +static void generate_polygon(int *P, int n) { + srand(42); + for (int i = 0; i < n * n; i++) + P[i] = (rand() % 100 < OBSTACLE_PROB) ? -1 : 0; + int sx = 2, sy = 2; + int fx = n - 3, fy = n - 3; + P[sx * n + sy] = 0; + P[fx * n + fy] = 0; +} + +int main(int argc, char *argv[]) { + MPI_Init(&argc, &argv); + + int rank, size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + if (argc < 2) { + if (rank == 0) + fprintf(stderr, "Usage: mpirun -np

%s [csv_file]\n", argv[0]); + MPI_Finalize(); + return 1; + } + + int n = atoi(argv[1]); + const char *csv_path = (argc >= 3) ? argv[2] : NULL; + int sx = 2, sy = 2; + int fx = n - 3, fy = n - 3; + + int *P = (int *)malloc(n * n * sizeof(int)); + unsigned int *dist = NULL; + + if (rank == 0) { + generate_polygon(P, n); + dist = (unsigned int *)malloc(n * n * sizeof(unsigned int)); + for (int i = 0; i < n * n; i++) + dist[i] = INF; + dist[sx * n + sy] = 0; + } + + MPI_Bcast(P, n * n, MPI_INT, 0, MPI_COMM_WORLD); + + int base_rows = n / size; + int remainder = n % size; + int local_rows = base_rows + (rank < remainder ? 1 : 0); + int start_row = rank * base_rows + (rank < remainder ? rank : remainder); + + int ghost_top = (rank > 0) ? 1 : 0; + int ghost_bot = (rank < size - 1) ? 1 : 0; + int total_local = (ghost_top + local_rows + ghost_bot) * n; + + unsigned int *local_dist = (unsigned int *)malloc(total_local * sizeof(unsigned int)); + int *local_P = (int *)malloc(total_local * sizeof(int)); + + for (int i = 0; i < total_local; i++) { + local_dist[i] = INF; + local_P[i] = -1; + } + + int *sendcounts = NULL, *displs = NULL; + if (rank == 0) { + sendcounts = (int *)malloc(size * sizeof(int)); + displs = (int *)malloc(size * sizeof(int)); + int off = 0; + for (int r = 0; r < size; r++) { + int rr = base_rows + (r < remainder ? 1 : 0); + sendcounts[r] = rr * n; + displs[r] = off; + off += rr * n; + } + } + + MPI_Scatterv( + (rank == 0) ? dist : NULL, sendcounts, displs, MPI_UNSIGNED, + local_dist + ghost_top * n, local_rows * n, MPI_UNSIGNED, + 0, MPI_COMM_WORLD); + + for (int i = 0; i < local_rows; i++) + memcpy(local_P + (ghost_top + i) * n, P + (start_row + i) * n, n * sizeof(int)); + + if (ghost_top) { + memcpy(local_P, P + (start_row - 1) * n, n * sizeof(int)); + } + if (ghost_bot) { + memcpy(local_P + (ghost_top + local_rows) * n, + P + (start_row + local_rows) * n, n * sizeof(int)); + } + + MPI_Barrier(MPI_COMM_WORLD); + double t_start = MPI_Wtime(); + + int prev_rank = (rank > 0) ? rank - 1 : MPI_PROC_NULL; + int next_rank = (rank < size - 1) ? rank + 1 : MPI_PROC_NULL; + + int iteration = 0; + int global_changed; + do { + /* exchange ghost rows */ + MPI_Sendrecv( + local_dist + ghost_top * n, n, MPI_UNSIGNED, prev_rank, 0, + local_dist + (ghost_top + local_rows) * n, n, MPI_UNSIGNED, next_rank, 0, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + MPI_Sendrecv( + local_dist + (ghost_top + local_rows - 1) * n, n, MPI_UNSIGNED, next_rank, 1, + local_dist, n, MPI_UNSIGNED, prev_rank, 1, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + int local_changed = 0; + + for (int li = ghost_top; li < ghost_top + local_rows; li++) { + for (int j = 0; j < n; j++) { + int idx = li * n + j; + if (local_P[idx] == -1) continue; + + unsigned int cur = local_dist[idx]; + unsigned int mn = cur; + + if (li > 0 && local_dist[(li - 1) * n + j] != INF) + mn = (local_dist[(li - 1) * n + j] + 1 < mn) ? local_dist[(li - 1) * n + j] + 1 : mn; + if (li < ghost_top + local_rows + ghost_bot - 1 && local_dist[(li + 1) * n + j] != INF) + mn = (local_dist[(li + 1) * n + j] + 1 < mn) ? local_dist[(li + 1) * n + j] + 1 : mn; + if (j > 0 && local_dist[li * n + j - 1] != INF) + mn = (local_dist[li * n + j - 1] + 1 < mn) ? local_dist[li * n + j - 1] + 1 : mn; + if (j < n - 1 && local_dist[li * n + j + 1] != INF) + mn = (local_dist[li * n + j + 1] + 1 < mn) ? local_dist[li * n + j + 1] + 1 : mn; + + if (mn < cur) { + local_dist[idx] = mn; + local_changed = 1; + } + } + } + + MPI_Allreduce(&local_changed, &global_changed, 1, MPI_INT, MPI_LOR, MPI_COMM_WORLD); + iteration++; + } while (global_changed && iteration < 2 * n); + + double t_end = MPI_Wtime(); + double elapsed_ms = (t_end - t_start) * 1000.0; + + MPI_Gatherv( + local_dist + ghost_top * n, local_rows * n, MPI_UNSIGNED, + (rank == 0) ? dist : NULL, sendcounts, displs, MPI_UNSIGNED, + 0, MPI_COMM_WORLD); + + if (rank == 0) { + unsigned int path_len = dist[fx * n + fy]; + if (path_len == INF) + printf("n=%d Path not found! time=%.2f ms iters=%d procs=%d\n", + n, elapsed_ms, iteration, size); + else + printf("n=%d path_len=%u time=%.2f ms iters=%d procs=%d\n", + n, path_len, elapsed_ms, iteration, size); + + if (csv_path) { + FILE *fp = fopen(csv_path, "a"); + if (fp) { + fprintf(fp, "%d,%d,%.4f,%u,%d\n", + n, size, elapsed_ms, path_len, iteration); + fclose(fp); + } + } + + free(sendcounts); + free(displs); + free(dist); + } + + free(P); + free(local_dist); + free(local_P); + + MPI_Finalize(); + return 0; +}