task2
This commit is contained in:
5
task2/.gitignore
vendored
Normal file
5
task2/.gitignore
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
bin/
|
||||
results/*.out
|
||||
results/*.err
|
||||
results/*.csv
|
||||
*.pyc
|
||||
122
task2/README.md
Normal file
122
task2/README.md
Normal file
@@ -0,0 +1,122 @@
|
||||
# Задание 2: MPI-реализация волнового алгоритма
|
||||
|
||||
MPI-версия волнового алгоритма (алгоритм Ли) для поиска кратчайшего пути робота на полигоне. Для сравнения также подготовлена CUDA-версия того же алгоритма.
|
||||
|
||||
## Структура
|
||||
|
||||
- `src/wave_mpi.c` — MPI-реализация (декомпозиция по строкам, ghost rows).
|
||||
- `src/wave_cuda.cu` — CUDA-реализация (глобальная память, по мотивам программы прошлого семестра).
|
||||
- `scripts/build_mpi.sh` — сборка MPI-версии.
|
||||
- `scripts/build_cuda.sh` — сборка CUDA-версии.
|
||||
- `scripts/run_mpi.slurm` — пакетный запуск MPI на кластере.
|
||||
- `scripts/run_cuda.slurm` — пакетный запуск CUDA на кластере.
|
||||
- `scripts/plot_task2_results.py` — построение графика для отчёта.
|
||||
|
||||
## Что сделать на СКЦ
|
||||
|
||||
### 1. Передать папку на кластер
|
||||
|
||||
```bash
|
||||
scp -r task2 polytech:~/supercomputers/
|
||||
```
|
||||
|
||||
### 2. Подключиться
|
||||
|
||||
```bash
|
||||
ssh polytech
|
||||
cd ~/supercomputers/task2
|
||||
```
|
||||
|
||||
### 3. Проверить доступные MPI-модули
|
||||
|
||||
```bash
|
||||
module avail mpi
|
||||
```
|
||||
|
||||
Если модуль `mpi/openmpi` не найден, посмотри список и подставь нужное имя в `scripts/run_mpi.slurm` (строка `module load mpi/openmpi`).
|
||||
|
||||
### 4. Запустить CUDA-версию (для сравнения)
|
||||
|
||||
```bash
|
||||
sbatch scripts/run_cuda.slurm
|
||||
```
|
||||
|
||||
### 5. Запустить MPI на 1, 2, 4 узлах
|
||||
|
||||
```bash
|
||||
sbatch --nodes=1 scripts/run_mpi.slurm
|
||||
sbatch --nodes=2 scripts/run_mpi.slurm
|
||||
sbatch --nodes=4 scripts/run_mpi.slurm
|
||||
```
|
||||
|
||||
### 6. Проверить статус
|
||||
|
||||
```bash
|
||||
squeue -u tm3u21
|
||||
sacct -j <JOBID> --format=JobID,JobName,Partition,State,Elapsed,NNodes,AllocTRES%40,NodeList,ExitCode
|
||||
```
|
||||
|
||||
### 7. Посмотреть результаты
|
||||
|
||||
```bash
|
||||
less results/task2-mpi-<JOBID>.out
|
||||
cat results/task2-mpi-1n-<JOBID>.csv
|
||||
cat results/task2-mpi-2n-<JOBID>.csv
|
||||
cat results/task2-mpi-4n-<JOBID>.csv
|
||||
cat results/task2-cuda-<JOBID>.csv
|
||||
```
|
||||
|
||||
### 8. Построить график
|
||||
|
||||
На локальной машине (нужен `matplotlib`):
|
||||
|
||||
```bash
|
||||
python3 scripts/plot_task2_results.py \
|
||||
--mpi1 results/task2-mpi-1n-XXXXX.csv \
|
||||
--mpi2 results/task2-mpi-2n-XXXXX.csv \
|
||||
--mpi4 results/task2-mpi-4n-XXXXX.csv \
|
||||
--cuda results/task2-cuda-XXXXX.csv \
|
||||
-o ../report/img/task2-time-comparison.png
|
||||
```
|
||||
|
||||
## Что нужно собрать для отчёта
|
||||
|
||||
### Скриншот 1: `task2-mpi-run.png` — вывод MPI-программы
|
||||
|
||||
После завершения MPI-задач открой вывод одной из них:
|
||||
|
||||
```bash
|
||||
less results/task2-mpi-<JOBID>.out
|
||||
```
|
||||
|
||||
Сделай скрин блока `===== benchmark =====` — там будут все размеры с временем.
|
||||
|
||||
### Скриншот 2: `task2-cuda-run.png` — вывод CUDA-программы
|
||||
|
||||
```bash
|
||||
less results/task2-cuda-<JOBID>.out
|
||||
```
|
||||
|
||||
Сделай скрин блока `===== benchmark =====`.
|
||||
|
||||
### Скриншот 3: `task2-sacct.png` — сведения Slurm
|
||||
|
||||
Собери все JOBID (3 MPI + 1 CUDA) и выполни:
|
||||
|
||||
```bash
|
||||
sacct -j <JOB_MPI1>,<JOB_MPI2>,<JOB_MPI4>,<JOB_CUDA> \
|
||||
--format=JobID,JobName,Partition,State,Elapsed,NNodes,AllocTRES%40,NodeList,ExitCode
|
||||
```
|
||||
|
||||
### Скриншот 4: `task2-time-comparison.png` — график
|
||||
|
||||
Генерируется скриптом `plot_task2_results.py` (см. шаг 8 выше).
|
||||
|
||||
### Куда положить скриншоты
|
||||
|
||||
Все картинки кладутся в `report/img/`:
|
||||
|
||||
- `report/img/task2-mpi-run.png`
|
||||
- `report/img/task2-cuda-run.png`
|
||||
- `report/img/task2-sacct.png`
|
||||
- `report/img/task2-time-comparison.png`
|
||||
0
task2/results/.gitkeep
Normal file
0
task2/results/.gitkeep
Normal file
7
task2/scripts/build_cuda.sh
Executable file
7
task2/scripts/build_cuda.sh
Executable file
@@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
cd "$(dirname "$0")/.."
|
||||
CUDA_ARCH="${CUDA_ARCH:-sm_35}"
|
||||
mkdir -p bin
|
||||
nvcc -ccbin g++ -O3 -arch="$CUDA_ARCH" -o bin/wave_cuda src/wave_cuda.cu
|
||||
echo "Built bin/wave_cuda (arch=$CUDA_ARCH)"
|
||||
6
task2/scripts/build_mpi.sh
Executable file
6
task2/scripts/build_mpi.sh
Executable file
@@ -0,0 +1,6 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
cd "$(dirname "$0")/.."
|
||||
mkdir -p bin
|
||||
mpicc -O3 -std=c99 -o bin/wave_mpi src/wave_mpi.c
|
||||
echo "Built bin/wave_mpi"
|
||||
73
task2/scripts/plot_task2_results.py
Executable file
73
task2/scripts/plot_task2_results.py
Executable file
@@ -0,0 +1,73 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Строит график зависимости времени вычисления от размера полигона
|
||||
для MPI (1, 2, 4 узла) и CUDA.
|
||||
|
||||
Использование:
|
||||
python3 plot_task2_results.py \
|
||||
--mpi1 results/task2-mpi-1n-XXXXX.csv \
|
||||
--mpi2 results/task2-mpi-2n-XXXXX.csv \
|
||||
--mpi4 results/task2-mpi-4n-XXXXX.csv \
|
||||
--cuda results/task2-cuda-XXXXX.csv \
|
||||
-o report/img/task2-time-comparison.png
|
||||
"""
|
||||
import argparse
|
||||
import csv
|
||||
from pathlib import Path
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
def read_mpi_csv(path: str) -> tuple[list[int], list[float]]:
|
||||
sizes, times = [], []
|
||||
with open(path) as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
sizes.append(int(row["n"]))
|
||||
times.append(float(row["time_ms"]))
|
||||
return sizes, times
|
||||
|
||||
|
||||
def read_cuda_csv(path: str) -> tuple[list[int], list[float]]:
|
||||
sizes, times = [], []
|
||||
with open(path) as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
sizes.append(int(row["n"]))
|
||||
times.append(float(row["time_ms"]))
|
||||
return sizes, times
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--mpi1", required=True, help="CSV for MPI 1 node")
|
||||
parser.add_argument("--mpi2", required=True, help="CSV for MPI 2 nodes")
|
||||
parser.add_argument("--mpi4", required=True, help="CSV for MPI 4 nodes")
|
||||
parser.add_argument("--cuda", required=True, help="CSV for CUDA")
|
||||
parser.add_argument("-o", "--output", default="task2-time-comparison.png")
|
||||
args = parser.parse_args()
|
||||
|
||||
fig, ax = plt.subplots(figsize=(10, 6))
|
||||
|
||||
for label, path in [
|
||||
("MPI 1 node", args.mpi1),
|
||||
("MPI 2 nodes", args.mpi2),
|
||||
("MPI 4 nodes", args.mpi4),
|
||||
("CUDA", args.cuda),
|
||||
]:
|
||||
sizes, times = read_mpi_csv(path) if "mpi" in label.lower() else read_cuda_csv(path)
|
||||
ax.plot(sizes, times, marker="o", label=label)
|
||||
|
||||
ax.set_xlabel("Размер полигона n")
|
||||
ax.set_ylabel("Время, мс")
|
||||
ax.set_title("Зависимость времени вычисления от размера полигона")
|
||||
ax.legend()
|
||||
ax.grid(True, alpha=0.3)
|
||||
|
||||
Path(args.output).parent.mkdir(parents=True, exist_ok=True)
|
||||
fig.savefig(args.output, dpi=150, bbox_inches="tight")
|
||||
print(f"Saved: {args.output}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
49
task2/scripts/run_cuda.slurm
Normal file
49
task2/scripts/run_cuda.slurm
Normal file
@@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env bash
|
||||
#SBATCH --job-name=task2-cuda
|
||||
#SBATCH --partition=tornado-k40
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH --ntasks=1
|
||||
#SBATCH --time=00:20:00
|
||||
#SBATCH --output=results/%x-%j.out
|
||||
#SBATCH --error=results/%x-%j.err
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
cd "${SLURM_SUBMIT_DIR}"
|
||||
|
||||
module purge
|
||||
module load compiler/gcc/11
|
||||
module load nvidia/cuda/11.6u2
|
||||
|
||||
mkdir -p results bin
|
||||
|
||||
./scripts/build_cuda.sh
|
||||
|
||||
echo "===== account info ====="
|
||||
whoami; hostname; date
|
||||
|
||||
echo
|
||||
echo "===== slurm info ====="
|
||||
echo "SLURM_JOB_ID=${SLURM_JOB_ID:-unknown}"
|
||||
echo "SLURM_JOB_PARTITION=${SLURM_JOB_PARTITION:-unknown}"
|
||||
echo "SLURM_NODELIST=${SLURM_NODELIST:-unknown}"
|
||||
scontrol show job "${SLURM_JOB_ID}" || true
|
||||
|
||||
echo
|
||||
echo "===== node config ====="
|
||||
lscpu | head -20
|
||||
nvidia-smi -L || true
|
||||
nvidia-smi || true
|
||||
|
||||
CSV="results/task2-cuda-${SLURM_JOB_ID}.csv"
|
||||
echo "n,impl,time_ms,path_len,iterations" > "$CSV"
|
||||
|
||||
echo
|
||||
echo "===== benchmark ====="
|
||||
for N in 500 1000 2000 3000 5000; do
|
||||
echo "--- n=$N ---"
|
||||
./bin/wave_cuda "$N" 256 256 "$CSV"
|
||||
done
|
||||
|
||||
echo
|
||||
echo "===== done ====="
|
||||
54
task2/scripts/run_mpi.slurm
Normal file
54
task2/scripts/run_mpi.slurm
Normal file
@@ -0,0 +1,54 @@
|
||||
#!/usr/bin/env bash
|
||||
#SBATCH --job-name=task2-mpi
|
||||
#SBATCH --partition=tornado
|
||||
#SBATCH --ntasks-per-node=1
|
||||
#SBATCH --cpus-per-task=56
|
||||
#SBATCH --time=00:20:00
|
||||
#SBATCH --output=results/%x-%j.out
|
||||
#SBATCH --error=results/%x-%j.err
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
cd "${SLURM_SUBMIT_DIR}"
|
||||
|
||||
module purge
|
||||
module load compiler/gcc/11
|
||||
module load mpi/openmpi
|
||||
|
||||
mkdir -p results bin
|
||||
|
||||
./scripts/build_mpi.sh
|
||||
|
||||
RANKS=${SLURM_JOB_NUM_NODES}
|
||||
|
||||
echo "===== account info ====="
|
||||
whoami; hostname; date
|
||||
|
||||
echo
|
||||
echo "===== slurm info ====="
|
||||
echo "SLURM_JOB_ID=${SLURM_JOB_ID:-unknown}"
|
||||
echo "SLURM_JOB_PARTITION=${SLURM_JOB_PARTITION:-unknown}"
|
||||
echo "SLURM_JOB_NUM_NODES=${SLURM_JOB_NUM_NODES:-unknown}"
|
||||
echo "SLURM_NODELIST=${SLURM_NODELIST:-unknown}"
|
||||
echo "RANKS=${RANKS}"
|
||||
scontrol show job "${SLURM_JOB_ID}" || true
|
||||
|
||||
echo
|
||||
echo "===== node config ====="
|
||||
lscpu | head -20
|
||||
if [ -n "${SLURMD_NODENAME:-}" ]; then
|
||||
scontrol show node "${SLURMD_NODENAME}" || true
|
||||
fi
|
||||
|
||||
CSV="results/task2-mpi-${RANKS}n-${SLURM_JOB_ID}.csv"
|
||||
echo "n,procs,time_ms,path_len,iterations" > "$CSV"
|
||||
|
||||
echo
|
||||
echo "===== benchmark (${RANKS} nodes / ${RANKS} ranks) ====="
|
||||
for N in 500 1000 2000 3000 5000; do
|
||||
echo "--- n=$N ---"
|
||||
mpirun -np "${RANKS}" --map-by ppr:1:node --bind-to none ./bin/wave_mpi "$N" "$CSV"
|
||||
done
|
||||
|
||||
echo
|
||||
echo "===== done ====="
|
||||
126
task2/src/wave_cuda.cu
Normal file
126
task2/src/wave_cuda.cu
Normal file
@@ -0,0 +1,126 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <limits.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <device_launch_parameters.h>
|
||||
|
||||
#define INF UINT_MAX
|
||||
#define OBSTACLE_PROB 10
|
||||
#define DEFAULT_BLOCKS 256
|
||||
#define DEFAULT_THREADS 256
|
||||
|
||||
static void generate_polygon(int *P, int n) {
|
||||
srand(42);
|
||||
for (int i = 0; i < n * n; i++)
|
||||
P[i] = (rand() % 100 < OBSTACLE_PROB) ? -1 : 0;
|
||||
int sx = 2, sy = 2;
|
||||
int fx = n - 3, fy = n - 3;
|
||||
P[sx * n + sy] = 0;
|
||||
P[fx * n + fy] = 0;
|
||||
}
|
||||
|
||||
__global__ void wave_step(int *P, unsigned int *dist, int n, bool *changed) {
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
|
||||
while (tid < n * n) {
|
||||
int i = tid / n;
|
||||
int j = tid % n;
|
||||
|
||||
if (P[tid] != -1) {
|
||||
unsigned int cur = dist[tid];
|
||||
unsigned int mn = cur;
|
||||
|
||||
if (i > 0 && dist[(i-1)*n + j] != INF) mn = min(mn, dist[(i-1)*n + j] + 1);
|
||||
if (i < n - 1 && dist[(i+1)*n + j] != INF) mn = min(mn, dist[(i+1)*n + j] + 1);
|
||||
if (j > 0 && dist[i*n + j - 1] != INF) mn = min(mn, dist[i*n + j - 1] + 1);
|
||||
if (j < n - 1 && dist[i*n + j + 1] != INF) mn = min(mn, dist[i*n + j + 1] + 1);
|
||||
|
||||
if (mn < cur) {
|
||||
dist[tid] = mn;
|
||||
*changed = true;
|
||||
}
|
||||
}
|
||||
tid += blockDim.x * gridDim.x;
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
if (argc < 2) {
|
||||
fprintf(stderr, "Usage: %s <matrix_size> [blocks] [threads] [csv_file]\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int n = atoi(argv[1]);
|
||||
int blocks = (argc >= 3) ? atoi(argv[2]) : DEFAULT_BLOCKS;
|
||||
int threads = (argc >= 4) ? atoi(argv[3]) : DEFAULT_THREADS;
|
||||
const char *csv_path = (argc >= 5) ? argv[4] : NULL;
|
||||
|
||||
int sx = 2, sy = 2;
|
||||
int fx = n - 3, fy = n - 3;
|
||||
|
||||
int *P = (int *)malloc(n * n * sizeof(int));
|
||||
generate_polygon(P, n);
|
||||
|
||||
unsigned int *dist_h = (unsigned int *)malloc(n * n * sizeof(unsigned int));
|
||||
for (int i = 0; i < n * n; i++) dist_h[i] = INF;
|
||||
dist_h[sx * n + sy] = 0;
|
||||
|
||||
int *d_P;
|
||||
unsigned int *d_dist;
|
||||
bool *d_changed;
|
||||
cudaMalloc(&d_P, n * n * sizeof(int));
|
||||
cudaMalloc(&d_dist, n * n * sizeof(unsigned int));
|
||||
cudaMalloc(&d_changed, sizeof(bool));
|
||||
|
||||
cudaMemcpy(d_P, P, n * n * sizeof(int), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_dist, dist_h, n * n * sizeof(unsigned int), cudaMemcpyHostToDevice);
|
||||
|
||||
cudaEvent_t t0, t1;
|
||||
cudaEventCreate(&t0);
|
||||
cudaEventCreate(&t1);
|
||||
cudaEventRecord(t0);
|
||||
|
||||
int iterations = 0;
|
||||
bool changed;
|
||||
do {
|
||||
changed = false;
|
||||
cudaMemcpy(d_changed, &changed, sizeof(bool), cudaMemcpyHostToDevice);
|
||||
wave_step<<<blocks, threads>>>(d_P, d_dist, n, d_changed);
|
||||
cudaDeviceSynchronize();
|
||||
cudaMemcpy(&changed, d_changed, sizeof(bool), cudaMemcpyDeviceToHost);
|
||||
iterations++;
|
||||
} while (changed && iterations < 2 * n);
|
||||
|
||||
cudaEventRecord(t1);
|
||||
cudaEventSynchronize(t1);
|
||||
float elapsed_ms = 0;
|
||||
cudaEventElapsedTime(&elapsed_ms, t0, t1);
|
||||
|
||||
cudaMemcpy(dist_h, d_dist, n * n * sizeof(unsigned int), cudaMemcpyDeviceToHost);
|
||||
|
||||
unsigned int path_len = dist_h[fx * n + fy];
|
||||
if (path_len == INF)
|
||||
printf("n=%d Path not found! time=%.2f ms iters=%d blocks=%d threads=%d\n",
|
||||
n, elapsed_ms, iterations, blocks, threads);
|
||||
else
|
||||
printf("n=%d path_len=%u time=%.2f ms iters=%d blocks=%d threads=%d\n",
|
||||
n, path_len, elapsed_ms, iterations, blocks, threads);
|
||||
|
||||
if (csv_path) {
|
||||
FILE *fp = fopen(csv_path, "a");
|
||||
if (fp) {
|
||||
fprintf(fp, "%d,cuda,%.4f,%u,%d\n", n, elapsed_ms, path_len, iterations);
|
||||
fclose(fp);
|
||||
}
|
||||
}
|
||||
|
||||
free(P);
|
||||
free(dist_h);
|
||||
cudaFree(d_P);
|
||||
cudaFree(d_dist);
|
||||
cudaFree(d_changed);
|
||||
cudaEventDestroy(t0);
|
||||
cudaEventDestroy(t1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
185
task2/src/wave_mpi.c
Normal file
185
task2/src/wave_mpi.c
Normal file
@@ -0,0 +1,185 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
#include <mpi.h>
|
||||
|
||||
#define INF UINT_MAX
|
||||
#define OBSTACLE_PROB 10
|
||||
|
||||
static void generate_polygon(int *P, int n) {
|
||||
srand(42);
|
||||
for (int i = 0; i < n * n; i++)
|
||||
P[i] = (rand() % 100 < OBSTACLE_PROB) ? -1 : 0;
|
||||
int sx = 2, sy = 2;
|
||||
int fx = n - 3, fy = n - 3;
|
||||
P[sx * n + sy] = 0;
|
||||
P[fx * n + fy] = 0;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
MPI_Init(&argc, &argv);
|
||||
|
||||
int rank, size;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
||||
|
||||
if (argc < 2) {
|
||||
if (rank == 0)
|
||||
fprintf(stderr, "Usage: mpirun -np <P> %s <matrix_size> [csv_file]\n", argv[0]);
|
||||
MPI_Finalize();
|
||||
return 1;
|
||||
}
|
||||
|
||||
int n = atoi(argv[1]);
|
||||
const char *csv_path = (argc >= 3) ? argv[2] : NULL;
|
||||
int sx = 2, sy = 2;
|
||||
int fx = n - 3, fy = n - 3;
|
||||
|
||||
int *P = (int *)malloc(n * n * sizeof(int));
|
||||
unsigned int *dist = NULL;
|
||||
|
||||
if (rank == 0) {
|
||||
generate_polygon(P, n);
|
||||
dist = (unsigned int *)malloc(n * n * sizeof(unsigned int));
|
||||
for (int i = 0; i < n * n; i++)
|
||||
dist[i] = INF;
|
||||
dist[sx * n + sy] = 0;
|
||||
}
|
||||
|
||||
MPI_Bcast(P, n * n, MPI_INT, 0, MPI_COMM_WORLD);
|
||||
|
||||
int base_rows = n / size;
|
||||
int remainder = n % size;
|
||||
int local_rows = base_rows + (rank < remainder ? 1 : 0);
|
||||
int start_row = rank * base_rows + (rank < remainder ? rank : remainder);
|
||||
|
||||
int ghost_top = (rank > 0) ? 1 : 0;
|
||||
int ghost_bot = (rank < size - 1) ? 1 : 0;
|
||||
int total_local = (ghost_top + local_rows + ghost_bot) * n;
|
||||
|
||||
unsigned int *local_dist = (unsigned int *)malloc(total_local * sizeof(unsigned int));
|
||||
int *local_P = (int *)malloc(total_local * sizeof(int));
|
||||
|
||||
for (int i = 0; i < total_local; i++) {
|
||||
local_dist[i] = INF;
|
||||
local_P[i] = -1;
|
||||
}
|
||||
|
||||
int *sendcounts = NULL, *displs = NULL;
|
||||
if (rank == 0) {
|
||||
sendcounts = (int *)malloc(size * sizeof(int));
|
||||
displs = (int *)malloc(size * sizeof(int));
|
||||
int off = 0;
|
||||
for (int r = 0; r < size; r++) {
|
||||
int rr = base_rows + (r < remainder ? 1 : 0);
|
||||
sendcounts[r] = rr * n;
|
||||
displs[r] = off;
|
||||
off += rr * n;
|
||||
}
|
||||
}
|
||||
|
||||
MPI_Scatterv(
|
||||
(rank == 0) ? dist : NULL, sendcounts, displs, MPI_UNSIGNED,
|
||||
local_dist + ghost_top * n, local_rows * n, MPI_UNSIGNED,
|
||||
0, MPI_COMM_WORLD);
|
||||
|
||||
for (int i = 0; i < local_rows; i++)
|
||||
memcpy(local_P + (ghost_top + i) * n, P + (start_row + i) * n, n * sizeof(int));
|
||||
|
||||
if (ghost_top) {
|
||||
memcpy(local_P, P + (start_row - 1) * n, n * sizeof(int));
|
||||
}
|
||||
if (ghost_bot) {
|
||||
memcpy(local_P + (ghost_top + local_rows) * n,
|
||||
P + (start_row + local_rows) * n, n * sizeof(int));
|
||||
}
|
||||
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
double t_start = MPI_Wtime();
|
||||
|
||||
int prev_rank = (rank > 0) ? rank - 1 : MPI_PROC_NULL;
|
||||
int next_rank = (rank < size - 1) ? rank + 1 : MPI_PROC_NULL;
|
||||
|
||||
int iteration = 0;
|
||||
int global_changed;
|
||||
do {
|
||||
/* exchange ghost rows */
|
||||
MPI_Sendrecv(
|
||||
local_dist + ghost_top * n, n, MPI_UNSIGNED, prev_rank, 0,
|
||||
local_dist + (ghost_top + local_rows) * n, n, MPI_UNSIGNED, next_rank, 0,
|
||||
MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
||||
|
||||
MPI_Sendrecv(
|
||||
local_dist + (ghost_top + local_rows - 1) * n, n, MPI_UNSIGNED, next_rank, 1,
|
||||
local_dist, n, MPI_UNSIGNED, prev_rank, 1,
|
||||
MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
||||
|
||||
int local_changed = 0;
|
||||
|
||||
for (int li = ghost_top; li < ghost_top + local_rows; li++) {
|
||||
for (int j = 0; j < n; j++) {
|
||||
int idx = li * n + j;
|
||||
if (local_P[idx] == -1) continue;
|
||||
|
||||
unsigned int cur = local_dist[idx];
|
||||
unsigned int mn = cur;
|
||||
|
||||
if (li > 0 && local_dist[(li - 1) * n + j] != INF)
|
||||
mn = (local_dist[(li - 1) * n + j] + 1 < mn) ? local_dist[(li - 1) * n + j] + 1 : mn;
|
||||
if (li < ghost_top + local_rows + ghost_bot - 1 && local_dist[(li + 1) * n + j] != INF)
|
||||
mn = (local_dist[(li + 1) * n + j] + 1 < mn) ? local_dist[(li + 1) * n + j] + 1 : mn;
|
||||
if (j > 0 && local_dist[li * n + j - 1] != INF)
|
||||
mn = (local_dist[li * n + j - 1] + 1 < mn) ? local_dist[li * n + j - 1] + 1 : mn;
|
||||
if (j < n - 1 && local_dist[li * n + j + 1] != INF)
|
||||
mn = (local_dist[li * n + j + 1] + 1 < mn) ? local_dist[li * n + j + 1] + 1 : mn;
|
||||
|
||||
if (mn < cur) {
|
||||
local_dist[idx] = mn;
|
||||
local_changed = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MPI_Allreduce(&local_changed, &global_changed, 1, MPI_INT, MPI_LOR, MPI_COMM_WORLD);
|
||||
iteration++;
|
||||
} while (global_changed && iteration < 2 * n);
|
||||
|
||||
double t_end = MPI_Wtime();
|
||||
double elapsed_ms = (t_end - t_start) * 1000.0;
|
||||
|
||||
MPI_Gatherv(
|
||||
local_dist + ghost_top * n, local_rows * n, MPI_UNSIGNED,
|
||||
(rank == 0) ? dist : NULL, sendcounts, displs, MPI_UNSIGNED,
|
||||
0, MPI_COMM_WORLD);
|
||||
|
||||
if (rank == 0) {
|
||||
unsigned int path_len = dist[fx * n + fy];
|
||||
if (path_len == INF)
|
||||
printf("n=%d Path not found! time=%.2f ms iters=%d procs=%d\n",
|
||||
n, elapsed_ms, iteration, size);
|
||||
else
|
||||
printf("n=%d path_len=%u time=%.2f ms iters=%d procs=%d\n",
|
||||
n, path_len, elapsed_ms, iteration, size);
|
||||
|
||||
if (csv_path) {
|
||||
FILE *fp = fopen(csv_path, "a");
|
||||
if (fp) {
|
||||
fprintf(fp, "%d,%d,%.4f,%u,%d\n",
|
||||
n, size, elapsed_ms, path_len, iteration);
|
||||
fclose(fp);
|
||||
}
|
||||
}
|
||||
|
||||
free(sendcounts);
|
||||
free(displs);
|
||||
free(dist);
|
||||
}
|
||||
|
||||
free(P);
|
||||
free(local_dist);
|
||||
free(local_P);
|
||||
|
||||
MPI_Finalize();
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user