Files
supercomputers/kernel.cu

160 lines
5.6 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#define BLOCKS_COUNT 16
#define THREADS_COUNT 16
#define MATRIX_SIZE 32
#define OBSTACLE_PROB 10
#define START_X 2
#define START_Y 2
#define FINISH_X (MATRIX_SIZE - 3)
#define FINISH_Y (MATRIX_SIZE - 3)
#define INF UINT_MAX // Используем беззнаковый максимум
void generate_polygon(int* P, int n) {
srand(42);
for (int i = 0; i < n*n; i++) {
P[i] = (rand() % 100 < OBSTACLE_PROB) ? -1 : 0;
}
P[START_X + START_Y * MATRIX_SIZE] = 0; // Гарантируем, что старт свободен
P[FINISH_X + FINISH_Y * MATRIX_SIZE] = 0; // Гарантируем, что финиш свободен
}
void print_distance_map(int* P, unsigned int* dist, int n) {
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
int idx = i * n + j;
if (P[idx] == -1)
printf("████"); // Препятствие
else if (i == START_X && j == START_Y)
printf("S "); // Старт
else if (i == FINISH_X && j == FINISH_Y)
printf("F "); // Финиш
else if (dist[idx] == INF)
printf(". "); // Недостижимая область
else
printf("%-4u", dist[idx]); // Вывод расстояния
}
printf("\n");
}
}
__global__ void wave_step(int* P, unsigned int* dist, int n, bool* changed) {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
// printf("Hello from CUDA kernel! I'm thread #%d\n", tid);
while (tid < n * n) {
int i = tid / n;
int j = tid % n;
// printf("TID = %d (real %d); i = %d; j = %d\n", tid, threadIdx.x + blockIdx.x * blockDim.x, i, j);
if (i >= n || j >= n) return;
if (P[tid] != -1) {
unsigned int current_dist = dist[tid];
unsigned int min_dist = current_dist;
// Проверка соседей с защитой от переполнения
if (i > 0 && dist[(i-1)*n + j] != INF)
min_dist = min(min_dist, dist[(i-1)*n + j] + 1);
if (i < n-1 && dist[(i+1)*n + j] != INF)
min_dist = min(min_dist, dist[(i+1)*n + j] + 1);
if (j > 0 && dist[i*n + (j-1)] != INF)
min_dist = min(min_dist, dist[i*n + (j-1)] + 1);
if (j < n-1 && dist[i*n + (j+1)] != INF)
min_dist = min(min_dist, dist[i*n + (j+1)] + 1);
if (min_dist < current_dist) {
dist[tid] = min_dist;
*changed = true;
}
}
// Каждый поток обрабатывает каждую blockDim.x * gridDim.x клетку
// printf("Increment will be: %d\n", blockDim.x * gridDim.x);
tid += blockDim.x * gridDim.x;
}
}
int main() {
const int n = MATRIX_SIZE;
// Инициализация полигона
int* P = (int*)malloc(n * n * sizeof(int));
generate_polygon(P, n);
// Выделение памяти на GPU
int* d_P;
unsigned int* d_dist;
bool* d_changed;
cudaMalloc(&d_P, n*n*sizeof(int));
cudaMalloc(&d_dist, n*n*sizeof(unsigned int));
cudaMalloc(&d_changed, sizeof(bool));
// Инициализация расстояний
unsigned int* dist = (unsigned int*)malloc(n*n*sizeof(unsigned int));
for (int i = 0; i < n*n; i++) dist[i] = INF;
dist[START_X + START_Y * MATRIX_SIZE] = 0; // Стартовая точка
// Копирование данных на GPU
cudaMemcpy(d_P, P, n*n*sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_dist, dist, n*n*sizeof(unsigned int), cudaMemcpyHostToDevice);
// Замер времени
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start);
// Основной цикл волны
int iterations = 0;
bool changed;
do {
// printf("Wave step #%d\n", iterations);
changed = false;
cudaMemcpy(d_changed, &changed, sizeof(bool), cudaMemcpyHostToDevice);
wave_step<<<BLOCKS_COUNT, THREADS_COUNT>>>(d_P, d_dist, n, d_changed);
cudaDeviceSynchronize(); // Синхронизация после ядра
cudaMemcpy(&changed, d_changed, sizeof(bool), cudaMemcpyDeviceToHost);
iterations++;
} while (changed && iterations < 2*n); // Защита от бесконечного цикла
// Финализация времени
cudaEventRecord(stop);
cudaEventSynchronize(stop);
float milliseconds = 0;
cudaEventElapsedTime(&milliseconds, start, stop);
// Проверка результата
cudaMemcpy(dist, d_dist, n*n*sizeof(unsigned int), cudaMemcpyDeviceToHost);
if (dist[FINISH_X + FINISH_Y * MATRIX_SIZE] == INF) {
printf("Path not found!\n");
} else {
printf("Success! Path length: %u\n", dist[FINISH_X + FINISH_Y * MATRIX_SIZE]);
}
// Вывод результатов
printf("Time: %.2f ms\n", milliseconds);
printf("Matrix: %dx%d | BlocksXThreads: %dx%d | Obstacles: %d%%\n\n",
n, n, BLOCKS_COUNT, THREADS_COUNT, OBSTACLE_PROB);
if (MATRIX_SIZE <= 100)
print_distance_map(P, dist, MATRIX_SIZE);
// Освобождение памяти
free(P);
free(dist);
cudaFree(d_P);
cudaFree(d_dist);
cudaFree(d_changed);
cudaEventDestroy(start);
cudaEventDestroy(stop);
return 0;
}