From ec24546829fced82860145c51e8e0b494999d87b Mon Sep 17 00:00:00 2001 From: Arity-T Date: Sun, 30 Mar 2025 14:15:53 +0300 Subject: [PATCH] =?UTF-8?q?=D0=9F=D1=80=D0=BE=D0=B8=D0=B7=D0=B2=D0=BE?= =?UTF-8?q?=D0=BB=D1=8C=D0=BD=D0=BE=D0=B5=20=D1=87=D0=B8=D1=81=D0=BB=D0=BE?= =?UTF-8?q?=20=D0=B1=D0=BB=D0=BE=D0=BA=D0=BE=D0=B2=20=D0=B8=20=D0=BF=D0=BE?= =?UTF-8?q?=D1=82=D0=BE=D0=BA=D0=BE=D0=B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kernel.cu | 86 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 47 insertions(+), 39 deletions(-) diff --git a/kernel.cu b/kernel.cu index 764b5b9..5f4809d 100644 --- a/kernel.cu +++ b/kernel.cu @@ -4,7 +4,8 @@ #include #include -#define BLOCK_SIZE 16 +#define BLOCKS_COUNT 16 +#define THREADS_COUNT 16 #define MATRIX_SIZE 32 #define OBSTACLE_PROB 10 #define START_X 2 @@ -14,35 +15,6 @@ #define INF UINT_MAX // Используем беззнаковый максимум -__global__ void wave_step(int* P, unsigned int* dist, int n, bool* changed) { - int i = blockIdx.y * blockDim.y + threadIdx.y; - int j = blockIdx.x * blockDim.x + threadIdx.x; - int idx = i * n + j; - - if (i >= n || j >= n || P[idx] == -1) return; - - unsigned int current_dist = dist[idx]; - unsigned int min_dist = current_dist; - - // Проверка соседей с защитой от переполнения - if (i > 0 && dist[(i-1)*n + j] != INF) - min_dist = min(min_dist, dist[(i-1)*n + j] + 1); - - if (i < n-1 && dist[(i+1)*n + j] != INF) - min_dist = min(min_dist, dist[(i+1)*n + j] + 1); - - if (j > 0 && dist[i*n + (j-1)] != INF) - min_dist = min(min_dist, dist[i*n + (j-1)] + 1); - - if (j < n-1 && dist[i*n + (j+1)] != INF) - min_dist = min(min_dist, dist[i*n + (j+1)] + 1); - - if (min_dist < current_dist) { - atomicMin(&dist[idx], min_dist); - *changed = true; - } -} - void generate_polygon(int* P, int n) { srand(42); for (int i = 0; i < n*n; i++) { @@ -71,9 +43,47 @@ void print_distance_map(int* P, unsigned int* dist, int n) { } } +__global__ void wave_step(int* P, unsigned int* dist, int n, bool* changed) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + // printf("Hello from CUDA kernel! I'm thread #%d\n", tid); + + while (tid < n * n) { + int i = tid / n; + int j = tid % n; + // printf("TID = %d (real %d); i = %d; j = %d\n", tid, threadIdx.x + blockIdx.x * blockDim.x, i, j); + + if (i >= n || j >= n) return; + if (P[tid] != -1) { + unsigned int current_dist = dist[tid]; + unsigned int min_dist = current_dist; + + // Проверка соседей с защитой от переполнения + if (i > 0 && dist[(i-1)*n + j] != INF) + min_dist = min(min_dist, dist[(i-1)*n + j] + 1); + + if (i < n-1 && dist[(i+1)*n + j] != INF) + min_dist = min(min_dist, dist[(i+1)*n + j] + 1); + + if (j > 0 && dist[i*n + (j-1)] != INF) + min_dist = min(min_dist, dist[i*n + (j-1)] + 1); + + if (j < n-1 && dist[i*n + (j+1)] != INF) + min_dist = min(min_dist, dist[i*n + (j+1)] + 1); + + if (min_dist < current_dist) { + dist[tid] = min_dist; + *changed = true; + } + } + + // Каждый поток обрабатывает каждую blockDim.x * gridDim.x клетку + // printf("Increment will be: %d\n", blockDim.x * gridDim.x); + tid += blockDim.x * gridDim.x; + } +} + int main() { const int n = MATRIX_SIZE; - const int block_size = BLOCK_SIZE; // Инициализация полигона int* P = (int*)malloc(n * n * sizeof(int)); @@ -96,10 +106,6 @@ int main() { cudaMemcpy(d_P, P, n*n*sizeof(int), cudaMemcpyHostToDevice); cudaMemcpy(d_dist, dist, n*n*sizeof(unsigned int), cudaMemcpyHostToDevice); - // Настройка запуска ядра - dim3 grid((n + block_size - 1)/block_size, (n + block_size - 1)/block_size); - dim3 block(block_size, block_size); - // Замер времени cudaEvent_t start, stop; cudaEventCreate(&start); @@ -110,9 +116,10 @@ int main() { int iterations = 0; bool changed; do { + // printf("Wave step #%d\n", iterations); changed = false; cudaMemcpy(d_changed, &changed, sizeof(bool), cudaMemcpyHostToDevice); - wave_step<<>>(d_P, d_dist, n, d_changed); + wave_step<<>>(d_P, d_dist, n, d_changed); cudaDeviceSynchronize(); // Синхронизация после ядра cudaMemcpy(&changed, d_changed, sizeof(bool), cudaMemcpyDeviceToHost); iterations++; @@ -132,11 +139,12 @@ int main() { printf("Success! Path length: %u\n", dist[FINISH_X + FINISH_Y * MATRIX_SIZE]); } + // Вывод результатов printf("Time: %.2f ms\n", milliseconds); - printf("Matrix: %dx%d | Blocks: %dx%d | Obstacles: %d%%\n\n", - n, n, block_size, block_size, OBSTACLE_PROB); + printf("Matrix: %dx%d | BlocksXThreads: %dx%d | Obstacles: %d%%\n\n", + n, n, BLOCKS_COUNT, THREADS_COUNT, OBSTACLE_PROB); - if (MATRIX_SIZE < 100) + if (MATRIX_SIZE <= 100) print_distance_map(P, dist, MATRIX_SIZE); // Освобождение памяти