first version of kernel.cu

2025-03-30 12:32:19 +03:00
parent c5a6f9b59f
commit c6be31c093
3 changed files with 130 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,2 @@
+*.out
+*.err
--- a/README.md
+++ b/README.md
@@ -10,6 +10,9 @@ sbatch run.script

 ```bash
 scontrol show jobid <jobid>
+
+# или так, чтобы не выводить лишнюю информацию
+scontrol show jobid <jobid> | grep JobState
 ```

 Отменить или завершить задачу досрочно.
--- a/kernel.cu
+++ b/kernel.cu
@@ -0,0 +1,125 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <cuda_runtime.h>
+#include <device_launch_parameters.h>
+
+#define BLOCK_SIZE 16
+#define MATRIX_SIZE 32
+#define OBSTACLE_PROB 10
+#define INF UINT_MAX // Используем беззнаковый максимум
+
+__global__ void wave_step(int* P, unsigned int* dist, int n, bool* changed) {
+    int i = blockIdx.y * blockDim.y + threadIdx.y;
+    int j = blockIdx.x * blockDim.x + threadIdx.x;
+    int idx = i * n + j;
+
+    if (i >= n || j >= n || P[idx] == -1) return;
+
+    unsigned int current_dist = dist[idx];
+    unsigned int min_dist = current_dist;
+
+    // Проверка соседей с защитой от переполнения
+    if (i > 0 && dist[(i-1)*n + j] != INF) 
+        min_dist = min(min_dist, dist[(i-1)*n + j] + 1);
+    
+    if (i < n-1 && dist[(i+1)*n + j] != INF) 
+        min_dist = min(min_dist, dist[(i+1)*n + j] + 1);
+    
+    if (j > 0 && dist[i*n + (j-1)] != INF) 
+        min_dist = min(min_dist, dist[i*n + (j-1)] + 1);
+    
+    if (j < n-1 && dist[i*n + (j+1)] != INF) 
+        min_dist = min(min_dist, dist[i*n + (j+1)] + 1);
+
+    if (min_dist < current_dist) {
+        atomicMin(&dist[idx], min_dist);
+        *changed = true;
+    }
+}
+
+void generate_polygon(int* P, int n) {
+    srand(42);
+    for (int i = 0; i < n*n; i++) {
+        P[i] = (rand() % 100 < OBSTACLE_PROB) ? -1 : 0;
+    }
+    P[0] = 0;       // Гарантируем, что старт свободен
+    P[n*n - 1] = 0; // Гарантируем, что финиш свободен
+}
+
+int main() {
+    const int n = MATRIX_SIZE;
+    const int block_size = BLOCK_SIZE;
+
+    // Инициализация полигона
+    int* P = (int*)malloc(n * n * sizeof(int));
+    generate_polygon(P, n);
+
+    // Выделение памяти на GPU
+    int* d_P;
+    unsigned int* d_dist;
+    bool* d_changed;
+    cudaMalloc(&d_P, n*n*sizeof(int));
+    cudaMalloc(&d_dist, n*n*sizeof(unsigned int));
+    cudaMalloc(&d_changed, sizeof(bool));
+
+    // Инициализация расстояний
+    unsigned int* dist = (unsigned int*)malloc(n*n*sizeof(unsigned int));
+    for (int i = 0; i < n*n; i++) dist[i] = INF;
+    dist[0] = 0; // Стартовая точка
+
+    // Копирование данных на GPU
+    cudaMemcpy(d_P, P, n*n*sizeof(int), cudaMemcpyHostToDevice);
+    cudaMemcpy(d_dist, dist, n*n*sizeof(unsigned int), cudaMemcpyHostToDevice);
+
+    // Настройка запуска ядра
+    dim3 grid((n + block_size - 1)/block_size, (n + block_size - 1)/block_size);
+    dim3 block(block_size, block_size);
+
+    // Замер времени
+    cudaEvent_t start, stop;
+    cudaEventCreate(&start);
+    cudaEventCreate(&stop);
+    cudaEventRecord(start);
+
+    // Основной цикл волны
+    int iterations = 0;
+    bool changed;
+    do {
+        changed = false;
+        cudaMemcpy(d_changed, &changed, sizeof(bool), cudaMemcpyHostToDevice);
+        wave_step<<<grid, block>>>(d_P, d_dist, n, d_changed);
+        cudaDeviceSynchronize(); // Синхронизация после ядра
+        cudaMemcpy(&changed, d_changed, sizeof(bool), cudaMemcpyDeviceToHost);
+        iterations++;
+    } while (changed && iterations < 2*n); // Защита от бесконечного цикла
+
+    // Финализация времени
+    cudaEventRecord(stop);
+    cudaEventSynchronize(stop);
+    float milliseconds = 0;
+    cudaEventElapsedTime(&milliseconds, start, stop);
+
+    // Проверка результата
+    cudaMemcpy(dist, d_dist, n*n*sizeof(unsigned int), cudaMemcpyDeviceToHost);
+    if (dist[n*n - 1] == INF) {
+        printf("Path not found!\n");
+    } else {
+        printf("Success! Path length: %u\n", dist[n*n - 1]);
+    }
+
+    printf("Time: %.2f ms\n", milliseconds);
+    printf("Matrix: %dx%d | Blocks: %dx%d | Obstacles: %d%%\n", 
+           n, n, block_size, block_size, OBSTACLE_PROB);
+
+    // Освобождение памяти
+    free(P);
+    free(dist);
+    cudaFree(d_P);
+    cudaFree(d_dist);
+    cudaFree(d_changed);
+    cudaEventDestroy(start);
+    cudaEventDestroy(stop);
+
+    return 0;
+}