Отчёт

Неиспользуемый мусор
USE_BLOCK_KERNEL флажок
2026-01-14 15:39:28 +03:00 · 2025-12-23 14:58:59 +00:00 · 2025-12-23 14:58:45 +00:00 · 2025-12-17 13:47:29 +00:00
5 changed files with 3655 additions and 23 deletions
--- a/report/.gitignore
+++ b/report/.gitignore
@@ -0,0 +1,4 @@
 *
 !.gitignore
 !report.tex
--- a/report/report.tex
+++ b/report/report.tex
--- a/run.slurm
+++ b/run.slurm
@@ -20,5 +20,8 @@ export AGGREGATION_INTERVAL=60
 # Использовать ли CUDA для агрегации (0 = нет, 1 = да)
 export USE_CUDA=1
 # Использовать ли блочное ядро (быстрее для больших интервалов, 0 = нет, 1 = да)
 export USE_BLOCK_KERNEL=0
 cd /mnt/shared/supercomputers/build
 mpirun -np $SLURM_NTASKS ./bitcoin_app
--- a/src/gpu_plugin.cu
+++ b/src/gpu_plugin.cu
@@ -3,6 +3,7 @@
 #include <cstdint>
 #include <cfloat>
 #include <cstdio>
 #include <cstdlib>
 #include <ctime>
 #include <string>
 #include <sstream>
@@ -12,16 +13,6 @@
 // Структуры данных
 // ============================================================================
 // SoA (Structure of Arrays) для входных данных на GPU
 struct GpuTicksSoA {
    double* timestamp;
    double* open;
    double* high;
    double* low;
    double* close;
    int n;
 };
 // Результат агрегации одного периода
 struct GpuPeriodStats {
    int64_t period;
@@ -199,6 +190,7 @@ __global__ void aggregate_periods_simple_kernel(
    out_stats[period_idx] = stats;
 }
 // ============================================================================
 // Проверка доступности GPU
 // ============================================================================
@@ -207,9 +199,6 @@ extern "C" int gpu_is_available() {
    int n = 0;
    cudaError_t err = cudaGetDeviceCount(&n);
    if (err != cudaSuccess) return 0;
    if (n > 0) {
        cudaFree(0);  // Форсируем инициализацию контекста
    }
    return (n > 0) ? 1 : 0;
 }
@@ -352,14 +341,32 @@ extern "C" int gpu_aggregate_periods(
    GpuPeriodStats* d_out_stats = nullptr;
    CUDA_CHECK(cudaMalloc(&d_out_stats, num_periods * sizeof(GpuPeriodStats)));
-    // Используем простой kernel (один поток на период)
+    // Выбор ядра через переменную окружения USE_BLOCK_KERNEL
-    // т.к. обычно тиков в периоде немного
+    const char* env_block_kernel = std::getenv("USE_BLOCK_KERNEL");
-    int agg_blocks = (num_periods + BLOCK_SIZE - 1) / BLOCK_SIZE;
+    if (env_block_kernel == nullptr) {
        printf("Error: Environment variable USE_BLOCK_KERNEL is not set\n");
        return -1;
    }
    bool use_block_kernel = std::atoi(env_block_kernel) != 0;
    if (use_block_kernel) {
        // Блочное ядро: один блок на период, потоки параллельно обрабатывают тики
        // Лучше для больших интервалов с множеством тиков в каждом периоде
        aggregate_periods_kernel<<<num_periods, BLOCK_SIZE>>>(
            d_open, d_high, d_low, d_close,
            d_unique_periods, d_offsets, d_counts,
            num_periods, d_out_stats);
    } else {
        // Простое ядро: один поток на период
        // Лучше для множества периодов с малым количеством тиков в каждом
        int agg_blocks = (num_periods + BLOCK_SIZE - 1) / BLOCK_SIZE;
        aggregate_periods_simple_kernel<<<agg_blocks, BLOCK_SIZE>>>(
            d_open, d_high, d_low, d_close,
            d_unique_periods, d_offsets, d_counts,
            num_periods, d_out_stats);
    }
    aggregate_periods_simple_kernel<<<agg_blocks, BLOCK_SIZE>>>(
        d_open, d_high, d_low, d_close,
        d_unique_periods, d_offsets, d_counts,
        num_periods, d_out_stats);
    CUDA_CHECK(cudaGetLastError());
    CUDA_CHECK(cudaDeviceSynchronize());
@@ -401,12 +408,12 @@ extern "C" int gpu_aggregate_periods(
    double total_ms = get_time_ms() - total_start;
    // Формируем весь вывод одной строкой
-    output << "  GPU aggregation (" << num_ticks << " ticks, interval=" << interval << " sec):\n";
+    output << "  GPU aggregation (" << num_ticks << " ticks, interval=" << interval << " sec, kernel=" << (use_block_kernel ? "block" : "simple") << "):\n";
    output << "    1. Malloc + H->D copy:  " << std::fixed << std::setprecision(3) << std::setw(7) << step1_ms << " ms\n";
    output << "    2. Compute period_ids:  " << std::setw(7) << step2_ms << " ms\n";
    output << "    3. RLE (CUB):           " << std::setw(7) << step3_ms << " ms (" << num_periods << " periods)\n";
    output << "    4. Exclusive scan:      " << std::setw(7) << step4_ms << " ms\n";
-    output << "    5. Aggregation kernel:  " << std::setw(7) << step5_ms << " ms\n";
+    output << "    5. Aggregation kernel:  " << std::setw(7) << step5_ms << " ms (" << (use_block_kernel ? "block" : "simple") << ")\n";
    output << "    6. D->H copy:           " << std::setw(7) << step6_ms << " ms\n";
    output << "    7. Free GPU memory:     " << std::setw(7) << step7_ms << " ms\n";
    output << "    GPU TOTAL:              " << std::setw(7) << total_ms << " ms\n";
--- a/src/utils.hpp
+++ b/src/utils.hpp
@@ -2,7 +2,6 @@
 #include "record.hpp"
 #include "period_stats.hpp"
 #include <map>
 #include <vector>
 #include <string>
 #include <cstdlib>
Author	SHA1	Message	Date
Arity-T	3141b9839b	Отчёт	2026-01-14 15:39:28 +03:00
Arity-T	0727e1ed03	Неиспользуемый мусор	2025-12-23 14:58:59 +00:00
Arity-T	3425a81750	USE_BLOCK_KERNEL флажок	2025-12-23 14:58:45 +00:00
Arity-T	9e81174a6d	Тестовые ядра	2025-12-17 13:47:29 +00:00