173 lines
5.4 KiB
Plaintext
173 lines
5.4 KiB
Plaintext
#include <cuda_runtime.h>
|
||
#include <cstdint>
|
||
#include <cfloat>
|
||
|
||
// Структуры данных (должны совпадать с C++ кодом)
|
||
struct GpuRecord {
|
||
double timestamp;
|
||
double open;
|
||
double high;
|
||
double low;
|
||
double close;
|
||
double volume;
|
||
};
|
||
|
||
struct GpuDayStats {
|
||
long long day;
|
||
double low;
|
||
double high;
|
||
double open;
|
||
double close;
|
||
double avg;
|
||
double first_ts;
|
||
double last_ts;
|
||
};
|
||
|
||
extern "C" int gpu_is_available() {
|
||
int n = 0;
|
||
cudaError_t err = cudaGetDeviceCount(&n);
|
||
if (err != cudaSuccess) return 0;
|
||
return (n > 0) ? 1 : 0;
|
||
}
|
||
|
||
// Kernel для агрегации (каждый поток обрабатывает один день)
|
||
__global__ void aggregate_kernel(
|
||
const GpuRecord* records,
|
||
int num_records,
|
||
const int* day_offsets, // начало каждого дня в массиве records
|
||
const int* day_counts, // количество записей в каждом дне
|
||
const long long* day_indices, // индексы дней
|
||
int num_days,
|
||
GpuDayStats* out_stats)
|
||
{
|
||
// Глобальный индекс потока = индекс дня
|
||
int d = blockIdx.x * blockDim.x + threadIdx.x;
|
||
|
||
if (d >= num_days) return;
|
||
|
||
int offset = day_offsets[d];
|
||
int count = day_counts[d];
|
||
|
||
GpuDayStats stats;
|
||
stats.day = day_indices[d];
|
||
stats.low = DBL_MAX;
|
||
stats.high = -DBL_MAX;
|
||
stats.first_ts = DBL_MAX;
|
||
stats.last_ts = -DBL_MAX;
|
||
stats.open = 0;
|
||
stats.close = 0;
|
||
|
||
for (int i = 0; i < count; i++) {
|
||
const GpuRecord& r = records[offset + i];
|
||
|
||
// min/max
|
||
if (r.low < stats.low) stats.low = r.low;
|
||
if (r.high > stats.high) stats.high = r.high;
|
||
|
||
// first/last по timestamp
|
||
if (r.timestamp < stats.first_ts) {
|
||
stats.first_ts = r.timestamp;
|
||
stats.open = r.open;
|
||
}
|
||
if (r.timestamp > stats.last_ts) {
|
||
stats.last_ts = r.timestamp;
|
||
stats.close = r.close;
|
||
}
|
||
}
|
||
|
||
stats.avg = (stats.low + stats.high) / 2.0;
|
||
out_stats[d] = stats;
|
||
}
|
||
|
||
// Функция агрегации, вызываемая из C++
|
||
extern "C" int gpu_aggregate_days(
|
||
const GpuRecord* h_records,
|
||
int num_records,
|
||
const int* h_day_offsets,
|
||
const int* h_day_counts,
|
||
const long long* h_day_indices,
|
||
int num_days,
|
||
GpuDayStats* h_out_stats)
|
||
{
|
||
// Выделяем память на GPU
|
||
GpuRecord* d_records = nullptr;
|
||
int* d_day_offsets = nullptr;
|
||
int* d_day_counts = nullptr;
|
||
long long* d_day_indices = nullptr;
|
||
GpuDayStats* d_out_stats = nullptr;
|
||
|
||
cudaError_t err;
|
||
|
||
err = cudaMalloc(&d_records, num_records * sizeof(GpuRecord));
|
||
if (err != cudaSuccess) return -1;
|
||
|
||
err = cudaMalloc(&d_day_offsets, num_days * sizeof(int));
|
||
if (err != cudaSuccess) { cudaFree(d_records); return -2; }
|
||
|
||
err = cudaMalloc(&d_day_counts, num_days * sizeof(int));
|
||
if (err != cudaSuccess) { cudaFree(d_records); cudaFree(d_day_offsets); return -3; }
|
||
|
||
err = cudaMalloc(&d_day_indices, num_days * sizeof(long long));
|
||
if (err != cudaSuccess) { cudaFree(d_records); cudaFree(d_day_offsets); cudaFree(d_day_counts); return -4; }
|
||
|
||
err = cudaMalloc(&d_out_stats, num_days * sizeof(GpuDayStats));
|
||
if (err != cudaSuccess) { cudaFree(d_records); cudaFree(d_day_offsets); cudaFree(d_day_counts); cudaFree(d_day_indices); return -5; }
|
||
|
||
// Копируем данные на GPU
|
||
err = cudaMemcpy(d_records, h_records, num_records * sizeof(GpuRecord), cudaMemcpyHostToDevice);
|
||
if (err != cudaSuccess) return -10;
|
||
|
||
err = cudaMemcpy(d_day_offsets, h_day_offsets, num_days * sizeof(int), cudaMemcpyHostToDevice);
|
||
if (err != cudaSuccess) return -11;
|
||
|
||
err = cudaMemcpy(d_day_counts, h_day_counts, num_days * sizeof(int), cudaMemcpyHostToDevice);
|
||
if (err != cudaSuccess) return -12;
|
||
|
||
err = cudaMemcpy(d_day_indices, h_day_indices, num_days * sizeof(long long), cudaMemcpyHostToDevice);
|
||
if (err != cudaSuccess) return -13;
|
||
|
||
// Запускаем kernel: каждый поток обрабатывает один день
|
||
const int THREADS_PER_BLOCK = 256;
|
||
int num_blocks = (num_days + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||
|
||
aggregate_kernel<<<num_blocks, THREADS_PER_BLOCK>>>(
|
||
d_records, num_records,
|
||
d_day_offsets, d_day_counts, d_day_indices,
|
||
num_days, d_out_stats
|
||
);
|
||
|
||
// Проверяем ошибку запуска kernel
|
||
err = cudaGetLastError();
|
||
if (err != cudaSuccess) {
|
||
cudaFree(d_records);
|
||
cudaFree(d_day_offsets);
|
||
cudaFree(d_day_counts);
|
||
cudaFree(d_day_indices);
|
||
cudaFree(d_out_stats);
|
||
return -7;
|
||
}
|
||
|
||
// Ждём завершения
|
||
err = cudaDeviceSynchronize();
|
||
if (err != cudaSuccess) {
|
||
cudaFree(d_records);
|
||
cudaFree(d_day_offsets);
|
||
cudaFree(d_day_counts);
|
||
cudaFree(d_day_indices);
|
||
cudaFree(d_out_stats);
|
||
return -6;
|
||
}
|
||
|
||
// Копируем результат обратно
|
||
cudaMemcpy(h_out_stats, d_out_stats, num_days * sizeof(GpuDayStats), cudaMemcpyDeviceToHost);
|
||
|
||
// Освобождаем память
|
||
cudaFree(d_records);
|
||
cudaFree(d_day_offsets);
|
||
cudaFree(d_day_counts);
|
||
cudaFree(d_day_indices);
|
||
cudaFree(d_out_stats);
|
||
|
||
return 0;
|
||
}
|