OpenMP в рамках одной машины
This commit is contained in:
2
Makefile
2
Makefile
@@ -1,5 +1,5 @@
|
|||||||
CXX = mpic++
|
CXX = mpic++
|
||||||
CXXFLAGS = -std=c++17 -O2 -Wall -Wextra -Wno-cast-function-type
|
CXXFLAGS = -std=c++17 -O2 -Wall -Wextra -Wno-cast-function-type -fopenmp
|
||||||
|
|
||||||
NVCC = nvcc
|
NVCC = nvcc
|
||||||
NVCCFLAGS = -O3 -std=c++17 -arch=sm_86 -Xcompiler -fPIC
|
NVCCFLAGS = -O3 -std=c++17 -arch=sm_86 -Xcompiler -fPIC
|
||||||
|
|||||||
@@ -2,9 +2,11 @@
|
|||||||
#SBATCH --job-name=btc
|
#SBATCH --job-name=btc
|
||||||
#SBATCH --nodes=4
|
#SBATCH --nodes=4
|
||||||
#SBATCH --ntasks=4
|
#SBATCH --ntasks=4
|
||||||
|
#SBATCH --cpus-per-task=2
|
||||||
#SBATCH --output=out.txt
|
#SBATCH --output=out.txt
|
||||||
|
|
||||||
# mpirun -np $SLURM_NTASKS ./build/bitcoin_app
|
# Количество CPU потоков на узел (должно соответствовать cpus-per-task)
|
||||||
|
export NUM_CPU_THREADS=2
|
||||||
|
|
||||||
cd /mnt/shared/supercomputers/build
|
cd /mnt/shared/supercomputers/build
|
||||||
mpirun -np $SLURM_NTASKS ./bitcoin_app
|
mpirun -np $SLURM_NTASKS ./bitcoin_app
|
||||||
|
|||||||
122
src/main.cpp
122
src/main.cpp
@@ -1,7 +1,10 @@
|
|||||||
#include <mpi.h>
|
#include <mpi.h>
|
||||||
|
#include <omp.h>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <cstdlib>
|
||||||
|
|
||||||
#include "csv_loader.hpp"
|
#include "csv_loader.hpp"
|
||||||
#include "utils.hpp"
|
#include "utils.hpp"
|
||||||
@@ -27,6 +30,26 @@ std::vector<Record> select_records_for_rank(
|
|||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Разделить записи на N частей (по дням)
|
||||||
|
std::vector<std::vector<Record>> split_records(const std::vector<Record>& records, int n_parts) {
|
||||||
|
// Группируем по дням
|
||||||
|
std::map<DayIndex, std::vector<Record>> by_day;
|
||||||
|
for (const auto& r : records) {
|
||||||
|
DayIndex day = static_cast<DayIndex>(r.timestamp) / 86400;
|
||||||
|
by_day[day].push_back(r);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Распределяем дни по частям
|
||||||
|
std::vector<std::vector<Record>> parts(n_parts);
|
||||||
|
int i = 0;
|
||||||
|
for (auto& [day, recs] : by_day) {
|
||||||
|
parts[i % n_parts].insert(parts[i % n_parts].end(), recs.begin(), recs.end());
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return parts;
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
MPI_Init(&argc, &argv);
|
MPI_Init(&argc, &argv);
|
||||||
|
|
||||||
@@ -34,6 +57,15 @@ int main(int argc, char** argv) {
|
|||||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||||
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
||||||
|
|
||||||
|
// Читаем количество CPU потоков из переменной окружения (по умолчанию 2)
|
||||||
|
int num_cpu_threads = 2;
|
||||||
|
const char* env_threads = std::getenv("NUM_CPU_THREADS");
|
||||||
|
if (env_threads) {
|
||||||
|
num_cpu_threads = std::atoi(env_threads);
|
||||||
|
if (num_cpu_threads < 1) num_cpu_threads = 1;
|
||||||
|
}
|
||||||
|
omp_set_num_threads(num_cpu_threads + 1); // +1 для GPU потока если есть
|
||||||
|
|
||||||
// ====== ЗАГРУЗКА GPU ФУНКЦИЙ ======
|
// ====== ЗАГРУЗКА GPU ФУНКЦИЙ ======
|
||||||
auto gpu_is_available = load_gpu_is_available();
|
auto gpu_is_available = load_gpu_is_available();
|
||||||
auto gpu_aggregate = load_gpu_aggregate_days();
|
auto gpu_aggregate = load_gpu_aggregate_days();
|
||||||
@@ -41,9 +73,9 @@ int main(int argc, char** argv) {
|
|||||||
bool have_gpu = false;
|
bool have_gpu = false;
|
||||||
if (gpu_is_available && gpu_is_available()) {
|
if (gpu_is_available && gpu_is_available()) {
|
||||||
have_gpu = true;
|
have_gpu = true;
|
||||||
std::cout << "Rank " << rank << ": GPU available" << std::endl;
|
std::cout << "Rank " << rank << ": GPU available + " << num_cpu_threads << " CPU threads" << std::endl;
|
||||||
} else {
|
} else {
|
||||||
std::cout << "Rank " << rank << ": GPU not available, using CPU" << std::endl;
|
std::cout << "Rank " << rank << ": " << num_cpu_threads << " CPU threads only" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<Record> local_records;
|
std::vector<Record> local_records;
|
||||||
@@ -87,27 +119,85 @@ int main(int argc, char** argv) {
|
|||||||
std::cout << "Rank " << rank << " received "
|
std::cout << "Rank " << rank << " received "
|
||||||
<< local_records.size() << " records" << std::endl;
|
<< local_records.size() << " records" << std::endl;
|
||||||
|
|
||||||
// ====== АГРЕГАЦИЯ НА КАЖДОМ УЗЛЕ (GPU или CPU) ======
|
// ====== АГРЕГАЦИЯ НА КАЖДОМ УЗЛЕ ======
|
||||||
std::vector<DayStats> local_stats;
|
std::vector<DayStats> local_stats;
|
||||||
|
double time_start = omp_get_wtime();
|
||||||
|
|
||||||
|
// Время работы: [0] = GPU (если есть), [1..n] = CPU потоки
|
||||||
|
std::vector<double> worker_times(num_cpu_threads + 1, 0.0);
|
||||||
|
|
||||||
if (have_gpu && gpu_aggregate) {
|
if (have_gpu && gpu_aggregate) {
|
||||||
bool gpu_success = aggregate_days_gpu(local_records, local_stats, gpu_aggregate);
|
// GPU узел: делим на (1 + num_cpu_threads) частей
|
||||||
if (gpu_success) {
|
int n_workers = 1 + num_cpu_threads;
|
||||||
std::cout << "Rank " << rank << " aggregated "
|
auto parts = split_records(local_records, n_workers);
|
||||||
<< local_stats.size() << " days (GPU)" << std::endl;
|
|
||||||
} else {
|
std::vector<std::vector<DayStats>> results(n_workers);
|
||||||
// Fallback на CPU при ошибке GPU
|
std::vector<bool> success(n_workers, true);
|
||||||
std::cout << "Rank " << rank << ": GPU aggregation failed, falling back to CPU" << std::endl;
|
|
||||||
local_stats = aggregate_days(local_records);
|
#pragma omp parallel
|
||||||
std::cout << "Rank " << rank << " aggregated "
|
{
|
||||||
<< local_stats.size() << " days (CPU)" << std::endl;
|
int tid = omp_get_thread_num();
|
||||||
|
if (tid < n_workers) {
|
||||||
|
double t0 = omp_get_wtime();
|
||||||
|
if (tid == 0) {
|
||||||
|
// GPU поток
|
||||||
|
success[0] = aggregate_days_gpu(parts[0], results[0], gpu_aggregate);
|
||||||
|
} else {
|
||||||
|
// CPU потоки
|
||||||
|
results[tid] = aggregate_days(parts[tid]);
|
||||||
|
}
|
||||||
|
worker_times[tid] = omp_get_wtime() - t0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Объединяем результаты
|
||||||
|
for (int i = 0; i < n_workers; i++) {
|
||||||
|
if (i == 0 && !success[0]) {
|
||||||
|
// GPU failed - обработаем на CPU
|
||||||
|
std::cout << "Rank " << rank << ": GPU failed, processing on CPU" << std::endl;
|
||||||
|
double t0 = omp_get_wtime();
|
||||||
|
results[0] = aggregate_days(parts[0]);
|
||||||
|
worker_times[0] = omp_get_wtime() - t0;
|
||||||
|
}
|
||||||
|
local_stats.insert(local_stats.end(), results[i].begin(), results[i].end());
|
||||||
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
local_stats = aggregate_days(local_records);
|
// CPU-only узел
|
||||||
std::cout << "Rank " << rank << " aggregated "
|
auto parts = split_records(local_records, num_cpu_threads);
|
||||||
<< local_stats.size() << " days (CPU)" << std::endl;
|
std::vector<std::vector<DayStats>> results(num_cpu_threads);
|
||||||
|
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
int tid = omp_get_thread_num();
|
||||||
|
if (tid < num_cpu_threads) {
|
||||||
|
double t0 = omp_get_wtime();
|
||||||
|
results[tid] = aggregate_days(parts[tid]);
|
||||||
|
worker_times[tid + 1] = omp_get_wtime() - t0; // +1 т.к. [0] для GPU
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < num_cpu_threads; i++) {
|
||||||
|
local_stats.insert(local_stats.end(), results[i].begin(), results[i].end());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
double time_total = omp_get_wtime() - time_start;
|
||||||
|
|
||||||
|
// Вывод времени
|
||||||
|
std::cout << std::fixed << std::setprecision(3);
|
||||||
|
std::cout << "Rank " << rank << " aggregated " << local_stats.size() << " days in "
|
||||||
|
<< time_total << "s (";
|
||||||
|
if (have_gpu) {
|
||||||
|
std::cout << "GPU: " << worker_times[0] << "s, ";
|
||||||
|
}
|
||||||
|
for (int i = 0; i < num_cpu_threads; i++) {
|
||||||
|
int idx = have_gpu ? (i + 1) : (i + 1);
|
||||||
|
std::cout << "CPU" << i << ": " << worker_times[idx] << "s";
|
||||||
|
if (i < num_cpu_threads - 1) std::cout << ", ";
|
||||||
|
}
|
||||||
|
std::cout << ")" << std::endl;
|
||||||
|
|
||||||
// ====== СБОР АГРЕГИРОВАННЫХ ДАННЫХ НА RANK 0 ======
|
// ====== СБОР АГРЕГИРОВАННЫХ ДАННЫХ НА RANK 0 ======
|
||||||
std::vector<DayStats> all_stats;
|
std::vector<DayStats> all_stats;
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user