Commit 20996c92 authored by Jeff Daily's avatar Jeff Daily
Browse files

partial revert of 61ec4f1a

Instead of replacing all #ifdef USE_CUDA, just add USE_CUDA define to ROCm build.
parent 1b3deb5f
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include "cuda_score_updater.hpp" #include "cuda_score_updater.hpp"
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
namespace LightGBM { namespace LightGBM {
...@@ -91,4 +91,4 @@ inline void CUDAScoreUpdater::MultiplyScore(double val, int cur_tree_id) { ...@@ -91,4 +91,4 @@ inline void CUDAScoreUpdater::MultiplyScore(double val, int cur_tree_id) {
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include "cuda_score_updater.hpp" #include "cuda_score_updater.hpp"
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
namespace LightGBM { namespace LightGBM {
...@@ -42,4 +42,4 @@ void CUDAScoreUpdater::LaunchMultiplyScoreConstantKernel(const double val, const ...@@ -42,4 +42,4 @@ void CUDAScoreUpdater::LaunchMultiplyScoreConstantKernel(const double val, const
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
#ifndef LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_ #ifndef LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_
#define LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_ #define LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
#include <LightGBM/cuda/cuda_utils.hu> #include <LightGBM/cuda/cuda_utils.hu>
...@@ -60,6 +60,6 @@ class CUDAScoreUpdater: public ScoreUpdater { ...@@ -60,6 +60,6 @@ class CUDAScoreUpdater: public ScoreUpdater {
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
#endif // LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_ #endif // LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_
...@@ -72,12 +72,12 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective ...@@ -72,12 +72,12 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
if (config_->device_type == std::string("cuda")) { if (config_->device_type == std::string("cuda")) {
LGBM_config_::current_learner = use_cuda_learner; LGBM_config_::current_learner = use_cuda_learner;
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
if (config_->device_type == std::string("cuda")) { if (config_->device_type == std::string("cuda")) {
const int gpu_device_id = config_->gpu_device_id >= 0 ? config_->gpu_device_id : 0; const int gpu_device_id = config_->gpu_device_id >= 0 ? config_->gpu_device_id : 0;
CUDASUCCESS_OR_FATAL(cudaSetDevice(gpu_device_id)); CUDASUCCESS_OR_FATAL(cudaSetDevice(gpu_device_id));
} }
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
} }
// load forced_splits file // load forced_splits file
...@@ -118,15 +118,15 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective ...@@ -118,15 +118,15 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
} }
training_metrics_.shrink_to_fit(); training_metrics_.shrink_to_fit();
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
if (config_->device_type == std::string("cuda")) { if (config_->device_type == std::string("cuda")) {
train_score_updater_.reset(new CUDAScoreUpdater(train_data_, num_tree_per_iteration_, boosting_on_gpu_)); train_score_updater_.reset(new CUDAScoreUpdater(train_data_, num_tree_per_iteration_, boosting_on_gpu_));
} else { } else {
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_)); train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_));
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
} }
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
num_data_ = train_data_->num_data(); num_data_ = train_data_->num_data();
...@@ -188,11 +188,11 @@ void GBDT::AddValidDataset(const Dataset* valid_data, ...@@ -188,11 +188,11 @@ void GBDT::AddValidDataset(const Dataset* valid_data,
} }
// for a validation dataset, we need its score and metric // for a validation dataset, we need its score and metric
auto new_score_updater = auto new_score_updater =
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
config_->device_type == std::string("cuda") ? config_->device_type == std::string("cuda") ?
std::unique_ptr<CUDAScoreUpdater>(new CUDAScoreUpdater(valid_data, num_tree_per_iteration_, std::unique_ptr<CUDAScoreUpdater>(new CUDAScoreUpdater(valid_data, num_tree_per_iteration_,
objective_function_ != nullptr && objective_function_->IsCUDAObjective())) : objective_function_ != nullptr && objective_function_->IsCUDAObjective())) :
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
std::unique_ptr<ScoreUpdater>(new ScoreUpdater(valid_data, num_tree_per_iteration_)); std::unique_ptr<ScoreUpdater>(new ScoreUpdater(valid_data, num_tree_per_iteration_));
// update score // update score
for (int i = 0; i < iter_; ++i) { for (int i = 0; i < iter_; ++i) {
...@@ -501,15 +501,15 @@ void GBDT::UpdateScore(const Tree* tree, const int cur_tree_id) { ...@@ -501,15 +501,15 @@ void GBDT::UpdateScore(const Tree* tree, const int cur_tree_id) {
const data_size_t bag_data_cnt = data_sample_strategy_->bag_data_cnt(); const data_size_t bag_data_cnt = data_sample_strategy_->bag_data_cnt();
// we need to predict out-of-bag scores of data for boosting // we need to predict out-of-bag scores of data for boosting
if (num_data_ - bag_data_cnt > 0) { if (num_data_ - bag_data_cnt > 0) {
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
if (config_->device_type == std::string("cuda")) { if (config_->device_type == std::string("cuda")) {
train_score_updater_->AddScore(tree, data_sample_strategy_->cuda_bag_data_indices().RawData() + bag_data_cnt, num_data_ - bag_data_cnt, cur_tree_id); train_score_updater_->AddScore(tree, data_sample_strategy_->cuda_bag_data_indices().RawData() + bag_data_cnt, num_data_ - bag_data_cnt, cur_tree_id);
} else { } else {
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
train_score_updater_->AddScore(tree, data_sample_strategy_->bag_data_indices().data() + bag_data_cnt, num_data_ - bag_data_cnt, cur_tree_id); train_score_updater_->AddScore(tree, data_sample_strategy_->bag_data_indices().data() + bag_data_cnt, num_data_ - bag_data_cnt, cur_tree_id);
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
} }
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
} }
} else { } else {
...@@ -523,17 +523,17 @@ void GBDT::UpdateScore(const Tree* tree, const int cur_tree_id) { ...@@ -523,17 +523,17 @@ void GBDT::UpdateScore(const Tree* tree, const int cur_tree_id) {
} }
} }
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* score, const data_size_t num_data) const { std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* score, const data_size_t num_data) const {
#else #else
std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* score, const data_size_t /*num_data*/) const { std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* score, const data_size_t /*num_data*/) const {
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
const bool evaluation_on_cuda = metric->IsCUDAMetric(); const bool evaluation_on_cuda = metric->IsCUDAMetric();
if ((boosting_on_gpu_ && evaluation_on_cuda) || (!boosting_on_gpu_ && !evaluation_on_cuda)) { if ((boosting_on_gpu_ && evaluation_on_cuda) || (!boosting_on_gpu_ && !evaluation_on_cuda)) {
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
return metric->Eval(score, objective_function_); return metric->Eval(score, objective_function_);
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
} else if (boosting_on_gpu_ && !evaluation_on_cuda) { } else if (boosting_on_gpu_ && !evaluation_on_cuda) {
const size_t total_size = static_cast<size_t>(num_data) * static_cast<size_t>(num_tree_per_iteration_); const size_t total_size = static_cast<size_t>(num_data) * static_cast<size_t>(num_tree_per_iteration_);
if (total_size > host_score_.size()) { if (total_size > host_score_.size()) {
...@@ -549,7 +549,7 @@ std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* scor ...@@ -549,7 +549,7 @@ std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* scor
CopyFromHostToCUDADevice<double>(cuda_score_.RawData(), score, total_size, __FILE__, __LINE__); CopyFromHostToCUDADevice<double>(cuda_score_.RawData(), score, total_size, __FILE__, __LINE__);
return metric->Eval(cuda_score_.RawData(), objective_function_); return metric->Eval(cuda_score_.RawData(), objective_function_);
} }
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
} }
std::string GBDT::OutputMetric(int iter) { std::string GBDT::OutputMetric(int iter) {
...@@ -684,14 +684,14 @@ void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) { ...@@ -684,14 +684,14 @@ void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) {
num_data = valid_score_updater_[used_idx]->num_data(); num_data = valid_score_updater_[used_idx]->num_data();
*out_len = static_cast<int64_t>(num_data) * num_class_; *out_len = static_cast<int64_t>(num_data) * num_class_;
} }
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
std::vector<double> host_raw_scores; std::vector<double> host_raw_scores;
if (boosting_on_gpu_) { if (boosting_on_gpu_) {
host_raw_scores.resize(static_cast<size_t>(*out_len), 0.0); host_raw_scores.resize(static_cast<size_t>(*out_len), 0.0);
CopyFromCUDADeviceToHost<double>(host_raw_scores.data(), raw_scores, static_cast<size_t>(*out_len), __FILE__, __LINE__); CopyFromCUDADeviceToHost<double>(host_raw_scores.data(), raw_scores, static_cast<size_t>(*out_len), __FILE__, __LINE__);
raw_scores = host_raw_scores.data(); raw_scores = host_raw_scores.data();
} }
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
if (objective_function_ != nullptr) { if (objective_function_ != nullptr) {
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
for (data_size_t i = 0; i < num_data; ++i) { for (data_size_t i = 0; i < num_data; ++i) {
...@@ -754,26 +754,26 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction* ...@@ -754,26 +754,26 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
} }
training_metrics_.shrink_to_fit(); training_metrics_.shrink_to_fit();
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
boosting_on_gpu_ = objective_function_ != nullptr && objective_function_->IsCUDAObjective() && boosting_on_gpu_ = objective_function_ != nullptr && objective_function_->IsCUDAObjective() &&
!data_sample_strategy_->IsHessianChange(); // for sample strategy with Hessian change, fall back to boosting on CPU !data_sample_strategy_->IsHessianChange(); // for sample strategy with Hessian change, fall back to boosting on CPU
tree_learner_->ResetBoostingOnGPU(boosting_on_gpu_); tree_learner_->ResetBoostingOnGPU(boosting_on_gpu_);
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
if (train_data != train_data_) { if (train_data != train_data_) {
train_data_ = train_data; train_data_ = train_data;
data_sample_strategy_->UpdateTrainingData(train_data); data_sample_strategy_->UpdateTrainingData(train_data);
// not same training data, need reset score and others // not same training data, need reset score and others
// create score tracker // create score tracker
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
if (config_->device_type == std::string("cuda")) { if (config_->device_type == std::string("cuda")) {
train_score_updater_.reset(new CUDAScoreUpdater(train_data_, num_tree_per_iteration_, boosting_on_gpu_)); train_score_updater_.reset(new CUDAScoreUpdater(train_data_, num_tree_per_iteration_, boosting_on_gpu_));
} else { } else {
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_)); train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_));
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
} }
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
// update score // update score
for (int i = 0; i < iter_; ++i) { for (int i = 0; i < iter_; ++i) {
...@@ -851,7 +851,7 @@ void GBDT::ResetGradientBuffers() { ...@@ -851,7 +851,7 @@ void GBDT::ResetGradientBuffers() {
const bool is_use_subset = data_sample_strategy_->is_use_subset(); const bool is_use_subset = data_sample_strategy_->is_use_subset();
const data_size_t bag_data_cnt = data_sample_strategy_->bag_data_cnt(); const data_size_t bag_data_cnt = data_sample_strategy_->bag_data_cnt();
if (objective_function_ != nullptr) { if (objective_function_ != nullptr) {
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
if (config_->device_type == std::string("cuda") && boosting_on_gpu_) { if (config_->device_type == std::string("cuda") && boosting_on_gpu_) {
if (cuda_gradients_.Size() < total_size) { if (cuda_gradients_.Size() < total_size) {
cuda_gradients_.Resize(total_size); cuda_gradients_.Resize(total_size);
...@@ -860,16 +860,16 @@ void GBDT::ResetGradientBuffers() { ...@@ -860,16 +860,16 @@ void GBDT::ResetGradientBuffers() {
gradients_pointer_ = cuda_gradients_.RawData(); gradients_pointer_ = cuda_gradients_.RawData();
hessians_pointer_ = cuda_hessians_.RawData(); hessians_pointer_ = cuda_hessians_.RawData();
} else { } else {
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
if (gradients_.size() < total_size) { if (gradients_.size() < total_size) {
gradients_.resize(total_size); gradients_.resize(total_size);
hessians_.resize(total_size); hessians_.resize(total_size);
} }
gradients_pointer_ = gradients_.data(); gradients_pointer_ = gradients_.data();
hessians_pointer_ = hessians_.data(); hessians_pointer_ = hessians_.data();
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
} }
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
} else if (data_sample_strategy_->IsHessianChange() || (is_use_subset && bag_data_cnt < num_data_ && !boosting_on_gpu_)) { } else if (data_sample_strategy_->IsHessianChange() || (is_use_subset && bag_data_cnt < num_data_ && !boosting_on_gpu_)) {
if (gradients_.size() < total_size) { if (gradients_.size() < total_size) {
gradients_.resize(total_size); gradients_.resize(total_size);
......
...@@ -560,7 +560,7 @@ class GBDT : public GBDTBase { ...@@ -560,7 +560,7 @@ class GBDT : public GBDTBase {
/*! \brief Mutex for exclusive models initialization */ /*! \brief Mutex for exclusive models initialization */
std::mutex instance_mutex_; std::mutex instance_mutex_;
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
/*! \brief First order derivative of training data */ /*! \brief First order derivative of training data */
std::vector<score_t, CHAllocator<score_t>> gradients_; std::vector<score_t, CHAllocator<score_t>> gradients_;
/*! \brief Second order derivative of training data */ /*! \brief Second order derivative of training data */
...@@ -577,7 +577,7 @@ class GBDT : public GBDTBase { ...@@ -577,7 +577,7 @@ class GBDT : public GBDTBase {
score_t* hessians_pointer_; score_t* hessians_pointer_;
/*! \brief Whether boosting is done on GPU, used for device_type=cuda */ /*! \brief Whether boosting is done on GPU, used for device_type=cuda */
bool boosting_on_gpu_; bool boosting_on_gpu_;
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
/*! \brief Gradient vector on GPU */ /*! \brief Gradient vector on GPU */
CUDAVector<score_t> cuda_gradients_; CUDAVector<score_t> cuda_gradients_;
/*! \brief Hessian vector on GPU */ /*! \brief Hessian vector on GPU */
...@@ -586,7 +586,7 @@ class GBDT : public GBDTBase { ...@@ -586,7 +586,7 @@ class GBDT : public GBDTBase {
mutable std::vector<double> host_score_; mutable std::vector<double> host_score_;
/*! \brief Buffer for scores when boosting is not on GPU but evaluation is, used only with device_type=cuda */ /*! \brief Buffer for scores when boosting is not on GPU but evaluation is, used only with device_type=cuda */
mutable CUDAVector<double> cuda_score_; mutable CUDAVector<double> cuda_score_;
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
/*! \brief Number of training data */ /*! \brief Number of training data */
data_size_t num_data_; data_size_t num_data_;
......
...@@ -45,33 +45,33 @@ class GOSSStrategy : public SampleStrategy { ...@@ -45,33 +45,33 @@ class GOSSStrategy : public SampleStrategy {
bag_data_cnt_ = left_cnt; bag_data_cnt_ = left_cnt;
// set bagging data to tree learner // set bagging data to tree learner
if (!is_use_subset_) { if (!is_use_subset_) {
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
if (config_->device_type == std::string("cuda")) { if (config_->device_type == std::string("cuda")) {
CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__); CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
tree_learner->SetBaggingData(nullptr, cuda_bag_data_indices_.RawData(), bag_data_cnt_); tree_learner->SetBaggingData(nullptr, cuda_bag_data_indices_.RawData(), bag_data_cnt_);
} else { } else {
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
tree_learner->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_); tree_learner->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_);
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
} }
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
} else { } else {
// get subset // get subset
tmp_subset_->ReSize(bag_data_cnt_); tmp_subset_->ReSize(bag_data_cnt_);
tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(), tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(),
bag_data_cnt_, false); bag_data_cnt_, false);
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
if (config_->device_type == std::string("cuda")) { if (config_->device_type == std::string("cuda")) {
CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__); CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
tree_learner->SetBaggingData(tmp_subset_.get(), cuda_bag_data_indices_.RawData(), tree_learner->SetBaggingData(tmp_subset_.get(), cuda_bag_data_indices_.RawData(),
bag_data_cnt_); bag_data_cnt_);
} else { } else {
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
tree_learner->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(), tree_learner->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(),
bag_data_cnt_); bag_data_cnt_);
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
} }
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
} }
} }
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
* Modifications Copyright(C) 2023 Advanced Micro Devices, Inc. All rights reserved. * Modifications Copyright(C) 2023 Advanced Micro Devices, Inc. All rights reserved.
*/ */
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
#include <LightGBM/cuda/cuda_algorithms.hpp> #include <LightGBM/cuda/cuda_algorithms.hpp>
#include <LightGBM/cuda/cuda_rocm_interop.h> #include <LightGBM/cuda/cuda_rocm_interop.h>
...@@ -445,4 +445,4 @@ void BitonicArgSortGlobal<data_size_t, int, true>(const data_size_t* values, int ...@@ -445,4 +445,4 @@ void BitonicArgSortGlobal<data_size_t, int, true>(const data_size_t* values, int
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information. * Licensed under the MIT License. See LICENSE file in the project root for license information.
*/ */
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
#include <LightGBM/cuda/cuda_rocm_interop.h> #include <LightGBM/cuda/cuda_rocm_interop.h>
#include <LightGBM/cuda/cuda_utils.hu> #include <LightGBM/cuda/cuda_utils.hu>
...@@ -35,4 +35,4 @@ int GetCUDADevice(const char* file, int line) { ...@@ -35,4 +35,4 @@ int GetCUDADevice(const char* file, int line) {
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
...@@ -874,7 +874,7 @@ namespace LightGBM { ...@@ -874,7 +874,7 @@ namespace LightGBM {
return nullptr; return nullptr;
} }
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
template <> template <>
const void* MultiValDenseBin<uint8_t>::GetRowWiseData(uint8_t* bit_type, const void* MultiValDenseBin<uint8_t>::GetRowWiseData(uint8_t* bit_type,
size_t* total_size, size_t* total_size,
...@@ -1069,6 +1069,6 @@ namespace LightGBM { ...@@ -1069,6 +1069,6 @@ namespace LightGBM {
return to_return; return to_return;
} }
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
} // namespace LightGBM } // namespace LightGBM
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information. * Licensed under the MIT License. See LICENSE file in the project root for license information.
*/ */
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
#include <LightGBM/cuda/cuda_column_data.hpp> #include <LightGBM/cuda/cuda_column_data.hpp>
...@@ -319,4 +319,4 @@ void CUDAColumnData::InitColumnMetaInfo() { ...@@ -319,4 +319,4 @@ void CUDAColumnData::InitColumnMetaInfo() {
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
*/ */
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
#include <LightGBM/cuda/cuda_column_data.hpp> #include <LightGBM/cuda/cuda_column_data.hpp>
...@@ -58,4 +58,4 @@ void CUDAColumnData::LaunchCopySubrowKernel(void* const* in_cuda_data_by_column) ...@@ -58,4 +58,4 @@ void CUDAColumnData::LaunchCopySubrowKernel(void* const* in_cuda_data_by_column)
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information. * Licensed under the MIT License. See LICENSE file in the project root for license information.
*/ */
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
#include <LightGBM/cuda/cuda_metadata.hpp> #include <LightGBM/cuda/cuda_metadata.hpp>
...@@ -89,4 +89,4 @@ void CUDAMetadata::SetInitScore(const double* init_score, data_size_t len) { ...@@ -89,4 +89,4 @@ void CUDAMetadata::SetInitScore(const double* init_score, data_size_t len) {
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information. * Licensed under the MIT License. See LICENSE file in the project root for license information.
*/ */
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
#include <LightGBM/cuda/cuda_row_data.hpp> #include <LightGBM/cuda/cuda_row_data.hpp>
...@@ -474,4 +474,4 @@ template const uint64_t* CUDARowData::GetPartitionPtr<uint64_t>() const; ...@@ -474,4 +474,4 @@ template const uint64_t* CUDARowData::GetPartitionPtr<uint64_t>() const;
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information. * Licensed under the MIT License. See LICENSE file in the project root for license information.
*/ */
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
#include <LightGBM/cuda/cuda_tree.hpp> #include <LightGBM/cuda/cuda_tree.hpp>
...@@ -338,4 +338,4 @@ void CUDATree::AsConstantTree(double val, int count) { ...@@ -338,4 +338,4 @@ void CUDATree::AsConstantTree(double val, int count) {
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
*/ */
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
#include <LightGBM/cuda/cuda_tree.hpp> #include <LightGBM/cuda/cuda_tree.hpp>
...@@ -456,4 +456,4 @@ void CUDATree::LaunchAddPredictionToScoreKernel( ...@@ -456,4 +456,4 @@ void CUDATree::LaunchAddPredictionToScoreKernel(
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
...@@ -451,14 +451,14 @@ void Dataset::FinishLoad() { ...@@ -451,14 +451,14 @@ void Dataset::FinishLoad() {
} }
metadata_.FinishLoad(); metadata_.FinishLoad();
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
if (device_type_ == std::string("cuda")) { if (device_type_ == std::string("cuda")) {
CreateCUDAColumnData(); CreateCUDAColumnData();
metadata_.CreateCUDAMetadata(gpu_device_id_); metadata_.CreateCUDAMetadata(gpu_device_id_);
} else { } else {
cuda_column_data_.reset(nullptr); cuda_column_data_.reset(nullptr);
} }
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
is_finish_load_ = true; is_finish_load_ = true;
} }
...@@ -886,7 +886,7 @@ void Dataset::CopySubrow(const Dataset* fullset, ...@@ -886,7 +886,7 @@ void Dataset::CopySubrow(const Dataset* fullset,
device_type_ = fullset->device_type_; device_type_ = fullset->device_type_;
gpu_device_id_ = fullset->gpu_device_id_; gpu_device_id_ = fullset->gpu_device_id_;
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
if (device_type_ == std::string("cuda")) { if (device_type_ == std::string("cuda")) {
if (cuda_column_data_ == nullptr) { if (cuda_column_data_ == nullptr) {
cuda_column_data_.reset(new CUDAColumnData(fullset->num_data(), gpu_device_id_)); cuda_column_data_.reset(new CUDAColumnData(fullset->num_data(), gpu_device_id_));
...@@ -894,7 +894,7 @@ void Dataset::CopySubrow(const Dataset* fullset, ...@@ -894,7 +894,7 @@ void Dataset::CopySubrow(const Dataset* fullset,
} }
cuda_column_data_->CopySubrow(fullset->cuda_column_data(), used_indices, num_used_indices); cuda_column_data_->CopySubrow(fullset->cuda_column_data(), used_indices, num_used_indices);
} }
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
} }
bool Dataset::SetFieldFromArrow(const char* field_name, const ArrowChunkedArray &ca) { bool Dataset::SetFieldFromArrow(const char* field_name, const ArrowChunkedArray &ca) {
...@@ -1735,13 +1735,13 @@ void Dataset::AddFeaturesFrom(Dataset* other) { ...@@ -1735,13 +1735,13 @@ void Dataset::AddFeaturesFrom(Dataset* other) {
raw_data_.push_back(other->raw_data_[i]); raw_data_.push_back(other->raw_data_[i]);
} }
} }
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
if (device_type_ == std::string("cuda")) { if (device_type_ == std::string("cuda")) {
CreateCUDAColumnData(); CreateCUDAColumnData();
} else { } else {
cuda_column_data_ = nullptr; cuda_column_data_ = nullptr;
} }
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
} }
const void* Dataset::GetColWiseData( const void* Dataset::GetColWiseData(
...@@ -1763,7 +1763,7 @@ const void* Dataset::GetColWiseData( ...@@ -1763,7 +1763,7 @@ const void* Dataset::GetColWiseData(
return feature_groups_[feature_group_index]->GetColWiseData(sub_feature_index, bit_type, is_sparse, bin_iterator); return feature_groups_[feature_group_index]->GetColWiseData(sub_feature_index, bit_type, is_sparse, bin_iterator);
} }
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
void Dataset::CreateCUDAColumnData() { void Dataset::CreateCUDAColumnData() {
cuda_column_data_.reset(new CUDAColumnData(num_data_, gpu_device_id_)); cuda_column_data_.reset(new CUDAColumnData(num_data_, gpu_device_id_));
int num_columns = 0; int num_columns = 0;
...@@ -1898,6 +1898,6 @@ void Dataset::CreateCUDAColumnData() { ...@@ -1898,6 +1898,6 @@ void Dataset::CreateCUDAColumnData() {
feature_to_column); feature_to_column);
} }
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
} // namespace LightGBM } // namespace LightGBM
...@@ -279,14 +279,14 @@ Dataset* DatasetLoader::LoadFromFile(const char* filename, int rank, int num_mac ...@@ -279,14 +279,14 @@ Dataset* DatasetLoader::LoadFromFile(const char* filename, int rank, int num_mac
dataset->device_type_ = config_.device_type; dataset->device_type_ = config_.device_type;
dataset->gpu_device_id_ = config_.gpu_device_id; dataset->gpu_device_id_ = config_.gpu_device_id;
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
if (config_.device_type == std::string("cuda")) { if (config_.device_type == std::string("cuda")) {
dataset->CreateCUDAColumnData(); dataset->CreateCUDAColumnData();
dataset->metadata_.CreateCUDAMetadata(dataset->gpu_device_id_); dataset->metadata_.CreateCUDAMetadata(dataset->gpu_device_id_);
} else { } else {
dataset->cuda_column_data_ = nullptr; dataset->cuda_column_data_ = nullptr;
} }
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
} }
// check meta data // check meta data
dataset->metadata_.CheckOrPartition(num_global_data, used_data_indices); dataset->metadata_.CheckOrPartition(num_global_data, used_data_indices);
......
...@@ -607,7 +607,7 @@ class DenseBin : public Bin { ...@@ -607,7 +607,7 @@ class DenseBin : public Bin {
private: private:
data_size_t num_data_; data_size_t num_data_;
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
std::vector<VAL_T, CHAllocator<VAL_T>> data_; std::vector<VAL_T, CHAllocator<VAL_T>> data_;
#else #else
std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, kAlignedSize>> data_; std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, kAlignedSize>> data_;
......
...@@ -21,9 +21,9 @@ Metadata::Metadata() { ...@@ -21,9 +21,9 @@ Metadata::Metadata() {
position_load_from_file_ = false; position_load_from_file_ = false;
query_load_from_file_ = false; query_load_from_file_ = false;
init_score_load_from_file_ = false; init_score_load_from_file_ = false;
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
cuda_metadata_ = nullptr; cuda_metadata_ = nullptr;
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
} }
void Metadata::Init(const char* data_filename) { void Metadata::Init(const char* data_filename) {
...@@ -380,11 +380,11 @@ void Metadata::SetInitScoresFromIterator(It first, It last) { ...@@ -380,11 +380,11 @@ void Metadata::SetInitScoresFromIterator(It first, It last) {
} }
init_score_load_from_file_ = false; init_score_load_from_file_ = false;
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
if (cuda_metadata_ != nullptr) { if (cuda_metadata_ != nullptr) {
cuda_metadata_->SetInitScore(init_score_.data(), init_score_.size()); cuda_metadata_->SetInitScore(init_score_.data(), init_score_.size());
} }
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
} }
void Metadata::SetInitScore(const double* init_score, data_size_t len) { void Metadata::SetInitScore(const double* init_score, data_size_t len) {
...@@ -434,11 +434,11 @@ void Metadata::SetLabelsFromIterator(It first, It last) { ...@@ -434,11 +434,11 @@ void Metadata::SetLabelsFromIterator(It first, It last) {
label_[i] = Common::AvoidInf(first[i]); label_[i] = Common::AvoidInf(first[i]);
} }
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
if (cuda_metadata_ != nullptr) { if (cuda_metadata_ != nullptr) {
cuda_metadata_->SetLabel(label_.data(), label_.size()); cuda_metadata_->SetLabel(label_.data(), label_.size());
} }
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
} }
void Metadata::SetLabel(const label_t* label, data_size_t len) { void Metadata::SetLabel(const label_t* label, data_size_t len) {
...@@ -492,11 +492,11 @@ void Metadata::SetWeightsFromIterator(It first, It last) { ...@@ -492,11 +492,11 @@ void Metadata::SetWeightsFromIterator(It first, It last) {
CalculateQueryWeights(); CalculateQueryWeights();
weight_load_from_file_ = false; weight_load_from_file_ = false;
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
if (cuda_metadata_ != nullptr) { if (cuda_metadata_ != nullptr) {
cuda_metadata_->SetWeights(weights_.data(), weights_.size()); cuda_metadata_->SetWeights(weights_.data(), weights_.size());
} }
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
} }
void Metadata::SetWeights(const label_t* weights, data_size_t len) { void Metadata::SetWeights(const label_t* weights, data_size_t len) {
...@@ -555,7 +555,7 @@ void Metadata::SetQueriesFromIterator(It first, It last) { ...@@ -555,7 +555,7 @@ void Metadata::SetQueriesFromIterator(It first, It last) {
CalculateQueryWeights(); CalculateQueryWeights();
query_load_from_file_ = false; query_load_from_file_ = false;
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
if (cuda_metadata_ != nullptr) { if (cuda_metadata_ != nullptr) {
if (query_weights_.size() > 0) { if (query_weights_.size() > 0) {
CHECK_EQ(query_weights_.size(), static_cast<size_t>(num_queries_)); CHECK_EQ(query_weights_.size(), static_cast<size_t>(num_queries_));
...@@ -564,7 +564,7 @@ void Metadata::SetQueriesFromIterator(It first, It last) { ...@@ -564,7 +564,7 @@ void Metadata::SetQueriesFromIterator(It first, It last) {
cuda_metadata_->SetQuery(query_boundaries_.data(), nullptr, num_queries_); cuda_metadata_->SetQuery(query_boundaries_.data(), nullptr, num_queries_);
} }
} }
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
} }
void Metadata::SetQuery(const data_size_t* query, data_size_t len) { void Metadata::SetQuery(const data_size_t* query, data_size_t len) {
...@@ -583,9 +583,9 @@ void Metadata::SetPosition(const data_size_t* positions, data_size_t len) { ...@@ -583,9 +583,9 @@ void Metadata::SetPosition(const data_size_t* positions, data_size_t len) {
num_positions_ = 0; num_positions_ = 0;
return; return;
} }
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
Log::Fatal("Positions in learning to rank is not supported in CUDA version yet."); Log::Fatal("Positions in learning to rank is not supported in CUDA version yet.");
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
if (num_data_ != len) { if (num_data_ != len) {
Log::Fatal("Positions size (%i) doesn't match data size (%i)", len, num_data_); Log::Fatal("Positions size (%i) doesn't match data size (%i)", len, num_data_);
} }
...@@ -788,12 +788,12 @@ void Metadata::FinishLoad() { ...@@ -788,12 +788,12 @@ void Metadata::FinishLoad() {
CalculateQueryBoundaries(); CalculateQueryBoundaries();
} }
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
void Metadata::CreateCUDAMetadata(const int gpu_device_id) { void Metadata::CreateCUDAMetadata(const int gpu_device_id) {
cuda_metadata_.reset(new CUDAMetadata(gpu_device_id)); cuda_metadata_.reset(new CUDAMetadata(gpu_device_id));
cuda_metadata_->Init(label_, weights_, query_boundaries_, query_weights_, init_score_); cuda_metadata_->Init(label_, weights_, query_boundaries_, query_weights_, init_score_);
} }
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
void Metadata::LoadFromMemory(const void* memory) { void Metadata::LoadFromMemory(const void* memory) {
const char* mem_ptr = reinterpret_cast<const char*>(memory); const char* mem_ptr = reinterpret_cast<const char*>(memory);
......
...@@ -328,13 +328,13 @@ class MultiValDenseBin : public MultiValBin { ...@@ -328,13 +328,13 @@ class MultiValDenseBin : public MultiValBin {
MultiValDenseBin<VAL_T>* Clone() override; MultiValDenseBin<VAL_T>* Clone() override;
#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA
const void* GetRowWiseData(uint8_t* bit_type, const void* GetRowWiseData(uint8_t* bit_type,
size_t* total_size, size_t* total_size,
bool* is_sparse, bool* is_sparse,
const void** out_data_ptr, const void** out_data_ptr,
uint8_t* data_ptr_bit_type) const override; uint8_t* data_ptr_bit_type) const override;
#endif // USE_CUDA || USE_ROCM #endif // USE_CUDA
private: private:
data_size_t num_data_; data_size_t num_data_;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment