Commit 61ec4f1a authored by Jeff Daily's avatar Jeff Daily
Browse files

[ROCm] re-add support for ROCm builds

Previously #6086 added ROCm support but after numerous rebases it lost
critical changes. This PR restores the ROCm build.

There are many source file changes but most were automated using the
following:

```bash
for f in `grep -rl '#ifdef USE_CUDA'`
do
    sed -i 's@#ifdef USE_CUDA@#if defined(USE_CUDA) || defined(USE_ROCM)@g' $f
done

for f in `grep -rl '#endif  // USE_CUDA'`
do
    sed -i 's@#endif  // USE_CUDA@#endif  // USE_CUDA || USE_ROCM@g' $f
done
```
parent 336a77df
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include "cuda_score_updater.hpp" #include "cuda_score_updater.hpp"
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
namespace LightGBM { namespace LightGBM {
...@@ -91,4 +91,4 @@ inline void CUDAScoreUpdater::MultiplyScore(double val, int cur_tree_id) { ...@@ -91,4 +91,4 @@ inline void CUDAScoreUpdater::MultiplyScore(double val, int cur_tree_id) {
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include "cuda_score_updater.hpp" #include "cuda_score_updater.hpp"
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
namespace LightGBM { namespace LightGBM {
...@@ -42,4 +42,4 @@ void CUDAScoreUpdater::LaunchMultiplyScoreConstantKernel(const double val, const ...@@ -42,4 +42,4 @@ void CUDAScoreUpdater::LaunchMultiplyScoreConstantKernel(const double val, const
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
#ifndef LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_ #ifndef LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_
#define LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_ #define LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
#include <LightGBM/cuda/cuda_utils.hu> #include <LightGBM/cuda/cuda_utils.hu>
...@@ -60,6 +60,6 @@ class CUDAScoreUpdater: public ScoreUpdater { ...@@ -60,6 +60,6 @@ class CUDAScoreUpdater: public ScoreUpdater {
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
#endif // LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_ #endif // LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_
...@@ -72,12 +72,12 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective ...@@ -72,12 +72,12 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
if (config_->device_type == std::string("cuda")) { if (config_->device_type == std::string("cuda")) {
LGBM_config_::current_learner = use_cuda_learner; LGBM_config_::current_learner = use_cuda_learner;
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
if (config_->device_type == std::string("cuda")) { if (config_->device_type == std::string("cuda")) {
const int gpu_device_id = config_->gpu_device_id >= 0 ? config_->gpu_device_id : 0; const int gpu_device_id = config_->gpu_device_id >= 0 ? config_->gpu_device_id : 0;
CUDASUCCESS_OR_FATAL(cudaSetDevice(gpu_device_id)); CUDASUCCESS_OR_FATAL(cudaSetDevice(gpu_device_id));
} }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
} }
// load forced_splits file // load forced_splits file
...@@ -118,15 +118,15 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective ...@@ -118,15 +118,15 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
} }
training_metrics_.shrink_to_fit(); training_metrics_.shrink_to_fit();
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
if (config_->device_type == std::string("cuda")) { if (config_->device_type == std::string("cuda")) {
train_score_updater_.reset(new CUDAScoreUpdater(train_data_, num_tree_per_iteration_, boosting_on_gpu_)); train_score_updater_.reset(new CUDAScoreUpdater(train_data_, num_tree_per_iteration_, boosting_on_gpu_));
} else { } else {
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_)); train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_));
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
} }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
num_data_ = train_data_->num_data(); num_data_ = train_data_->num_data();
...@@ -188,11 +188,11 @@ void GBDT::AddValidDataset(const Dataset* valid_data, ...@@ -188,11 +188,11 @@ void GBDT::AddValidDataset(const Dataset* valid_data,
} }
// for a validation dataset, we need its score and metric // for a validation dataset, we need its score and metric
auto new_score_updater = auto new_score_updater =
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
config_->device_type == std::string("cuda") ? config_->device_type == std::string("cuda") ?
std::unique_ptr<CUDAScoreUpdater>(new CUDAScoreUpdater(valid_data, num_tree_per_iteration_, std::unique_ptr<CUDAScoreUpdater>(new CUDAScoreUpdater(valid_data, num_tree_per_iteration_,
objective_function_ != nullptr && objective_function_->IsCUDAObjective())) : objective_function_ != nullptr && objective_function_->IsCUDAObjective())) :
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
std::unique_ptr<ScoreUpdater>(new ScoreUpdater(valid_data, num_tree_per_iteration_)); std::unique_ptr<ScoreUpdater>(new ScoreUpdater(valid_data, num_tree_per_iteration_));
// update score // update score
for (int i = 0; i < iter_; ++i) { for (int i = 0; i < iter_; ++i) {
...@@ -501,15 +501,15 @@ void GBDT::UpdateScore(const Tree* tree, const int cur_tree_id) { ...@@ -501,15 +501,15 @@ void GBDT::UpdateScore(const Tree* tree, const int cur_tree_id) {
const data_size_t bag_data_cnt = data_sample_strategy_->bag_data_cnt(); const data_size_t bag_data_cnt = data_sample_strategy_->bag_data_cnt();
// we need to predict out-of-bag scores of data for boosting // we need to predict out-of-bag scores of data for boosting
if (num_data_ - bag_data_cnt > 0) { if (num_data_ - bag_data_cnt > 0) {
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
if (config_->device_type == std::string("cuda")) { if (config_->device_type == std::string("cuda")) {
train_score_updater_->AddScore(tree, data_sample_strategy_->cuda_bag_data_indices().RawData() + bag_data_cnt, num_data_ - bag_data_cnt, cur_tree_id); train_score_updater_->AddScore(tree, data_sample_strategy_->cuda_bag_data_indices().RawData() + bag_data_cnt, num_data_ - bag_data_cnt, cur_tree_id);
} else { } else {
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
train_score_updater_->AddScore(tree, data_sample_strategy_->bag_data_indices().data() + bag_data_cnt, num_data_ - bag_data_cnt, cur_tree_id); train_score_updater_->AddScore(tree, data_sample_strategy_->bag_data_indices().data() + bag_data_cnt, num_data_ - bag_data_cnt, cur_tree_id);
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
} }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
} }
} else { } else {
...@@ -523,17 +523,17 @@ void GBDT::UpdateScore(const Tree* tree, const int cur_tree_id) { ...@@ -523,17 +523,17 @@ void GBDT::UpdateScore(const Tree* tree, const int cur_tree_id) {
} }
} }
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* score, const data_size_t num_data) const { std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* score, const data_size_t num_data) const {
#else #else
std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* score, const data_size_t /*num_data*/) const { std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* score, const data_size_t /*num_data*/) const {
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
const bool evaluation_on_cuda = metric->IsCUDAMetric(); const bool evaluation_on_cuda = metric->IsCUDAMetric();
if ((boosting_on_gpu_ && evaluation_on_cuda) || (!boosting_on_gpu_ && !evaluation_on_cuda)) { if ((boosting_on_gpu_ && evaluation_on_cuda) || (!boosting_on_gpu_ && !evaluation_on_cuda)) {
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
return metric->Eval(score, objective_function_); return metric->Eval(score, objective_function_);
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
} else if (boosting_on_gpu_ && !evaluation_on_cuda) { } else if (boosting_on_gpu_ && !evaluation_on_cuda) {
const size_t total_size = static_cast<size_t>(num_data) * static_cast<size_t>(num_tree_per_iteration_); const size_t total_size = static_cast<size_t>(num_data) * static_cast<size_t>(num_tree_per_iteration_);
if (total_size > host_score_.size()) { if (total_size > host_score_.size()) {
...@@ -549,7 +549,7 @@ std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* scor ...@@ -549,7 +549,7 @@ std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* scor
CopyFromHostToCUDADevice<double>(cuda_score_.RawData(), score, total_size, __FILE__, __LINE__); CopyFromHostToCUDADevice<double>(cuda_score_.RawData(), score, total_size, __FILE__, __LINE__);
return metric->Eval(cuda_score_.RawData(), objective_function_); return metric->Eval(cuda_score_.RawData(), objective_function_);
} }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
} }
std::string GBDT::OutputMetric(int iter) { std::string GBDT::OutputMetric(int iter) {
...@@ -684,14 +684,14 @@ void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) { ...@@ -684,14 +684,14 @@ void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) {
num_data = valid_score_updater_[used_idx]->num_data(); num_data = valid_score_updater_[used_idx]->num_data();
*out_len = static_cast<int64_t>(num_data) * num_class_; *out_len = static_cast<int64_t>(num_data) * num_class_;
} }
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
std::vector<double> host_raw_scores; std::vector<double> host_raw_scores;
if (boosting_on_gpu_) { if (boosting_on_gpu_) {
host_raw_scores.resize(static_cast<size_t>(*out_len), 0.0); host_raw_scores.resize(static_cast<size_t>(*out_len), 0.0);
CopyFromCUDADeviceToHost<double>(host_raw_scores.data(), raw_scores, static_cast<size_t>(*out_len), __FILE__, __LINE__); CopyFromCUDADeviceToHost<double>(host_raw_scores.data(), raw_scores, static_cast<size_t>(*out_len), __FILE__, __LINE__);
raw_scores = host_raw_scores.data(); raw_scores = host_raw_scores.data();
} }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
if (objective_function_ != nullptr) { if (objective_function_ != nullptr) {
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
for (data_size_t i = 0; i < num_data; ++i) { for (data_size_t i = 0; i < num_data; ++i) {
...@@ -754,26 +754,26 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction* ...@@ -754,26 +754,26 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
} }
training_metrics_.shrink_to_fit(); training_metrics_.shrink_to_fit();
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
boosting_on_gpu_ = objective_function_ != nullptr && objective_function_->IsCUDAObjective() && boosting_on_gpu_ = objective_function_ != nullptr && objective_function_->IsCUDAObjective() &&
!data_sample_strategy_->IsHessianChange(); // for sample strategy with Hessian change, fall back to boosting on CPU !data_sample_strategy_->IsHessianChange(); // for sample strategy with Hessian change, fall back to boosting on CPU
tree_learner_->ResetBoostingOnGPU(boosting_on_gpu_); tree_learner_->ResetBoostingOnGPU(boosting_on_gpu_);
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
if (train_data != train_data_) { if (train_data != train_data_) {
train_data_ = train_data; train_data_ = train_data;
data_sample_strategy_->UpdateTrainingData(train_data); data_sample_strategy_->UpdateTrainingData(train_data);
// not same training data, need reset score and others // not same training data, need reset score and others
// create score tracker // create score tracker
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
if (config_->device_type == std::string("cuda")) { if (config_->device_type == std::string("cuda")) {
train_score_updater_.reset(new CUDAScoreUpdater(train_data_, num_tree_per_iteration_, boosting_on_gpu_)); train_score_updater_.reset(new CUDAScoreUpdater(train_data_, num_tree_per_iteration_, boosting_on_gpu_));
} else { } else {
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_)); train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_));
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
} }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
// update score // update score
for (int i = 0; i < iter_; ++i) { for (int i = 0; i < iter_; ++i) {
...@@ -851,7 +851,7 @@ void GBDT::ResetGradientBuffers() { ...@@ -851,7 +851,7 @@ void GBDT::ResetGradientBuffers() {
const bool is_use_subset = data_sample_strategy_->is_use_subset(); const bool is_use_subset = data_sample_strategy_->is_use_subset();
const data_size_t bag_data_cnt = data_sample_strategy_->bag_data_cnt(); const data_size_t bag_data_cnt = data_sample_strategy_->bag_data_cnt();
if (objective_function_ != nullptr) { if (objective_function_ != nullptr) {
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
if (config_->device_type == std::string("cuda") && boosting_on_gpu_) { if (config_->device_type == std::string("cuda") && boosting_on_gpu_) {
if (cuda_gradients_.Size() < total_size) { if (cuda_gradients_.Size() < total_size) {
cuda_gradients_.Resize(total_size); cuda_gradients_.Resize(total_size);
...@@ -860,16 +860,16 @@ void GBDT::ResetGradientBuffers() { ...@@ -860,16 +860,16 @@ void GBDT::ResetGradientBuffers() {
gradients_pointer_ = cuda_gradients_.RawData(); gradients_pointer_ = cuda_gradients_.RawData();
hessians_pointer_ = cuda_hessians_.RawData(); hessians_pointer_ = cuda_hessians_.RawData();
} else { } else {
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
if (gradients_.size() < total_size) { if (gradients_.size() < total_size) {
gradients_.resize(total_size); gradients_.resize(total_size);
hessians_.resize(total_size); hessians_.resize(total_size);
} }
gradients_pointer_ = gradients_.data(); gradients_pointer_ = gradients_.data();
hessians_pointer_ = hessians_.data(); hessians_pointer_ = hessians_.data();
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
} }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
} else if (data_sample_strategy_->IsHessianChange() || (is_use_subset && bag_data_cnt < num_data_ && !boosting_on_gpu_)) { } else if (data_sample_strategy_->IsHessianChange() || (is_use_subset && bag_data_cnt < num_data_ && !boosting_on_gpu_)) {
if (gradients_.size() < total_size) { if (gradients_.size() < total_size) {
gradients_.resize(total_size); gradients_.resize(total_size);
......
...@@ -560,7 +560,7 @@ class GBDT : public GBDTBase { ...@@ -560,7 +560,7 @@ class GBDT : public GBDTBase {
/*! \brief Mutex for exclusive models initialization */ /*! \brief Mutex for exclusive models initialization */
std::mutex instance_mutex_; std::mutex instance_mutex_;
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
/*! \brief First order derivative of training data */ /*! \brief First order derivative of training data */
std::vector<score_t, CHAllocator<score_t>> gradients_; std::vector<score_t, CHAllocator<score_t>> gradients_;
/*! \brief Second order derivative of training data */ /*! \brief Second order derivative of training data */
...@@ -577,7 +577,7 @@ class GBDT : public GBDTBase { ...@@ -577,7 +577,7 @@ class GBDT : public GBDTBase {
score_t* hessians_pointer_; score_t* hessians_pointer_;
/*! \brief Whether boosting is done on GPU, used for device_type=cuda */ /*! \brief Whether boosting is done on GPU, used for device_type=cuda */
bool boosting_on_gpu_; bool boosting_on_gpu_;
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
/*! \brief Gradient vector on GPU */ /*! \brief Gradient vector on GPU */
CUDAVector<score_t> cuda_gradients_; CUDAVector<score_t> cuda_gradients_;
/*! \brief Hessian vector on GPU */ /*! \brief Hessian vector on GPU */
...@@ -586,7 +586,7 @@ class GBDT : public GBDTBase { ...@@ -586,7 +586,7 @@ class GBDT : public GBDTBase {
mutable std::vector<double> host_score_; mutable std::vector<double> host_score_;
/*! \brief Buffer for scores when boosting is not on GPU but evaluation is, used only with device_type=cuda */ /*! \brief Buffer for scores when boosting is not on GPU but evaluation is, used only with device_type=cuda */
mutable CUDAVector<double> cuda_score_; mutable CUDAVector<double> cuda_score_;
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
/*! \brief Number of training data */ /*! \brief Number of training data */
data_size_t num_data_; data_size_t num_data_;
......
...@@ -45,33 +45,33 @@ class GOSSStrategy : public SampleStrategy { ...@@ -45,33 +45,33 @@ class GOSSStrategy : public SampleStrategy {
bag_data_cnt_ = left_cnt; bag_data_cnt_ = left_cnt;
// set bagging data to tree learner // set bagging data to tree learner
if (!is_use_subset_) { if (!is_use_subset_) {
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
if (config_->device_type == std::string("cuda")) { if (config_->device_type == std::string("cuda")) {
CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__); CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
tree_learner->SetBaggingData(nullptr, cuda_bag_data_indices_.RawData(), bag_data_cnt_); tree_learner->SetBaggingData(nullptr, cuda_bag_data_indices_.RawData(), bag_data_cnt_);
} else { } else {
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
tree_learner->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_); tree_learner->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_);
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
} }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
} else { } else {
// get subset // get subset
tmp_subset_->ReSize(bag_data_cnt_); tmp_subset_->ReSize(bag_data_cnt_);
tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(), tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(),
bag_data_cnt_, false); bag_data_cnt_, false);
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
if (config_->device_type == std::string("cuda")) { if (config_->device_type == std::string("cuda")) {
CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__); CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
tree_learner->SetBaggingData(tmp_subset_.get(), cuda_bag_data_indices_.RawData(), tree_learner->SetBaggingData(tmp_subset_.get(), cuda_bag_data_indices_.RawData(),
bag_data_cnt_); bag_data_cnt_);
} else { } else {
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
tree_learner->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(), tree_learner->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(),
bag_data_cnt_); bag_data_cnt_);
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
} }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
} }
} }
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
* Modifications Copyright(C) 2023 Advanced Micro Devices, Inc. All rights reserved. * Modifications Copyright(C) 2023 Advanced Micro Devices, Inc. All rights reserved.
*/ */
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
#include <LightGBM/cuda/cuda_algorithms.hpp> #include <LightGBM/cuda/cuda_algorithms.hpp>
#include <LightGBM/cuda/cuda_rocm_interop.h> #include <LightGBM/cuda/cuda_rocm_interop.h>
...@@ -445,4 +445,4 @@ void BitonicArgSortGlobal<data_size_t, int, true>(const data_size_t* values, int ...@@ -445,4 +445,4 @@ void BitonicArgSortGlobal<data_size_t, int, true>(const data_size_t* values, int
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
...@@ -3,8 +3,9 @@ ...@@ -3,8 +3,9 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information. * Licensed under the MIT License. See LICENSE file in the project root for license information.
*/ */
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
#include <LightGBM/cuda/cuda_rocm_interop.h>
#include <LightGBM/cuda/cuda_utils.hu> #include <LightGBM/cuda/cuda_utils.hu>
namespace LightGBM { namespace LightGBM {
...@@ -34,4 +35,4 @@ int GetCUDADevice(const char* file, int line) { ...@@ -34,4 +35,4 @@ int GetCUDADevice(const char* file, int line) {
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
...@@ -874,7 +874,7 @@ namespace LightGBM { ...@@ -874,7 +874,7 @@ namespace LightGBM {
return nullptr; return nullptr;
} }
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
template <> template <>
const void* MultiValDenseBin<uint8_t>::GetRowWiseData(uint8_t* bit_type, const void* MultiValDenseBin<uint8_t>::GetRowWiseData(uint8_t* bit_type,
size_t* total_size, size_t* total_size,
...@@ -1069,6 +1069,6 @@ namespace LightGBM { ...@@ -1069,6 +1069,6 @@ namespace LightGBM {
return to_return; return to_return;
} }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
} // namespace LightGBM } // namespace LightGBM
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information. * Licensed under the MIT License. See LICENSE file in the project root for license information.
*/ */
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
#include <LightGBM/cuda/cuda_column_data.hpp> #include <LightGBM/cuda/cuda_column_data.hpp>
...@@ -319,4 +319,4 @@ void CUDAColumnData::InitColumnMetaInfo() { ...@@ -319,4 +319,4 @@ void CUDAColumnData::InitColumnMetaInfo() {
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
*/ */
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
#include <LightGBM/cuda/cuda_column_data.hpp> #include <LightGBM/cuda/cuda_column_data.hpp>
...@@ -58,4 +58,4 @@ void CUDAColumnData::LaunchCopySubrowKernel(void* const* in_cuda_data_by_column) ...@@ -58,4 +58,4 @@ void CUDAColumnData::LaunchCopySubrowKernel(void* const* in_cuda_data_by_column)
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information. * Licensed under the MIT License. See LICENSE file in the project root for license information.
*/ */
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
#include <LightGBM/cuda/cuda_metadata.hpp> #include <LightGBM/cuda/cuda_metadata.hpp>
...@@ -89,4 +89,4 @@ void CUDAMetadata::SetInitScore(const double* init_score, data_size_t len) { ...@@ -89,4 +89,4 @@ void CUDAMetadata::SetInitScore(const double* init_score, data_size_t len) {
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information. * Licensed under the MIT License. See LICENSE file in the project root for license information.
*/ */
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
#include <LightGBM/cuda/cuda_row_data.hpp> #include <LightGBM/cuda/cuda_row_data.hpp>
...@@ -474,4 +474,4 @@ template const uint64_t* CUDARowData::GetPartitionPtr<uint64_t>() const; ...@@ -474,4 +474,4 @@ template const uint64_t* CUDARowData::GetPartitionPtr<uint64_t>() const;
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information. * Licensed under the MIT License. See LICENSE file in the project root for license information.
*/ */
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
#include <LightGBM/cuda/cuda_tree.hpp> #include <LightGBM/cuda/cuda_tree.hpp>
...@@ -338,4 +338,4 @@ void CUDATree::AsConstantTree(double val, int count) { ...@@ -338,4 +338,4 @@ void CUDATree::AsConstantTree(double val, int count) {
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
*/ */
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
#include <LightGBM/cuda/cuda_tree.hpp> #include <LightGBM/cuda/cuda_tree.hpp>
...@@ -456,4 +456,4 @@ void CUDATree::LaunchAddPredictionToScoreKernel( ...@@ -456,4 +456,4 @@ void CUDATree::LaunchAddPredictionToScoreKernel(
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
...@@ -451,14 +451,14 @@ void Dataset::FinishLoad() { ...@@ -451,14 +451,14 @@ void Dataset::FinishLoad() {
} }
metadata_.FinishLoad(); metadata_.FinishLoad();
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
if (device_type_ == std::string("cuda")) { if (device_type_ == std::string("cuda")) {
CreateCUDAColumnData(); CreateCUDAColumnData();
metadata_.CreateCUDAMetadata(gpu_device_id_); metadata_.CreateCUDAMetadata(gpu_device_id_);
} else { } else {
cuda_column_data_.reset(nullptr); cuda_column_data_.reset(nullptr);
} }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
is_finish_load_ = true; is_finish_load_ = true;
} }
...@@ -886,7 +886,7 @@ void Dataset::CopySubrow(const Dataset* fullset, ...@@ -886,7 +886,7 @@ void Dataset::CopySubrow(const Dataset* fullset,
device_type_ = fullset->device_type_; device_type_ = fullset->device_type_;
gpu_device_id_ = fullset->gpu_device_id_; gpu_device_id_ = fullset->gpu_device_id_;
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
if (device_type_ == std::string("cuda")) { if (device_type_ == std::string("cuda")) {
if (cuda_column_data_ == nullptr) { if (cuda_column_data_ == nullptr) {
cuda_column_data_.reset(new CUDAColumnData(fullset->num_data(), gpu_device_id_)); cuda_column_data_.reset(new CUDAColumnData(fullset->num_data(), gpu_device_id_));
...@@ -894,7 +894,7 @@ void Dataset::CopySubrow(const Dataset* fullset, ...@@ -894,7 +894,7 @@ void Dataset::CopySubrow(const Dataset* fullset,
} }
cuda_column_data_->CopySubrow(fullset->cuda_column_data(), used_indices, num_used_indices); cuda_column_data_->CopySubrow(fullset->cuda_column_data(), used_indices, num_used_indices);
} }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
} }
bool Dataset::SetFieldFromArrow(const char* field_name, const ArrowChunkedArray &ca) { bool Dataset::SetFieldFromArrow(const char* field_name, const ArrowChunkedArray &ca) {
...@@ -1735,13 +1735,13 @@ void Dataset::AddFeaturesFrom(Dataset* other) { ...@@ -1735,13 +1735,13 @@ void Dataset::AddFeaturesFrom(Dataset* other) {
raw_data_.push_back(other->raw_data_[i]); raw_data_.push_back(other->raw_data_[i]);
} }
} }
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
if (device_type_ == std::string("cuda")) { if (device_type_ == std::string("cuda")) {
CreateCUDAColumnData(); CreateCUDAColumnData();
} else { } else {
cuda_column_data_ = nullptr; cuda_column_data_ = nullptr;
} }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
} }
const void* Dataset::GetColWiseData( const void* Dataset::GetColWiseData(
...@@ -1763,7 +1763,7 @@ const void* Dataset::GetColWiseData( ...@@ -1763,7 +1763,7 @@ const void* Dataset::GetColWiseData(
return feature_groups_[feature_group_index]->GetColWiseData(sub_feature_index, bit_type, is_sparse, bin_iterator); return feature_groups_[feature_group_index]->GetColWiseData(sub_feature_index, bit_type, is_sparse, bin_iterator);
} }
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
void Dataset::CreateCUDAColumnData() { void Dataset::CreateCUDAColumnData() {
cuda_column_data_.reset(new CUDAColumnData(num_data_, gpu_device_id_)); cuda_column_data_.reset(new CUDAColumnData(num_data_, gpu_device_id_));
int num_columns = 0; int num_columns = 0;
...@@ -1898,6 +1898,6 @@ void Dataset::CreateCUDAColumnData() { ...@@ -1898,6 +1898,6 @@ void Dataset::CreateCUDAColumnData() {
feature_to_column); feature_to_column);
} }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
} // namespace LightGBM } // namespace LightGBM
...@@ -279,14 +279,14 @@ Dataset* DatasetLoader::LoadFromFile(const char* filename, int rank, int num_mac ...@@ -279,14 +279,14 @@ Dataset* DatasetLoader::LoadFromFile(const char* filename, int rank, int num_mac
dataset->device_type_ = config_.device_type; dataset->device_type_ = config_.device_type;
dataset->gpu_device_id_ = config_.gpu_device_id; dataset->gpu_device_id_ = config_.gpu_device_id;
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
if (config_.device_type == std::string("cuda")) { if (config_.device_type == std::string("cuda")) {
dataset->CreateCUDAColumnData(); dataset->CreateCUDAColumnData();
dataset->metadata_.CreateCUDAMetadata(dataset->gpu_device_id_); dataset->metadata_.CreateCUDAMetadata(dataset->gpu_device_id_);
} else { } else {
dataset->cuda_column_data_ = nullptr; dataset->cuda_column_data_ = nullptr;
} }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
} }
// check meta data // check meta data
dataset->metadata_.CheckOrPartition(num_global_data, used_data_indices); dataset->metadata_.CheckOrPartition(num_global_data, used_data_indices);
......
...@@ -607,7 +607,7 @@ class DenseBin : public Bin { ...@@ -607,7 +607,7 @@ class DenseBin : public Bin {
private: private:
data_size_t num_data_; data_size_t num_data_;
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
std::vector<VAL_T, CHAllocator<VAL_T>> data_; std::vector<VAL_T, CHAllocator<VAL_T>> data_;
#else #else
std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, kAlignedSize>> data_; std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, kAlignedSize>> data_;
......
...@@ -21,9 +21,9 @@ Metadata::Metadata() { ...@@ -21,9 +21,9 @@ Metadata::Metadata() {
position_load_from_file_ = false; position_load_from_file_ = false;
query_load_from_file_ = false; query_load_from_file_ = false;
init_score_load_from_file_ = false; init_score_load_from_file_ = false;
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
cuda_metadata_ = nullptr; cuda_metadata_ = nullptr;
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
} }
void Metadata::Init(const char* data_filename) { void Metadata::Init(const char* data_filename) {
...@@ -380,11 +380,11 @@ void Metadata::SetInitScoresFromIterator(It first, It last) { ...@@ -380,11 +380,11 @@ void Metadata::SetInitScoresFromIterator(It first, It last) {
} }
init_score_load_from_file_ = false; init_score_load_from_file_ = false;
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
if (cuda_metadata_ != nullptr) { if (cuda_metadata_ != nullptr) {
cuda_metadata_->SetInitScore(init_score_.data(), init_score_.size()); cuda_metadata_->SetInitScore(init_score_.data(), init_score_.size());
} }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
} }
void Metadata::SetInitScore(const double* init_score, data_size_t len) { void Metadata::SetInitScore(const double* init_score, data_size_t len) {
...@@ -434,11 +434,11 @@ void Metadata::SetLabelsFromIterator(It first, It last) { ...@@ -434,11 +434,11 @@ void Metadata::SetLabelsFromIterator(It first, It last) {
label_[i] = Common::AvoidInf(first[i]); label_[i] = Common::AvoidInf(first[i]);
} }
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
if (cuda_metadata_ != nullptr) { if (cuda_metadata_ != nullptr) {
cuda_metadata_->SetLabel(label_.data(), label_.size()); cuda_metadata_->SetLabel(label_.data(), label_.size());
} }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
} }
void Metadata::SetLabel(const label_t* label, data_size_t len) { void Metadata::SetLabel(const label_t* label, data_size_t len) {
...@@ -492,11 +492,11 @@ void Metadata::SetWeightsFromIterator(It first, It last) { ...@@ -492,11 +492,11 @@ void Metadata::SetWeightsFromIterator(It first, It last) {
CalculateQueryWeights(); CalculateQueryWeights();
weight_load_from_file_ = false; weight_load_from_file_ = false;
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
if (cuda_metadata_ != nullptr) { if (cuda_metadata_ != nullptr) {
cuda_metadata_->SetWeights(weights_.data(), weights_.size()); cuda_metadata_->SetWeights(weights_.data(), weights_.size());
} }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
} }
void Metadata::SetWeights(const label_t* weights, data_size_t len) { void Metadata::SetWeights(const label_t* weights, data_size_t len) {
...@@ -555,7 +555,7 @@ void Metadata::SetQueriesFromIterator(It first, It last) { ...@@ -555,7 +555,7 @@ void Metadata::SetQueriesFromIterator(It first, It last) {
CalculateQueryWeights(); CalculateQueryWeights();
query_load_from_file_ = false; query_load_from_file_ = false;
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
if (cuda_metadata_ != nullptr) { if (cuda_metadata_ != nullptr) {
if (query_weights_.size() > 0) { if (query_weights_.size() > 0) {
CHECK_EQ(query_weights_.size(), static_cast<size_t>(num_queries_)); CHECK_EQ(query_weights_.size(), static_cast<size_t>(num_queries_));
...@@ -564,7 +564,7 @@ void Metadata::SetQueriesFromIterator(It first, It last) { ...@@ -564,7 +564,7 @@ void Metadata::SetQueriesFromIterator(It first, It last) {
cuda_metadata_->SetQuery(query_boundaries_.data(), nullptr, num_queries_); cuda_metadata_->SetQuery(query_boundaries_.data(), nullptr, num_queries_);
} }
} }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
} }
void Metadata::SetQuery(const data_size_t* query, data_size_t len) { void Metadata::SetQuery(const data_size_t* query, data_size_t len) {
...@@ -583,9 +583,9 @@ void Metadata::SetPosition(const data_size_t* positions, data_size_t len) { ...@@ -583,9 +583,9 @@ void Metadata::SetPosition(const data_size_t* positions, data_size_t len) {
num_positions_ = 0; num_positions_ = 0;
return; return;
} }
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
Log::Fatal("Positions in learning to rank is not supported in CUDA version yet."); Log::Fatal("Positions in learning to rank is not supported in CUDA version yet.");
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
if (num_data_ != len) { if (num_data_ != len) {
Log::Fatal("Positions size (%i) doesn't match data size (%i)", len, num_data_); Log::Fatal("Positions size (%i) doesn't match data size (%i)", len, num_data_);
} }
...@@ -788,12 +788,12 @@ void Metadata::FinishLoad() { ...@@ -788,12 +788,12 @@ void Metadata::FinishLoad() {
CalculateQueryBoundaries(); CalculateQueryBoundaries();
} }
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
void Metadata::CreateCUDAMetadata(const int gpu_device_id) { void Metadata::CreateCUDAMetadata(const int gpu_device_id) {
cuda_metadata_.reset(new CUDAMetadata(gpu_device_id)); cuda_metadata_.reset(new CUDAMetadata(gpu_device_id));
cuda_metadata_->Init(label_, weights_, query_boundaries_, query_weights_, init_score_); cuda_metadata_->Init(label_, weights_, query_boundaries_, query_weights_, init_score_);
} }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
void Metadata::LoadFromMemory(const void* memory) { void Metadata::LoadFromMemory(const void* memory) {
const char* mem_ptr = reinterpret_cast<const char*>(memory); const char* mem_ptr = reinterpret_cast<const char*>(memory);
......
...@@ -328,13 +328,13 @@ class MultiValDenseBin : public MultiValBin { ...@@ -328,13 +328,13 @@ class MultiValDenseBin : public MultiValBin {
MultiValDenseBin<VAL_T>* Clone() override; MultiValDenseBin<VAL_T>* Clone() override;
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
const void* GetRowWiseData(uint8_t* bit_type, const void* GetRowWiseData(uint8_t* bit_type,
size_t* total_size, size_t* total_size,
bool* is_sparse, bool* is_sparse,
const void** out_data_ptr, const void** out_data_ptr,
uint8_t* data_ptr_bit_type) const override; uint8_t* data_ptr_bit_type) const override;
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
private: private:
data_size_t num_data_; data_size_t num_data_;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment