Commit 61ec4f1a authored by Jeff Daily's avatar Jeff Daily
Browse files

[ROCm] re-add support for ROCm builds

Previously #6086 added ROCm support but after numerous rebases it lost
critical changes. This PR restores the ROCm build.

There are many source file changes but most were automated using the
following:

```bash
for f in `grep -rl '#ifdef USE_CUDA'`
do
    sed -i 's@#ifdef USE_CUDA@#if defined(USE_CUDA) || defined(USE_ROCM)@g' $f
done

for f in `grep -rl '#endif  // USE_CUDA'`
do
    sed -i 's@#endif  // USE_CUDA@#endif  // USE_CUDA || USE_ROCM@g' $f
done
```
parent 336a77df
......@@ -5,7 +5,7 @@
#include "cuda_score_updater.hpp"
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
namespace LightGBM {
......@@ -91,4 +91,4 @@ inline void CUDAScoreUpdater::MultiplyScore(double val, int cur_tree_id) {
} // namespace LightGBM
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
......@@ -5,7 +5,7 @@
#include "cuda_score_updater.hpp"
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
namespace LightGBM {
......@@ -42,4 +42,4 @@ void CUDAScoreUpdater::LaunchMultiplyScoreConstantKernel(const double val, const
} // namespace LightGBM
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
......@@ -6,7 +6,7 @@
#ifndef LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_
#define LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
#include <LightGBM/cuda/cuda_utils.hu>
......@@ -60,6 +60,6 @@ class CUDAScoreUpdater: public ScoreUpdater {
} // namespace LightGBM
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
#endif // LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_
......@@ -72,12 +72,12 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
if (config_->device_type == std::string("cuda")) {
LGBM_config_::current_learner = use_cuda_learner;
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
if (config_->device_type == std::string("cuda")) {
const int gpu_device_id = config_->gpu_device_id >= 0 ? config_->gpu_device_id : 0;
CUDASUCCESS_OR_FATAL(cudaSetDevice(gpu_device_id));
}
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
}
// load forced_splits file
......@@ -118,15 +118,15 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
}
training_metrics_.shrink_to_fit();
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
if (config_->device_type == std::string("cuda")) {
train_score_updater_.reset(new CUDAScoreUpdater(train_data_, num_tree_per_iteration_, boosting_on_gpu_));
} else {
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_));
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
}
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
num_data_ = train_data_->num_data();
......@@ -188,11 +188,11 @@ void GBDT::AddValidDataset(const Dataset* valid_data,
}
// for a validation dataset, we need its score and metric
auto new_score_updater =
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
config_->device_type == std::string("cuda") ?
std::unique_ptr<CUDAScoreUpdater>(new CUDAScoreUpdater(valid_data, num_tree_per_iteration_,
objective_function_ != nullptr && objective_function_->IsCUDAObjective())) :
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
std::unique_ptr<ScoreUpdater>(new ScoreUpdater(valid_data, num_tree_per_iteration_));
// update score
for (int i = 0; i < iter_; ++i) {
......@@ -501,15 +501,15 @@ void GBDT::UpdateScore(const Tree* tree, const int cur_tree_id) {
const data_size_t bag_data_cnt = data_sample_strategy_->bag_data_cnt();
// we need to predict out-of-bag scores of data for boosting
if (num_data_ - bag_data_cnt > 0) {
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
if (config_->device_type == std::string("cuda")) {
train_score_updater_->AddScore(tree, data_sample_strategy_->cuda_bag_data_indices().RawData() + bag_data_cnt, num_data_ - bag_data_cnt, cur_tree_id);
} else {
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
train_score_updater_->AddScore(tree, data_sample_strategy_->bag_data_indices().data() + bag_data_cnt, num_data_ - bag_data_cnt, cur_tree_id);
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
}
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
}
} else {
......@@ -523,17 +523,17 @@ void GBDT::UpdateScore(const Tree* tree, const int cur_tree_id) {
}
}
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* score, const data_size_t num_data) const {
#else
std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* score, const data_size_t /*num_data*/) const {
#endif // USE_CUDA
#ifdef USE_CUDA
#endif // USE_CUDA || USE_ROCM
#if defined(USE_CUDA) || defined(USE_ROCM)
const bool evaluation_on_cuda = metric->IsCUDAMetric();
if ((boosting_on_gpu_ && evaluation_on_cuda) || (!boosting_on_gpu_ && !evaluation_on_cuda)) {
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
return metric->Eval(score, objective_function_);
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
} else if (boosting_on_gpu_ && !evaluation_on_cuda) {
const size_t total_size = static_cast<size_t>(num_data) * static_cast<size_t>(num_tree_per_iteration_);
if (total_size > host_score_.size()) {
......@@ -549,7 +549,7 @@ std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* scor
CopyFromHostToCUDADevice<double>(cuda_score_.RawData(), score, total_size, __FILE__, __LINE__);
return metric->Eval(cuda_score_.RawData(), objective_function_);
}
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
}
std::string GBDT::OutputMetric(int iter) {
......@@ -684,14 +684,14 @@ void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) {
num_data = valid_score_updater_[used_idx]->num_data();
*out_len = static_cast<int64_t>(num_data) * num_class_;
}
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
std::vector<double> host_raw_scores;
if (boosting_on_gpu_) {
host_raw_scores.resize(static_cast<size_t>(*out_len), 0.0);
CopyFromCUDADeviceToHost<double>(host_raw_scores.data(), raw_scores, static_cast<size_t>(*out_len), __FILE__, __LINE__);
raw_scores = host_raw_scores.data();
}
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
if (objective_function_ != nullptr) {
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
for (data_size_t i = 0; i < num_data; ++i) {
......@@ -754,26 +754,26 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
}
training_metrics_.shrink_to_fit();
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
boosting_on_gpu_ = objective_function_ != nullptr && objective_function_->IsCUDAObjective() &&
!data_sample_strategy_->IsHessianChange(); // for sample strategy with Hessian change, fall back to boosting on CPU
tree_learner_->ResetBoostingOnGPU(boosting_on_gpu_);
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
if (train_data != train_data_) {
train_data_ = train_data;
data_sample_strategy_->UpdateTrainingData(train_data);
// not same training data, need reset score and others
// create score tracker
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
if (config_->device_type == std::string("cuda")) {
train_score_updater_.reset(new CUDAScoreUpdater(train_data_, num_tree_per_iteration_, boosting_on_gpu_));
} else {
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_));
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
}
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
// update score
for (int i = 0; i < iter_; ++i) {
......@@ -851,7 +851,7 @@ void GBDT::ResetGradientBuffers() {
const bool is_use_subset = data_sample_strategy_->is_use_subset();
const data_size_t bag_data_cnt = data_sample_strategy_->bag_data_cnt();
if (objective_function_ != nullptr) {
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
if (config_->device_type == std::string("cuda") && boosting_on_gpu_) {
if (cuda_gradients_.Size() < total_size) {
cuda_gradients_.Resize(total_size);
......@@ -860,16 +860,16 @@ void GBDT::ResetGradientBuffers() {
gradients_pointer_ = cuda_gradients_.RawData();
hessians_pointer_ = cuda_hessians_.RawData();
} else {
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
if (gradients_.size() < total_size) {
gradients_.resize(total_size);
hessians_.resize(total_size);
}
gradients_pointer_ = gradients_.data();
hessians_pointer_ = hessians_.data();
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
}
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
} else if (data_sample_strategy_->IsHessianChange() || (is_use_subset && bag_data_cnt < num_data_ && !boosting_on_gpu_)) {
if (gradients_.size() < total_size) {
gradients_.resize(total_size);
......
......@@ -560,7 +560,7 @@ class GBDT : public GBDTBase {
/*! \brief Mutex for exclusive models initialization */
std::mutex instance_mutex_;
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
/*! \brief First order derivative of training data */
std::vector<score_t, CHAllocator<score_t>> gradients_;
/*! \brief Second order derivative of training data */
......@@ -577,7 +577,7 @@ class GBDT : public GBDTBase {
score_t* hessians_pointer_;
/*! \brief Whether boosting is done on GPU, used for device_type=cuda */
bool boosting_on_gpu_;
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
/*! \brief Gradient vector on GPU */
CUDAVector<score_t> cuda_gradients_;
/*! \brief Hessian vector on GPU */
......@@ -586,7 +586,7 @@ class GBDT : public GBDTBase {
mutable std::vector<double> host_score_;
/*! \brief Buffer for scores when boosting is not on GPU but evaluation is, used only with device_type=cuda */
mutable CUDAVector<double> cuda_score_;
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
/*! \brief Number of training data */
data_size_t num_data_;
......
......@@ -45,33 +45,33 @@ class GOSSStrategy : public SampleStrategy {
bag_data_cnt_ = left_cnt;
// set bagging data to tree learner
if (!is_use_subset_) {
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
if (config_->device_type == std::string("cuda")) {
CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
tree_learner->SetBaggingData(nullptr, cuda_bag_data_indices_.RawData(), bag_data_cnt_);
} else {
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
tree_learner->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_);
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
}
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
} else {
// get subset
tmp_subset_->ReSize(bag_data_cnt_);
tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(),
bag_data_cnt_, false);
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
if (config_->device_type == std::string("cuda")) {
CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
tree_learner->SetBaggingData(tmp_subset_.get(), cuda_bag_data_indices_.RawData(),
bag_data_cnt_);
} else {
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
tree_learner->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(),
bag_data_cnt_);
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
}
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
}
}
......
......@@ -4,7 +4,7 @@
* Modifications Copyright(C) 2023 Advanced Micro Devices, Inc. All rights reserved.
*/
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
#include <LightGBM/cuda/cuda_algorithms.hpp>
#include <LightGBM/cuda/cuda_rocm_interop.h>
......@@ -445,4 +445,4 @@ void BitonicArgSortGlobal<data_size_t, int, true>(const data_size_t* values, int
} // namespace LightGBM
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
......@@ -3,8 +3,9 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
#include <LightGBM/cuda/cuda_rocm_interop.h>
#include <LightGBM/cuda/cuda_utils.hu>
namespace LightGBM {
......@@ -34,4 +35,4 @@ int GetCUDADevice(const char* file, int line) {
} // namespace LightGBM
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
......@@ -874,7 +874,7 @@ namespace LightGBM {
return nullptr;
}
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
template <>
const void* MultiValDenseBin<uint8_t>::GetRowWiseData(uint8_t* bit_type,
size_t* total_size,
......@@ -1069,6 +1069,6 @@ namespace LightGBM {
return to_return;
}
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
} // namespace LightGBM
......@@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
#include <LightGBM/cuda/cuda_column_data.hpp>
......@@ -319,4 +319,4 @@ void CUDAColumnData::InitColumnMetaInfo() {
} // namespace LightGBM
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
......@@ -4,7 +4,7 @@
*/
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
#include <LightGBM/cuda/cuda_column_data.hpp>
......@@ -58,4 +58,4 @@ void CUDAColumnData::LaunchCopySubrowKernel(void* const* in_cuda_data_by_column)
} // namespace LightGBM
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
......@@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
#include <LightGBM/cuda/cuda_metadata.hpp>
......@@ -89,4 +89,4 @@ void CUDAMetadata::SetInitScore(const double* init_score, data_size_t len) {
} // namespace LightGBM
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
......@@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
#include <LightGBM/cuda/cuda_row_data.hpp>
......@@ -474,4 +474,4 @@ template const uint64_t* CUDARowData::GetPartitionPtr<uint64_t>() const;
} // namespace LightGBM
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
......@@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
#include <LightGBM/cuda/cuda_tree.hpp>
......@@ -338,4 +338,4 @@ void CUDATree::AsConstantTree(double val, int count) {
} // namespace LightGBM
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
......@@ -4,7 +4,7 @@
*/
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
#include <LightGBM/cuda/cuda_tree.hpp>
......@@ -456,4 +456,4 @@ void CUDATree::LaunchAddPredictionToScoreKernel(
} // namespace LightGBM
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
......@@ -451,14 +451,14 @@ void Dataset::FinishLoad() {
}
metadata_.FinishLoad();
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
if (device_type_ == std::string("cuda")) {
CreateCUDAColumnData();
metadata_.CreateCUDAMetadata(gpu_device_id_);
} else {
cuda_column_data_.reset(nullptr);
}
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
is_finish_load_ = true;
}
......@@ -886,7 +886,7 @@ void Dataset::CopySubrow(const Dataset* fullset,
device_type_ = fullset->device_type_;
gpu_device_id_ = fullset->gpu_device_id_;
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
if (device_type_ == std::string("cuda")) {
if (cuda_column_data_ == nullptr) {
cuda_column_data_.reset(new CUDAColumnData(fullset->num_data(), gpu_device_id_));
......@@ -894,7 +894,7 @@ void Dataset::CopySubrow(const Dataset* fullset,
}
cuda_column_data_->CopySubrow(fullset->cuda_column_data(), used_indices, num_used_indices);
}
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
}
bool Dataset::SetFieldFromArrow(const char* field_name, const ArrowChunkedArray &ca) {
......@@ -1735,13 +1735,13 @@ void Dataset::AddFeaturesFrom(Dataset* other) {
raw_data_.push_back(other->raw_data_[i]);
}
}
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
if (device_type_ == std::string("cuda")) {
CreateCUDAColumnData();
} else {
cuda_column_data_ = nullptr;
}
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
}
const void* Dataset::GetColWiseData(
......@@ -1763,7 +1763,7 @@ const void* Dataset::GetColWiseData(
return feature_groups_[feature_group_index]->GetColWiseData(sub_feature_index, bit_type, is_sparse, bin_iterator);
}
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
void Dataset::CreateCUDAColumnData() {
cuda_column_data_.reset(new CUDAColumnData(num_data_, gpu_device_id_));
int num_columns = 0;
......@@ -1898,6 +1898,6 @@ void Dataset::CreateCUDAColumnData() {
feature_to_column);
}
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
} // namespace LightGBM
......@@ -279,14 +279,14 @@ Dataset* DatasetLoader::LoadFromFile(const char* filename, int rank, int num_mac
dataset->device_type_ = config_.device_type;
dataset->gpu_device_id_ = config_.gpu_device_id;
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
if (config_.device_type == std::string("cuda")) {
dataset->CreateCUDAColumnData();
dataset->metadata_.CreateCUDAMetadata(dataset->gpu_device_id_);
} else {
dataset->cuda_column_data_ = nullptr;
}
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
}
// check meta data
dataset->metadata_.CheckOrPartition(num_global_data, used_data_indices);
......
......@@ -607,7 +607,7 @@ class DenseBin : public Bin {
private:
data_size_t num_data_;
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
std::vector<VAL_T, CHAllocator<VAL_T>> data_;
#else
std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, kAlignedSize>> data_;
......
......@@ -21,9 +21,9 @@ Metadata::Metadata() {
position_load_from_file_ = false;
query_load_from_file_ = false;
init_score_load_from_file_ = false;
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
cuda_metadata_ = nullptr;
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
}
void Metadata::Init(const char* data_filename) {
......@@ -380,11 +380,11 @@ void Metadata::SetInitScoresFromIterator(It first, It last) {
}
init_score_load_from_file_ = false;
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
if (cuda_metadata_ != nullptr) {
cuda_metadata_->SetInitScore(init_score_.data(), init_score_.size());
}
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
}
void Metadata::SetInitScore(const double* init_score, data_size_t len) {
......@@ -434,11 +434,11 @@ void Metadata::SetLabelsFromIterator(It first, It last) {
label_[i] = Common::AvoidInf(first[i]);
}
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
if (cuda_metadata_ != nullptr) {
cuda_metadata_->SetLabel(label_.data(), label_.size());
}
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
}
void Metadata::SetLabel(const label_t* label, data_size_t len) {
......@@ -492,11 +492,11 @@ void Metadata::SetWeightsFromIterator(It first, It last) {
CalculateQueryWeights();
weight_load_from_file_ = false;
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
if (cuda_metadata_ != nullptr) {
cuda_metadata_->SetWeights(weights_.data(), weights_.size());
}
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
}
void Metadata::SetWeights(const label_t* weights, data_size_t len) {
......@@ -555,7 +555,7 @@ void Metadata::SetQueriesFromIterator(It first, It last) {
CalculateQueryWeights();
query_load_from_file_ = false;
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
if (cuda_metadata_ != nullptr) {
if (query_weights_.size() > 0) {
CHECK_EQ(query_weights_.size(), static_cast<size_t>(num_queries_));
......@@ -564,7 +564,7 @@ void Metadata::SetQueriesFromIterator(It first, It last) {
cuda_metadata_->SetQuery(query_boundaries_.data(), nullptr, num_queries_);
}
}
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
}
void Metadata::SetQuery(const data_size_t* query, data_size_t len) {
......@@ -583,9 +583,9 @@ void Metadata::SetPosition(const data_size_t* positions, data_size_t len) {
num_positions_ = 0;
return;
}
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
Log::Fatal("Positions in learning to rank is not supported in CUDA version yet.");
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
if (num_data_ != len) {
Log::Fatal("Positions size (%i) doesn't match data size (%i)", len, num_data_);
}
......@@ -788,12 +788,12 @@ void Metadata::FinishLoad() {
CalculateQueryBoundaries();
}
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
void Metadata::CreateCUDAMetadata(const int gpu_device_id) {
cuda_metadata_.reset(new CUDAMetadata(gpu_device_id));
cuda_metadata_->Init(label_, weights_, query_boundaries_, query_weights_, init_score_);
}
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
void Metadata::LoadFromMemory(const void* memory) {
const char* mem_ptr = reinterpret_cast<const char*>(memory);
......
......@@ -328,13 +328,13 @@ class MultiValDenseBin : public MultiValBin {
MultiValDenseBin<VAL_T>* Clone() override;
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
const void* GetRowWiseData(uint8_t* bit_type,
size_t* total_size,
bool* is_sparse,
const void** out_data_ptr,
uint8_t* data_ptr_bit_type) const override;
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
private:
data_size_t num_data_;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment