Commit 20996c92 authored by Jeff Daily's avatar Jeff Daily
Browse files

partial revert of 61ec4f1a

Instead of replacing all #ifdef USE_CUDA, just add USE_CUDA define to ROCm build.
parent 1b3deb5f
......@@ -294,6 +294,9 @@ if(USE_ROCM)
endif()
message(STATUS "CMAKE_HIP_FLAGS: ${CMAKE_HIP_FLAGS}")
# Building for ROCm almost always means USE_CUDA.
# Exceptions to this will be guarded by USE_ROCM.
add_definitions(-DUSE_CUDA)
add_definitions(-DUSE_ROCM)
endif()
......
......@@ -600,13 +600,13 @@ class MultiValBin {
virtual MultiValBin* Clone() = 0;
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
virtual const void* GetRowWiseData(uint8_t* bit_type,
size_t* total_size,
bool* is_sparse,
const void** out_data_ptr,
uint8_t* data_ptr_bit_type) const = 0;
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
};
inline uint32_t BinMapper::ValueToBin(double value) const {
......
......@@ -7,9 +7,9 @@
#ifndef LIGHTGBM_CUDA_CUDA_ALGORITHMS_HPP_
#define LIGHTGBM_CUDA_CUDA_ALGORITHMS_HPP_
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
#if defined(USE_CUDA)
#ifndef USE_ROCM
#include <cuda.h>
#include <cuda_runtime.h>
#endif
......@@ -621,5 +621,5 @@ __device__ VAL_T PercentileDevice(const VAL_T* values,
} // namespace LightGBM
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
#endif // LIGHTGBM_CUDA_CUDA_ALGORITHMS_HPP_
......@@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
#ifndef LIGHTGBM_CUDA_CUDA_COLUMN_DATA_HPP_
#define LIGHTGBM_CUDA_CUDA_COLUMN_DATA_HPP_
......@@ -139,4 +139,4 @@ class CUDAColumnData {
#endif // LIGHTGBM_CUDA_CUDA_COLUMN_DATA_HPP_
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
......@@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
#ifndef LIGHTGBM_CUDA_CUDA_METADATA_HPP_
#define LIGHTGBM_CUDA_CUDA_METADATA_HPP_
......@@ -55,4 +55,4 @@ class CUDAMetadata {
#endif // LIGHTGBM_CUDA_CUDA_METADATA_HPP_
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
......@@ -7,7 +7,7 @@
#ifndef LIGHTGBM_CUDA_CUDA_METRIC_HPP_
#define LIGHTGBM_CUDA_CUDA_METRIC_HPP_
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
#include <LightGBM/cuda/cuda_utils.hu>
#include <LightGBM/metric.h>
......@@ -39,6 +39,6 @@ class CUDAMetricInterface: public HOST_METRIC {
} // namespace LightGBM
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
#endif // LIGHTGBM_CUDA_CUDA_METRIC_HPP_
......@@ -7,7 +7,7 @@
#ifndef LIGHTGBM_CUDA_CUDA_OBJECTIVE_FUNCTION_HPP_
#define LIGHTGBM_CUDA_CUDA_OBJECTIVE_FUNCTION_HPP_
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
#include <LightGBM/cuda/cuda_utils.hu>
#include <LightGBM/objective_function.h>
......@@ -81,6 +81,6 @@ class CUDAObjectiveInterface: public HOST_OBJECTIVE {
} // namespace LightGBM
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
#endif // LIGHTGBM_CUDA_CUDA_OBJECTIVE_FUNCTION_HPP_
......@@ -5,9 +5,9 @@
#ifndef LIGHTGBM_CUDA_CUDA_RANDOM_HPP_
#define LIGHTGBM_CUDA_CUDA_RANDOM_HPP_
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
#if defined(USE_CUDA)
#ifndef USE_ROCM
#include <cuda.h>
#include <cuda_runtime.h>
#endif
......@@ -71,6 +71,6 @@ class CUDARandom {
} // namespace LightGBM
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
#endif // LIGHTGBM_CUDA_CUDA_RANDOM_HPP_
......@@ -3,7 +3,7 @@
*/
#pragma once
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
#if defined(__HIP_PLATFORM_AMD__)
......@@ -62,4 +62,4 @@ static inline constexpr int WARP_SIZE_INTERNAL() {
#define WARPSIZE 32
#endif
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
......@@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
#ifndef LIGHTGBM_CUDA_CUDA_ROW_DATA_HPP_
#define LIGHTGBM_CUDA_CUDA_ROW_DATA_HPP_
......@@ -177,4 +177,4 @@ class CUDARowData {
} // namespace LightGBM
#endif // LIGHTGBM_CUDA_CUDA_ROW_DATA_HPP_
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
......@@ -5,7 +5,7 @@
* Modifications Copyright(C) 2023 Advanced Micro Devices, Inc. All rights reserved.
*/
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
#ifndef LIGHTGBM_CUDA_CUDA_SPLIT_INFO_HPP_
#define LIGHTGBM_CUDA_CUDA_SPLIT_INFO_HPP_
......@@ -105,4 +105,4 @@ class CUDASplitInfo {
#endif // LIGHTGBM_CUDA_CUDA_SPLIT_INFO_HPP_
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
......@@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
#ifndef LIGHTGBM_CUDA_CUDA_TREE_HPP_
#define LIGHTGBM_CUDA_CUDA_TREE_HPP_
......@@ -170,4 +170,4 @@ class CUDATree : public Tree {
#endif // LIGHTGBM_CUDA_CUDA_TREE_HPP_
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
......@@ -6,13 +6,13 @@
#ifndef LIGHTGBM_CUDA_CUDA_UTILS_H_
#define LIGHTGBM_CUDA_CUDA_UTILS_H_
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
#if defined(USE_CUDA)
#if defined(USE_ROCM)
#include <LightGBM/cuda/cuda_rocm_interop.h>
#else
#include <cuda.h>
#include <cuda_runtime.h>
#else
#include <LightGBM/cuda/cuda_rocm_interop.h>
#endif
#include <stdio.h>
......@@ -211,6 +211,6 @@ static __device__ T SafeLog(T x) {
} // namespace LightGBM
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
#endif // LIGHTGBM_CUDA_CUDA_UTILS_H_
......@@ -8,13 +8,14 @@
#include <LightGBM/utils/common.h>
#if defined(USE_CUDA) || defined(USE_ROCM)
#if defined(USE_CUDA)
#ifdef USE_CUDA
#if defined(USE_ROCM)
#include <LightGBM/cuda/cuda_utils.hu>
#else // USE_ROCM
#include <cuda.h>
#include <cuda_runtime.h>
#endif // USE_ROCM
#endif // USE_CUDA
#include <LightGBM/cuda/cuda_utils.hu>
#endif // USE_CUDA || USE_ROCM
#include <stdio.h>
enum LGBM_Device {
......@@ -47,7 +48,7 @@ struct CHAllocator {
T* ptr;
if (n == 0) return NULL;
n = SIZE_ALIGNED(n);
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
if (LGBM_config_::current_device == lgbm_device_cuda) {
cudaError_t ret = cudaHostAlloc(reinterpret_cast<void**>(&ptr), n*sizeof(T), cudaHostAllocPortable);
if (ret != cudaSuccess) {
......@@ -66,7 +67,7 @@ struct CHAllocator {
void deallocate(T* p, std::size_t n) {
(void)n; // UNUSED
if (p == NULL) return;
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
if (LGBM_config_::current_device == lgbm_device_cuda) {
cudaPointerAttributes attributes;
CUDASUCCESS_OR_FATAL(cudaPointerGetAttributes(&attributes, p));
......
......@@ -318,13 +318,13 @@ class Metadata {
/*! \brief Disable copy */
Metadata(const Metadata&) = delete;
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
CUDAMetadata* cuda_metadata() const { return cuda_metadata_.get(); }
void CreateCUDAMetadata(const int gpu_device_id);
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
private:
/*! \brief Load wights from file */
......@@ -391,9 +391,9 @@ class Metadata {
bool position_load_from_file_;
bool query_load_from_file_;
bool init_score_load_from_file_;
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
std::unique_ptr<CUDAMetadata> cuda_metadata_;
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
};
......@@ -997,13 +997,13 @@ class Dataset {
return feature_groups_[feature_group_index]->feature_min_bin(sub_feature_index);
}
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
const CUDAColumnData* cuda_column_data() const {
return cuda_column_data_.get();
}
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
private:
void SerializeHeader(BinaryWriter* serializer);
......@@ -1062,9 +1062,9 @@ class Dataset {
/*! \brief mutex for threading safe call */
std::mutex mutex_;
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
std::unique_ptr<CUDAColumnData> cuda_column_data_;
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
std::string parser_config_str_;
};
......
......@@ -108,7 +108,7 @@ class ObjectiveFunction {
*/
virtual bool IsCUDAObjective() const { return false; }
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
/*!
* \brief Convert output for CUDA version
*/
......@@ -118,7 +118,7 @@ class ObjectiveFunction {
virtual bool NeedConvertOutputCUDA () const { return false; }
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
};
} // namespace LightGBM
......
......@@ -38,9 +38,9 @@ class SampleStrategy {
std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>>& bag_data_indices() { return bag_data_indices_; }
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
CUDAVector<data_size_t>& cuda_bag_data_indices() { return cuda_bag_data_indices_; }
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
void UpdateObjectiveFunction(const ObjectiveFunction* objective_function) {
objective_function_ = objective_function;
......@@ -76,10 +76,10 @@ class SampleStrategy {
/*! \brief whether need to resize the gradient vectors */
bool need_resize_gradients_;
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
/*! \brief Buffer for bag_data_indices_ on GPU, used only with cuda */
CUDAVector<data_size_t> cuda_bag_data_indices_;
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
};
} // namespace LightGBM
......
......@@ -219,7 +219,7 @@ class MultiValBinWrapper {
}
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
const void* GetRowWiseData(
uint8_t* bit_type,
size_t* total_size,
......@@ -235,7 +235,7 @@ class MultiValBinWrapper {
return multi_val_bin_->GetRowWiseData(bit_type, total_size, is_sparse, out_data_ptr, data_ptr_bit_type);
}
}
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
private:
bool is_use_subcol_ = false;
......@@ -280,9 +280,9 @@ struct TrainingShareStates {
const std::vector<uint32_t>& feature_hist_offsets() const { return feature_hist_offsets_; }
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
const std::vector<uint32_t>& column_hist_offsets() const { return column_hist_offsets_; }
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
bool IsSparseRowwise() {
return (multi_val_bin_wrapper_ != nullptr && multi_val_bin_wrapper_->IsSparse());
......@@ -332,7 +332,7 @@ struct TrainingShareStates {
}
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
const void* GetRowWiseData(uint8_t* bit_type,
size_t* total_size,
bool* is_sparse,
......@@ -347,13 +347,13 @@ struct TrainingShareStates {
return nullptr;
}
}
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
private:
std::vector<uint32_t> feature_hist_offsets_;
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
std::vector<uint32_t> column_hist_offsets_;
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
int num_hist_total_bin_ = 0;
std::unique_ptr<MultiValBinWrapper> multi_val_bin_wrapper_;
std::vector<hist_t, Common::AlignmentAllocator<hist_t, kAlignedSize>> hist_buf_;
......
......@@ -321,9 +321,9 @@ class Tree {
inline bool is_linear() const { return is_linear_; }
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
inline bool is_cuda_tree() const { return is_cuda_tree_; }
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
inline void SetIsLinear(bool is_linear) {
is_linear_ = is_linear;
......@@ -534,10 +534,10 @@ class Tree {
std::vector<std::vector<int>> leaf_features_;
/* \brief features used in leaf linear models; indexing is relative to used_features_ */
std::vector<std::vector<int>> leaf_features_inner_;
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
/*! \brief Marks whether this tree is a CUDATree */
bool is_cuda_tree_;
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
};
inline void Tree::Split(int leaf, int feature, int real_feature,
......
......@@ -105,33 +105,33 @@ class BaggingSampleStrategy : public SampleStrategy {
Log::Debug("Re-bagging, using %d data to train", bag_data_cnt_);
// set bagging data to tree learner
if (!is_use_subset_) {
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
if (config_->device_type == std::string("cuda")) {
CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
tree_learner->SetBaggingData(nullptr, cuda_bag_data_indices_.RawData(), bag_data_cnt_);
} else {
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
tree_learner->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_);
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
}
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
} else {
// get subset
tmp_subset_->ReSize(bag_data_cnt_);
tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(),
bag_data_cnt_, false);
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
if (config_->device_type == std::string("cuda")) {
CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
tree_learner->SetBaggingData(tmp_subset_.get(), cuda_bag_data_indices_.RawData(),
bag_data_cnt_);
} else {
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
tree_learner->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(),
bag_data_cnt_);
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
}
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
}
}
}
......@@ -161,11 +161,11 @@ class BaggingSampleStrategy : public SampleStrategy {
bag_data_cnt_ = static_cast<data_size_t>(config_->bagging_fraction * num_data_);
}
bag_data_indices_.resize(num_data_);
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
if (config_->device_type == std::string("cuda")) {
cuda_bag_data_indices_.Resize(num_data_);
}
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
if (!config_->bagging_by_query) {
bagging_runner_.ReSize(num_data_);
} else {
......@@ -206,9 +206,9 @@ class BaggingSampleStrategy : public SampleStrategy {
} else {
bag_data_cnt_ = num_data_;
bag_data_indices_.clear();
#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
cuda_bag_data_indices_.Clear();
#endif // USE_CUDA || USE_ROCM
#endif // USE_CUDA
bagging_runner_.ReSize(0);
is_use_subset_ = false;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment