Commit 61ec4f1a authored by Jeff Daily's avatar Jeff Daily
Browse files

[ROCm] re-add support for ROCm builds

Previously #6086 added ROCm support but after numerous rebases it lost
critical changes. This PR restores the ROCm build.

There are many source file changes but most were automated using the
following:

```bash
for f in `grep -rl '#ifdef USE_CUDA'`
do
    sed -i 's@#ifdef USE_CUDA@#if defined(USE_CUDA) || defined(USE_ROCM)@g' $f
done

for f in `grep -rl '#endif  // USE_CUDA'`
do
    sed -i 's@#endif  // USE_CUDA@#endif  // USE_CUDA || USE_ROCM@g' $f
done
```
parent 336a77df
...@@ -36,7 +36,7 @@ endif() ...@@ -36,7 +36,7 @@ endif()
project(lightgbm LANGUAGES C CXX) project(lightgbm LANGUAGES C CXX)
if(USE_CUDA) if(USE_CUDA OR USE_ROCM)
set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD 17)
elseif(BUILD_CPP_TEST) elseif(BUILD_CPP_TEST)
set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD 14)
...@@ -480,10 +480,21 @@ set( ...@@ -480,10 +480,21 @@ set(
src/cuda/cuda_algorithms.cu src/cuda/cuda_algorithms.cu
) )
if(USE_CUDA) if(USE_CUDA OR USE_ROCM)
list(APPEND LGBM_SOURCES ${LGBM_CUDA_SOURCES}) list(APPEND LGBM_SOURCES ${LGBM_CUDA_SOURCES})
endif() endif()
if(USE_ROCM)
set(CU_FILES "")
foreach(file IN LISTS LGBM_CUDA_SOURCES)
string(REGEX MATCH "\\.cu$" is_cu_file "${file}")
if(is_cu_file)
list(APPEND CU_FILES "${file}")
endif()
endforeach()
set_source_files_properties(${CU_FILES} PROPERTIES LANGUAGE HIP)
endif()
add_library(lightgbm_objs OBJECT ${LGBM_SOURCES}) add_library(lightgbm_objs OBJECT ${LGBM_SOURCES})
if(BUILD_CLI) if(BUILD_CLI)
...@@ -632,6 +643,10 @@ if(USE_CUDA) ...@@ -632,6 +643,10 @@ if(USE_CUDA)
endif() endif()
endif() endif()
if(USE_ROCM)
target_link_libraries(lightgbm_objs PUBLIC hip::host)
endif()
if(WIN32) if(WIN32)
if(MINGW OR CYGWIN) if(MINGW OR CYGWIN)
target_link_libraries(lightgbm_objs PUBLIC ws2_32 iphlpapi) target_link_libraries(lightgbm_objs PUBLIC ws2_32 iphlpapi)
......
...@@ -600,13 +600,13 @@ class MultiValBin { ...@@ -600,13 +600,13 @@ class MultiValBin {
virtual MultiValBin* Clone() = 0; virtual MultiValBin* Clone() = 0;
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
virtual const void* GetRowWiseData(uint8_t* bit_type, virtual const void* GetRowWiseData(uint8_t* bit_type,
size_t* total_size, size_t* total_size,
bool* is_sparse, bool* is_sparse,
const void** out_data_ptr, const void** out_data_ptr,
uint8_t* data_ptr_bit_type) const = 0; uint8_t* data_ptr_bit_type) const = 0;
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
}; };
inline uint32_t BinMapper::ValueToBin(double value) const { inline uint32_t BinMapper::ValueToBin(double value) const {
......
...@@ -7,10 +7,12 @@ ...@@ -7,10 +7,12 @@
#ifndef LIGHTGBM_CUDA_CUDA_ALGORITHMS_HPP_ #ifndef LIGHTGBM_CUDA_CUDA_ALGORITHMS_HPP_
#define LIGHTGBM_CUDA_CUDA_ALGORITHMS_HPP_ #define LIGHTGBM_CUDA_CUDA_ALGORITHMS_HPP_
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
#if defined(USE_CUDA)
#include <cuda.h> #include <cuda.h>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#endif
#include <stdio.h> #include <stdio.h>
#include <LightGBM/bin.h> #include <LightGBM/bin.h>
...@@ -619,5 +621,5 @@ __device__ VAL_T PercentileDevice(const VAL_T* values, ...@@ -619,5 +621,5 @@ __device__ VAL_T PercentileDevice(const VAL_T* values,
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
#endif // LIGHTGBM_CUDA_CUDA_ALGORITHMS_HPP_ #endif // LIGHTGBM_CUDA_CUDA_ALGORITHMS_HPP_
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information. * Licensed under the MIT License. See LICENSE file in the project root for license information.
*/ */
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
#ifndef LIGHTGBM_CUDA_CUDA_COLUMN_DATA_HPP_ #ifndef LIGHTGBM_CUDA_CUDA_COLUMN_DATA_HPP_
#define LIGHTGBM_CUDA_CUDA_COLUMN_DATA_HPP_ #define LIGHTGBM_CUDA_CUDA_COLUMN_DATA_HPP_
...@@ -139,4 +139,4 @@ class CUDAColumnData { ...@@ -139,4 +139,4 @@ class CUDAColumnData {
#endif // LIGHTGBM_CUDA_CUDA_COLUMN_DATA_HPP_ #endif // LIGHTGBM_CUDA_CUDA_COLUMN_DATA_HPP_
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information. * Licensed under the MIT License. See LICENSE file in the project root for license information.
*/ */
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
#ifndef LIGHTGBM_CUDA_CUDA_METADATA_HPP_ #ifndef LIGHTGBM_CUDA_CUDA_METADATA_HPP_
#define LIGHTGBM_CUDA_CUDA_METADATA_HPP_ #define LIGHTGBM_CUDA_CUDA_METADATA_HPP_
...@@ -55,4 +55,4 @@ class CUDAMetadata { ...@@ -55,4 +55,4 @@ class CUDAMetadata {
#endif // LIGHTGBM_CUDA_CUDA_METADATA_HPP_ #endif // LIGHTGBM_CUDA_CUDA_METADATA_HPP_
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
#ifndef LIGHTGBM_CUDA_CUDA_METRIC_HPP_ #ifndef LIGHTGBM_CUDA_CUDA_METRIC_HPP_
#define LIGHTGBM_CUDA_CUDA_METRIC_HPP_ #define LIGHTGBM_CUDA_CUDA_METRIC_HPP_
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
#include <LightGBM/cuda/cuda_utils.hu> #include <LightGBM/cuda/cuda_utils.hu>
#include <LightGBM/metric.h> #include <LightGBM/metric.h>
...@@ -39,6 +39,6 @@ class CUDAMetricInterface: public HOST_METRIC { ...@@ -39,6 +39,6 @@ class CUDAMetricInterface: public HOST_METRIC {
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
#endif // LIGHTGBM_CUDA_CUDA_METRIC_HPP_ #endif // LIGHTGBM_CUDA_CUDA_METRIC_HPP_
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
#ifndef LIGHTGBM_CUDA_CUDA_OBJECTIVE_FUNCTION_HPP_ #ifndef LIGHTGBM_CUDA_CUDA_OBJECTIVE_FUNCTION_HPP_
#define LIGHTGBM_CUDA_CUDA_OBJECTIVE_FUNCTION_HPP_ #define LIGHTGBM_CUDA_CUDA_OBJECTIVE_FUNCTION_HPP_
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
#include <LightGBM/cuda/cuda_utils.hu> #include <LightGBM/cuda/cuda_utils.hu>
#include <LightGBM/objective_function.h> #include <LightGBM/objective_function.h>
...@@ -81,6 +81,6 @@ class CUDAObjectiveInterface: public HOST_OBJECTIVE { ...@@ -81,6 +81,6 @@ class CUDAObjectiveInterface: public HOST_OBJECTIVE {
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
#endif // LIGHTGBM_CUDA_CUDA_OBJECTIVE_FUNCTION_HPP_ #endif // LIGHTGBM_CUDA_CUDA_OBJECTIVE_FUNCTION_HPP_
...@@ -5,10 +5,12 @@ ...@@ -5,10 +5,12 @@
#ifndef LIGHTGBM_CUDA_CUDA_RANDOM_HPP_ #ifndef LIGHTGBM_CUDA_CUDA_RANDOM_HPP_
#define LIGHTGBM_CUDA_CUDA_RANDOM_HPP_ #define LIGHTGBM_CUDA_CUDA_RANDOM_HPP_
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
#if defined(USE_CUDA)
#include <cuda.h> #include <cuda.h>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#endif
namespace LightGBM { namespace LightGBM {
...@@ -69,6 +71,6 @@ class CUDARandom { ...@@ -69,6 +71,6 @@ class CUDARandom {
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
#endif // LIGHTGBM_CUDA_CUDA_RANDOM_HPP_ #endif // LIGHTGBM_CUDA_CUDA_RANDOM_HPP_
/*! /*!
* Copyright(C) 2023 Advanced Micro Devices, Inc. All rights reserved. * Copyright(C) 2023 Advanced Micro Devices, Inc. All rights reserved.
*/ */
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
#if defined(__HIP_PLATFORM_AMD__) || defined(__HIP__) #if defined(__HIP_PLATFORM_AMD__) || defined(__HIP__)
// ROCm doesn't have __shfl_down_sync, only __shfl_down without mask. // ROCm doesn't have __shfl_down_sync, only __shfl_down without mask.
...@@ -12,9 +12,38 @@ ...@@ -12,9 +12,38 @@
#define WARPSIZE warpSize #define WARPSIZE warpSize
// ROCm doesn't have atomicAdd_block, but it should be semantically the same as atomicAdd // ROCm doesn't have atomicAdd_block, but it should be semantically the same as atomicAdd
#define atomicAdd_block atomicAdd #define atomicAdd_block atomicAdd
#else // hipify
#include <hip/hip_runtime.h>
#define cudaDeviceProp hipDeviceProp_t
#define cudaDeviceSynchronize hipDeviceSynchronize
#define cudaError_t hipError_t
#define cudaFree hipFree
#define cudaFreeHost hipFreeHost
#define cudaGetDevice hipGetDevice
#define cudaGetDeviceProperties hipGetDeviceProperties
#define cudaGetErrorName hipGetErrorName
#define cudaGetErrorString hipGetErrorString
#define cudaGetLastError hipGetLastError
#define cudaHostAlloc hipHostAlloc
#define cudaHostAllocPortable hipHostAllocPortable
#define cudaMalloc hipMalloc
#define cudaMemcpy hipMemcpy
#define cudaMemcpyAsync hipMemcpyAsync
#define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice
#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost
#define cudaMemcpyHostToDevice hipMemcpyHostToDevice
#define cudaMemoryTypeHost hipMemoryTypeHost
#define cudaMemset hipMemset
#define cudaPointerAttributes hipPointerAttribute_t
#define cudaPointerGetAttributes hipPointerGetAttributes
#define cudaSetDevice hipSetDevice
#define cudaStreamCreate hipStreamCreate
#define cudaStreamDestroy hipStreamDestroy
#define cudaStream_t hipStream_t
#define cudaSuccess hipSuccess
#else // __HIP_PLATFORM_AMD__ || __HIP__
// CUDA warpSize is not a constexpr, but always 32 // CUDA warpSize is not a constexpr, but always 32
#define WARPSIZE 32 #define WARPSIZE 32
#endif #endif
#endif #endif // USE_CUDA || USE_ROCM
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information. * Licensed under the MIT License. See LICENSE file in the project root for license information.
*/ */
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
#ifndef LIGHTGBM_CUDA_CUDA_ROW_DATA_HPP_ #ifndef LIGHTGBM_CUDA_CUDA_ROW_DATA_HPP_
#define LIGHTGBM_CUDA_CUDA_ROW_DATA_HPP_ #define LIGHTGBM_CUDA_CUDA_ROW_DATA_HPP_
...@@ -177,4 +177,4 @@ class CUDARowData { ...@@ -177,4 +177,4 @@ class CUDARowData {
} // namespace LightGBM } // namespace LightGBM
#endif // LIGHTGBM_CUDA_CUDA_ROW_DATA_HPP_ #endif // LIGHTGBM_CUDA_CUDA_ROW_DATA_HPP_
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* Modifications Copyright(C) 2023 Advanced Micro Devices, Inc. All rights reserved. * Modifications Copyright(C) 2023 Advanced Micro Devices, Inc. All rights reserved.
*/ */
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
#ifndef LIGHTGBM_CUDA_CUDA_SPLIT_INFO_HPP_ #ifndef LIGHTGBM_CUDA_CUDA_SPLIT_INFO_HPP_
#define LIGHTGBM_CUDA_CUDA_SPLIT_INFO_HPP_ #define LIGHTGBM_CUDA_CUDA_SPLIT_INFO_HPP_
...@@ -105,4 +105,4 @@ class CUDASplitInfo { ...@@ -105,4 +105,4 @@ class CUDASplitInfo {
#endif // LIGHTGBM_CUDA_CUDA_SPLIT_INFO_HPP_ #endif // LIGHTGBM_CUDA_CUDA_SPLIT_INFO_HPP_
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information. * Licensed under the MIT License. See LICENSE file in the project root for license information.
*/ */
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
#ifndef LIGHTGBM_CUDA_CUDA_TREE_HPP_ #ifndef LIGHTGBM_CUDA_CUDA_TREE_HPP_
#define LIGHTGBM_CUDA_CUDA_TREE_HPP_ #define LIGHTGBM_CUDA_CUDA_TREE_HPP_
...@@ -170,4 +170,4 @@ class CUDATree : public Tree { ...@@ -170,4 +170,4 @@ class CUDATree : public Tree {
#endif // LIGHTGBM_CUDA_CUDA_TREE_HPP_ #endif // LIGHTGBM_CUDA_CUDA_TREE_HPP_
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
...@@ -6,10 +6,14 @@ ...@@ -6,10 +6,14 @@
#ifndef LIGHTGBM_CUDA_CUDA_UTILS_H_ #ifndef LIGHTGBM_CUDA_CUDA_UTILS_H_
#define LIGHTGBM_CUDA_CUDA_UTILS_H_ #define LIGHTGBM_CUDA_CUDA_UTILS_H_
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
#if defined(USE_CUDA)
#include <cuda.h> #include <cuda.h>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#else
#include <LightGBM/cuda/cuda_rocm_interop.h>
#endif
#include <stdio.h> #include <stdio.h>
#include <LightGBM/utils/log.h> #include <LightGBM/utils/log.h>
...@@ -207,6 +211,6 @@ static __device__ T SafeLog(T x) { ...@@ -207,6 +211,6 @@ static __device__ T SafeLog(T x) {
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
#endif // LIGHTGBM_CUDA_CUDA_UTILS_H_ #endif // LIGHTGBM_CUDA_CUDA_UTILS_H_
...@@ -8,10 +8,13 @@ ...@@ -8,10 +8,13 @@
#include <LightGBM/utils/common.h> #include <LightGBM/utils/common.h>
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
#if defined(USE_CUDA)
#include <cuda.h> #include <cuda.h>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#endif #endif // USE_CUDA
#include <LightGBM/cuda/cuda_utils.hu>
#endif // USE_CUDA || USE_ROCM
#include <stdio.h> #include <stdio.h>
enum LGBM_Device { enum LGBM_Device {
...@@ -44,7 +47,7 @@ struct CHAllocator { ...@@ -44,7 +47,7 @@ struct CHAllocator {
T* ptr; T* ptr;
if (n == 0) return NULL; if (n == 0) return NULL;
n = SIZE_ALIGNED(n); n = SIZE_ALIGNED(n);
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
if (LGBM_config_::current_device == lgbm_device_cuda) { if (LGBM_config_::current_device == lgbm_device_cuda) {
cudaError_t ret = cudaHostAlloc(reinterpret_cast<void**>(&ptr), n*sizeof(T), cudaHostAllocPortable); cudaError_t ret = cudaHostAlloc(reinterpret_cast<void**>(&ptr), n*sizeof(T), cudaHostAllocPortable);
if (ret != cudaSuccess) { if (ret != cudaSuccess) {
...@@ -63,17 +66,17 @@ struct CHAllocator { ...@@ -63,17 +66,17 @@ struct CHAllocator {
void deallocate(T* p, std::size_t n) { void deallocate(T* p, std::size_t n) {
(void)n; // UNUSED (void)n; // UNUSED
if (p == NULL) return; if (p == NULL) return;
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
if (LGBM_config_::current_device == lgbm_device_cuda) { if (LGBM_config_::current_device == lgbm_device_cuda) {
cudaPointerAttributes attributes; cudaPointerAttributes attributes;
cudaPointerGetAttributes(&attributes, p); CUDASUCCESS_OR_FATAL(cudaPointerGetAttributes(&attributes, p));
#if CUDA_VERSION >= 10000 #if CUDA_VERSION >= 10000 || defined(USE_ROCM)
if ((attributes.type == cudaMemoryTypeHost) && (attributes.devicePointer != NULL)) { if ((attributes.type == cudaMemoryTypeHost) && (attributes.devicePointer != NULL)) {
cudaFreeHost(p); CUDASUCCESS_OR_FATAL(cudaFreeHost(p));
} }
#else #else
if ((attributes.memoryType == cudaMemoryTypeHost) && (attributes.devicePointer != NULL)) { if ((attributes.memoryType == cudaMemoryTypeHost) && (attributes.devicePointer != NULL)) {
cudaFreeHost(p); CUDASUCCESS_OR_FATAL(cudaFreeHost(p));
} }
#endif #endif
} else { } else {
......
...@@ -318,13 +318,13 @@ class Metadata { ...@@ -318,13 +318,13 @@ class Metadata {
/*! \brief Disable copy */ /*! \brief Disable copy */
Metadata(const Metadata&) = delete; Metadata(const Metadata&) = delete;
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
CUDAMetadata* cuda_metadata() const { return cuda_metadata_.get(); } CUDAMetadata* cuda_metadata() const { return cuda_metadata_.get(); }
void CreateCUDAMetadata(const int gpu_device_id); void CreateCUDAMetadata(const int gpu_device_id);
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
private: private:
/*! \brief Load wights from file */ /*! \brief Load wights from file */
...@@ -391,9 +391,9 @@ class Metadata { ...@@ -391,9 +391,9 @@ class Metadata {
bool position_load_from_file_; bool position_load_from_file_;
bool query_load_from_file_; bool query_load_from_file_;
bool init_score_load_from_file_; bool init_score_load_from_file_;
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
std::unique_ptr<CUDAMetadata> cuda_metadata_; std::unique_ptr<CUDAMetadata> cuda_metadata_;
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
}; };
...@@ -997,13 +997,13 @@ class Dataset { ...@@ -997,13 +997,13 @@ class Dataset {
return feature_groups_[feature_group_index]->feature_min_bin(sub_feature_index); return feature_groups_[feature_group_index]->feature_min_bin(sub_feature_index);
} }
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
const CUDAColumnData* cuda_column_data() const { const CUDAColumnData* cuda_column_data() const {
return cuda_column_data_.get(); return cuda_column_data_.get();
} }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
private: private:
void SerializeHeader(BinaryWriter* serializer); void SerializeHeader(BinaryWriter* serializer);
...@@ -1062,9 +1062,9 @@ class Dataset { ...@@ -1062,9 +1062,9 @@ class Dataset {
/*! \brief mutex for threading safe call */ /*! \brief mutex for threading safe call */
std::mutex mutex_; std::mutex mutex_;
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
std::unique_ptr<CUDAColumnData> cuda_column_data_; std::unique_ptr<CUDAColumnData> cuda_column_data_;
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
std::string parser_config_str_; std::string parser_config_str_;
}; };
......
...@@ -108,7 +108,7 @@ class ObjectiveFunction { ...@@ -108,7 +108,7 @@ class ObjectiveFunction {
*/ */
virtual bool IsCUDAObjective() const { return false; } virtual bool IsCUDAObjective() const { return false; }
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
/*! /*!
* \brief Convert output for CUDA version * \brief Convert output for CUDA version
*/ */
...@@ -118,7 +118,7 @@ class ObjectiveFunction { ...@@ -118,7 +118,7 @@ class ObjectiveFunction {
virtual bool NeedConvertOutputCUDA () const { return false; } virtual bool NeedConvertOutputCUDA () const { return false; }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
}; };
} // namespace LightGBM } // namespace LightGBM
......
...@@ -38,9 +38,9 @@ class SampleStrategy { ...@@ -38,9 +38,9 @@ class SampleStrategy {
std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>>& bag_data_indices() { return bag_data_indices_; } std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>>& bag_data_indices() { return bag_data_indices_; }
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
CUDAVector<data_size_t>& cuda_bag_data_indices() { return cuda_bag_data_indices_; } CUDAVector<data_size_t>& cuda_bag_data_indices() { return cuda_bag_data_indices_; }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
void UpdateObjectiveFunction(const ObjectiveFunction* objective_function) { void UpdateObjectiveFunction(const ObjectiveFunction* objective_function) {
objective_function_ = objective_function; objective_function_ = objective_function;
...@@ -76,10 +76,10 @@ class SampleStrategy { ...@@ -76,10 +76,10 @@ class SampleStrategy {
/*! \brief whether need to resize the gradient vectors */ /*! \brief whether need to resize the gradient vectors */
bool need_resize_gradients_; bool need_resize_gradients_;
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
/*! \brief Buffer for bag_data_indices_ on GPU, used only with cuda */ /*! \brief Buffer for bag_data_indices_ on GPU, used only with cuda */
CUDAVector<data_size_t> cuda_bag_data_indices_; CUDAVector<data_size_t> cuda_bag_data_indices_;
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
}; };
} // namespace LightGBM } // namespace LightGBM
......
...@@ -219,7 +219,7 @@ class MultiValBinWrapper { ...@@ -219,7 +219,7 @@ class MultiValBinWrapper {
} }
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
const void* GetRowWiseData( const void* GetRowWiseData(
uint8_t* bit_type, uint8_t* bit_type,
size_t* total_size, size_t* total_size,
...@@ -235,7 +235,7 @@ class MultiValBinWrapper { ...@@ -235,7 +235,7 @@ class MultiValBinWrapper {
return multi_val_bin_->GetRowWiseData(bit_type, total_size, is_sparse, out_data_ptr, data_ptr_bit_type); return multi_val_bin_->GetRowWiseData(bit_type, total_size, is_sparse, out_data_ptr, data_ptr_bit_type);
} }
} }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
private: private:
bool is_use_subcol_ = false; bool is_use_subcol_ = false;
...@@ -280,9 +280,9 @@ struct TrainingShareStates { ...@@ -280,9 +280,9 @@ struct TrainingShareStates {
const std::vector<uint32_t>& feature_hist_offsets() const { return feature_hist_offsets_; } const std::vector<uint32_t>& feature_hist_offsets() const { return feature_hist_offsets_; }
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
const std::vector<uint32_t>& column_hist_offsets() const { return column_hist_offsets_; } const std::vector<uint32_t>& column_hist_offsets() const { return column_hist_offsets_; }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
bool IsSparseRowwise() { bool IsSparseRowwise() {
return (multi_val_bin_wrapper_ != nullptr && multi_val_bin_wrapper_->IsSparse()); return (multi_val_bin_wrapper_ != nullptr && multi_val_bin_wrapper_->IsSparse());
...@@ -332,7 +332,7 @@ struct TrainingShareStates { ...@@ -332,7 +332,7 @@ struct TrainingShareStates {
} }
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
const void* GetRowWiseData(uint8_t* bit_type, const void* GetRowWiseData(uint8_t* bit_type,
size_t* total_size, size_t* total_size,
bool* is_sparse, bool* is_sparse,
...@@ -347,13 +347,13 @@ struct TrainingShareStates { ...@@ -347,13 +347,13 @@ struct TrainingShareStates {
return nullptr; return nullptr;
} }
} }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
private: private:
std::vector<uint32_t> feature_hist_offsets_; std::vector<uint32_t> feature_hist_offsets_;
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
std::vector<uint32_t> column_hist_offsets_; std::vector<uint32_t> column_hist_offsets_;
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
int num_hist_total_bin_ = 0; int num_hist_total_bin_ = 0;
std::unique_ptr<MultiValBinWrapper> multi_val_bin_wrapper_; std::unique_ptr<MultiValBinWrapper> multi_val_bin_wrapper_;
std::vector<hist_t, Common::AlignmentAllocator<hist_t, kAlignedSize>> hist_buf_; std::vector<hist_t, Common::AlignmentAllocator<hist_t, kAlignedSize>> hist_buf_;
......
...@@ -321,9 +321,9 @@ class Tree { ...@@ -321,9 +321,9 @@ class Tree {
inline bool is_linear() const { return is_linear_; } inline bool is_linear() const { return is_linear_; }
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
inline bool is_cuda_tree() const { return is_cuda_tree_; } inline bool is_cuda_tree() const { return is_cuda_tree_; }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
inline void SetIsLinear(bool is_linear) { inline void SetIsLinear(bool is_linear) {
is_linear_ = is_linear; is_linear_ = is_linear;
...@@ -534,10 +534,10 @@ class Tree { ...@@ -534,10 +534,10 @@ class Tree {
std::vector<std::vector<int>> leaf_features_; std::vector<std::vector<int>> leaf_features_;
/* \brief features used in leaf linear models; indexing is relative to used_features_ */ /* \brief features used in leaf linear models; indexing is relative to used_features_ */
std::vector<std::vector<int>> leaf_features_inner_; std::vector<std::vector<int>> leaf_features_inner_;
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
/*! \brief Marks whether this tree is a CUDATree */ /*! \brief Marks whether this tree is a CUDATree */
bool is_cuda_tree_; bool is_cuda_tree_;
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
}; };
inline void Tree::Split(int leaf, int feature, int real_feature, inline void Tree::Split(int leaf, int feature, int real_feature,
......
...@@ -105,33 +105,33 @@ class BaggingSampleStrategy : public SampleStrategy { ...@@ -105,33 +105,33 @@ class BaggingSampleStrategy : public SampleStrategy {
Log::Debug("Re-bagging, using %d data to train", bag_data_cnt_); Log::Debug("Re-bagging, using %d data to train", bag_data_cnt_);
// set bagging data to tree learner // set bagging data to tree learner
if (!is_use_subset_) { if (!is_use_subset_) {
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
if (config_->device_type == std::string("cuda")) { if (config_->device_type == std::string("cuda")) {
CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__); CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
tree_learner->SetBaggingData(nullptr, cuda_bag_data_indices_.RawData(), bag_data_cnt_); tree_learner->SetBaggingData(nullptr, cuda_bag_data_indices_.RawData(), bag_data_cnt_);
} else { } else {
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
tree_learner->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_); tree_learner->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_);
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
} }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
} else { } else {
// get subset // get subset
tmp_subset_->ReSize(bag_data_cnt_); tmp_subset_->ReSize(bag_data_cnt_);
tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(), tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(),
bag_data_cnt_, false); bag_data_cnt_, false);
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
if (config_->device_type == std::string("cuda")) { if (config_->device_type == std::string("cuda")) {
CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__); CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
tree_learner->SetBaggingData(tmp_subset_.get(), cuda_bag_data_indices_.RawData(), tree_learner->SetBaggingData(tmp_subset_.get(), cuda_bag_data_indices_.RawData(),
bag_data_cnt_); bag_data_cnt_);
} else { } else {
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
tree_learner->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(), tree_learner->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(),
bag_data_cnt_); bag_data_cnt_);
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
} }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
} }
} }
} }
...@@ -161,11 +161,11 @@ class BaggingSampleStrategy : public SampleStrategy { ...@@ -161,11 +161,11 @@ class BaggingSampleStrategy : public SampleStrategy {
bag_data_cnt_ = static_cast<data_size_t>(config_->bagging_fraction * num_data_); bag_data_cnt_ = static_cast<data_size_t>(config_->bagging_fraction * num_data_);
} }
bag_data_indices_.resize(num_data_); bag_data_indices_.resize(num_data_);
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
if (config_->device_type == std::string("cuda")) { if (config_->device_type == std::string("cuda")) {
cuda_bag_data_indices_.Resize(num_data_); cuda_bag_data_indices_.Resize(num_data_);
} }
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
if (!config_->bagging_by_query) { if (!config_->bagging_by_query) {
bagging_runner_.ReSize(num_data_); bagging_runner_.ReSize(num_data_);
} else { } else {
...@@ -206,9 +206,9 @@ class BaggingSampleStrategy : public SampleStrategy { ...@@ -206,9 +206,9 @@ class BaggingSampleStrategy : public SampleStrategy {
} else { } else {
bag_data_cnt_ = num_data_; bag_data_cnt_ = num_data_;
bag_data_indices_.clear(); bag_data_indices_.clear();
#ifdef USE_CUDA #if defined(USE_CUDA) || defined(USE_ROCM)
cuda_bag_data_indices_.Clear(); cuda_bag_data_indices_.Clear();
#endif // USE_CUDA #endif // USE_CUDA || USE_ROCM
bagging_runner_.ReSize(0); bagging_runner_.ReSize(0);
is_use_subset_ = false; is_use_subset_ = false;
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment