partial revert of 61ec4f1a

Instead of replacing all #ifdef USE_CUDA, just add USE_CUDA define to ROCm build.

partial revert of 61ec4f1a
Instead of replacing all #ifdef USE_CUDA, just add USE_CUDA define to ROCm build.
20996c92 · Jeff Daily · 1b3deb5f · 20996c92 · 20996c92 · 20996c92
Commit 20996c92 authored Sep 23, 2025 by Jeff Daily
20 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -294,6 +294,9 @@ if(USE_ROCM)
    endif()
    message(STATUS "CMAKE_HIP_FLAGS: ${CMAKE_HIP_FLAGS}")

+    # Building for ROCm almost always means USE_CUDA.
+    # Exceptions to this will be guarded by USE_ROCM.
+    add_definitions(-DUSE_CUDA)
    add_definitions(-DUSE_ROCM)
 endif()


--- a/include/LightGBM/bin.h
+++ b/include/LightGBM/bin.h
@@ -600,13 +600,13 @@ class MultiValBin {

  virtual MultiValBin* Clone() = 0;

-  #if defined(USE_CUDA) || defined(USE_ROCM)
+  #ifdef USE_CUDA
  virtual const void* GetRowWiseData(uint8_t* bit_type,
    size_t* total_size,
    bool* is_sparse,
    const void** out_data_ptr,
    uint8_t* data_ptr_bit_type) const = 0;
-  #endif  // USE_CUDA || USE_ROCM
+  #endif  // USE_CUDA
 };

 inline uint32_t BinMapper::ValueToBin(double value) const {

--- a/include/LightGBM/cuda/cuda_algorithms.hpp
+++ b/include/LightGBM/cuda/cuda_algorithms.hpp
@@ -7,9 +7,9 @@
 #ifndef LIGHTGBM_CUDA_CUDA_ALGORITHMS_HPP_
 #define LIGHTGBM_CUDA_CUDA_ALGORITHMS_HPP_

-#if defined(USE_CUDA) || defined(USE_ROCM)
+#ifdef USE_CUDA

-#if defined(USE_CUDA)
+#ifndef USE_ROCM
 #include <cuda.h>
 #include <cuda_runtime.h>
 #endif
@@ -621,5 +621,5 @@ __device__ VAL_T PercentileDevice(const VAL_T* values,

 }  // namespace LightGBM

-#endif  // USE_CUDA || USE_ROCM
+#endif  // USE_CUDA
 #endif  // LIGHTGBM_CUDA_CUDA_ALGORITHMS_HPP_
--- a/include/LightGBM/cuda/cuda_column_data.hpp
+++ b/include/LightGBM/cuda/cuda_column_data.hpp
@@ -3,7 +3,7 @@
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */

-#if defined(USE_CUDA) || defined(USE_ROCM)
+#ifdef USE_CUDA

 #ifndef LIGHTGBM_CUDA_CUDA_COLUMN_DATA_HPP_
 #define LIGHTGBM_CUDA_CUDA_COLUMN_DATA_HPP_
@@ -139,4 +139,4 @@ class CUDAColumnData {

 #endif  // LIGHTGBM_CUDA_CUDA_COLUMN_DATA_HPP_

-#endif  // USE_CUDA || USE_ROCM
+#endif  // USE_CUDA
--- a/include/LightGBM/cuda/cuda_metadata.hpp
+++ b/include/LightGBM/cuda/cuda_metadata.hpp
@@ -3,7 +3,7 @@
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */

-#if defined(USE_CUDA) || defined(USE_ROCM)
+#ifdef USE_CUDA

 #ifndef LIGHTGBM_CUDA_CUDA_METADATA_HPP_
 #define LIGHTGBM_CUDA_CUDA_METADATA_HPP_
@@ -55,4 +55,4 @@ class CUDAMetadata {

 #endif  // LIGHTGBM_CUDA_CUDA_METADATA_HPP_

-#endif  // USE_CUDA || USE_ROCM
+#endif  // USE_CUDA
--- a/include/LightGBM/cuda/cuda_metric.hpp
+++ b/include/LightGBM/cuda/cuda_metric.hpp
@@ -7,7 +7,7 @@
 #ifndef LIGHTGBM_CUDA_CUDA_METRIC_HPP_
 #define LIGHTGBM_CUDA_CUDA_METRIC_HPP_

-#if defined(USE_CUDA) || defined(USE_ROCM)
+#ifdef USE_CUDA

 #include <LightGBM/cuda/cuda_utils.hu>
 #include <LightGBM/metric.h>
@@ -39,6 +39,6 @@ class CUDAMetricInterface: public HOST_METRIC {

 }  // namespace LightGBM

-#endif  // USE_CUDA || USE_ROCM
+#endif  // USE_CUDA

 #endif  // LIGHTGBM_CUDA_CUDA_METRIC_HPP_
--- a/include/LightGBM/cuda/cuda_objective_function.hpp
+++ b/include/LightGBM/cuda/cuda_objective_function.hpp
@@ -7,7 +7,7 @@
 #ifndef LIGHTGBM_CUDA_CUDA_OBJECTIVE_FUNCTION_HPP_
 #define LIGHTGBM_CUDA_CUDA_OBJECTIVE_FUNCTION_HPP_

-#if defined(USE_CUDA) || defined(USE_ROCM)
+#ifdef USE_CUDA

 #include <LightGBM/cuda/cuda_utils.hu>
 #include <LightGBM/objective_function.h>
@@ -81,6 +81,6 @@ class CUDAObjectiveInterface: public HOST_OBJECTIVE {

 }  // namespace LightGBM

-#endif  // USE_CUDA || USE_ROCM
+#endif  // USE_CUDA

 #endif  // LIGHTGBM_CUDA_CUDA_OBJECTIVE_FUNCTION_HPP_
--- a/include/LightGBM/cuda/cuda_random.hpp
+++ b/include/LightGBM/cuda/cuda_random.hpp
@@ -5,9 +5,9 @@
 #ifndef LIGHTGBM_CUDA_CUDA_RANDOM_HPP_
 #define LIGHTGBM_CUDA_CUDA_RANDOM_HPP_

-#if defined(USE_CUDA) || defined(USE_ROCM)
+#ifdef USE_CUDA

-#if defined(USE_CUDA)
+#ifndef USE_ROCM
 #include <cuda.h>
 #include <cuda_runtime.h>
 #endif
@@ -71,6 +71,6 @@ class CUDARandom {

 }  // namespace LightGBM

-#endif  // USE_CUDA || USE_ROCM
+#endif  // USE_CUDA

 #endif  // LIGHTGBM_CUDA_CUDA_RANDOM_HPP_
--- a/include/LightGBM/cuda/cuda_rocm_interop.h
+++ b/include/LightGBM/cuda/cuda_rocm_interop.h
@@ -3,7 +3,7 @@
 */
 #pragma once

-#if defined(USE_CUDA) || defined(USE_ROCM)
+#ifdef USE_CUDA

 #if defined(__HIP_PLATFORM_AMD__)

@@ -62,4 +62,4 @@ static inline constexpr int WARP_SIZE_INTERNAL() {
 #define WARPSIZE 32
 #endif

-#endif  // USE_CUDA || USE_ROCM
+#endif  // USE_CUDA
--- a/include/LightGBM/cuda/cuda_row_data.hpp
+++ b/include/LightGBM/cuda/cuda_row_data.hpp
@@ -3,7 +3,7 @@
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */

-#if defined(USE_CUDA) || defined(USE_ROCM)
+#ifdef USE_CUDA

 #ifndef LIGHTGBM_CUDA_CUDA_ROW_DATA_HPP_
 #define LIGHTGBM_CUDA_CUDA_ROW_DATA_HPP_
@@ -177,4 +177,4 @@ class CUDARowData {
 }  // namespace LightGBM
 #endif  // LIGHTGBM_CUDA_CUDA_ROW_DATA_HPP_

-#endif  // USE_CUDA || USE_ROCM
+#endif  // USE_CUDA
--- a/include/LightGBM/cuda/cuda_split_info.hpp
+++ b/include/LightGBM/cuda/cuda_split_info.hpp
@@ -5,7 +5,7 @@
 * Modifications Copyright(C) 2023 Advanced Micro Devices, Inc. All rights reserved.
 */

-#if defined(USE_CUDA) || defined(USE_ROCM)
+#ifdef USE_CUDA

 #ifndef LIGHTGBM_CUDA_CUDA_SPLIT_INFO_HPP_
 #define LIGHTGBM_CUDA_CUDA_SPLIT_INFO_HPP_
@@ -105,4 +105,4 @@ class CUDASplitInfo {

 #endif  // LIGHTGBM_CUDA_CUDA_SPLIT_INFO_HPP_

-#endif  // USE_CUDA || USE_ROCM
+#endif  // USE_CUDA
--- a/include/LightGBM/cuda/cuda_tree.hpp
+++ b/include/LightGBM/cuda/cuda_tree.hpp
@@ -3,7 +3,7 @@
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */

-#if defined(USE_CUDA) || defined(USE_ROCM)
+#ifdef USE_CUDA

 #ifndef LIGHTGBM_CUDA_CUDA_TREE_HPP_
 #define LIGHTGBM_CUDA_CUDA_TREE_HPP_
@@ -170,4 +170,4 @@ class CUDATree : public Tree {

 #endif  // LIGHTGBM_CUDA_CUDA_TREE_HPP_

-#endif  // USE_CUDA || USE_ROCM
+#endif  // USE_CUDA
--- a/include/LightGBM/cuda/cuda_utils.hu
+++ b/include/LightGBM/cuda/cuda_utils.hu
@@ -6,13 +6,13 @@
 #ifndef LIGHTGBM_CUDA_CUDA_UTILS_H_
 #define LIGHTGBM_CUDA_CUDA_UTILS_H_

-#if defined(USE_CUDA) || defined(USE_ROCM)
+#ifdef USE_CUDA

-#if defined(USE_CUDA)
+#if defined(USE_ROCM)
+#include <LightGBM/cuda/cuda_rocm_interop.h>
+#else
 #include <cuda.h>
 #include <cuda_runtime.h>
-#else
-#include <LightGBM/cuda/cuda_rocm_interop.h>
 #endif
 #include <stdio.h>

@@ -211,6 +211,6 @@ static __device__ T SafeLog(T x) {

 }  // namespace LightGBM

-#endif  // USE_CUDA || USE_ROCM
+#endif  // USE_CUDA

 #endif  // LIGHTGBM_CUDA_CUDA_UTILS_H_
--- a/include/LightGBM/cuda/vector_cudahost.h
+++ b/include/LightGBM/cuda/vector_cudahost.h
@@ -8,13 +8,14 @@

 #include <LightGBM/utils/common.h>

-#if defined(USE_CUDA) || defined(USE_ROCM)
-#if defined(USE_CUDA)
+#ifdef USE_CUDA
+#if defined(USE_ROCM)
+#include <LightGBM/cuda/cuda_utils.hu>
+#else  // USE_ROCM
 #include <cuda.h>
 #include <cuda_runtime.h>
+#endif  // USE_ROCM
 #endif  // USE_CUDA
-#include <LightGBM/cuda/cuda_utils.hu>
-#endif  // USE_CUDA || USE_ROCM
 #include <stdio.h>

 enum LGBM_Device {
@@ -47,7 +48,7 @@ struct CHAllocator {
    T* ptr;
    if (n == 0) return NULL;
    n = SIZE_ALIGNED(n);
-    #if defined(USE_CUDA) || defined(USE_ROCM)
+    #ifdef USE_CUDA
      if (LGBM_config_::current_device == lgbm_device_cuda) {
        cudaError_t ret = cudaHostAlloc(reinterpret_cast<void**>(&ptr), n*sizeof(T), cudaHostAllocPortable);
        if (ret != cudaSuccess) {
@@ -66,7 +67,7 @@ struct CHAllocator {
  void deallocate(T* p, std::size_t n) {
    (void)n;  // UNUSED
    if (p == NULL) return;
-    #if defined(USE_CUDA) || defined(USE_ROCM)
+    #ifdef USE_CUDA
      if (LGBM_config_::current_device == lgbm_device_cuda) {
        cudaPointerAttributes attributes;
        CUDASUCCESS_OR_FATAL(cudaPointerGetAttributes(&attributes, p));

--- a/include/LightGBM/dataset.h
+++ b/include/LightGBM/dataset.h
@@ -318,13 +318,13 @@ class Metadata {
  /*! \brief Disable copy */
  Metadata(const Metadata&) = delete;

-  #if defined(USE_CUDA) || defined(USE_ROCM)
+  #ifdef USE_CUDA

  CUDAMetadata* cuda_metadata() const { return cuda_metadata_.get(); }

  void CreateCUDAMetadata(const int gpu_device_id);

-  #endif  // USE_CUDA || USE_ROCM
+  #endif  // USE_CUDA

 private:
  /*! \brief Load wights from file */
@@ -391,9 +391,9 @@ class Metadata {
  bool position_load_from_file_;
  bool query_load_from_file_;
  bool init_score_load_from_file_;
-  #if defined(USE_CUDA) || defined(USE_ROCM)
+  #ifdef USE_CUDA
  std::unique_ptr<CUDAMetadata> cuda_metadata_;
-  #endif  // USE_CUDA || USE_ROCM
+  #endif  // USE_CUDA
 };


@@ -997,13 +997,13 @@ class Dataset {
    return feature_groups_[feature_group_index]->feature_min_bin(sub_feature_index);
  }

-  #if defined(USE_CUDA) || defined(USE_ROCM)
+  #ifdef USE_CUDA

  const CUDAColumnData* cuda_column_data() const {
    return cuda_column_data_.get();
  }

-  #endif  // USE_CUDA || USE_ROCM
+  #endif  // USE_CUDA

 private:
  void SerializeHeader(BinaryWriter* serializer);
@@ -1062,9 +1062,9 @@ class Dataset {
  /*! \brief mutex for threading safe call */
  std::mutex mutex_;

-  #if defined(USE_CUDA) || defined(USE_ROCM)
+  #ifdef USE_CUDA
  std::unique_ptr<CUDAColumnData> cuda_column_data_;
-  #endif  // USE_CUDA || USE_ROCM
+  #endif  // USE_CUDA

  std::string parser_config_str_;
 };

--- a/include/LightGBM/objective_function.h
+++ b/include/LightGBM/objective_function.h
@@ -108,7 +108,7 @@ class ObjectiveFunction {
  */
  virtual bool IsCUDAObjective() const { return false; }

-  #if defined(USE_CUDA) || defined(USE_ROCM)
+  #ifdef USE_CUDA
  /*!
  * \brief Convert output for CUDA version
  */
@@ -118,7 +118,7 @@ class ObjectiveFunction {

  virtual bool NeedConvertOutputCUDA () const { return false; }

-  #endif  // USE_CUDA || USE_ROCM
+  #endif  // USE_CUDA
 };

 }  // namespace LightGBM

--- a/include/LightGBM/sample_strategy.h
+++ b/include/LightGBM/sample_strategy.h
@@ -38,9 +38,9 @@ class SampleStrategy {

  std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>>& bag_data_indices() { return bag_data_indices_; }

-  #if defined(USE_CUDA) || defined(USE_ROCM)
+  #ifdef USE_CUDA
  CUDAVector<data_size_t>& cuda_bag_data_indices() { return cuda_bag_data_indices_; }
-  #endif  // USE_CUDA || USE_ROCM
+  #endif  // USE_CUDA

  void UpdateObjectiveFunction(const ObjectiveFunction* objective_function) {
    objective_function_ = objective_function;
@@ -76,10 +76,10 @@ class SampleStrategy {
  /*! \brief whether need to resize the gradient vectors */
  bool need_resize_gradients_;

-  #if defined(USE_CUDA) || defined(USE_ROCM)
+  #ifdef USE_CUDA
  /*! \brief Buffer for bag_data_indices_ on GPU, used only with cuda */
  CUDAVector<data_size_t> cuda_bag_data_indices_;
-  #endif  // USE_CUDA || USE_ROCM
+  #endif  // USE_CUDA
 };

 }  // namespace LightGBM

--- a/include/LightGBM/train_share_states.h
+++ b/include/LightGBM/train_share_states.h
@@ -219,7 +219,7 @@ class MultiValBinWrapper {
  }


-  #if defined(USE_CUDA) || defined(USE_ROCM)
+  #ifdef USE_CUDA
  const void* GetRowWiseData(
    uint8_t* bit_type,
    size_t* total_size,
@@ -235,7 +235,7 @@ class MultiValBinWrapper {
      return multi_val_bin_->GetRowWiseData(bit_type, total_size, is_sparse, out_data_ptr, data_ptr_bit_type);
    }
  }
-  #endif  // USE_CUDA || USE_ROCM
+  #endif  // USE_CUDA

 private:
  bool is_use_subcol_ = false;
@@ -280,9 +280,9 @@ struct TrainingShareStates {

  const std::vector<uint32_t>& feature_hist_offsets() const { return feature_hist_offsets_; }

-  #if defined(USE_CUDA) || defined(USE_ROCM)
+  #ifdef USE_CUDA
  const std::vector<uint32_t>& column_hist_offsets() const { return column_hist_offsets_; }
-  #endif  // USE_CUDA || USE_ROCM
+  #endif  // USE_CUDA

  bool IsSparseRowwise() {
    return (multi_val_bin_wrapper_ != nullptr && multi_val_bin_wrapper_->IsSparse());
@@ -332,7 +332,7 @@ struct TrainingShareStates {
  }


-  #if defined(USE_CUDA) || defined(USE_ROCM)
+  #ifdef USE_CUDA
  const void* GetRowWiseData(uint8_t* bit_type,
    size_t* total_size,
    bool* is_sparse,
@@ -347,13 +347,13 @@ struct TrainingShareStates {
      return nullptr;
    }
  }
-  #endif  // USE_CUDA || USE_ROCM
+  #endif  // USE_CUDA

 private:
  std::vector<uint32_t> feature_hist_offsets_;
-  #if defined(USE_CUDA) || defined(USE_ROCM)
+  #ifdef USE_CUDA
  std::vector<uint32_t> column_hist_offsets_;
-  #endif  // USE_CUDA || USE_ROCM
+  #endif  // USE_CUDA
  int num_hist_total_bin_ = 0;
  std::unique_ptr<MultiValBinWrapper> multi_val_bin_wrapper_;
  std::vector<hist_t, Common::AlignmentAllocator<hist_t, kAlignedSize>> hist_buf_;

--- a/include/LightGBM/tree.h
+++ b/include/LightGBM/tree.h
@@ -321,9 +321,9 @@ class Tree {

  inline bool is_linear() const { return is_linear_; }

-  #if defined(USE_CUDA) || defined(USE_ROCM)
+  #ifdef USE_CUDA
  inline bool is_cuda_tree() const { return is_cuda_tree_; }
-  #endif  // USE_CUDA || USE_ROCM
+  #endif  // USE_CUDA

  inline void SetIsLinear(bool is_linear) {
    is_linear_ = is_linear;
@@ -534,10 +534,10 @@ class Tree {
  std::vector<std::vector<int>> leaf_features_;
  /* \brief features used in leaf linear models; indexing is relative to used_features_ */
  std::vector<std::vector<int>> leaf_features_inner_;
-  #if defined(USE_CUDA) || defined(USE_ROCM)
+  #ifdef USE_CUDA
  /*! \brief Marks whether this tree is a CUDATree */
  bool is_cuda_tree_;
-  #endif  // USE_CUDA || USE_ROCM
+  #endif  // USE_CUDA
 };

 inline void Tree::Split(int leaf, int feature, int real_feature,

--- a/src/boosting/bagging.hpp
+++ b/src/boosting/bagging.hpp
@@ -105,33 +105,33 @@ class BaggingSampleStrategy : public SampleStrategy {
      Log::Debug("Re-bagging, using %d data to train", bag_data_cnt_);
      // set bagging data to tree learner
      if (!is_use_subset_) {
-        #if defined(USE_CUDA) || defined(USE_ROCM)
+        #ifdef USE_CUDA
        if (config_->device_type == std::string("cuda")) {
          CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
          tree_learner->SetBaggingData(nullptr, cuda_bag_data_indices_.RawData(), bag_data_cnt_);
        } else {
-        #endif  // USE_CUDA || USE_ROCM
+        #endif  // USE_CUDA
          tree_learner->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_);
-        #if defined(USE_CUDA) || defined(USE_ROCM)
+        #ifdef USE_CUDA
        }
-        #endif  // USE_CUDA || USE_ROCM
+        #endif  // USE_CUDA
      } else {
        // get subset
        tmp_subset_->ReSize(bag_data_cnt_);
        tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(),
                                bag_data_cnt_, false);
-        #if defined(USE_CUDA) || defined(USE_ROCM)
+        #ifdef USE_CUDA
        if (config_->device_type == std::string("cuda")) {
          CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
          tree_learner->SetBaggingData(tmp_subset_.get(), cuda_bag_data_indices_.RawData(),
                                       bag_data_cnt_);
        } else {
-        #endif  // USE_CUDA || USE_ROCM
+        #endif  // USE_CUDA
          tree_learner->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(),
                                       bag_data_cnt_);
-        #if defined(USE_CUDA) || defined(USE_ROCM)
+        #ifdef USE_CUDA
        }
-        #endif  // USE_CUDA || USE_ROCM
+        #endif  // USE_CUDA
      }
    }
  }
@@ -161,11 +161,11 @@ class BaggingSampleStrategy : public SampleStrategy {
        bag_data_cnt_ = static_cast<data_size_t>(config_->bagging_fraction * num_data_);
      }
      bag_data_indices_.resize(num_data_);
-      #if defined(USE_CUDA) || defined(USE_ROCM)
+      #ifdef USE_CUDA
      if (config_->device_type == std::string("cuda")) {
        cuda_bag_data_indices_.Resize(num_data_);
      }
-      #endif  // USE_CUDA || USE_ROCM
+      #endif  // USE_CUDA
      if (!config_->bagging_by_query) {
        bagging_runner_.ReSize(num_data_);
      } else {
@@ -206,9 +206,9 @@ class BaggingSampleStrategy : public SampleStrategy {
    } else {
      bag_data_cnt_ = num_data_;
      bag_data_indices_.clear();
-      #if defined(USE_CUDA) || defined(USE_ROCM)
+      #ifdef USE_CUDA
      cuda_bag_data_indices_.Clear();
-      #endif  // USE_CUDA || USE_ROCM
+      #endif  // USE_CUDA
      bagging_runner_.ReSize(0);
      is_use_subset_ = false;
    }