[CUDA] consolidate CUDA versions (#5677)

* [ci] speed up if-else, swig, and lint conda setup * add 'source activate' * python constraint * start removing cuda v1 * comment out CI * remove more references * revert some unnecessaary changes * revert a few more mistakes * revert another change that ignored params * sigh * remove CUDATreeLearner * fix tests, docs * fix quoting in setup.py * restore all CI * Apply suggestions from code review Co-authored-by: shiyu1994 <shiyu_k1994@qq.com> * Apply suggestions from code review * completely remove cuda_exp, update docs --------- Co-authored-by: shiyu1994 <shiyu_k1994@qq.com>

[CUDA] consolidate CUDA versions (#5677)
* [ci] speed up if-else, swig, and lint conda setup * add 'source activate' * python constraint * start removing cuda v1 * comment out CI * remove more references * revert some unnecessaary changes * revert a few more mistakes * revert another change that ignored params * sigh * remove CUDATreeLearner * fix tests, docs * fix quoting in setup.py * restore all CI * Apply suggestions from code review Co-authored-by: shiyu1994 <shiyu_k1994@qq.com> * Apply suggestions from code review * completely remove cuda_exp, update docs --------- Co-authored-by: shiyu1994 <shiyu_k1994@qq.com>
4f47547c · James Lamb · GitHub · 5ffd7571 · 4f47547c · 4f47547c
Unverified Commit 4f47547c authored Jan 31, 2023 by James Lamb Committed by GitHub Feb 01, 2023
20 changed files
--- a/.ci/setup.sh
+++ b/.ci/setup.sh
@@ -106,7 +106,7 @@ else  # Linux
            || exit -1
        fi
    fi
-    if [[ $TASK == "cuda" || $TASK == "cuda_exp" ]]; then
+    if [[ $TASK == "cuda" ]]; then
        echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections
        apt-get update
        apt-get install --no-install-recommends -y \

--- a/.ci/test.sh
+++ b/.ci/test.sh
@@ -201,41 +201,24 @@ if [[ $TASK == "gpu" ]]; then
    elif [[ $METHOD == "source" ]]; then
        cmake -DUSE_GPU=ON ..
    fi
-elif [[ $TASK == "cuda" || $TASK == "cuda_exp" ]]; then
+elif [[ $TASK == "cuda" ]]; then
-    if [[ $TASK == "cuda" ]]; then
+    sed -i'.bak' 's/std::string device_type = "cpu";/std::string device_type = "cuda";/' $BUILD_DIRECTORY/include/LightGBM/config.h
-        sed -i'.bak' 's/std::string device_type = "cpu";/std::string device_type = "cuda";/' $BUILD_DIRECTORY/include/LightGBM/config.h
+    grep -q 'std::string device_type = "cuda"' $BUILD_DIRECTORY/include/LightGBM/config.h || exit -1  # make sure that changes were really done
-        grep -q 'std::string device_type = "cuda"' $BUILD_DIRECTORY/include/LightGBM/config.h || exit -1  # make sure that changes were really done
+    # by default ``gpu_use_dp=false`` for efficiency. change to ``true`` here for exact results in ci tests
-    else
+    sed -i'.bak' 's/gpu_use_dp = false;/gpu_use_dp = true;/' $BUILD_DIRECTORY/include/LightGBM/config.h
-        sed -i'.bak' 's/std::string device_type = "cpu";/std::string device_type = "cuda_exp";/' $BUILD_DIRECTORY/include/LightGBM/config.h
+    grep -q 'gpu_use_dp = true' $BUILD_DIRECTORY/include/LightGBM/config.h || exit -1  # make sure that changes were really done
-        grep -q 'std::string device_type = "cuda_exp"' $BUILD_DIRECTORY/include/LightGBM/config.h || exit -1  # make sure that changes were really done
-        # by default ``gpu_use_dp=false`` for efficiency. change to ``true`` here for exact results in ci tests
-        sed -i'.bak' 's/gpu_use_dp = false;/gpu_use_dp = true;/' $BUILD_DIRECTORY/include/LightGBM/config.h
-        grep -q 'gpu_use_dp = true' $BUILD_DIRECTORY/include/LightGBM/config.h || exit -1  # make sure that changes were really done
-    fi
    if [[ $METHOD == "pip" ]]; then
        cd $BUILD_DIRECTORY/python-package && python setup.py sdist || exit -1
-        if [[ $TASK == "cuda" ]]; then
+        pip install --user $BUILD_DIRECTORY/python-package/dist/lightgbm-$LGB_VER.tar.gz -v --install-option=--cuda || exit -1
-            pip install --user $BUILD_DIRECTORY/python-package/dist/lightgbm-$LGB_VER.tar.gz -v --install-option=--cuda || exit -1
-        else
-            pip install --user $BUILD_DIRECTORY/python-package/dist/lightgbm-$LGB_VER.tar.gz -v --install-option=--cuda-exp || exit -1
-        fi
        pytest $BUILD_DIRECTORY/tests/python_package_test || exit -1
        exit 0
    elif [[ $METHOD == "wheel" ]]; then
-        if [[ $TASK == "cuda" ]]; then
+        cd $BUILD_DIRECTORY/python-package && python setup.py bdist_wheel --cuda || exit -1
-            cd $BUILD_DIRECTORY/python-package && python setup.py bdist_wheel --cuda || exit -1
-        else
-            cd $BUILD_DIRECTORY/python-package && python setup.py bdist_wheel --cuda-exp || exit -1
-        fi
        pip install --user $BUILD_DIRECTORY/python-package/dist/lightgbm-$LGB_VER*.whl -v || exit -1
        pytest $BUILD_DIRECTORY/tests || exit -1
        exit 0
    elif [[ $METHOD == "source" ]]; then
-        if [[ $TASK == "cuda" ]]; then
+        cmake -DUSE_CUDA=ON ..
-            cmake -DUSE_CUDA=ON ..
-        else
-            cmake -DUSE_CUDA_EXP=ON ..
-        fi
    fi
 elif [[ $TASK == "mpi" ]]; then
    if [[ $METHOD == "pip" ]]; then

--- a/.github/workflows/cuda.yml
+++ b/.github/workflows/cuda.yml
@@ -28,31 +28,21 @@ jobs:
      fail-fast: false
      matrix:
        include:
-          - method: source
-            compiler: gcc
-            python_version: "3.8"
-            cuda_version: "11.7.1"
-            task: cuda
-          - method: pip
-            compiler: clang
-            python_version: "3.9"
-            cuda_version: "10.0"
-            task: cuda
          - method: wheel
            compiler: gcc
            python_version: "3.10"
-            cuda_version: "9.0"
+            cuda_version: "11.7.1"
            task: cuda
          - method: source
            compiler: gcc
            python_version: "3.8"
-            cuda_version: "11.7.1"
+            cuda_version: "10.0"
-            task: cuda_exp
+            task: cuda
          - method: pip
            compiler: clang
            python_version: "3.9"
-            cuda_version: "10.0"
+            cuda_version: "11.7.1"
-            task: cuda_exp
+            task: cuda
    steps:
      - name: Setup or update software on host machine
        run: |

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,8 +4,7 @@ option(USE_GPU "Enable GPU-accelerated training" OFF)
 option(USE_SWIG "Enable SWIG to generate Java API" OFF)
 option(USE_HDFS "Enable HDFS support (EXPERIMENTAL)" OFF)
 option(USE_TIMETAG "Set to ON to output time costs" OFF)
-option(USE_CUDA "Enable CUDA-accelerated training (EXPERIMENTAL)" OFF)
+option(USE_CUDA "Enable CUDA-accelerated training " OFF)
-option(USE_CUDA_EXP "Enable CUDA-accelerated training with more acceleration (EXPERIMENTAL)" OFF)
 option(USE_DEBUG "Set to ON for Debug mode" OFF)
 option(USE_SANITIZER "Use santizer flags" OFF)
 set(
@@ -31,7 +30,7 @@ elseif(USE_SWIG)
  cmake_minimum_required(VERSION 3.8)
 elseif(USE_GPU OR APPLE)
  cmake_minimum_required(VERSION 3.2)
-elseif(USE_CUDA OR USE_CUDA_EXP)
+elseif(USE_CUDA)
  cmake_minimum_required(VERSION 3.16)
 else()
  cmake_minimum_required(VERSION 3.0)
@@ -137,7 +136,7 @@ else()
    add_definitions(-DUSE_SOCKET)
 endif()
-if(USE_CUDA OR USE_CUDA_EXP)
+if(USE_CUDA)
    set(CMAKE_CUDA_HOST_COMPILER "${CMAKE_CXX_COMPILER}")
    enable_language(CUDA)
    set(USE_OPENMP ON CACHE BOOL "CUDA requires OpenMP" FORCE)
@@ -192,12 +191,8 @@ if(__INTEGRATE_OPENCL)
    endif()
 endif()
-if(USE_CUDA OR USE_CUDA_EXP)
+if(USE_CUDA)
-    if(USE_CUDA)
+    find_package(CUDA 10.0 REQUIRED)
-      find_package(CUDA 9.0 REQUIRED)
-    else()
-      find_package(CUDA 10.0 REQUIRED)
-    endif()
    include_directories(${CUDA_INCLUDE_DIRS})
    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=${OpenMP_CXX_FLAGS} -Xcompiler=-fPIC -Xcompiler=-Wall")
@@ -224,11 +219,7 @@ if(USE_CUDA OR USE_CUDA_EXP)
    endif()
    message(STATUS "CMAKE_CUDA_FLAGS: ${CMAKE_CUDA_FLAGS}")
-    if(USE_CUDA)
+    add_definitions(-DUSE_CUDA)
-      add_definitions(-DUSE_CUDA)
-    elseif(USE_CUDA_EXP)
-      add_definitions(-DUSE_CUDA_EXP)
-    endif()
    if(NOT DEFINED CMAKE_CUDA_STANDARD)
      set(CMAKE_CUDA_STANDARD 11)
@@ -411,10 +402,8 @@ file(
      src/objective/*.cpp
      src/network/*.cpp
      src/treelearner/*.cpp
-if(USE_CUDA OR USE_CUDA_EXP)
+if(USE_CUDA)
      src/treelearner/*.cu
-endif()
-if(USE_CUDA_EXP)
      src/boosting/cuda/*.cpp
      src/boosting/cuda/*.cu
      src/metric/cuda/*.cpp
@@ -549,7 +538,7 @@ if(__INTEGRATE_OPENCL)
  target_link_libraries(lightgbm_objs PUBLIC ${INTEGRATED_OPENCL_LIBRARIES} ${CMAKE_DL_LIBS})
 endif()
-if(USE_CUDA OR USE_CUDA_EXP)
+if(USE_CUDA)
  # Disable cmake warning about policy CMP0104. Refer to issue #3754 and PR #4268.
  # Custom target properties does not propagate, thus we need to specify for
  # each target that contains or depends on cuda source.

--- a/docs/Installation-Guide.rst
+++ b/docs/Installation-Guide.rst
@@ -605,8 +605,8 @@ Docker
 Refer to `GPU Docker folder <https://github.com/microsoft/LightGBM/tree/master/docker/gpu>`__.
-Build CUDA Version (Experimental)
+Build CUDA Version
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~
 The `original GPU build <#build-gpu-version>`__ of LightGBM (``device_type=gpu``) is based on OpenCL.
@@ -621,7 +621,7 @@ On Linux a CUDA version of LightGBM can be built using **CUDA**, **CMake** and *
 The following dependencies should be installed before compilation:
-  **CUDA** 9.0 or later libraries. Please refer to `this detailed guide`_. Pay great attention to the minimum required versions of host compilers listed in the table from that guide and use only recommended versions of compilers.
+-  **CUDA** 10.0 or later libraries. Please refer to `this detailed guide`_. Pay great attention to the minimum required versions of host compilers listed in the table from that guide and use only recommended versions of compilers.
 -  **CMake** 3.16 or later.
@@ -636,8 +636,6 @@ To build LightGBM CUDA version, run the following commands:
  cmake -DUSE_CUDA=1 ..
  make -j4
-Recently, a new CUDA version with better efficiency is implemented as an experimental feature. To build the new CUDA version, replace ``-DUSE_CUDA`` with ``-DUSE_CUDA_EXP`` in the above commands. Please note that new version requires **CUDA** 10.0 or later libraries.
 **Note**: glibc >= 2.14 is required.
 **Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this).

--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@@ -205,9 +205,15 @@ Core Parameters
   -  **Note**: please **don't** change this during training, especially when running multiple jobs simultaneously by external packages, otherwise it may cause undesirable errors
-  ``device_type`` :raw-html:`<a id="device_type" title="Permalink to this parameter" href="#device_type">&#x1F517;&#xFE0E;</a>`, default = ``cpu``, type = enum, options: ``cpu``, ``gpu``, ``cuda``, ``cuda_exp``, aliases: ``device``
+-  ``device_type`` :raw-html:`<a id="device_type" title="Permalink to this parameter" href="#device_type">&#x1F517;&#xFE0E;</a>`, default = ``cpu``, type = enum, options: ``cpu``, ``gpu``, ``cuda``, aliases: ``device``
-   -  device for the tree learning, you can use GPU to achieve the faster learning
+   -  device for the tree learning
+   -  ``cpu`` supports all LightGBM functionality and is portable across the widest range of operating systems and hardware
+   -  ``cuda`` offers faster training than ``gpu`` or ``cpu``, but only works on GPUs supporting CUDA
+   -  ``gpu`` can be faster than ``cpu`` and works on a wider range of GPUs than CUDA
   -  **Note**: it is recommended to use the smaller ``max_bin`` (e.g. 63) to get the better speed up
@@ -215,10 +221,6 @@ Core Parameters
   -  **Note**: refer to `Installation Guide <./Installation-Guide.rst#build-gpu-version>`__ to build LightGBM with GPU support
-   -  **Note**: ``cuda_exp`` is an experimental CUDA version, the installation guide for ``cuda_exp`` is identical with ``cuda``
-   -  **Note**: ``cuda_exp`` is faster than ``cuda`` and will replace ``cuda`` in the future
 -  ``seed`` :raw-html:`<a id="seed" title="Permalink to this parameter" href="#seed">&#x1F517;&#xFE0E;</a>`, default = ``None``, type = int, aliases: ``random_seed``, ``random_state``
   -  this seed is used to generate other seeds, e.g. ``data_random_seed``, ``feature_fraction_seed``, etc.

--- a/include/LightGBM/bin.h
+++ b/include/LightGBM/bin.h
@@ -480,13 +480,13 @@ class MultiValBin {
  virtual MultiValBin* Clone() = 0;
-  #ifdef USE_CUDA_EXP
+  #ifdef USE_CUDA
  virtual const void* GetRowWiseData(uint8_t* bit_type,
    size_t* total_size,
    bool* is_sparse,
    const void** out_data_ptr,
    uint8_t* data_ptr_bit_type) const = 0;
-  #endif  // USE_CUDA_EXP
+  #endif  // USE_CUDA
 };
 inline uint32_t BinMapper::ValueToBin(double value) const {

--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -223,14 +223,15 @@ struct Config {
  // [doc-only]
  // type = enum
-  // options = cpu, gpu, cuda, cuda_exp
+  // options = cpu, gpu, cuda
  // alias = device
-  // desc = device for the tree learning, you can use GPU to achieve the faster learning
+  // desc = device for the tree learning
+  // desc = ``cpu`` supports all LightGBM functionality and is portable across the widest range of operating systems and hardware
+  // desc = ``cuda`` offers faster training than ``gpu`` or ``cpu``, but only works on GPUs supporting CUDA
+  // desc = ``gpu`` can be faster than ``cpu`` and works on a wider range of GPUs than CUDA
  // desc = **Note**: it is recommended to use the smaller ``max_bin`` (e.g. 63) to get the better speed up
  // desc = **Note**: for the faster speed, GPU uses 32-bit float point to sum up by default, so this may affect the accuracy for some tasks. You can set ``gpu_use_dp=true`` to enable 64-bit float point, but it will slow down the training
  // desc = **Note**: refer to `Installation Guide <./Installation-Guide.rst#build-gpu-version>`__ to build LightGBM with GPU support
-  // desc = **Note**: ``cuda_exp`` is an experimental CUDA version, the installation guide for ``cuda_exp`` is identical with ``cuda``
-  // desc = **Note**: ``cuda_exp`` is faster than ``cuda`` and will replace ``cuda`` in the future
  std::string device_type = "cpu";
  // [doc-only]

--- a/include/LightGBM/cuda/cuda_algorithms.hpp
+++ b/include/LightGBM/cuda/cuda_algorithms.hpp
@@ -6,7 +6,7 @@
 #ifndef LIGHTGBM_CUDA_CUDA_ALGORITHMS_HPP_
 #define LIGHTGBM_CUDA_CUDA_ALGORITHMS_HPP_
-#ifdef USE_CUDA_EXP
+#ifdef USE_CUDA
 #include <cuda.h>
 #include <cuda_runtime.h>
@@ -577,5 +577,5 @@ __device__ VAL_T PercentileDevice(const VAL_T* values,
 }  // namespace LightGBM
-#endif  // USE_CUDA_EXP
+#endif  // USE_CUDA
 #endif  // LIGHTGBM_CUDA_CUDA_ALGORITHMS_HPP_
--- a/include/LightGBM/cuda/cuda_column_data.hpp
+++ b/include/LightGBM/cuda/cuda_column_data.hpp
@@ -3,7 +3,7 @@
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
-#ifdef USE_CUDA_EXP
+#ifdef USE_CUDA
 #ifndef LIGHTGBM_CUDA_CUDA_COLUMN_DATA_HPP_
 #define LIGHTGBM_CUDA_CUDA_COLUMN_DATA_HPP_
@@ -137,4 +137,4 @@ class CUDAColumnData {
 #endif  // LIGHTGBM_CUDA_CUDA_COLUMN_DATA_HPP_
-#endif  // USE_CUDA_EXP
+#endif  // USE_CUDA
--- a/include/LightGBM/cuda/cuda_metadata.hpp
+++ b/include/LightGBM/cuda/cuda_metadata.hpp
@@ -3,7 +3,7 @@
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
-#ifdef USE_CUDA_EXP
+#ifdef USE_CUDA
 #ifndef LIGHTGBM_CUDA_CUDA_METADATA_HPP_
 #define LIGHTGBM_CUDA_CUDA_METADATA_HPP_
@@ -55,4 +55,4 @@ class CUDAMetadata {
 #endif  // LIGHTGBM_CUDA_CUDA_METADATA_HPP_
-#endif  // USE_CUDA_EXP
+#endif  // USE_CUDA
--- a/include/LightGBM/cuda/cuda_metric.hpp
+++ b/include/LightGBM/cuda/cuda_metric.hpp
@@ -7,7 +7,7 @@
 #ifndef LIGHTGBM_CUDA_CUDA_METRIC_HPP_
 #define LIGHTGBM_CUDA_CUDA_METRIC_HPP_
-#ifdef USE_CUDA_EXP
+#ifdef USE_CUDA
 #include <LightGBM/metric.h>
@@ -36,6 +36,6 @@ class CUDAMetricInterface: public HOST_METRIC {
 }  // namespace LightGBM
-#endif  // USE_CUDA_EXP
+#endif  // USE_CUDA
 #endif  // LIGHTGBM_CUDA_CUDA_METRIC_HPP_
--- a/include/LightGBM/cuda/cuda_objective_function.hpp
+++ b/include/LightGBM/cuda/cuda_objective_function.hpp
@@ -7,7 +7,7 @@
 #ifndef LIGHTGBM_CUDA_CUDA_OBJECTIVE_FUNCTION_HPP_
 #define LIGHTGBM_CUDA_CUDA_OBJECTIVE_FUNCTION_HPP_
-#ifdef USE_CUDA_EXP
+#ifdef USE_CUDA
 #include <LightGBM/cuda/cuda_utils.h>
 #include <LightGBM/objective_function.h>
@@ -73,6 +73,6 @@ class CUDAObjectiveInterface: public HOST_OBJECTIVE {
 }  // namespace LightGBM
-#endif  // USE_CUDA_EXP
+#endif  // USE_CUDA
 #endif  // LIGHTGBM_CUDA_CUDA_OBJECTIVE_FUNCTION_HPP_
--- a/include/LightGBM/cuda/cuda_random.hpp
+++ b/include/LightGBM/cuda/cuda_random.hpp
@@ -5,7 +5,7 @@
 #ifndef LIGHTGBM_CUDA_CUDA_RANDOM_HPP_
 #define LIGHTGBM_CUDA_CUDA_RANDOM_HPP_
-#ifdef USE_CUDA_EXP
+#ifdef USE_CUDA
 #include <cuda.h>
 #include <cuda_runtime.h>
@@ -69,6 +69,6 @@ class CUDARandom {
 }  // namespace LightGBM
-#endif  // USE_CUDA_EXP
+#endif  // USE_CUDA
 #endif  // LIGHTGBM_CUDA_CUDA_RANDOM_HPP_
--- a/include/LightGBM/cuda/cuda_row_data.hpp
+++ b/include/LightGBM/cuda/cuda_row_data.hpp
@@ -3,7 +3,7 @@
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
-#ifdef USE_CUDA_EXP
+#ifdef USE_CUDA
 #ifndef LIGHTGBM_CUDA_CUDA_ROW_DATA_HPP_
 #define LIGHTGBM_CUDA_CUDA_ROW_DATA_HPP_
@@ -176,4 +176,4 @@ class CUDARowData {
 }  // namespace LightGBM
 #endif  // LIGHTGBM_CUDA_CUDA_ROW_DATA_HPP_
-#endif  // USE_CUDA_EXP
+#endif  // USE_CUDA
--- a/include/LightGBM/cuda/cuda_split_info.hpp
+++ b/include/LightGBM/cuda/cuda_split_info.hpp
@@ -4,7 +4,7 @@
 * license information.
 */
-#ifdef USE_CUDA_EXP
+#ifdef USE_CUDA
 #ifndef LIGHTGBM_CUDA_CUDA_SPLIT_INFO_HPP_
 #define LIGHTGBM_CUDA_CUDA_SPLIT_INFO_HPP_
@@ -102,4 +102,4 @@ class CUDASplitInfo {
 #endif  // LIGHTGBM_CUDA_CUDA_SPLIT_INFO_HPP_
-#endif  // USE_CUDA_EXP
+#endif  // USE_CUDA
--- a/include/LightGBM/cuda/cuda_tree.hpp
+++ b/include/LightGBM/cuda/cuda_tree.hpp
@@ -3,7 +3,7 @@
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
-#ifdef USE_CUDA_EXP
+#ifdef USE_CUDA
 #ifndef LIGHTGBM_CUDA_CUDA_TREE_HPP_
 #define LIGHTGBM_CUDA_CUDA_TREE_HPP_
@@ -170,4 +170,4 @@ class CUDATree : public Tree {
 #endif  // LIGHTGBM_CUDA_CUDA_TREE_HPP_
-#endif  // USE_CUDA_EXP
+#endif  // USE_CUDA
--- a/include/LightGBM/cuda/cuda_utils.h
+++ b/include/LightGBM/cuda/cuda_utils.h
@@ -6,20 +6,15 @@
 #ifndef LIGHTGBM_CUDA_CUDA_UTILS_H_
 #define LIGHTGBM_CUDA_CUDA_UTILS_H_
-#if defined(USE_CUDA) || defined(USE_CUDA_EXP)
+#ifdef USE_CUDA
 #include <cuda.h>
 #include <cuda_runtime.h>
 #include <stdio.h>
 #include <LightGBM/utils/log.h>
-#endif  // USE_CUDA || USE_CUDA_EXP
-#ifdef USE_CUDA_EXP
 #include <vector>
-#endif  // USE_CUDA_EXP
 namespace LightGBM {
-#if defined(USE_CUDA) || defined(USE_CUDA_EXP)
 #define CUDASUCCESS_OR_FATAL(ans) { gpuAssert((ans), __FILE__, __LINE__); }
 inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true) {
  if (code != cudaSuccess) {
@@ -27,9 +22,7 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort =
    if (abort) exit(code);
  }
 }
-#endif  // USE_CUDA || USE_CUDA_EXP
-#ifdef USE_CUDA_EXP
 #define CUDASUCCESS_OR_FATAL_OUTER(ans) { gpuAssert((ans), file, line); }
 void SetCUDADevice(int gpu_device_id, const char* file, int line);
@@ -184,8 +177,8 @@ class CUDAVector {
  size_t size_;
 };
-#endif  // USE_CUDA_EXP
 }  // namespace LightGBM
+#endif  // USE_CUDA
 #endif  // LIGHTGBM_CUDA_CUDA_UTILS_H_
--- a/include/LightGBM/cuda/vector_cudahost.h
+++ b/include/LightGBM/cuda/vector_cudahost.h
@@ -7,7 +7,7 @@
 #include <LightGBM/utils/common.h>
-#if defined(USE_CUDA) || defined(USE_CUDA_EXP)
+#ifdef USE_CUDA
 #include <cuda.h>
 #include <cuda_runtime.h>
 #endif
@@ -43,7 +43,7 @@ struct CHAllocator {
    T* ptr;
    if (n == 0) return NULL;
    n = SIZE_ALIGNED(n);
-    #if defined(USE_CUDA) || defined(USE_CUDA_EXP)
+    #ifdef USE_CUDA
      if (LGBM_config_::current_device == lgbm_device_cuda) {
        cudaError_t ret = cudaHostAlloc(&ptr, n*sizeof(T), cudaHostAllocPortable);
        if (ret != cudaSuccess) {
@@ -62,7 +62,7 @@ struct CHAllocator {
  void deallocate(T* p, std::size_t n) {
    (void)n;  // UNUSED
    if (p == NULL) return;
-    #if defined(USE_CUDA) || defined(USE_CUDA_EXP)
+    #ifdef USE_CUDA
      if (LGBM_config_::current_device == lgbm_device_cuda) {
        cudaPointerAttributes attributes;
        cudaPointerGetAttributes(&attributes, p);

--- a/include/LightGBM/dataset.h
+++ b/include/LightGBM/dataset.h
@@ -277,13 +277,13 @@ class Metadata {
  /*! \brief Disable copy */
  Metadata(const Metadata&) = delete;
-  #ifdef USE_CUDA_EXP
+  #ifdef USE_CUDA
  CUDAMetadata* cuda_metadata() const { return cuda_metadata_.get(); }
  void CreateCUDAMetadata(const int gpu_device_id);
-  #endif  // USE_CUDA_EXP
+  #endif  // USE_CUDA
 private:
  /*! \brief Load wights from file */
@@ -329,9 +329,9 @@ class Metadata {
  bool weight_load_from_file_;
  bool query_load_from_file_;
  bool init_score_load_from_file_;
-  #ifdef USE_CUDA_EXP
+  #ifdef USE_CUDA
  std::unique_ptr<CUDAMetadata> cuda_metadata_;
-  #endif  // USE_CUDA_EXP
+  #endif  // USE_CUDA
 };
@@ -910,13 +910,13 @@ class Dataset {
    return feature_groups_[feature_group_index]->feature_min_bin(sub_feature_index);
  }
-  #ifdef USE_CUDA_EXP
+  #ifdef USE_CUDA
  const CUDAColumnData* cuda_column_data() const {
    return cuda_column_data_.get();
  }
-  #endif  // USE_CUDA_EXP
+  #endif  // USE_CUDA
 private:
  void CreateCUDAColumnData();
@@ -968,9 +968,9 @@ class Dataset {
  /*! \brief mutex for threading safe call */
  std::mutex mutex_;
-  #ifdef USE_CUDA_EXP
+  #ifdef USE_CUDA
  std::unique_ptr<CUDAColumnData> cuda_column_data_;
-  #endif  // USE_CUDA_EXP
+  #endif  // USE_CUDA
  std::string parser_config_str_;
 };