Unverified Commit 4f47547c authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[CUDA] consolidate CUDA versions (#5677)



* [ci] speed up if-else, swig, and lint conda setup

* add 'source activate'

* python constraint

* start removing cuda v1

* comment out CI

* remove more references

* revert some unnecessaary changes

* revert a few more mistakes

* revert another change that ignored params

* sigh

* remove CUDATreeLearner

* fix tests, docs

* fix quoting in setup.py

* restore all CI

* Apply suggestions from code review
Co-authored-by: default avatarshiyu1994 <shiyu_k1994@qq.com>

* Apply suggestions from code review

* completely remove cuda_exp, update docs

---------
Co-authored-by: default avatarshiyu1994 <shiyu_k1994@qq.com>
parent 5ffd7571
...@@ -97,7 +97,7 @@ class ObjectiveFunction { ...@@ -97,7 +97,7 @@ class ObjectiveFunction {
*/ */
virtual bool IsCUDAObjective() const { return false; } virtual bool IsCUDAObjective() const { return false; }
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
/*! /*!
* \brief Convert output for CUDA version * \brief Convert output for CUDA version
*/ */
...@@ -107,7 +107,7 @@ class ObjectiveFunction { ...@@ -107,7 +107,7 @@ class ObjectiveFunction {
virtual bool NeedConvertOutputCUDA () const { return false; } virtual bool NeedConvertOutputCUDA () const { return false; }
#endif // USE_CUDA_EXP #endif // USE_CUDA
}; };
} // namespace LightGBM } // namespace LightGBM
......
...@@ -38,9 +38,9 @@ class SampleStrategy { ...@@ -38,9 +38,9 @@ class SampleStrategy {
std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>>& bag_data_indices() { return bag_data_indices_; } std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>>& bag_data_indices() { return bag_data_indices_; }
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
CUDAVector<data_size_t>& cuda_bag_data_indices() { return cuda_bag_data_indices_; } CUDAVector<data_size_t>& cuda_bag_data_indices() { return cuda_bag_data_indices_; }
#endif // USE_CUDA_EXP #endif // USE_CUDA
void UpdateObjectiveFunction(const ObjectiveFunction* objective_function) { void UpdateObjectiveFunction(const ObjectiveFunction* objective_function) {
objective_function_ = objective_function; objective_function_ = objective_function;
...@@ -72,10 +72,10 @@ class SampleStrategy { ...@@ -72,10 +72,10 @@ class SampleStrategy {
/*! \brief whether need to resize the gradient vectors */ /*! \brief whether need to resize the gradient vectors */
bool need_resize_gradients_; bool need_resize_gradients_;
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
/*! \brief Buffer for bag_data_indices_ on GPU, used only with cuda_exp */ /*! \brief Buffer for bag_data_indices_ on GPU, used only with cuda */
CUDAVector<data_size_t> cuda_bag_data_indices_; CUDAVector<data_size_t> cuda_bag_data_indices_;
#endif // USE_CUDA_EXP #endif // USE_CUDA
}; };
} // namespace LightGBM } // namespace LightGBM
......
...@@ -126,7 +126,7 @@ class MultiValBinWrapper { ...@@ -126,7 +126,7 @@ class MultiValBinWrapper {
} }
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
const void* GetRowWiseData( const void* GetRowWiseData(
uint8_t* bit_type, uint8_t* bit_type,
size_t* total_size, size_t* total_size,
...@@ -142,7 +142,7 @@ class MultiValBinWrapper { ...@@ -142,7 +142,7 @@ class MultiValBinWrapper {
return multi_val_bin_->GetRowWiseData(bit_type, total_size, is_sparse, out_data_ptr, data_ptr_bit_type); return multi_val_bin_->GetRowWiseData(bit_type, total_size, is_sparse, out_data_ptr, data_ptr_bit_type);
} }
} }
#endif // USE_CUDA_EXP #endif // USE_CUDA
private: private:
bool is_use_subcol_ = false; bool is_use_subcol_ = false;
...@@ -183,9 +183,9 @@ struct TrainingShareStates { ...@@ -183,9 +183,9 @@ struct TrainingShareStates {
const std::vector<uint32_t>& feature_hist_offsets() const { return feature_hist_offsets_; } const std::vector<uint32_t>& feature_hist_offsets() const { return feature_hist_offsets_; }
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
const std::vector<uint32_t>& column_hist_offsets() const { return column_hist_offsets_; } const std::vector<uint32_t>& column_hist_offsets() const { return column_hist_offsets_; }
#endif // USE_CUDA_EXP #endif // USE_CUDA
bool IsSparseRowwise() { bool IsSparseRowwise() {
return (multi_val_bin_wrapper_ != nullptr && multi_val_bin_wrapper_->IsSparse()); return (multi_val_bin_wrapper_ != nullptr && multi_val_bin_wrapper_->IsSparse());
...@@ -235,7 +235,7 @@ struct TrainingShareStates { ...@@ -235,7 +235,7 @@ struct TrainingShareStates {
} }
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
const void* GetRowWiseData(uint8_t* bit_type, const void* GetRowWiseData(uint8_t* bit_type,
size_t* total_size, size_t* total_size,
bool* is_sparse, bool* is_sparse,
...@@ -250,13 +250,13 @@ struct TrainingShareStates { ...@@ -250,13 +250,13 @@ struct TrainingShareStates {
return nullptr; return nullptr;
} }
} }
#endif // USE_CUDA_EXP #endif // USE_CUDA
private: private:
std::vector<uint32_t> feature_hist_offsets_; std::vector<uint32_t> feature_hist_offsets_;
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
std::vector<uint32_t> column_hist_offsets_; std::vector<uint32_t> column_hist_offsets_;
#endif // USE_CUDA_EXP #endif // USE_CUDA
int num_hist_total_bin_ = 0; int num_hist_total_bin_ = 0;
std::unique_ptr<MultiValBinWrapper> multi_val_bin_wrapper_; std::unique_ptr<MultiValBinWrapper> multi_val_bin_wrapper_;
std::vector<hist_t, Common::AlignmentAllocator<hist_t, kAlignedSize>> hist_buf_; std::vector<hist_t, Common::AlignmentAllocator<hist_t, kAlignedSize>> hist_buf_;
......
...@@ -319,9 +319,9 @@ class Tree { ...@@ -319,9 +319,9 @@ class Tree {
inline bool is_linear() const { return is_linear_; } inline bool is_linear() const { return is_linear_; }
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
inline bool is_cuda_tree() const { return is_cuda_tree_; } inline bool is_cuda_tree() const { return is_cuda_tree_; }
#endif // USE_CUDA_EXP #endif // USE_CUDA
inline void SetIsLinear(bool is_linear) { inline void SetIsLinear(bool is_linear) {
is_linear_ = is_linear; is_linear_ = is_linear;
...@@ -532,10 +532,10 @@ class Tree { ...@@ -532,10 +532,10 @@ class Tree {
std::vector<std::vector<int>> leaf_features_; std::vector<std::vector<int>> leaf_features_;
/* \brief features used in leaf linear models; indexing is relative to used_features_ */ /* \brief features used in leaf linear models; indexing is relative to used_features_ */
std::vector<std::vector<int>> leaf_features_inner_; std::vector<std::vector<int>> leaf_features_inner_;
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
/*! \brief Marks whether this tree is a CUDATree */ /*! \brief Marks whether this tree is a CUDATree */
bool is_cuda_tree_; bool is_cuda_tree_;
#endif // USE_CUDA_EXP #endif // USE_CUDA
}; };
inline void Tree::Split(int leaf, int feature, int real_feature, inline void Tree::Split(int leaf, int feature, int real_feature,
......
...@@ -121,11 +121,9 @@ Build CUDA Version ...@@ -121,11 +121,9 @@ Build CUDA Version
All requirements from `Build from Sources section <#build-from-sources>`__ apply for this installation option as well, and `CMake`_ (version 3.16 or higher) is strongly required. All requirements from `Build from Sources section <#build-from-sources>`__ apply for this installation option as well, and `CMake`_ (version 3.16 or higher) is strongly required.
**CUDA** library (version 9.0 or higher) is needed: details for installation can be found in `Installation Guide <https://github.com/microsoft/LightGBM/blob/master/docs/Installation-Guide.rst#build-cuda-version-experimental>`__. **CUDA** library (version 10.0 or higher) is needed: details for installation can be found in `Installation Guide <https://github.com/microsoft/LightGBM/blob/master/docs/Installation-Guide.rst#build-cuda-version-experimental>`__.
Recently, a new CUDA version with better efficiency is implemented as an experimental feature. To build the new CUDA version, replace ``--cuda`` with ``--cuda-exp`` in the above commands. Please note that new version requires **CUDA** 10.0 or later libraries. Note that this new version uses twice the memory, since it stores data row-wise as well as column-wise in memory to improve performance (see this `issue <https://github.com/microsoft/LightGBM/issues/5318>`__ for discussion). To use the CUDA version within Python, pass ``{"device": "cuda"}`` respectively in parameters.
To use the regular or experimental CUDA versions within Python, pass ``{"device": "cuda"}`` or ``{"device": "cuda_exp"}`` respectively as parameters.
Build HDFS Version Build HDFS Version
~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~
...@@ -211,8 +209,6 @@ Run ``python setup.py install --gpu`` to enable GPU support. All requirements fr ...@@ -211,8 +209,6 @@ Run ``python setup.py install --gpu`` to enable GPU support. All requirements fr
Run ``python setup.py install --cuda`` to enable CUDA support. All requirements from `Build CUDA Version section <#build-cuda-version>`__ apply for this installation option as well. Run ``python setup.py install --cuda`` to enable CUDA support. All requirements from `Build CUDA Version section <#build-cuda-version>`__ apply for this installation option as well.
Run ``python setup.py install --cuda-exp`` to enable the new experimental version of CUDA support. All requirements from `Build CUDA Version section <#build-cuda-version>`__ apply for this installation option as well.
Run ``python setup.py install --hdfs`` to enable HDFS support. All requirements from `Build HDFS Version section <#build-hdfs-version>`__ apply for this installation option as well. Run ``python setup.py install --hdfs`` to enable HDFS support. All requirements from `Build HDFS Version section <#build-hdfs-version>`__ apply for this installation option as well.
Run ``python setup.py install --bit32``, if you want to use 32-bit version. All requirements from `Build 32-bit Version with 32-bit Python section <#build-32-bit-version-with-32-bit-python>`__ apply for this installation option as well. Run ``python setup.py install --bit32``, if you want to use 32-bit version. All requirements from `Build 32-bit Version with 32-bit Python section <#build-32-bit-version-with-32-bit-python>`__ apply for this installation option as well.
......
...@@ -21,7 +21,6 @@ LIGHTGBM_OPTIONS = [ ...@@ -21,7 +21,6 @@ LIGHTGBM_OPTIONS = [
('integrated-opencl', None, 'Compile integrated OpenCL version'), ('integrated-opencl', None, 'Compile integrated OpenCL version'),
('gpu', 'g', 'Compile GPU version'), ('gpu', 'g', 'Compile GPU version'),
('cuda', None, 'Compile CUDA version'), ('cuda', None, 'Compile CUDA version'),
('cuda-exp', None, 'Compile CUDA Experimental version'),
('mpi', None, 'Compile MPI version'), ('mpi', None, 'Compile MPI version'),
('nomp', None, 'Compile version without OpenMP support'), ('nomp', None, 'Compile version without OpenMP support'),
('hdfs', 'h', 'Compile HDFS version'), ('hdfs', 'h', 'Compile HDFS version'),
...@@ -106,7 +105,6 @@ def compile_cpp( ...@@ -106,7 +105,6 @@ def compile_cpp(
use_mingw: bool = False, use_mingw: bool = False,
use_gpu: bool = False, use_gpu: bool = False,
use_cuda: bool = False, use_cuda: bool = False,
use_cuda_exp: bool = False,
use_mpi: bool = False, use_mpi: bool = False,
use_hdfs: bool = False, use_hdfs: bool = False,
boost_root: Optional[str] = None, boost_root: Optional[str] = None,
...@@ -148,8 +146,6 @@ def compile_cpp( ...@@ -148,8 +146,6 @@ def compile_cpp(
cmake_cmd.append(f"-DOpenCL_LIBRARY={opencl_library}") cmake_cmd.append(f"-DOpenCL_LIBRARY={opencl_library}")
elif use_cuda: elif use_cuda:
cmake_cmd.append("-DUSE_CUDA=ON") cmake_cmd.append("-DUSE_CUDA=ON")
elif use_cuda_exp:
cmake_cmd.append("-DUSE_CUDA_EXP=ON")
if use_mpi: if use_mpi:
cmake_cmd.append("-DUSE_MPI=ON") cmake_cmd.append("-DUSE_MPI=ON")
if nomp: if nomp:
...@@ -171,7 +167,7 @@ def compile_cpp( ...@@ -171,7 +167,7 @@ def compile_cpp(
else: else:
status = 1 status = 1
lib_path = CURRENT_DIR / "compile" / "windows" / "x64" / "DLL" / "lib_lightgbm.dll" lib_path = CURRENT_DIR / "compile" / "windows" / "x64" / "DLL" / "lib_lightgbm.dll"
if not any((use_gpu, use_cuda, use_cuda_exp, use_mpi, use_hdfs, nomp, bit32, integrated_opencl)): if not any((use_gpu, use_cuda, use_mpi, use_hdfs, nomp, bit32, integrated_opencl)):
logger.info("Starting to compile with MSBuild from existing solution file.") logger.info("Starting to compile with MSBuild from existing solution file.")
platform_toolsets = ("v143", "v142", "v141", "v140") platform_toolsets = ("v143", "v142", "v141", "v140")
for pt in platform_toolsets: for pt in platform_toolsets:
...@@ -235,7 +231,6 @@ class CustomInstall(install): ...@@ -235,7 +231,6 @@ class CustomInstall(install):
self.integrated_opencl = False self.integrated_opencl = False
self.gpu = False self.gpu = False
self.cuda = False self.cuda = False
self.cuda_exp = False
self.boost_root = None self.boost_root = None
self.boost_dir = None self.boost_dir = None
self.boost_include_dir = None self.boost_include_dir = None
...@@ -260,7 +255,7 @@ class CustomInstall(install): ...@@ -260,7 +255,7 @@ class CustomInstall(install):
LOG_PATH.touch() LOG_PATH.touch()
if not self.precompile: if not self.precompile:
copy_files(integrated_opencl=self.integrated_opencl, use_gpu=self.gpu) copy_files(integrated_opencl=self.integrated_opencl, use_gpu=self.gpu)
compile_cpp(use_mingw=self.mingw, use_gpu=self.gpu, use_cuda=self.cuda, use_cuda_exp=self.cuda_exp, use_mpi=self.mpi, compile_cpp(use_mingw=self.mingw, use_gpu=self.gpu, use_cuda=self.cuda, use_mpi=self.mpi,
use_hdfs=self.hdfs, boost_root=self.boost_root, boost_dir=self.boost_dir, use_hdfs=self.hdfs, boost_root=self.boost_root, boost_dir=self.boost_dir,
boost_include_dir=self.boost_include_dir, boost_librarydir=self.boost_librarydir, boost_include_dir=self.boost_include_dir, boost_librarydir=self.boost_librarydir,
opencl_include_dir=self.opencl_include_dir, opencl_library=self.opencl_library, opencl_include_dir=self.opencl_include_dir, opencl_library=self.opencl_library,
...@@ -281,7 +276,6 @@ class CustomBdistWheel(bdist_wheel): ...@@ -281,7 +276,6 @@ class CustomBdistWheel(bdist_wheel):
self.integrated_opencl = False self.integrated_opencl = False
self.gpu = False self.gpu = False
self.cuda = False self.cuda = False
self.cuda_exp = False
self.boost_root = None self.boost_root = None
self.boost_dir = None self.boost_dir = None
self.boost_include_dir = None self.boost_include_dir = None
...@@ -304,7 +298,6 @@ class CustomBdistWheel(bdist_wheel): ...@@ -304,7 +298,6 @@ class CustomBdistWheel(bdist_wheel):
install.integrated_opencl = self.integrated_opencl install.integrated_opencl = self.integrated_opencl
install.gpu = self.gpu install.gpu = self.gpu
install.cuda = self.cuda install.cuda = self.cuda
install.cuda_exp = self.cuda_exp
install.boost_root = self.boost_root install.boost_root = self.boost_root
install.boost_dir = self.boost_dir install.boost_dir = self.boost_dir
install.boost_include_dir = self.boost_include_dir install.boost_include_dir = self.boost_include_dir
......
...@@ -36,7 +36,7 @@ Application::Application(int argc, char** argv) { ...@@ -36,7 +36,7 @@ Application::Application(int argc, char** argv) {
Log::Fatal("No training/prediction data, application quit"); Log::Fatal("No training/prediction data, application quit");
} }
if (config_.device_type == std::string("cuda") || config_.device_type == std::string("cuda_exp")) { if (config_.device_type == std::string("cuda")) {
LGBM_config_::current_device = lgbm_device_cuda; LGBM_config_::current_device = lgbm_device_cuda;
} }
} }
......
...@@ -47,33 +47,33 @@ class BaggingSampleStrategy : public SampleStrategy { ...@@ -47,33 +47,33 @@ class BaggingSampleStrategy : public SampleStrategy {
Log::Debug("Re-bagging, using %d data to train", bag_data_cnt_); Log::Debug("Re-bagging, using %d data to train", bag_data_cnt_);
// set bagging data to tree learner // set bagging data to tree learner
if (!is_use_subset_) { if (!is_use_subset_) {
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
if (config_->device_type == std::string("cuda_exp")) { if (config_->device_type == std::string("cuda")) {
CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__); CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
tree_learner->SetBaggingData(nullptr, cuda_bag_data_indices_.RawData(), bag_data_cnt_); tree_learner->SetBaggingData(nullptr, cuda_bag_data_indices_.RawData(), bag_data_cnt_);
} else { } else {
#endif // USE_CUDA_EXP #endif // USE_CUDA
tree_learner->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_); tree_learner->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_);
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
} }
#endif // USE_CUDA_EXP #endif // USE_CUDA
} else { } else {
// get subset // get subset
tmp_subset_->ReSize(bag_data_cnt_); tmp_subset_->ReSize(bag_data_cnt_);
tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(), tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(),
bag_data_cnt_, false); bag_data_cnt_, false);
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
if (config_->device_type == std::string("cuda_exp")) { if (config_->device_type == std::string("cuda")) {
CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__); CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
tree_learner->SetBaggingData(tmp_subset_.get(), cuda_bag_data_indices_.RawData(), tree_learner->SetBaggingData(tmp_subset_.get(), cuda_bag_data_indices_.RawData(),
bag_data_cnt_); bag_data_cnt_);
} else { } else {
#endif // USE_CUDA_EXP #endif // USE_CUDA
tree_learner->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(), tree_learner->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(),
bag_data_cnt_); bag_data_cnt_);
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
} }
#endif // USE_CUDA_EXP #endif // USE_CUDA
} }
} }
} }
...@@ -103,11 +103,11 @@ class BaggingSampleStrategy : public SampleStrategy { ...@@ -103,11 +103,11 @@ class BaggingSampleStrategy : public SampleStrategy {
bag_data_cnt_ = static_cast<data_size_t>(config_->bagging_fraction * num_data_); bag_data_cnt_ = static_cast<data_size_t>(config_->bagging_fraction * num_data_);
} }
bag_data_indices_.resize(num_data_); bag_data_indices_.resize(num_data_);
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
if (config_->device_type == std::string("cuda_exp")) { if (config_->device_type == std::string("cuda")) {
cuda_bag_data_indices_.Resize(num_data_); cuda_bag_data_indices_.Resize(num_data_);
} }
#endif // USE_CUDA_EXP #endif // USE_CUDA
bagging_runner_.ReSize(num_data_); bagging_runner_.ReSize(num_data_);
bagging_rands_.clear(); bagging_rands_.clear();
for (int i = 0; for (int i = 0;
...@@ -118,7 +118,7 @@ class BaggingSampleStrategy : public SampleStrategy { ...@@ -118,7 +118,7 @@ class BaggingSampleStrategy : public SampleStrategy {
double average_bag_rate = double average_bag_rate =
(static_cast<double>(bag_data_cnt_) / num_data_) / config_->bagging_freq; (static_cast<double>(bag_data_cnt_) / num_data_) / config_->bagging_freq;
is_use_subset_ = false; is_use_subset_ = false;
if (config_->device_type != std::string("cuda_exp")) { if (config_->device_type != std::string("cuda")) {
const int group_threshold_usesubset = 100; const int group_threshold_usesubset = 100;
const double average_bag_rate_threshold = 0.5; const double average_bag_rate_threshold = 0.5;
if (average_bag_rate <= average_bag_rate_threshold if (average_bag_rate <= average_bag_rate_threshold
...@@ -141,9 +141,9 @@ class BaggingSampleStrategy : public SampleStrategy { ...@@ -141,9 +141,9 @@ class BaggingSampleStrategy : public SampleStrategy {
} else { } else {
bag_data_cnt_ = num_data_; bag_data_cnt_ = num_data_;
bag_data_indices_.clear(); bag_data_indices_.clear();
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
cuda_bag_data_indices_.Clear(); cuda_bag_data_indices_.Clear();
#endif // USE_CUDA_EXP #endif // USE_CUDA
bagging_runner_.ReSize(0); bagging_runner_.ReSize(0);
is_use_subset_ = false; is_use_subset_ = false;
} }
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include "cuda_score_updater.hpp" #include "cuda_score_updater.hpp"
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
namespace LightGBM { namespace LightGBM {
...@@ -91,4 +91,4 @@ inline void CUDAScoreUpdater::MultiplyScore(double val, int cur_tree_id) { ...@@ -91,4 +91,4 @@ inline void CUDAScoreUpdater::MultiplyScore(double val, int cur_tree_id) {
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA_EXP #endif // USE_CUDA
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include "cuda_score_updater.hpp" #include "cuda_score_updater.hpp"
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
namespace LightGBM { namespace LightGBM {
...@@ -42,4 +42,4 @@ void CUDAScoreUpdater::LaunchMultiplyScoreConstantKernel(const double val, const ...@@ -42,4 +42,4 @@ void CUDAScoreUpdater::LaunchMultiplyScoreConstantKernel(const double val, const
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA_EXP #endif // USE_CUDA
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
#ifndef LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_ #ifndef LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_
#define LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_ #define LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
#include <LightGBM/cuda/cuda_utils.h> #include <LightGBM/cuda/cuda_utils.h>
...@@ -60,6 +60,6 @@ class CUDAScoreUpdater: public ScoreUpdater { ...@@ -60,6 +60,6 @@ class CUDAScoreUpdater: public ScoreUpdater {
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA_EXP #endif // USE_CUDA
#endif // LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_ #endif // LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_
...@@ -68,14 +68,14 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective ...@@ -68,14 +68,14 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
es_first_metric_only_ = config_->first_metric_only; es_first_metric_only_ = config_->first_metric_only;
shrinkage_rate_ = config_->learning_rate; shrinkage_rate_ = config_->learning_rate;
if (config_->device_type == std::string("cuda") || config_->device_type == std::string("cuda_exp")) { if (config_->device_type == std::string("cuda")) {
LGBM_config_::current_learner = use_cuda_learner; LGBM_config_::current_learner = use_cuda_learner;
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
if (config_->device_type == std::string("cuda_exp")) { if (config_->device_type == std::string("cuda")) {
const int gpu_device_id = config_->gpu_device_id >= 0 ? config_->gpu_device_id : 0; const int gpu_device_id = config_->gpu_device_id >= 0 ? config_->gpu_device_id : 0;
CUDASUCCESS_OR_FATAL(cudaSetDevice(gpu_device_id)); CUDASUCCESS_OR_FATAL(cudaSetDevice(gpu_device_id));
} }
#endif // USE_CUDA_EXP #endif // USE_CUDA
} }
// load forced_splits file // load forced_splits file
...@@ -116,15 +116,15 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective ...@@ -116,15 +116,15 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
} }
training_metrics_.shrink_to_fit(); training_metrics_.shrink_to_fit();
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
if (config_->device_type == std::string("cuda_exp")) { if (config_->device_type == std::string("cuda")) {
train_score_updater_.reset(new CUDAScoreUpdater(train_data_, num_tree_per_iteration_, boosting_on_gpu_)); train_score_updater_.reset(new CUDAScoreUpdater(train_data_, num_tree_per_iteration_, boosting_on_gpu_));
} else { } else {
#endif // USE_CUDA_EXP #endif // USE_CUDA
train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_)); train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_));
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
} }
#endif // USE_CUDA_EXP #endif // USE_CUDA
num_data_ = train_data_->num_data(); num_data_ = train_data_->num_data();
...@@ -186,11 +186,11 @@ void GBDT::AddValidDataset(const Dataset* valid_data, ...@@ -186,11 +186,11 @@ void GBDT::AddValidDataset(const Dataset* valid_data,
} }
// for a validation dataset, we need its score and metric // for a validation dataset, we need its score and metric
auto new_score_updater = auto new_score_updater =
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
config_->device_type == std::string("cuda_exp") ? config_->device_type == std::string("cuda") ?
std::unique_ptr<CUDAScoreUpdater>(new CUDAScoreUpdater(valid_data, num_tree_per_iteration_, std::unique_ptr<CUDAScoreUpdater>(new CUDAScoreUpdater(valid_data, num_tree_per_iteration_,
objective_function_ != nullptr && objective_function_->IsCUDAObjective())) : objective_function_ != nullptr && objective_function_->IsCUDAObjective())) :
#endif // USE_CUDA_EXP #endif // USE_CUDA
std::unique_ptr<ScoreUpdater>(new ScoreUpdater(valid_data, num_tree_per_iteration_)); std::unique_ptr<ScoreUpdater>(new ScoreUpdater(valid_data, num_tree_per_iteration_));
// update score // update score
for (int i = 0; i < iter_; ++i) { for (int i = 0; i < iter_; ++i) {
...@@ -481,15 +481,15 @@ void GBDT::UpdateScore(const Tree* tree, const int cur_tree_id) { ...@@ -481,15 +481,15 @@ void GBDT::UpdateScore(const Tree* tree, const int cur_tree_id) {
const data_size_t bag_data_cnt = data_sample_strategy_->bag_data_cnt(); const data_size_t bag_data_cnt = data_sample_strategy_->bag_data_cnt();
// we need to predict out-of-bag scores of data for boosting // we need to predict out-of-bag scores of data for boosting
if (num_data_ - bag_data_cnt > 0) { if (num_data_ - bag_data_cnt > 0) {
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
if (config_->device_type == std::string("cuda_exp")) { if (config_->device_type == std::string("cuda")) {
train_score_updater_->AddScore(tree, data_sample_strategy_->cuda_bag_data_indices().RawData() + bag_data_cnt, num_data_ - bag_data_cnt, cur_tree_id); train_score_updater_->AddScore(tree, data_sample_strategy_->cuda_bag_data_indices().RawData() + bag_data_cnt, num_data_ - bag_data_cnt, cur_tree_id);
} else { } else {
#endif // USE_CUDA_EXP #endif // USE_CUDA
train_score_updater_->AddScore(tree, data_sample_strategy_->bag_data_indices().data() + bag_data_cnt, num_data_ - bag_data_cnt, cur_tree_id); train_score_updater_->AddScore(tree, data_sample_strategy_->bag_data_indices().data() + bag_data_cnt, num_data_ - bag_data_cnt, cur_tree_id);
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
} }
#endif // USE_CUDA_EXP #endif // USE_CUDA
} }
} else { } else {
...@@ -503,17 +503,17 @@ void GBDT::UpdateScore(const Tree* tree, const int cur_tree_id) { ...@@ -503,17 +503,17 @@ void GBDT::UpdateScore(const Tree* tree, const int cur_tree_id) {
} }
} }
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* score, const data_size_t num_data) const { std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* score, const data_size_t num_data) const {
#else #else
std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* score, const data_size_t /*num_data*/) const { std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* score, const data_size_t /*num_data*/) const {
#endif // USE_CUDA_EXP #endif // USE_CUDA
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
const bool evaluation_on_cuda = metric->IsCUDAMetric(); const bool evaluation_on_cuda = metric->IsCUDAMetric();
if ((boosting_on_gpu_ && evaluation_on_cuda) || (!boosting_on_gpu_ && !evaluation_on_cuda)) { if ((boosting_on_gpu_ && evaluation_on_cuda) || (!boosting_on_gpu_ && !evaluation_on_cuda)) {
#endif // USE_CUDA_EXP #endif // USE_CUDA
return metric->Eval(score, objective_function_); return metric->Eval(score, objective_function_);
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
} else if (boosting_on_gpu_ && !evaluation_on_cuda) { } else if (boosting_on_gpu_ && !evaluation_on_cuda) {
const size_t total_size = static_cast<size_t>(num_data) * static_cast<size_t>(num_tree_per_iteration_); const size_t total_size = static_cast<size_t>(num_data) * static_cast<size_t>(num_tree_per_iteration_);
if (total_size > host_score_.size()) { if (total_size > host_score_.size()) {
...@@ -529,7 +529,7 @@ std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* scor ...@@ -529,7 +529,7 @@ std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* scor
CopyFromHostToCUDADevice<double>(cuda_score_.RawData(), score, total_size, __FILE__, __LINE__); CopyFromHostToCUDADevice<double>(cuda_score_.RawData(), score, total_size, __FILE__, __LINE__);
return metric->Eval(cuda_score_.RawData(), objective_function_); return metric->Eval(cuda_score_.RawData(), objective_function_);
} }
#endif // USE_CUDA_EXP #endif // USE_CUDA
} }
std::string GBDT::OutputMetric(int iter) { std::string GBDT::OutputMetric(int iter) {
...@@ -660,14 +660,14 @@ void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) { ...@@ -660,14 +660,14 @@ void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) {
num_data = valid_score_updater_[used_idx]->num_data(); num_data = valid_score_updater_[used_idx]->num_data();
*out_len = static_cast<int64_t>(num_data) * num_class_; *out_len = static_cast<int64_t>(num_data) * num_class_;
} }
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
std::vector<double> host_raw_scores; std::vector<double> host_raw_scores;
if (boosting_on_gpu_) { if (boosting_on_gpu_) {
host_raw_scores.resize(static_cast<size_t>(*out_len), 0.0); host_raw_scores.resize(static_cast<size_t>(*out_len), 0.0);
CopyFromCUDADeviceToHost<double>(host_raw_scores.data(), raw_scores, static_cast<size_t>(*out_len), __FILE__, __LINE__); CopyFromCUDADeviceToHost<double>(host_raw_scores.data(), raw_scores, static_cast<size_t>(*out_len), __FILE__, __LINE__);
raw_scores = host_raw_scores.data(); raw_scores = host_raw_scores.data();
} }
#endif // USE_CUDA_EXP #endif // USE_CUDA
if (objective_function_ != nullptr) { if (objective_function_ != nullptr) {
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data; ++i) { for (data_size_t i = 0; i < num_data; ++i) {
...@@ -730,26 +730,26 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction* ...@@ -730,26 +730,26 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
} }
training_metrics_.shrink_to_fit(); training_metrics_.shrink_to_fit();
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
boosting_on_gpu_ = objective_function_ != nullptr && objective_function_->IsCUDAObjective() && boosting_on_gpu_ = objective_function_ != nullptr && objective_function_->IsCUDAObjective() &&
!data_sample_strategy_->IsHessianChange(); // for sample strategy with Hessian change, fall back to boosting on CPU !data_sample_strategy_->IsHessianChange(); // for sample strategy with Hessian change, fall back to boosting on CPU
tree_learner_->ResetBoostingOnGPU(boosting_on_gpu_); tree_learner_->ResetBoostingOnGPU(boosting_on_gpu_);
#endif // USE_CUDA_EXP #endif // USE_CUDA
if (train_data != train_data_) { if (train_data != train_data_) {
train_data_ = train_data; train_data_ = train_data;
data_sample_strategy_->UpdateTrainingData(train_data); data_sample_strategy_->UpdateTrainingData(train_data);
// not same training data, need reset score and others // not same training data, need reset score and others
// create score tracker // create score tracker
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
if (config_->device_type == std::string("cuda_exp")) { if (config_->device_type == std::string("cuda")) {
train_score_updater_.reset(new CUDAScoreUpdater(train_data_, num_tree_per_iteration_, boosting_on_gpu_)); train_score_updater_.reset(new CUDAScoreUpdater(train_data_, num_tree_per_iteration_, boosting_on_gpu_));
} else { } else {
#endif // USE_CUDA_EXP #endif // USE_CUDA
train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_)); train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_));
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
} }
#endif // USE_CUDA_EXP #endif // USE_CUDA
// update score // update score
for (int i = 0; i < iter_; ++i) { for (int i = 0; i < iter_; ++i) {
...@@ -827,8 +827,8 @@ void GBDT::ResetGradientBuffers() { ...@@ -827,8 +827,8 @@ void GBDT::ResetGradientBuffers() {
const bool is_use_subset = data_sample_strategy_->is_use_subset(); const bool is_use_subset = data_sample_strategy_->is_use_subset();
const data_size_t bag_data_cnt = data_sample_strategy_->bag_data_cnt(); const data_size_t bag_data_cnt = data_sample_strategy_->bag_data_cnt();
if (objective_function_ != nullptr) { if (objective_function_ != nullptr) {
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
if (config_->device_type == std::string("cuda_exp") && boosting_on_gpu_) { if (config_->device_type == std::string("cuda") && boosting_on_gpu_) {
if (cuda_gradients_.Size() < total_size) { if (cuda_gradients_.Size() < total_size) {
cuda_gradients_.Resize(total_size); cuda_gradients_.Resize(total_size);
cuda_hessians_.Resize(total_size); cuda_hessians_.Resize(total_size);
...@@ -836,16 +836,16 @@ void GBDT::ResetGradientBuffers() { ...@@ -836,16 +836,16 @@ void GBDT::ResetGradientBuffers() {
gradients_pointer_ = cuda_gradients_.RawData(); gradients_pointer_ = cuda_gradients_.RawData();
hessians_pointer_ = cuda_hessians_.RawData(); hessians_pointer_ = cuda_hessians_.RawData();
} else { } else {
#endif // USE_CUDA_EXP #endif // USE_CUDA
if (gradients_.size() < total_size) { if (gradients_.size() < total_size) {
gradients_.resize(total_size); gradients_.resize(total_size);
hessians_.resize(total_size); hessians_.resize(total_size);
} }
gradients_pointer_ = gradients_.data(); gradients_pointer_ = gradients_.data();
hessians_pointer_ = hessians_.data(); hessians_pointer_ = hessians_.data();
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
} }
#endif // USE_CUDA_EXP #endif // USE_CUDA
} else if (data_sample_strategy_->IsHessianChange() || (is_use_subset && bag_data_cnt < num_data_ && !boosting_on_gpu_)) { } else if (data_sample_strategy_->IsHessianChange() || (is_use_subset && bag_data_cnt < num_data_ && !boosting_on_gpu_)) {
if (gradients_.size() < total_size) { if (gradients_.size() < total_size) {
gradients_.resize(total_size); gradients_.resize(total_size);
......
...@@ -542,7 +542,7 @@ class GBDT : public GBDTBase { ...@@ -542,7 +542,7 @@ class GBDT : public GBDTBase {
/*! \brief Parser config file content */ /*! \brief Parser config file content */
std::string parser_config_str_ = ""; std::string parser_config_str_ = "";
#if defined(USE_CUDA) || defined(USE_CUDA_EXP) #ifdef USE_CUDA
/*! \brief First order derivative of training data */ /*! \brief First order derivative of training data */
std::vector<score_t, CHAllocator<score_t>> gradients_; std::vector<score_t, CHAllocator<score_t>> gradients_;
/*! \brief Second order derivative of training data */ /*! \brief Second order derivative of training data */
...@@ -557,18 +557,18 @@ class GBDT : public GBDTBase { ...@@ -557,18 +557,18 @@ class GBDT : public GBDTBase {
score_t* gradients_pointer_; score_t* gradients_pointer_;
/*! \brief Pointer to hessian vector, can be on CPU or GPU */ /*! \brief Pointer to hessian vector, can be on CPU or GPU */
score_t* hessians_pointer_; score_t* hessians_pointer_;
/*! \brief Whether boosting is done on GPU, used for cuda_exp */ /*! \brief Whether boosting is done on GPU, used for device_type=cuda */
bool boosting_on_gpu_; bool boosting_on_gpu_;
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
/*! \brief Gradient vector on GPU */ /*! \brief Gradient vector on GPU */
CUDAVector<score_t> cuda_gradients_; CUDAVector<score_t> cuda_gradients_;
/*! \brief Hessian vector on GPU */ /*! \brief Hessian vector on GPU */
CUDAVector<score_t> cuda_hessians_; CUDAVector<score_t> cuda_hessians_;
/*! \brief Buffer for scores when boosting is on GPU but evaluation is not, used only with cuda_exp */ /*! \brief Buffer for scores when boosting is on GPU but evaluation is not, used only with device_type=cuda */
mutable std::vector<double> host_score_; mutable std::vector<double> host_score_;
/*! \brief Buffer for scores when boosting is not on GPU but evaluation is, used only with cuda_exp */ /*! \brief Buffer for scores when boosting is not on GPU but evaluation is, used only with device_type=cuda */
mutable CUDAVector<double> cuda_score_; mutable CUDAVector<double> cuda_score_;
#endif // USE_CUDA_EXP #endif // USE_CUDA
/*! \brief Number of training data */ /*! \brief Number of training data */
data_size_t num_data_; data_size_t num_data_;
......
...@@ -43,33 +43,33 @@ class GOSSStrategy : public SampleStrategy { ...@@ -43,33 +43,33 @@ class GOSSStrategy : public SampleStrategy {
bag_data_cnt_ = left_cnt; bag_data_cnt_ = left_cnt;
// set bagging data to tree learner // set bagging data to tree learner
if (!is_use_subset_) { if (!is_use_subset_) {
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
if (config_->device_type == std::string("cuda_exp")) { if (config_->device_type == std::string("cuda")) {
CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__); CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
tree_learner->SetBaggingData(nullptr, cuda_bag_data_indices_.RawData(), bag_data_cnt_); tree_learner->SetBaggingData(nullptr, cuda_bag_data_indices_.RawData(), bag_data_cnt_);
} else { } else {
#endif // USE_CUDA_EXP #endif // USE_CUDA
tree_learner->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_); tree_learner->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_);
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
} }
#endif // USE_CUDA_EXP #endif // USE_CUDA
} else { } else {
// get subset // get subset
tmp_subset_->ReSize(bag_data_cnt_); tmp_subset_->ReSize(bag_data_cnt_);
tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(), tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(),
bag_data_cnt_, false); bag_data_cnt_, false);
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
if (config_->device_type == std::string("cuda_exp")) { if (config_->device_type == std::string("cuda")) {
CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__); CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
tree_learner->SetBaggingData(tmp_subset_.get(), cuda_bag_data_indices_.RawData(), tree_learner->SetBaggingData(tmp_subset_.get(), cuda_bag_data_indices_.RawData(),
bag_data_cnt_); bag_data_cnt_);
} else { } else {
#endif // USE_CUDA_EXP #endif // USE_CUDA
tree_learner->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(), tree_learner->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(),
bag_data_cnt_); bag_data_cnt_);
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
} }
#endif // USE_CUDA_EXP #endif // USE_CUDA
} }
} }
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information. * Licensed under the MIT License. See LICENSE file in the project root for license information.
*/ */
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
#include <LightGBM/cuda/cuda_algorithms.hpp> #include <LightGBM/cuda/cuda_algorithms.hpp>
...@@ -509,4 +509,4 @@ template __device__ double PercentileDevice<double, data_size_t, label_t, double ...@@ -509,4 +509,4 @@ template __device__ double PercentileDevice<double, data_size_t, label_t, double
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA_EXP #endif // USE_CUDA
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information. * Licensed under the MIT License. See LICENSE file in the project root for license information.
*/ */
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
#include <LightGBM/cuda/cuda_utils.h> #include <LightGBM/cuda/cuda_utils.h>
...@@ -28,4 +28,4 @@ void SetCUDADevice(int gpu_device_id, const char* file, int line) { ...@@ -28,4 +28,4 @@ void SetCUDADevice(int gpu_device_id, const char* file, int line) {
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA_EXP #endif // USE_CUDA
...@@ -886,7 +886,7 @@ namespace LightGBM { ...@@ -886,7 +886,7 @@ namespace LightGBM {
return nullptr; return nullptr;
} }
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
template <> template <>
const void* MultiValDenseBin<uint8_t>::GetRowWiseData(uint8_t* bit_type, const void* MultiValDenseBin<uint8_t>::GetRowWiseData(uint8_t* bit_type,
size_t* total_size, size_t* total_size,
...@@ -1081,6 +1081,6 @@ namespace LightGBM { ...@@ -1081,6 +1081,6 @@ namespace LightGBM {
return to_return; return to_return;
} }
#endif // USE_CUDA_EXP #endif // USE_CUDA
} // namespace LightGBM } // namespace LightGBM
...@@ -177,8 +177,6 @@ void GetDeviceType(const std::unordered_map<std::string, std::string>& params, s ...@@ -177,8 +177,6 @@ void GetDeviceType(const std::unordered_map<std::string, std::string>& params, s
*device_type = "gpu"; *device_type = "gpu";
} else if (value == std::string("cuda")) { } else if (value == std::string("cuda")) {
*device_type = "cuda"; *device_type = "cuda";
} else if (value == std::string("cuda_exp")) {
*device_type = "cuda_exp";
} else { } else {
Log::Fatal("Unknown device type %s", value.c_str()); Log::Fatal("Unknown device type %s", value.c_str());
} }
...@@ -260,7 +258,7 @@ void Config::Set(const std::unordered_map<std::string, std::string>& params) { ...@@ -260,7 +258,7 @@ void Config::Set(const std::unordered_map<std::string, std::string>& params) {
GetObjectiveType(params, &objective); GetObjectiveType(params, &objective);
GetMetricType(params, objective, &metric); GetMetricType(params, objective, &metric);
GetDeviceType(params, &device_type); GetDeviceType(params, &device_type);
if (device_type == std::string("cuda") || device_type == std::string("cuda_exp")) { if (device_type == std::string("cuda")) {
LGBM_config_::current_device = lgbm_device_cuda; LGBM_config_::current_device = lgbm_device_cuda;
} }
GetTreeLearnerType(params, &tree_learner); GetTreeLearnerType(params, &tree_learner);
...@@ -373,26 +371,21 @@ void Config::CheckParamConflict() { ...@@ -373,26 +371,21 @@ void Config::CheckParamConflict() {
num_leaves = static_cast<int>(full_num_leaves); num_leaves = static_cast<int>(full_num_leaves);
} }
} }
if (device_type == std::string("gpu") || device_type == std::string("cuda")) { if (device_type == std::string("gpu")) {
// force col-wise for gpu, and cuda version // force col-wise for gpu, and cuda version
force_col_wise = true; force_col_wise = true;
force_row_wise = false; force_row_wise = false;
if (deterministic) { if (deterministic) {
Log::Warning("Although \"deterministic\" is set, the results ran by GPU may be non-deterministic."); Log::Warning("Although \"deterministic\" is set, the results ran by GPU may be non-deterministic.");
} }
} else if (device_type == std::string("cuda_exp")) { } else if (device_type == std::string("cuda")) {
// force row-wise for cuda_exp version // force row-wise for cuda version
force_col_wise = false; force_col_wise = false;
force_row_wise = true; force_row_wise = true;
if (deterministic) { if (deterministic) {
Log::Warning("Although \"deterministic\" is set, the results ran by GPU may be non-deterministic."); Log::Warning("Although \"deterministic\" is set, the results ran by GPU may be non-deterministic.");
} }
} }
// force gpu_use_dp for CUDA
if (device_type == std::string("cuda") && !gpu_use_dp) {
Log::Warning("CUDA currently requires double precision calculations.");
gpu_use_dp = true;
}
// linear tree learner must be serial type and run on CPU device // linear tree learner must be serial type and run on CPU device
if (linear_tree) { if (linear_tree) {
if (device_type != std::string("cpu")) { if (device_type != std::string("cpu")) {
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information. * Licensed under the MIT License. See LICENSE file in the project root for license information.
*/ */
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
#include <LightGBM/cuda/cuda_column_data.hpp> #include <LightGBM/cuda/cuda_column_data.hpp>
...@@ -308,4 +308,4 @@ void CUDAColumnData::InitColumnMetaInfo() { ...@@ -308,4 +308,4 @@ void CUDAColumnData::InitColumnMetaInfo() {
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA_EXP #endif // USE_CUDA
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
*/ */
#ifdef USE_CUDA_EXP #ifdef USE_CUDA
#include <LightGBM/cuda/cuda_column_data.hpp> #include <LightGBM/cuda/cuda_column_data.hpp>
...@@ -58,4 +58,4 @@ void CUDAColumnData::LaunchCopySubrowKernel(void* const* in_cuda_data_by_column) ...@@ -58,4 +58,4 @@ void CUDAColumnData::LaunchCopySubrowKernel(void* const* in_cuda_data_by_column)
} // namespace LightGBM } // namespace LightGBM
#endif // USE_CUDA_EXP #endif // USE_CUDA
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment