Unverified Commit 4f47547c authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[CUDA] consolidate CUDA versions (#5677)



* [ci] speed up if-else, swig, and lint conda setup

* add 'source activate'

* python constraint

* start removing cuda v1

* comment out CI

* remove more references

* revert some unnecessaary changes

* revert a few more mistakes

* revert another change that ignored params

* sigh

* remove CUDATreeLearner

* fix tests, docs

* fix quoting in setup.py

* restore all CI

* Apply suggestions from code review
Co-authored-by: default avatarshiyu1994 <shiyu_k1994@qq.com>

* Apply suggestions from code review

* completely remove cuda_exp, update docs

---------
Co-authored-by: default avatarshiyu1994 <shiyu_k1994@qq.com>
parent 5ffd7571
......@@ -97,7 +97,7 @@ class ObjectiveFunction {
*/
virtual bool IsCUDAObjective() const { return false; }
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
/*!
* \brief Convert output for CUDA version
*/
......@@ -107,7 +107,7 @@ class ObjectiveFunction {
virtual bool NeedConvertOutputCUDA () const { return false; }
#endif // USE_CUDA_EXP
#endif // USE_CUDA
};
} // namespace LightGBM
......
......@@ -38,9 +38,9 @@ class SampleStrategy {
std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>>& bag_data_indices() { return bag_data_indices_; }
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
CUDAVector<data_size_t>& cuda_bag_data_indices() { return cuda_bag_data_indices_; }
#endif // USE_CUDA_EXP
#endif // USE_CUDA
void UpdateObjectiveFunction(const ObjectiveFunction* objective_function) {
objective_function_ = objective_function;
......@@ -72,10 +72,10 @@ class SampleStrategy {
/*! \brief whether need to resize the gradient vectors */
bool need_resize_gradients_;
#ifdef USE_CUDA_EXP
/*! \brief Buffer for bag_data_indices_ on GPU, used only with cuda_exp */
#ifdef USE_CUDA
/*! \brief Buffer for bag_data_indices_ on GPU, used only with cuda */
CUDAVector<data_size_t> cuda_bag_data_indices_;
#endif // USE_CUDA_EXP
#endif // USE_CUDA
};
} // namespace LightGBM
......
......@@ -126,7 +126,7 @@ class MultiValBinWrapper {
}
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
const void* GetRowWiseData(
uint8_t* bit_type,
size_t* total_size,
......@@ -142,7 +142,7 @@ class MultiValBinWrapper {
return multi_val_bin_->GetRowWiseData(bit_type, total_size, is_sparse, out_data_ptr, data_ptr_bit_type);
}
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
private:
bool is_use_subcol_ = false;
......@@ -183,9 +183,9 @@ struct TrainingShareStates {
const std::vector<uint32_t>& feature_hist_offsets() const { return feature_hist_offsets_; }
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
const std::vector<uint32_t>& column_hist_offsets() const { return column_hist_offsets_; }
#endif // USE_CUDA_EXP
#endif // USE_CUDA
bool IsSparseRowwise() {
return (multi_val_bin_wrapper_ != nullptr && multi_val_bin_wrapper_->IsSparse());
......@@ -235,7 +235,7 @@ struct TrainingShareStates {
}
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
const void* GetRowWiseData(uint8_t* bit_type,
size_t* total_size,
bool* is_sparse,
......@@ -250,13 +250,13 @@ struct TrainingShareStates {
return nullptr;
}
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
private:
std::vector<uint32_t> feature_hist_offsets_;
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
std::vector<uint32_t> column_hist_offsets_;
#endif // USE_CUDA_EXP
#endif // USE_CUDA
int num_hist_total_bin_ = 0;
std::unique_ptr<MultiValBinWrapper> multi_val_bin_wrapper_;
std::vector<hist_t, Common::AlignmentAllocator<hist_t, kAlignedSize>> hist_buf_;
......
......@@ -319,9 +319,9 @@ class Tree {
inline bool is_linear() const { return is_linear_; }
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
inline bool is_cuda_tree() const { return is_cuda_tree_; }
#endif // USE_CUDA_EXP
#endif // USE_CUDA
inline void SetIsLinear(bool is_linear) {
is_linear_ = is_linear;
......@@ -532,10 +532,10 @@ class Tree {
std::vector<std::vector<int>> leaf_features_;
/* \brief features used in leaf linear models; indexing is relative to used_features_ */
std::vector<std::vector<int>> leaf_features_inner_;
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
/*! \brief Marks whether this tree is a CUDATree */
bool is_cuda_tree_;
#endif // USE_CUDA_EXP
#endif // USE_CUDA
};
inline void Tree::Split(int leaf, int feature, int real_feature,
......
......@@ -121,11 +121,9 @@ Build CUDA Version
All requirements from `Build from Sources section <#build-from-sources>`__ apply for this installation option as well, and `CMake`_ (version 3.16 or higher) is strongly required.
**CUDA** library (version 9.0 or higher) is needed: details for installation can be found in `Installation Guide <https://github.com/microsoft/LightGBM/blob/master/docs/Installation-Guide.rst#build-cuda-version-experimental>`__.
**CUDA** library (version 10.0 or higher) is needed: details for installation can be found in `Installation Guide <https://github.com/microsoft/LightGBM/blob/master/docs/Installation-Guide.rst#build-cuda-version-experimental>`__.
Recently, a new CUDA version with better efficiency is implemented as an experimental feature. To build the new CUDA version, replace ``--cuda`` with ``--cuda-exp`` in the above commands. Please note that new version requires **CUDA** 10.0 or later libraries. Note that this new version uses twice the memory, since it stores data row-wise as well as column-wise in memory to improve performance (see this `issue <https://github.com/microsoft/LightGBM/issues/5318>`__ for discussion).
To use the regular or experimental CUDA versions within Python, pass ``{"device": "cuda"}`` or ``{"device": "cuda_exp"}`` respectively as parameters.
To use the CUDA version within Python, pass ``{"device": "cuda"}`` respectively in parameters.
Build HDFS Version
~~~~~~~~~~~~~~~~~~
......@@ -211,8 +209,6 @@ Run ``python setup.py install --gpu`` to enable GPU support. All requirements fr
Run ``python setup.py install --cuda`` to enable CUDA support. All requirements from `Build CUDA Version section <#build-cuda-version>`__ apply for this installation option as well.
Run ``python setup.py install --cuda-exp`` to enable the new experimental version of CUDA support. All requirements from `Build CUDA Version section <#build-cuda-version>`__ apply for this installation option as well.
Run ``python setup.py install --hdfs`` to enable HDFS support. All requirements from `Build HDFS Version section <#build-hdfs-version>`__ apply for this installation option as well.
Run ``python setup.py install --bit32``, if you want to use 32-bit version. All requirements from `Build 32-bit Version with 32-bit Python section <#build-32-bit-version-with-32-bit-python>`__ apply for this installation option as well.
......
......@@ -21,7 +21,6 @@ LIGHTGBM_OPTIONS = [
('integrated-opencl', None, 'Compile integrated OpenCL version'),
('gpu', 'g', 'Compile GPU version'),
('cuda', None, 'Compile CUDA version'),
('cuda-exp', None, 'Compile CUDA Experimental version'),
('mpi', None, 'Compile MPI version'),
('nomp', None, 'Compile version without OpenMP support'),
('hdfs', 'h', 'Compile HDFS version'),
......@@ -106,7 +105,6 @@ def compile_cpp(
use_mingw: bool = False,
use_gpu: bool = False,
use_cuda: bool = False,
use_cuda_exp: bool = False,
use_mpi: bool = False,
use_hdfs: bool = False,
boost_root: Optional[str] = None,
......@@ -148,8 +146,6 @@ def compile_cpp(
cmake_cmd.append(f"-DOpenCL_LIBRARY={opencl_library}")
elif use_cuda:
cmake_cmd.append("-DUSE_CUDA=ON")
elif use_cuda_exp:
cmake_cmd.append("-DUSE_CUDA_EXP=ON")
if use_mpi:
cmake_cmd.append("-DUSE_MPI=ON")
if nomp:
......@@ -171,7 +167,7 @@ def compile_cpp(
else:
status = 1
lib_path = CURRENT_DIR / "compile" / "windows" / "x64" / "DLL" / "lib_lightgbm.dll"
if not any((use_gpu, use_cuda, use_cuda_exp, use_mpi, use_hdfs, nomp, bit32, integrated_opencl)):
if not any((use_gpu, use_cuda, use_mpi, use_hdfs, nomp, bit32, integrated_opencl)):
logger.info("Starting to compile with MSBuild from existing solution file.")
platform_toolsets = ("v143", "v142", "v141", "v140")
for pt in platform_toolsets:
......@@ -235,7 +231,6 @@ class CustomInstall(install):
self.integrated_opencl = False
self.gpu = False
self.cuda = False
self.cuda_exp = False
self.boost_root = None
self.boost_dir = None
self.boost_include_dir = None
......@@ -260,7 +255,7 @@ class CustomInstall(install):
LOG_PATH.touch()
if not self.precompile:
copy_files(integrated_opencl=self.integrated_opencl, use_gpu=self.gpu)
compile_cpp(use_mingw=self.mingw, use_gpu=self.gpu, use_cuda=self.cuda, use_cuda_exp=self.cuda_exp, use_mpi=self.mpi,
compile_cpp(use_mingw=self.mingw, use_gpu=self.gpu, use_cuda=self.cuda, use_mpi=self.mpi,
use_hdfs=self.hdfs, boost_root=self.boost_root, boost_dir=self.boost_dir,
boost_include_dir=self.boost_include_dir, boost_librarydir=self.boost_librarydir,
opencl_include_dir=self.opencl_include_dir, opencl_library=self.opencl_library,
......@@ -281,7 +276,6 @@ class CustomBdistWheel(bdist_wheel):
self.integrated_opencl = False
self.gpu = False
self.cuda = False
self.cuda_exp = False
self.boost_root = None
self.boost_dir = None
self.boost_include_dir = None
......@@ -304,7 +298,6 @@ class CustomBdistWheel(bdist_wheel):
install.integrated_opencl = self.integrated_opencl
install.gpu = self.gpu
install.cuda = self.cuda
install.cuda_exp = self.cuda_exp
install.boost_root = self.boost_root
install.boost_dir = self.boost_dir
install.boost_include_dir = self.boost_include_dir
......
......@@ -36,7 +36,7 @@ Application::Application(int argc, char** argv) {
Log::Fatal("No training/prediction data, application quit");
}
if (config_.device_type == std::string("cuda") || config_.device_type == std::string("cuda_exp")) {
if (config_.device_type == std::string("cuda")) {
LGBM_config_::current_device = lgbm_device_cuda;
}
}
......
......@@ -47,33 +47,33 @@ class BaggingSampleStrategy : public SampleStrategy {
Log::Debug("Re-bagging, using %d data to train", bag_data_cnt_);
// set bagging data to tree learner
if (!is_use_subset_) {
#ifdef USE_CUDA_EXP
if (config_->device_type == std::string("cuda_exp")) {
#ifdef USE_CUDA
if (config_->device_type == std::string("cuda")) {
CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
tree_learner->SetBaggingData(nullptr, cuda_bag_data_indices_.RawData(), bag_data_cnt_);
} else {
#endif // USE_CUDA_EXP
#endif // USE_CUDA
tree_learner->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_);
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
} else {
// get subset
tmp_subset_->ReSize(bag_data_cnt_);
tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(),
bag_data_cnt_, false);
#ifdef USE_CUDA_EXP
if (config_->device_type == std::string("cuda_exp")) {
#ifdef USE_CUDA
if (config_->device_type == std::string("cuda")) {
CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
tree_learner->SetBaggingData(tmp_subset_.get(), cuda_bag_data_indices_.RawData(),
bag_data_cnt_);
} else {
#endif // USE_CUDA_EXP
#endif // USE_CUDA
tree_learner->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(),
bag_data_cnt_);
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
}
}
}
......@@ -103,11 +103,11 @@ class BaggingSampleStrategy : public SampleStrategy {
bag_data_cnt_ = static_cast<data_size_t>(config_->bagging_fraction * num_data_);
}
bag_data_indices_.resize(num_data_);
#ifdef USE_CUDA_EXP
if (config_->device_type == std::string("cuda_exp")) {
#ifdef USE_CUDA
if (config_->device_type == std::string("cuda")) {
cuda_bag_data_indices_.Resize(num_data_);
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
bagging_runner_.ReSize(num_data_);
bagging_rands_.clear();
for (int i = 0;
......@@ -118,7 +118,7 @@ class BaggingSampleStrategy : public SampleStrategy {
double average_bag_rate =
(static_cast<double>(bag_data_cnt_) / num_data_) / config_->bagging_freq;
is_use_subset_ = false;
if (config_->device_type != std::string("cuda_exp")) {
if (config_->device_type != std::string("cuda")) {
const int group_threshold_usesubset = 100;
const double average_bag_rate_threshold = 0.5;
if (average_bag_rate <= average_bag_rate_threshold
......@@ -141,9 +141,9 @@ class BaggingSampleStrategy : public SampleStrategy {
} else {
bag_data_cnt_ = num_data_;
bag_data_indices_.clear();
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
cuda_bag_data_indices_.Clear();
#endif // USE_CUDA_EXP
#endif // USE_CUDA
bagging_runner_.ReSize(0);
is_use_subset_ = false;
}
......
......@@ -5,7 +5,7 @@
#include "cuda_score_updater.hpp"
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
namespace LightGBM {
......@@ -91,4 +91,4 @@ inline void CUDAScoreUpdater::MultiplyScore(double val, int cur_tree_id) {
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA
......@@ -5,7 +5,7 @@
#include "cuda_score_updater.hpp"
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
namespace LightGBM {
......@@ -42,4 +42,4 @@ void CUDAScoreUpdater::LaunchMultiplyScoreConstantKernel(const double val, const
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA
......@@ -6,7 +6,7 @@
#ifndef LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_
#define LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <LightGBM/cuda/cuda_utils.h>
......@@ -60,6 +60,6 @@ class CUDAScoreUpdater: public ScoreUpdater {
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA
#endif // LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_
......@@ -68,14 +68,14 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
es_first_metric_only_ = config_->first_metric_only;
shrinkage_rate_ = config_->learning_rate;
if (config_->device_type == std::string("cuda") || config_->device_type == std::string("cuda_exp")) {
if (config_->device_type == std::string("cuda")) {
LGBM_config_::current_learner = use_cuda_learner;
#ifdef USE_CUDA_EXP
if (config_->device_type == std::string("cuda_exp")) {
#ifdef USE_CUDA
if (config_->device_type == std::string("cuda")) {
const int gpu_device_id = config_->gpu_device_id >= 0 ? config_->gpu_device_id : 0;
CUDASUCCESS_OR_FATAL(cudaSetDevice(gpu_device_id));
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
}
// load forced_splits file
......@@ -116,15 +116,15 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
}
training_metrics_.shrink_to_fit();
#ifdef USE_CUDA_EXP
if (config_->device_type == std::string("cuda_exp")) {
#ifdef USE_CUDA
if (config_->device_type == std::string("cuda")) {
train_score_updater_.reset(new CUDAScoreUpdater(train_data_, num_tree_per_iteration_, boosting_on_gpu_));
} else {
#endif // USE_CUDA_EXP
#endif // USE_CUDA
train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_));
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
num_data_ = train_data_->num_data();
......@@ -186,11 +186,11 @@ void GBDT::AddValidDataset(const Dataset* valid_data,
}
// for a validation dataset, we need its score and metric
auto new_score_updater =
#ifdef USE_CUDA_EXP
config_->device_type == std::string("cuda_exp") ?
#ifdef USE_CUDA
config_->device_type == std::string("cuda") ?
std::unique_ptr<CUDAScoreUpdater>(new CUDAScoreUpdater(valid_data, num_tree_per_iteration_,
objective_function_ != nullptr && objective_function_->IsCUDAObjective())) :
#endif // USE_CUDA_EXP
#endif // USE_CUDA
std::unique_ptr<ScoreUpdater>(new ScoreUpdater(valid_data, num_tree_per_iteration_));
// update score
for (int i = 0; i < iter_; ++i) {
......@@ -481,15 +481,15 @@ void GBDT::UpdateScore(const Tree* tree, const int cur_tree_id) {
const data_size_t bag_data_cnt = data_sample_strategy_->bag_data_cnt();
// we need to predict out-of-bag scores of data for boosting
if (num_data_ - bag_data_cnt > 0) {
#ifdef USE_CUDA_EXP
if (config_->device_type == std::string("cuda_exp")) {
#ifdef USE_CUDA
if (config_->device_type == std::string("cuda")) {
train_score_updater_->AddScore(tree, data_sample_strategy_->cuda_bag_data_indices().RawData() + bag_data_cnt, num_data_ - bag_data_cnt, cur_tree_id);
} else {
#endif // USE_CUDA_EXP
#endif // USE_CUDA
train_score_updater_->AddScore(tree, data_sample_strategy_->bag_data_indices().data() + bag_data_cnt, num_data_ - bag_data_cnt, cur_tree_id);
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
}
} else {
......@@ -503,17 +503,17 @@ void GBDT::UpdateScore(const Tree* tree, const int cur_tree_id) {
}
}
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* score, const data_size_t num_data) const {
#else
std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* score, const data_size_t /*num_data*/) const {
#endif // USE_CUDA_EXP
#ifdef USE_CUDA_EXP
#endif // USE_CUDA
#ifdef USE_CUDA
const bool evaluation_on_cuda = metric->IsCUDAMetric();
if ((boosting_on_gpu_ && evaluation_on_cuda) || (!boosting_on_gpu_ && !evaluation_on_cuda)) {
#endif // USE_CUDA_EXP
#endif // USE_CUDA
return metric->Eval(score, objective_function_);
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
} else if (boosting_on_gpu_ && !evaluation_on_cuda) {
const size_t total_size = static_cast<size_t>(num_data) * static_cast<size_t>(num_tree_per_iteration_);
if (total_size > host_score_.size()) {
......@@ -529,7 +529,7 @@ std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* scor
CopyFromHostToCUDADevice<double>(cuda_score_.RawData(), score, total_size, __FILE__, __LINE__);
return metric->Eval(cuda_score_.RawData(), objective_function_);
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
}
std::string GBDT::OutputMetric(int iter) {
......@@ -660,14 +660,14 @@ void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) {
num_data = valid_score_updater_[used_idx]->num_data();
*out_len = static_cast<int64_t>(num_data) * num_class_;
}
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
std::vector<double> host_raw_scores;
if (boosting_on_gpu_) {
host_raw_scores.resize(static_cast<size_t>(*out_len), 0.0);
CopyFromCUDADeviceToHost<double>(host_raw_scores.data(), raw_scores, static_cast<size_t>(*out_len), __FILE__, __LINE__);
raw_scores = host_raw_scores.data();
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
if (objective_function_ != nullptr) {
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data; ++i) {
......@@ -730,26 +730,26 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
}
training_metrics_.shrink_to_fit();
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
boosting_on_gpu_ = objective_function_ != nullptr && objective_function_->IsCUDAObjective() &&
!data_sample_strategy_->IsHessianChange(); // for sample strategy with Hessian change, fall back to boosting on CPU
tree_learner_->ResetBoostingOnGPU(boosting_on_gpu_);
#endif // USE_CUDA_EXP
#endif // USE_CUDA
if (train_data != train_data_) {
train_data_ = train_data;
data_sample_strategy_->UpdateTrainingData(train_data);
// not same training data, need reset score and others
// create score tracker
#ifdef USE_CUDA_EXP
if (config_->device_type == std::string("cuda_exp")) {
#ifdef USE_CUDA
if (config_->device_type == std::string("cuda")) {
train_score_updater_.reset(new CUDAScoreUpdater(train_data_, num_tree_per_iteration_, boosting_on_gpu_));
} else {
#endif // USE_CUDA_EXP
#endif // USE_CUDA
train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_));
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
// update score
for (int i = 0; i < iter_; ++i) {
......@@ -827,8 +827,8 @@ void GBDT::ResetGradientBuffers() {
const bool is_use_subset = data_sample_strategy_->is_use_subset();
const data_size_t bag_data_cnt = data_sample_strategy_->bag_data_cnt();
if (objective_function_ != nullptr) {
#ifdef USE_CUDA_EXP
if (config_->device_type == std::string("cuda_exp") && boosting_on_gpu_) {
#ifdef USE_CUDA
if (config_->device_type == std::string("cuda") && boosting_on_gpu_) {
if (cuda_gradients_.Size() < total_size) {
cuda_gradients_.Resize(total_size);
cuda_hessians_.Resize(total_size);
......@@ -836,16 +836,16 @@ void GBDT::ResetGradientBuffers() {
gradients_pointer_ = cuda_gradients_.RawData();
hessians_pointer_ = cuda_hessians_.RawData();
} else {
#endif // USE_CUDA_EXP
#endif // USE_CUDA
if (gradients_.size() < total_size) {
gradients_.resize(total_size);
hessians_.resize(total_size);
}
gradients_pointer_ = gradients_.data();
hessians_pointer_ = hessians_.data();
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
} else if (data_sample_strategy_->IsHessianChange() || (is_use_subset && bag_data_cnt < num_data_ && !boosting_on_gpu_)) {
if (gradients_.size() < total_size) {
gradients_.resize(total_size);
......
......@@ -542,7 +542,7 @@ class GBDT : public GBDTBase {
/*! \brief Parser config file content */
std::string parser_config_str_ = "";
#if defined(USE_CUDA) || defined(USE_CUDA_EXP)
#ifdef USE_CUDA
/*! \brief First order derivative of training data */
std::vector<score_t, CHAllocator<score_t>> gradients_;
/*! \brief Second order derivative of training data */
......@@ -557,18 +557,18 @@ class GBDT : public GBDTBase {
score_t* gradients_pointer_;
/*! \brief Pointer to hessian vector, can be on CPU or GPU */
score_t* hessians_pointer_;
/*! \brief Whether boosting is done on GPU, used for cuda_exp */
/*! \brief Whether boosting is done on GPU, used for device_type=cuda */
bool boosting_on_gpu_;
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
/*! \brief Gradient vector on GPU */
CUDAVector<score_t> cuda_gradients_;
/*! \brief Hessian vector on GPU */
CUDAVector<score_t> cuda_hessians_;
/*! \brief Buffer for scores when boosting is on GPU but evaluation is not, used only with cuda_exp */
/*! \brief Buffer for scores when boosting is on GPU but evaluation is not, used only with device_type=cuda */
mutable std::vector<double> host_score_;
/*! \brief Buffer for scores when boosting is not on GPU but evaluation is, used only with cuda_exp */
/*! \brief Buffer for scores when boosting is not on GPU but evaluation is, used only with device_type=cuda */
mutable CUDAVector<double> cuda_score_;
#endif // USE_CUDA_EXP
#endif // USE_CUDA
/*! \brief Number of training data */
data_size_t num_data_;
......
......@@ -43,33 +43,33 @@ class GOSSStrategy : public SampleStrategy {
bag_data_cnt_ = left_cnt;
// set bagging data to tree learner
if (!is_use_subset_) {
#ifdef USE_CUDA_EXP
if (config_->device_type == std::string("cuda_exp")) {
#ifdef USE_CUDA
if (config_->device_type == std::string("cuda")) {
CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
tree_learner->SetBaggingData(nullptr, cuda_bag_data_indices_.RawData(), bag_data_cnt_);
} else {
#endif // USE_CUDA_EXP
#endif // USE_CUDA
tree_learner->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_);
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
} else {
// get subset
tmp_subset_->ReSize(bag_data_cnt_);
tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(),
bag_data_cnt_, false);
#ifdef USE_CUDA_EXP
if (config_->device_type == std::string("cuda_exp")) {
#ifdef USE_CUDA
if (config_->device_type == std::string("cuda")) {
CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
tree_learner->SetBaggingData(tmp_subset_.get(), cuda_bag_data_indices_.RawData(),
bag_data_cnt_);
} else {
#endif // USE_CUDA_EXP
#endif // USE_CUDA
tree_learner->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(),
bag_data_cnt_);
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
}
}
......
......@@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <LightGBM/cuda/cuda_algorithms.hpp>
......@@ -509,4 +509,4 @@ template __device__ double PercentileDevice<double, data_size_t, label_t, double
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA
......@@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <LightGBM/cuda/cuda_utils.h>
......@@ -28,4 +28,4 @@ void SetCUDADevice(int gpu_device_id, const char* file, int line) {
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA
......@@ -886,7 +886,7 @@ namespace LightGBM {
return nullptr;
}
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
template <>
const void* MultiValDenseBin<uint8_t>::GetRowWiseData(uint8_t* bit_type,
size_t* total_size,
......@@ -1081,6 +1081,6 @@ namespace LightGBM {
return to_return;
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
} // namespace LightGBM
......@@ -177,8 +177,6 @@ void GetDeviceType(const std::unordered_map<std::string, std::string>& params, s
*device_type = "gpu";
} else if (value == std::string("cuda")) {
*device_type = "cuda";
} else if (value == std::string("cuda_exp")) {
*device_type = "cuda_exp";
} else {
Log::Fatal("Unknown device type %s", value.c_str());
}
......@@ -260,7 +258,7 @@ void Config::Set(const std::unordered_map<std::string, std::string>& params) {
GetObjectiveType(params, &objective);
GetMetricType(params, objective, &metric);
GetDeviceType(params, &device_type);
if (device_type == std::string("cuda") || device_type == std::string("cuda_exp")) {
if (device_type == std::string("cuda")) {
LGBM_config_::current_device = lgbm_device_cuda;
}
GetTreeLearnerType(params, &tree_learner);
......@@ -373,26 +371,21 @@ void Config::CheckParamConflict() {
num_leaves = static_cast<int>(full_num_leaves);
}
}
if (device_type == std::string("gpu") || device_type == std::string("cuda")) {
if (device_type == std::string("gpu")) {
// force col-wise for gpu, and cuda version
force_col_wise = true;
force_row_wise = false;
if (deterministic) {
Log::Warning("Although \"deterministic\" is set, the results ran by GPU may be non-deterministic.");
}
} else if (device_type == std::string("cuda_exp")) {
// force row-wise for cuda_exp version
} else if (device_type == std::string("cuda")) {
// force row-wise for cuda version
force_col_wise = false;
force_row_wise = true;
if (deterministic) {
Log::Warning("Although \"deterministic\" is set, the results ran by GPU may be non-deterministic.");
}
}
// force gpu_use_dp for CUDA
if (device_type == std::string("cuda") && !gpu_use_dp) {
Log::Warning("CUDA currently requires double precision calculations.");
gpu_use_dp = true;
}
// linear tree learner must be serial type and run on CPU device
if (linear_tree) {
if (device_type != std::string("cpu")) {
......
......@@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <LightGBM/cuda/cuda_column_data.hpp>
......@@ -308,4 +308,4 @@ void CUDAColumnData::InitColumnMetaInfo() {
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA
......@@ -4,7 +4,7 @@
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <LightGBM/cuda/cuda_column_data.hpp>
......@@ -58,4 +58,4 @@ void CUDAColumnData::LaunchCopySubrowKernel(void* const* in_cuda_data_by_column)
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment