Unverified Commit 0aa7bfee authored by Guolin Ke's avatar Guolin Ke Committed by GitHub
Browse files

don't save num_thread as possible (#2839)



* don't cache `num_thread`, to avoid change outside

* rename

* update document

* Update docs/Parameters.rst

* Update include/LightGBM/config.h

* Apply suggestions from code review
Co-Authored-By: default avatarNikita Titov <nekit94-08@mail.ru>

* Apply suggestions from code review
Co-Authored-By: default avatarNikita Titov <nekit94-08@mail.ru>
Co-authored-by: default avatarNikita Titov <nekit94-08@mail.ru>
parent 5a80b788
...@@ -171,6 +171,8 @@ Core Parameters ...@@ -171,6 +171,8 @@ Core Parameters
- for parallel learning, do not use all CPU cores because this will cause poor performance for the network communication - for parallel learning, do not use all CPU cores because this will cause poor performance for the network communication
- **Note**: please **don't** change this during training, especially when running multiple jobs simultaneously by external packages, otherwise it may cause undesirable errors
- ``device_type`` :raw-html:`<a id="device_type" title="Permalink to this parameter" href="#device_type">&#x1F517;&#xFE0E;</a>`, default = ``cpu``, type = enum, options: ``cpu``, ``gpu``, aliases: ``device`` - ``device_type`` :raw-html:`<a id="device_type" title="Permalink to this parameter" href="#device_type">&#x1F517;&#xFE0E;</a>`, default = ``cpu``, type = enum, options: ``cpu``, ``gpu``, aliases: ``device``
- device for the tree learning, you can use GPU to achieve the faster learning - device for the tree learning, you can use GPU to achieve the faster learning
......
...@@ -192,6 +192,7 @@ struct Config { ...@@ -192,6 +192,7 @@ struct Config {
// desc = do not set it too large if your dataset is small (for instance, do not use 64 threads for a dataset with 10,000 rows) // desc = do not set it too large if your dataset is small (for instance, do not use 64 threads for a dataset with 10,000 rows)
// desc = be aware a task manager or any similar CPU monitoring tool might report that cores not being fully utilized. **This is normal** // desc = be aware a task manager or any similar CPU monitoring tool might report that cores not being fully utilized. **This is normal**
// desc = for parallel learning, do not use all CPU cores because this will cause poor performance for the network communication // desc = for parallel learning, do not use all CPU cores because this will cause poor performance for the network communication
// desc = **Note**: please **don't** change this during training, especially when running multiple jobs simultaneously by external packages, otherwise it may cause undesirable errors
int num_threads = 0; int num_threads = 0;
// [doc-only] // [doc-only]
......
...@@ -292,10 +292,7 @@ struct TrainingTempState { ...@@ -292,10 +292,7 @@ struct TrainingTempState {
return; return;
} }
multi_val_bin.reset(bin); multi_val_bin.reset(bin);
int num_threads = 1; int num_threads = OMP_NUM_THREADS();
#pragma omp parallel
#pragma omp master
{ num_threads = omp_get_num_threads(); }
num_bin_aligned = num_bin_aligned =
(bin->num_bin() + kAlignedSize - 1) / kAlignedSize * kAlignedSize; (bin->num_bin() + kAlignedSize - 1) / kAlignedSize * kAlignedSize;
size_t new_size = static_cast<size_t>(num_bin_aligned) * 2 * num_threads; size_t new_size = static_cast<size_t>(num_bin_aligned) * 2 * num_threads;
......
...@@ -21,12 +21,7 @@ template<typename VAL_T> ...@@ -21,12 +21,7 @@ template<typename VAL_T>
class ArrayArgs { class ArrayArgs {
public: public:
inline static size_t ArgMaxMT(const std::vector<VAL_T>& array) { inline static size_t ArgMaxMT(const std::vector<VAL_T>& array) {
int num_threads = 1; int num_threads = OMP_NUM_THREADS();
#pragma omp parallel
#pragma omp master
{
num_threads = omp_get_num_threads();
}
std::vector<size_t> arg_maxs(num_threads, 0); std::vector<size_t> arg_maxs(num_threads, 0);
int n_blocks = Threading::For<size_t>( int n_blocks = Threading::For<size_t>(
0, array.size(), 1024, 0, array.size(), 1024,
......
...@@ -727,12 +727,7 @@ template<typename _RanIt, typename _Pr, typename _VTRanIt> inline ...@@ -727,12 +727,7 @@ template<typename _RanIt, typename _Pr, typename _VTRanIt> inline
static void ParallelSort(_RanIt _First, _RanIt _Last, _Pr _Pred, _VTRanIt*) { static void ParallelSort(_RanIt _First, _RanIt _Last, _Pr _Pred, _VTRanIt*) {
size_t len = _Last - _First; size_t len = _Last - _First;
const size_t kMinInnerLen = 1024; const size_t kMinInnerLen = 1024;
int num_threads = 1; int num_threads = OMP_NUM_THREADS();
#pragma omp parallel
#pragma omp master
{
num_threads = omp_get_num_threads();
}
if (len <= kMinInnerLen || num_threads <= 1) { if (len <= kMinInnerLen || num_threads <= 1) {
std::sort(_First, _Last, _Pred); std::sort(_First, _Last, _Pred);
return; return;
...@@ -1032,10 +1027,7 @@ class Timer { ...@@ -1032,10 +1027,7 @@ class Timer {
public: public:
Timer() { Timer() {
#ifdef TIMETAG #ifdef TIMETAG
int num_threads = 1; int num_threads = OMP_NUM_THREADS();
#pragma omp parallel
#pragma omp master
{ num_threads = omp_get_num_threads(); }
start_time_.resize(num_threads); start_time_.resize(num_threads);
stats_.resize(num_threads); stats_.resize(num_threads);
#endif // TIMETAG #endif // TIMETAG
......
...@@ -15,6 +15,14 @@ ...@@ -15,6 +15,14 @@
#include <stdexcept> #include <stdexcept>
#include <vector> #include <vector>
inline int OMP_NUM_THREADS() {
int ret = 1;
#pragma omp parallel
#pragma omp master
{ ret = omp_get_num_threads(); }
return ret;
}
class ThreadExceptionHelper { class ThreadExceptionHelper {
public: public:
ThreadExceptionHelper() { ThreadExceptionHelper() {
...@@ -70,6 +78,7 @@ class ThreadExceptionHelper { ...@@ -70,6 +78,7 @@ class ThreadExceptionHelper {
inline void omp_set_num_threads(int) {} inline void omp_set_num_threads(int) {}
inline int omp_get_num_threads() {return 1;} inline int omp_get_num_threads() {return 1;}
inline int omp_get_thread_num() {return 0;} inline int omp_get_thread_num() {return 0;}
inline int OMP_NUM_THREADS() { return 1; }
#ifdef __cplusplus #ifdef __cplusplus
}; // extern "C" }; // extern "C"
#endif #endif
......
...@@ -21,10 +21,7 @@ class Threading { ...@@ -21,10 +21,7 @@ class Threading {
template <typename INDEX_T> template <typename INDEX_T>
static inline void BlockInfo(INDEX_T cnt, INDEX_T min_cnt_per_block, static inline void BlockInfo(INDEX_T cnt, INDEX_T min_cnt_per_block,
int* out_nblock, INDEX_T* block_size) { int* out_nblock, INDEX_T* block_size) {
int num_threads = 1; int num_threads = OMP_NUM_THREADS();
#pragma omp parallel
#pragma omp master
{ num_threads = omp_get_num_threads(); }
BlockInfo<INDEX_T>(num_threads, cnt, min_cnt_per_block, out_nblock, BlockInfo<INDEX_T>(num_threads, cnt, min_cnt_per_block, out_nblock,
block_size); block_size);
} }
...@@ -84,10 +81,7 @@ class ParallelPartitionRunner { ...@@ -84,10 +81,7 @@ class ParallelPartitionRunner {
public: public:
ParallelPartitionRunner(INDEX_T num_data, INDEX_T min_block_size) ParallelPartitionRunner(INDEX_T num_data, INDEX_T min_block_size)
: min_block_size_(min_block_size) { : min_block_size_(min_block_size) {
num_threads_ = 1; num_threads_ = OMP_NUM_THREADS();
#pragma omp parallel
#pragma omp master
{ num_threads_ = omp_get_num_threads(); }
left_.resize(num_data); left_.resize(num_data);
if (TWO_BUFFER) { if (TWO_BUFFER) {
right_.resize(num_data); right_.resize(num_data);
......
...@@ -56,16 +56,13 @@ class Predictor { ...@@ -56,16 +56,13 @@ class Predictor {
} }
} }
#pragma omp parallel
#pragma omp master
{ num_threads_ = omp_get_num_threads(); }
boosting->InitPredict(num_iteration, predict_contrib); boosting->InitPredict(num_iteration, predict_contrib);
boosting_ = boosting; boosting_ = boosting;
num_pred_one_row_ = boosting_->NumPredictOneRow( num_pred_one_row_ = boosting_->NumPredictOneRow(
num_iteration, predict_leaf_index, predict_contrib); num_iteration, predict_leaf_index, predict_contrib);
num_feature_ = boosting_->MaxFeatureIdx() + 1; num_feature_ = boosting_->MaxFeatureIdx() + 1;
predict_buf_.resize( predict_buf_.resize(
num_threads_, OMP_NUM_THREADS(),
std::vector<double, Common::AlignmentAllocator<double, kAlignedSize>>( std::vector<double, Common::AlignmentAllocator<double, kAlignedSize>>(
num_feature_, 0.0f)); num_feature_, 0.0f));
const int kFeatureThreshold = 100000; const int kFeatureThreshold = 100000;
...@@ -281,7 +278,6 @@ class Predictor { ...@@ -281,7 +278,6 @@ class Predictor {
PredictionEarlyStopInstance early_stop_; PredictionEarlyStopInstance early_stop_;
int num_feature_; int num_feature_;
int num_pred_one_row_; int num_pred_one_row_;
int num_threads_;
std::vector<std::vector<double, Common::AlignmentAllocator<double, kAlignedSize>>> predict_buf_; std::vector<std::vector<double, Common::AlignmentAllocator<double, kAlignedSize>>> predict_buf_;
}; };
......
...@@ -1529,12 +1529,7 @@ int LGBM_BoosterPredictForCSC(BoosterHandle handle, ...@@ -1529,12 +1529,7 @@ int LGBM_BoosterPredictForCSC(BoosterHandle handle,
if (config.num_threads > 0) { if (config.num_threads > 0) {
omp_set_num_threads(config.num_threads); omp_set_num_threads(config.num_threads);
} }
int num_threads = 1; int num_threads = OMP_NUM_THREADS();
#pragma omp parallel
#pragma omp master
{
num_threads = omp_get_num_threads();
}
int ncol = static_cast<int>(ncol_ptr - 1); int ncol = static_cast<int>(ncol_ptr - 1);
std::vector<std::vector<CSC_RowIterator>> iterators(num_threads, std::vector<CSC_RowIterator>()); std::vector<std::vector<CSC_RowIterator>> iterators(num_threads, std::vector<CSC_RowIterator>());
for (int i = 0; i < num_threads; ++i) { for (int i = 0; i < num_threads; ++i) {
......
...@@ -506,10 +506,7 @@ MultiValBin* Dataset::GetMultiBinFromSparseFeatures() const { ...@@ -506,10 +506,7 @@ MultiValBin* Dataset::GetMultiBinFromSparseFeatures() const {
} }
const auto& offsets = feature_groups_[multi_group_id]->bin_offsets_; const auto& offsets = feature_groups_[multi_group_id]->bin_offsets_;
const int num_feature = feature_groups_[multi_group_id]->num_feature_; const int num_feature = feature_groups_[multi_group_id]->num_feature_;
int num_threads = 1; int num_threads = OMP_NUM_THREADS();
#pragma omp parallel
#pragma omp master
{ num_threads = omp_get_num_threads(); }
std::vector<std::vector<std::unique_ptr<BinIterator>>> iters(num_threads); std::vector<std::vector<std::unique_ptr<BinIterator>>> iters(num_threads);
std::vector<uint32_t> most_freq_bins; std::vector<uint32_t> most_freq_bins;
...@@ -539,10 +536,7 @@ MultiValBin* Dataset::GetMultiBinFromSparseFeatures() const { ...@@ -539,10 +536,7 @@ MultiValBin* Dataset::GetMultiBinFromSparseFeatures() const {
MultiValBin* Dataset::GetMultiBinFromAllFeatures() const { MultiValBin* Dataset::GetMultiBinFromAllFeatures() const {
Common::FunctionTimer fun_time("Dataset::GetMultiBinFromAllFeatures", Common::FunctionTimer fun_time("Dataset::GetMultiBinFromAllFeatures",
global_timer); global_timer);
int num_threads = 1; int num_threads = OMP_NUM_THREADS();
#pragma omp parallel
#pragma omp master
{ num_threads = omp_get_num_threads(); }
double sum_dense_ratio = 0; double sum_dense_ratio = 0;
std::unique_ptr<MultiValBin> ret; std::unique_ptr<MultiValBin> ret;
...@@ -1185,10 +1179,7 @@ void Dataset::ConstructHistogramsMultiVal( ...@@ -1185,10 +1179,7 @@ void Dataset::ConstructHistogramsMultiVal(
if (multi_val_bin == nullptr) { if (multi_val_bin == nullptr) {
return; return;
} }
int num_threads = 1; int num_threads = OMP_NUM_THREADS();
#pragma omp parallel
#pragma omp master
{ num_threads = omp_get_num_threads(); }
global_timer.Start("Dataset::sparse_bin_histogram"); global_timer.Start("Dataset::sparse_bin_histogram");
const int num_bin = multi_val_bin->num_bin(); const int num_bin = multi_val_bin->num_bin();
......
...@@ -25,10 +25,7 @@ class MultiValSparseBin : public MultiValBin { ...@@ -25,10 +25,7 @@ class MultiValSparseBin : public MultiValBin {
estimate_element_per_row_(estimate_element_per_row) { estimate_element_per_row_(estimate_element_per_row) {
row_ptr_.resize(num_data_ + 1, 0); row_ptr_.resize(num_data_ + 1, 0);
INDEX_T estimate_num_data = static_cast<INDEX_T>(estimate_element_per_row_ * 1.1 * num_data_); INDEX_T estimate_num_data = static_cast<INDEX_T>(estimate_element_per_row_ * 1.1 * num_data_);
int num_threads = 1; int num_threads = OMP_NUM_THREADS();
#pragma omp parallel
#pragma omp master
{ num_threads = omp_get_num_threads(); }
if (num_threads > 1) { if (num_threads > 1) {
t_data_.resize(num_threads - 1); t_data_.resize(num_threads - 1);
for (size_t i = 0; i < t_data_.size(); ++i) { for (size_t i = 0; i < t_data_.size(); ++i) {
......
...@@ -73,12 +73,7 @@ class SparseBin: public Bin { ...@@ -73,12 +73,7 @@ class SparseBin: public Bin {
explicit SparseBin(data_size_t num_data) explicit SparseBin(data_size_t num_data)
: num_data_(num_data) { : num_data_(num_data) {
int num_threads = 1; int num_threads = OMP_NUM_THREADS();
#pragma omp parallel
#pragma omp master
{
num_threads = omp_get_num_threads();
}
push_buffers_.resize(num_threads); push_buffers_.resize(num_threads);
} }
......
...@@ -23,12 +23,6 @@ class MapMetric:public Metric { ...@@ -23,12 +23,6 @@ class MapMetric:public Metric {
// get eval position // get eval position
eval_at_ = config.eval_at; eval_at_ = config.eval_at;
DCGCalculator::DefaultEvalAt(&eval_at_); DCGCalculator::DefaultEvalAt(&eval_at_);
// get number of threads
#pragma omp parallel
#pragma omp master
{
num_threads_ = omp_get_num_threads();
}
} }
~MapMetric() { ~MapMetric() {
...@@ -110,8 +104,9 @@ class MapMetric:public Metric { ...@@ -110,8 +104,9 @@ class MapMetric:public Metric {
} }
std::vector<double> Eval(const double* score, const ObjectiveFunction*) const override { std::vector<double> Eval(const double* score, const ObjectiveFunction*) const override {
// some buffers for multi-threading sum up // some buffers for multi-threading sum up
int num_threads = OMP_NUM_THREADS();
std::vector<std::vector<double>> result_buffer_; std::vector<std::vector<double>> result_buffer_;
for (int i = 0; i < num_threads_; ++i) { for (int i = 0; i < num_threads; ++i) {
result_buffer_.emplace_back(eval_at_.size(), 0.0f); result_buffer_.emplace_back(eval_at_.size(), 0.0f);
} }
std::vector<double> tmp_map(eval_at_.size(), 0.0f); std::vector<double> tmp_map(eval_at_.size(), 0.0f);
...@@ -139,7 +134,7 @@ class MapMetric:public Metric { ...@@ -139,7 +134,7 @@ class MapMetric:public Metric {
// Get final average MAP // Get final average MAP
std::vector<double> result(eval_at_.size(), 0.0f); std::vector<double> result(eval_at_.size(), 0.0f);
for (size_t j = 0; j < result.size(); ++j) { for (size_t j = 0; j < result.size(); ++j) {
for (int i = 0; i < num_threads_; ++i) { for (int i = 0; i < num_threads; ++i) {
result[j] += result_buffer_[i][j]; result[j] += result_buffer_[i][j];
} }
result[j] /= sum_query_weights_; result[j] /= sum_query_weights_;
...@@ -162,8 +157,6 @@ class MapMetric:public Metric { ...@@ -162,8 +157,6 @@ class MapMetric:public Metric {
double sum_query_weights_; double sum_query_weights_;
/*! \brief Evaluate position of Nmap */ /*! \brief Evaluate position of Nmap */
std::vector<data_size_t> eval_at_; std::vector<data_size_t> eval_at_;
/*! \brief Number of threads */
int num_threads_;
std::vector<std::string> name_; std::vector<std::string> name_;
std::vector<data_size_t> npos_per_query_; std::vector<data_size_t> npos_per_query_;
}; };
......
...@@ -26,12 +26,6 @@ class NDCGMetric:public Metric { ...@@ -26,12 +26,6 @@ class NDCGMetric:public Metric {
DCGCalculator::DefaultLabelGain(&label_gain); DCGCalculator::DefaultLabelGain(&label_gain);
// initialize DCG calculator // initialize DCG calculator
DCGCalculator::Init(label_gain); DCGCalculator::Init(label_gain);
// get number of threads
#pragma omp parallel
#pragma omp master
{
num_threads_ = omp_get_num_threads();
}
} }
~NDCGMetric() { ~NDCGMetric() {
...@@ -89,9 +83,10 @@ class NDCGMetric:public Metric { ...@@ -89,9 +83,10 @@ class NDCGMetric:public Metric {
} }
std::vector<double> Eval(const double* score, const ObjectiveFunction*) const override { std::vector<double> Eval(const double* score, const ObjectiveFunction*) const override {
int num_threads = OMP_NUM_THREADS();
// some buffers for multi-threading sum up // some buffers for multi-threading sum up
std::vector<std::vector<double>> result_buffer_; std::vector<std::vector<double>> result_buffer_;
for (int i = 0; i < num_threads_; ++i) { for (int i = 0; i < num_threads; ++i) {
result_buffer_.emplace_back(eval_at_.size(), 0.0f); result_buffer_.emplace_back(eval_at_.size(), 0.0f);
} }
std::vector<double> tmp_dcg(eval_at_.size(), 0.0f); std::vector<double> tmp_dcg(eval_at_.size(), 0.0f);
...@@ -139,7 +134,7 @@ class NDCGMetric:public Metric { ...@@ -139,7 +134,7 @@ class NDCGMetric:public Metric {
// Get final average NDCG // Get final average NDCG
std::vector<double> result(eval_at_.size(), 0.0f); std::vector<double> result(eval_at_.size(), 0.0f);
for (size_t j = 0; j < result.size(); ++j) { for (size_t j = 0; j < result.size(); ++j) {
for (int i = 0; i < num_threads_; ++i) { for (int i = 0; i < num_threads; ++i) {
result[j] += result_buffer_[i][j]; result[j] += result_buffer_[i][j];
} }
result[j] /= sum_query_weights_; result[j] /= sum_query_weights_;
...@@ -166,8 +161,6 @@ class NDCGMetric:public Metric { ...@@ -166,8 +161,6 @@ class NDCGMetric:public Metric {
std::vector<data_size_t> eval_at_; std::vector<data_size_t> eval_at_;
/*! \brief Cache the inverse max dcg for all queries */ /*! \brief Cache the inverse max dcg for all queries */
std::vector<std::vector<double>> inverse_max_dcgs_; std::vector<std::vector<double>> inverse_max_dcgs_;
/*! \brief Number of threads */
int num_threads_;
}; };
} // namespace LightGBM } // namespace LightGBM
......
...@@ -165,8 +165,9 @@ void DataParallelTreeLearner<TREELEARNER_T>::FindBestSplits() { ...@@ -165,8 +165,9 @@ void DataParallelTreeLearner<TREELEARNER_T>::FindBestSplits() {
template <typename TREELEARNER_T> template <typename TREELEARNER_T>
void DataParallelTreeLearner<TREELEARNER_T>::FindBestSplitsFromHistograms(const std::vector<int8_t>&, bool) { void DataParallelTreeLearner<TREELEARNER_T>::FindBestSplitsFromHistograms(const std::vector<int8_t>&, bool) {
std::vector<SplitInfo> smaller_bests_per_thread(this->num_threads_, SplitInfo()); int num_threads = OMP_NUM_THREADS();
std::vector<SplitInfo> larger_bests_per_thread(this->num_threads_, SplitInfo()); std::vector<SplitInfo> smaller_bests_per_thread(num_threads, SplitInfo());
std::vector<SplitInfo> larger_bests_per_thread(num_threads, SplitInfo());
std::vector<int8_t> smaller_node_used_features(this->num_features_, 1); std::vector<int8_t> smaller_node_used_features(this->num_features_, 1);
std::vector<int8_t> larger_node_used_features(this->num_features_, 1); std::vector<int8_t> larger_node_used_features(this->num_features_, 1);
if (this->config_->feature_fraction_bynode < 1.0f) { if (this->config_->feature_fraction_bynode < 1.0f) {
......
...@@ -21,11 +21,6 @@ namespace LightGBM { ...@@ -21,11 +21,6 @@ namespace LightGBM {
SerialTreeLearner::SerialTreeLearner(const Config* config) SerialTreeLearner::SerialTreeLearner(const Config* config)
:config_(config) { :config_(config) {
random_ = Random(config_->feature_fraction_seed); random_ = Random(config_->feature_fraction_seed);
#pragma omp parallel
#pragma omp master
{
num_threads_ = omp_get_num_threads();
}
} }
SerialTreeLearner::~SerialTreeLearner() { SerialTreeLearner::~SerialTreeLearner() {
...@@ -400,8 +395,9 @@ void SerialTreeLearner::FindBestSplitsFromHistograms( ...@@ -400,8 +395,9 @@ void SerialTreeLearner::FindBestSplitsFromHistograms(
const std::vector<int8_t>& is_feature_used, bool use_subtract) { const std::vector<int8_t>& is_feature_used, bool use_subtract) {
Common::FunctionTimer fun_timer( Common::FunctionTimer fun_timer(
"SerialTreeLearner::FindBestSplitsFromHistograms", global_timer); "SerialTreeLearner::FindBestSplitsFromHistograms", global_timer);
std::vector<SplitInfo> smaller_best(num_threads_); int num_threads = OMP_NUM_THREADS();
std::vector<SplitInfo> larger_best(num_threads_); std::vector<SplitInfo> smaller_best(num_threads);
std::vector<SplitInfo> larger_best(num_threads);
std::vector<int8_t> smaller_node_used_features(num_features_, 1); std::vector<int8_t> smaller_node_used_features(num_features_, 1);
std::vector<int8_t> larger_node_used_features(num_features_, 1); std::vector<int8_t> larger_node_used_features(num_features_, 1);
if (config_->feature_fraction_bynode < 1.0f) { if (config_->feature_fraction_bynode < 1.0f) {
......
...@@ -189,7 +189,6 @@ class SerialTreeLearner: public TreeLearner { ...@@ -189,7 +189,6 @@ class SerialTreeLearner: public TreeLearner {
HistogramPool histogram_pool_; HistogramPool histogram_pool_;
/*! \brief config of tree learner*/ /*! \brief config of tree learner*/
const Config* config_; const Config* config_;
int num_threads_;
std::vector<int> ordered_bin_indices_; std::vector<int> ordered_bin_indices_;
bool is_constant_hessian_; bool is_constant_hessian_;
std::unique_ptr<TrainingTempState> temp_state_; std::unique_ptr<TrainingTempState> temp_state_;
......
...@@ -349,8 +349,9 @@ void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplits() { ...@@ -349,8 +349,9 @@ void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplits() {
template <typename TREELEARNER_T> template <typename TREELEARNER_T>
void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplitsFromHistograms(const std::vector<int8_t>&, bool) { void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplitsFromHistograms(const std::vector<int8_t>&, bool) {
std::vector<SplitInfo> smaller_bests_per_thread(this->num_threads_); int num_threads = OMP_NUM_THREADS();
std::vector<SplitInfo> larger_best_per_thread(this->num_threads_); std::vector<SplitInfo> smaller_bests_per_thread(num_threads);
std::vector<SplitInfo> larger_best_per_thread(num_threads);
std::vector<int8_t> smaller_node_used_features(this->num_features_, 1); std::vector<int8_t> smaller_node_used_features(this->num_features_, 1);
std::vector<int8_t> larger_node_used_features(this->num_features_, 1); std::vector<int8_t> larger_node_used_features(this->num_features_, 1);
if (this->config_->feature_fraction_bynode < 1.0f) { if (this->config_->feature_fraction_bynode < 1.0f) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment