Unverified Commit 8ed371ce authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

set explicit number of threads in every OpenMP `parallel` region (#6135)

parent 992f5056
......@@ -19,7 +19,7 @@ void LinearTreeLearner::InitLinear(const Dataset* train_data, const int max_leav
leaf_map_ = std::vector<int>(train_data->num_data(), -1);
contains_nan_ = std::vector<int8_t>(train_data->num_features(), 0);
// identify features containing nans
#pragma omp parallel for schedule(static)
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
for (int feat = 0; feat < train_data->num_features(); ++feat) {
auto bin_mapper = train_data_->FeatureBinMapper(feat);
if (bin_mapper->bin_type() == BinType::NumericalBin) {
......@@ -159,7 +159,7 @@ void LinearTreeLearner::GetLeafMap(Tree* tree) const {
std::fill(leaf_map_.begin(), leaf_map_.end(), -1);
// map data to leaf number
const data_size_t* ind = data_partition_->indices();
#pragma omp parallel for schedule(dynamic)
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(dynamic)
for (int i = 0; i < tree->num_leaves(); ++i) {
data_size_t idx = data_partition_->leaf_begin(i);
for (int j = 0; j < data_partition_->leaf_count(i); ++j) {
......@@ -224,7 +224,7 @@ void LinearTreeLearner::CalculateLinear(Tree* tree, bool is_refit, const score_t
}
}
// clear the coefficient matrices
#pragma omp parallel for schedule(static)
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
for (int i = 0; i < num_threads; ++i) {
for (int leaf_num = 0; leaf_num < num_leaves; ++leaf_num) {
size_t num_feat = leaf_features[leaf_num].size();
......@@ -232,7 +232,7 @@ void LinearTreeLearner::CalculateLinear(Tree* tree, bool is_refit, const score_t
std::fill(XTg_by_thread_[i][leaf_num].begin(), XTg_by_thread_[i][leaf_num].begin() + num_feat + 1, 0.0f);
}
}
#pragma omp parallel for schedule(static)
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
for (int leaf_num = 0; leaf_num < num_leaves; ++leaf_num) {
size_t num_feat = leaf_features[leaf_num].size();
std::fill(XTHX_[leaf_num].begin(), XTHX_[leaf_num].begin() + (num_feat + 1) * (num_feat + 2) / 2, 0.0f);
......@@ -245,7 +245,7 @@ void LinearTreeLearner::CalculateLinear(Tree* tree, bool is_refit, const score_t
}
}
OMP_INIT_EX();
#pragma omp parallel if (num_data_ > 1024)
#pragma omp parallel num_threads(OMP_NUM_THREADS()) if (num_data_ > 1024)
{
std::vector<float> curr_row(max_num_features + 1);
int tid = omp_get_thread_num();
......@@ -296,7 +296,7 @@ void LinearTreeLearner::CalculateLinear(Tree* tree, bool is_refit, const score_t
auto total_nonzero = std::vector<int>(tree->num_leaves());
// aggregate results from different threads
for (int tid = 0; tid < num_threads; ++tid) {
#pragma omp parallel for schedule(static)
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
for (int leaf_num = 0; leaf_num < num_leaves; ++leaf_num) {
size_t num_feat = leaf_features[leaf_num].size();
for (size_t j = 0; j < (num_feat + 1) * (num_feat + 2) / 2; ++j) {
......@@ -318,7 +318,7 @@ void LinearTreeLearner::CalculateLinear(Tree* tree, bool is_refit, const score_t
double shrinkage = tree->shrinkage();
double decay_rate = config_->refit_decay_rate;
// copy into eigen matrices and solve
#pragma omp parallel for schedule(static)
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
for (int leaf_num = 0; leaf_num < num_leaves; ++leaf_num) {
if (total_nonzero[leaf_num] < static_cast<int>(leaf_features[leaf_num].size()) + 1) {
if (is_refit) {
......
......@@ -75,7 +75,7 @@ class LinearTreeLearner: public SerialTreeLearner {
leaf_num_features[leaf_num] = static_cast<int>(feat_ptr[leaf_num].size());
}
OMP_INIT_EX();
#pragma omp parallel for schedule(static) if (num_data_ > 1024)
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) if (num_data_ > 1024)
for (int i = 0; i < num_data_; ++i) {
OMP_LOOP_EX_BEGIN();
int leaf_num = leaf_map_[i];
......
......@@ -242,7 +242,7 @@ Tree* SerialTreeLearner::FitByExistingTree(const Tree* old_tree, const score_t*
auto tree = std::unique_ptr<Tree>(new Tree(*old_tree));
CHECK_GE(data_partition_->num_leaves(), tree->num_leaves());
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
for (int i = 0; i < tree->num_leaves(); ++i) {
OMP_LOOP_EX_BEGIN();
data_size_t cnt_leaf_data = 0;
......@@ -379,7 +379,7 @@ void SerialTreeLearner::FindBestSplits(const Tree* tree) {
void SerialTreeLearner::FindBestSplits(const Tree* tree, const std::set<int>* force_features) {
std::vector<int8_t> is_feature_used(num_features_, 0);
#pragma omp parallel for schedule(static, 256) if (num_features_ >= 512)
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static, 256) if (num_features_ >= 512)
for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
if (!col_sampler_.is_feature_used_bytree()[feature_index] && (force_features == nullptr || force_features->find(feature_index) == force_features->end())) continue;
if (parent_leaf_histogram_array_ != nullptr
......@@ -922,7 +922,7 @@ void SerialTreeLearner::RenewTreeOutput(Tree* tree, const ObjectiveFunction* obj
}
std::vector<int> n_nozeroworker_perleaf(tree->num_leaves(), 1);
int num_machines = Network::num_machines();
#pragma omp parallel for schedule(static)
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
for (int i = 0; i < tree->num_leaves(); ++i) {
const double output = static_cast<double>(tree->LeafOutput(i));
data_size_t cnt_leaf_data = 0;
......
......@@ -103,7 +103,7 @@ class SerialTreeLearner: public TreeLearner {
if (tree->num_leaves() <= 1) {
return;
}
#pragma omp parallel for schedule(static, 1)
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static, 1)
for (int i = 0; i < tree->num_leaves(); ++i) {
double output = static_cast<double>(tree->LeafOutput(i));
data_size_t cnt_leaf_data = 0;
......
......@@ -243,7 +243,7 @@ template <typename TREELEARNER_T>
void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplits(const Tree* tree) {
// use local data to find local best splits
std::vector<int8_t> is_feature_used(this->num_features_, 0);
#pragma omp parallel for schedule(static)
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
for (int feature_index = 0; feature_index < this->num_features_; ++feature_index) {
if (!this->col_sampler_.is_feature_used_bytree()[feature_index]) continue;
if (this->parent_leaf_histogram_array_ != nullptr
......@@ -265,7 +265,7 @@ void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplits(const Tree* tree)
// clear histogram buffer before synchronizing
// otherwise histogram contents from the previous iteration will be sent
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
for (int feature_index = 0; feature_index < this->num_features_; ++feature_index) {
OMP_LOOP_EX_BEGIN();
if (!is_feature_used[feature_index]) { continue; }
......@@ -285,7 +285,7 @@ void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplits(const Tree* tree)
const data_size_t local_data_on_larger_leaf = this->data_partition_->leaf_count(larger_leaf_index);
if (local_data_on_larger_leaf <= 0) {
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
for (int feature_index = 0; feature_index < this->num_features_; ++feature_index) {
OMP_LOOP_EX_BEGIN();
if (!is_feature_used[feature_index]) { continue; }
......@@ -307,7 +307,7 @@ void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplits(const Tree* tree)
double larger_leaf_parent_output = this->GetParentOutput(tree, this->larger_leaf_splits_.get());
OMP_INIT_EX();
// find splits
#pragma omp parallel for schedule(static)
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
for (int feature_index = 0; feature_index < this->num_features_; ++feature_index) {
OMP_LOOP_EX_BEGIN();
if (!is_feature_used[feature_index]) { continue; }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment