fix possible bug related num_threads (#2876)

* only one fix * add more * add more

fix possible bug related num_threads (#2876)
* only one fix * add more * add more
67d56b26 · Guolin Ke · GitHub · ba15a16a · 67d56b26 · 67d56b26
Unverified Commit 67d56b26 authored Mar 06, 2020 by Guolin Ke Committed by GitHub Mar 06, 2020
5 changed files
--- a/include/LightGBM/utils/common.h
+++ b/include/LightGBM/utils/common.h
@@ -735,7 +735,7 @@ static void ParallelSort(_RanIt _First, _RanIt _Last, _Pr _Pred, _VTRanIt*) {
  size_t inner_size = (len + num_threads - 1) / num_threads;
  inner_size = std::max(inner_size, kMinInnerLen);
  num_threads = static_cast<int>((len + inner_size - 1) / inner_size);
-  #pragma omp parallel for schedule(static, 1)
+#pragma omp parallel for schedule(static, 1)
  for (int i = 0; i < num_threads; ++i) {
    size_t left = inner_size*i;
    size_t right = left + inner_size;

--- a/include/LightGBM/utils/threading.h
+++ b/include/LightGBM/utils/threading.h
@@ -118,7 +118,7 @@ class ParallelPartitionRunner {
    }
    OMP_INIT_EX();
-#pragma omp parallel for schedule(static, 1)
+#pragma omp parallel for schedule(static, 1) num_threads(num_threads_)
    for (int i = 0; i < nblock; ++i) {
      OMP_LOOP_EX_BEGIN();
      INDEX_T cur_start = i * inner_size;
@@ -156,7 +156,7 @@ class ParallelPartitionRunner {
    data_size_t left_cnt = left_write_pos_[nblock - 1] + left_cnts_[nblock - 1];
    auto right_start = out + left_cnt;
-#pragma omp parallel for schedule(static)
+#pragma omp parallel for schedule(static, 1) num_threads(num_threads_)
    for (int i = 0; i < nblock; ++i) {
      std::copy_n(left_.data() + offsets_[i], left_cnts_[i],
                  out + left_write_pos_[i]);

--- a/src/io/dataset.cpp
+++ b/src/io/dataset.cpp
@@ -512,7 +512,7 @@ MultiValBin* Dataset::GetMultiBinFromSparseFeatures() const {
  std::vector<uint32_t> most_freq_bins;
  double sum_sparse_rate = 0;
  for (int i = 0; i < num_feature; ++i) {
-#pragma omp parallel for schedule(static)
+#pragma omp parallel for schedule(static, 1)
    for (int tid = 0; tid < num_threads; ++tid) {
      iters[tid].emplace_back(
          feature_groups_[multi_group_id]->SubFeatureIterator(i));
@@ -556,7 +556,7 @@ MultiValBin* Dataset::GetMultiBinFromAllFeatures() const {
          num_total_bin -= 1;
        }
        offsets.push_back(num_total_bin);
-#pragma omp parallel for schedule(static)
+#pragma omp parallel for schedule(static, 1)
        for (int tid = 0; tid < num_threads; ++tid) {
          iters[tid].emplace_back(
              feature_groups_[gid]->SubFeatureIterator(fid));
@@ -1228,7 +1228,7 @@ void Dataset::ConstructHistogramsMultiVal(const data_size_t* data_indices,
    hist_data = share_state->TempBuf();
  }
  OMP_INIT_EX();
-#pragma omp parallel for schedule(static)
+#pragma omp parallel for schedule(static, 1) num_threads(share_state->num_threads)
  for (int tid = 0; tid < n_data_block; ++tid) {
    OMP_LOOP_EX_BEGIN();
    data_size_t start = tid * data_block_size;
@@ -1261,7 +1261,7 @@ void Dataset::ConstructHistogramsMultiVal(const data_size_t* data_indices,
  int bin_block_size = num_bin;
  Threading::BlockInfo<data_size_t>(share_state->num_threads, num_bin, 512, &n_bin_block,
                                    &bin_block_size);
-#pragma omp parallel for schedule(static)
+#pragma omp parallel for schedule(static, 1) num_threads(share_state->num_threads)
  for (int t = 0; t < n_bin_block; ++t) {
    const int start = t * bin_block_size;
    const int end = std::min(start + bin_block_size, num_bin);
@@ -1333,7 +1333,7 @@ void Dataset::ConstructHistogramsInner(
      }
    }
    OMP_INIT_EX();
-#pragma omp parallel for schedule(static)
+#pragma omp parallel for schedule(static) num_threads(share_state->num_threads)
    for (int gi = 0; gi < num_used_dense_group; ++gi) {
      OMP_LOOP_EX_BEGIN();
      int group = used_dense_group[gi];

--- a/src/treelearner/serial_tree_learner.cpp
+++ b/src/treelearner/serial_tree_learner.cpp
@@ -352,7 +352,7 @@ void SerialTreeLearner::FindBestSplitsFromHistograms(
  std::vector<int8_t> larger_node_used_features = col_sampler_.GetByNode();
  OMP_INIT_EX();
 // find splits
-#pragma omp parallel for schedule(static)
+#pragma omp parallel for schedule(static) num_threads(share_state_->num_threads)
  for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
    OMP_LOOP_EX_BEGIN();
    if (!is_feature_used[feature_index]) {

--- a/src/treelearner/voting_parallel_tree_learner.cpp
+++ b/src/treelearner/voting_parallel_tree_learner.cpp
@@ -358,7 +358,7 @@ void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplitsFromHistograms(cons
  // find best split from local aggregated histograms
  OMP_INIT_EX();
-  #pragma omp parallel for schedule(static)
+#pragma omp parallel for schedule(static) num_threads(this->share_state_->num_threads)
  for (int feature_index = 0; feature_index < this->num_features_; ++feature_index) {
    OMP_LOOP_EX_BEGIN();
    const int tid = omp_get_thread_num();