Unverified Commit 67d56b26 authored by Guolin Ke's avatar Guolin Ke Committed by GitHub
Browse files

fix possible bug related num_threads (#2876)

* only one fix

* add more

* add more
parent ba15a16a
...@@ -735,7 +735,7 @@ static void ParallelSort(_RanIt _First, _RanIt _Last, _Pr _Pred, _VTRanIt*) { ...@@ -735,7 +735,7 @@ static void ParallelSort(_RanIt _First, _RanIt _Last, _Pr _Pred, _VTRanIt*) {
size_t inner_size = (len + num_threads - 1) / num_threads; size_t inner_size = (len + num_threads - 1) / num_threads;
inner_size = std::max(inner_size, kMinInnerLen); inner_size = std::max(inner_size, kMinInnerLen);
num_threads = static_cast<int>((len + inner_size - 1) / inner_size); num_threads = static_cast<int>((len + inner_size - 1) / inner_size);
#pragma omp parallel for schedule(static, 1) #pragma omp parallel for schedule(static, 1)
for (int i = 0; i < num_threads; ++i) { for (int i = 0; i < num_threads; ++i) {
size_t left = inner_size*i; size_t left = inner_size*i;
size_t right = left + inner_size; size_t right = left + inner_size;
......
...@@ -118,7 +118,7 @@ class ParallelPartitionRunner { ...@@ -118,7 +118,7 @@ class ParallelPartitionRunner {
} }
OMP_INIT_EX(); OMP_INIT_EX();
#pragma omp parallel for schedule(static, 1) #pragma omp parallel for schedule(static, 1) num_threads(num_threads_)
for (int i = 0; i < nblock; ++i) { for (int i = 0; i < nblock; ++i) {
OMP_LOOP_EX_BEGIN(); OMP_LOOP_EX_BEGIN();
INDEX_T cur_start = i * inner_size; INDEX_T cur_start = i * inner_size;
...@@ -156,7 +156,7 @@ class ParallelPartitionRunner { ...@@ -156,7 +156,7 @@ class ParallelPartitionRunner {
data_size_t left_cnt = left_write_pos_[nblock - 1] + left_cnts_[nblock - 1]; data_size_t left_cnt = left_write_pos_[nblock - 1] + left_cnts_[nblock - 1];
auto right_start = out + left_cnt; auto right_start = out + left_cnt;
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static, 1) num_threads(num_threads_)
for (int i = 0; i < nblock; ++i) { for (int i = 0; i < nblock; ++i) {
std::copy_n(left_.data() + offsets_[i], left_cnts_[i], std::copy_n(left_.data() + offsets_[i], left_cnts_[i],
out + left_write_pos_[i]); out + left_write_pos_[i]);
......
...@@ -512,7 +512,7 @@ MultiValBin* Dataset::GetMultiBinFromSparseFeatures() const { ...@@ -512,7 +512,7 @@ MultiValBin* Dataset::GetMultiBinFromSparseFeatures() const {
std::vector<uint32_t> most_freq_bins; std::vector<uint32_t> most_freq_bins;
double sum_sparse_rate = 0; double sum_sparse_rate = 0;
for (int i = 0; i < num_feature; ++i) { for (int i = 0; i < num_feature; ++i) {
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static, 1)
for (int tid = 0; tid < num_threads; ++tid) { for (int tid = 0; tid < num_threads; ++tid) {
iters[tid].emplace_back( iters[tid].emplace_back(
feature_groups_[multi_group_id]->SubFeatureIterator(i)); feature_groups_[multi_group_id]->SubFeatureIterator(i));
...@@ -556,7 +556,7 @@ MultiValBin* Dataset::GetMultiBinFromAllFeatures() const { ...@@ -556,7 +556,7 @@ MultiValBin* Dataset::GetMultiBinFromAllFeatures() const {
num_total_bin -= 1; num_total_bin -= 1;
} }
offsets.push_back(num_total_bin); offsets.push_back(num_total_bin);
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static, 1)
for (int tid = 0; tid < num_threads; ++tid) { for (int tid = 0; tid < num_threads; ++tid) {
iters[tid].emplace_back( iters[tid].emplace_back(
feature_groups_[gid]->SubFeatureIterator(fid)); feature_groups_[gid]->SubFeatureIterator(fid));
...@@ -1228,7 +1228,7 @@ void Dataset::ConstructHistogramsMultiVal(const data_size_t* data_indices, ...@@ -1228,7 +1228,7 @@ void Dataset::ConstructHistogramsMultiVal(const data_size_t* data_indices,
hist_data = share_state->TempBuf(); hist_data = share_state->TempBuf();
} }
OMP_INIT_EX(); OMP_INIT_EX();
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static, 1) num_threads(share_state->num_threads)
for (int tid = 0; tid < n_data_block; ++tid) { for (int tid = 0; tid < n_data_block; ++tid) {
OMP_LOOP_EX_BEGIN(); OMP_LOOP_EX_BEGIN();
data_size_t start = tid * data_block_size; data_size_t start = tid * data_block_size;
...@@ -1261,7 +1261,7 @@ void Dataset::ConstructHistogramsMultiVal(const data_size_t* data_indices, ...@@ -1261,7 +1261,7 @@ void Dataset::ConstructHistogramsMultiVal(const data_size_t* data_indices,
int bin_block_size = num_bin; int bin_block_size = num_bin;
Threading::BlockInfo<data_size_t>(share_state->num_threads, num_bin, 512, &n_bin_block, Threading::BlockInfo<data_size_t>(share_state->num_threads, num_bin, 512, &n_bin_block,
&bin_block_size); &bin_block_size);
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static, 1) num_threads(share_state->num_threads)
for (int t = 0; t < n_bin_block; ++t) { for (int t = 0; t < n_bin_block; ++t) {
const int start = t * bin_block_size; const int start = t * bin_block_size;
const int end = std::min(start + bin_block_size, num_bin); const int end = std::min(start + bin_block_size, num_bin);
...@@ -1333,7 +1333,7 @@ void Dataset::ConstructHistogramsInner( ...@@ -1333,7 +1333,7 @@ void Dataset::ConstructHistogramsInner(
} }
} }
OMP_INIT_EX(); OMP_INIT_EX();
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static) num_threads(share_state->num_threads)
for (int gi = 0; gi < num_used_dense_group; ++gi) { for (int gi = 0; gi < num_used_dense_group; ++gi) {
OMP_LOOP_EX_BEGIN(); OMP_LOOP_EX_BEGIN();
int group = used_dense_group[gi]; int group = used_dense_group[gi];
......
...@@ -352,7 +352,7 @@ void SerialTreeLearner::FindBestSplitsFromHistograms( ...@@ -352,7 +352,7 @@ void SerialTreeLearner::FindBestSplitsFromHistograms(
std::vector<int8_t> larger_node_used_features = col_sampler_.GetByNode(); std::vector<int8_t> larger_node_used_features = col_sampler_.GetByNode();
OMP_INIT_EX(); OMP_INIT_EX();
// find splits // find splits
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static) num_threads(share_state_->num_threads)
for (int feature_index = 0; feature_index < num_features_; ++feature_index) { for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
OMP_LOOP_EX_BEGIN(); OMP_LOOP_EX_BEGIN();
if (!is_feature_used[feature_index]) { if (!is_feature_used[feature_index]) {
......
...@@ -358,7 +358,7 @@ void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplitsFromHistograms(cons ...@@ -358,7 +358,7 @@ void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplitsFromHistograms(cons
// find best split from local aggregated histograms // find best split from local aggregated histograms
OMP_INIT_EX(); OMP_INIT_EX();
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static) num_threads(this->share_state_->num_threads)
for (int feature_index = 0; feature_index < this->num_features_; ++feature_index) { for (int feature_index = 0; feature_index < this->num_features_; ++feature_index) {
OMP_LOOP_EX_BEGIN(); OMP_LOOP_EX_BEGIN();
const int tid = omp_get_thread_num(); const int tid = omp_get_thread_num();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment