Unverified Commit 831c0e3f authored by Guolin Ke's avatar Guolin Ke Committed by GitHub
Browse files

rollback to omp sum (#3493)

* rollback omp sum

* remove sum reduction
parent ca7a01cd
...@@ -84,33 +84,6 @@ class Threading { ...@@ -84,33 +84,6 @@ class Threading {
OMP_THROW_EX(); OMP_THROW_EX();
return n_block; return n_block;
} }
template <typename INDEX_T, typename VAL1_T, typename VAL2_T>
static inline int SumReduction(
INDEX_T start, INDEX_T end, INDEX_T min_block_size,
const std::function<void(int, INDEX_T, INDEX_T, VAL1_T* res1,
VAL2_T* res2)>& inner_fun,
VAL1_T* res1, VAL2_T* res2) {
int n_block = 1;
INDEX_T num_inner = end - start;
BlockInfoForceSize<INDEX_T>(num_inner, min_block_size, &n_block,
&num_inner);
std::vector<VAL1_T> val_1s(n_block, static_cast<VAL1_T>(0));
std::vector<VAL2_T> val_2s(n_block, static_cast<VAL2_T>(0));
#pragma omp parallel for schedule(static)
for (int i = 0; i < n_block; ++i) {
INDEX_T inner_start = start + num_inner * i;
INDEX_T inner_end = std::min(end, inner_start + num_inner);
inner_fun(i, inner_start, inner_end, &val_1s[i], &val_2s[i]);
}
*res1 = 0;
*res2 = 0;
for (int i = 0; i < n_block; ++i) {
*res1 += val_1s[i];
*res2 += val_2s[i];
}
return n_block;
}
}; };
template <typename INDEX_T, bool TWO_BUFFER> template <typename INDEX_T, bool TWO_BUFFER>
......
...@@ -68,24 +68,15 @@ class LeafSplits { ...@@ -68,24 +68,15 @@ class LeafSplits {
num_data_in_leaf_ = num_data_; num_data_in_leaf_ = num_data_;
leaf_index_ = 0; leaf_index_ = 0;
data_indices_ = nullptr; data_indices_ = nullptr;
if (num_data_in_leaf_ < 4096) { double tmp_sum_gradients = 0.0f;
sum_gradients_ = 0.0f; double tmp_sum_hessians = 0.0f;
sum_hessians_ = 0.0f; #pragma omp parallel for schedule(static, 512) reduction(+:tmp_sum_gradients, tmp_sum_hessians) if (num_data_in_leaf_ >= 1024)
for (data_size_t i = 0; i < num_data_in_leaf_; ++i) { for (data_size_t i = 0; i < num_data_in_leaf_; ++i) {
sum_gradients_ += gradients[i]; tmp_sum_gradients += gradients[i];
sum_hessians_ += hessians[i]; tmp_sum_hessians += hessians[i];
}
} else {
Threading::SumReduction<data_size_t, double, double>(
0, num_data_in_leaf_, 2048,
[=](int, data_size_t start, data_size_t end, double* s1, double* s2) {
for (data_size_t i = start; i < end; ++i) {
*s1 += gradients[i];
*s2 += hessians[i];
}
},
&sum_gradients_, &sum_hessians_);
} }
sum_gradients_ = tmp_sum_gradients;
sum_hessians_ = tmp_sum_hessians;
} }
/*! /*!
...@@ -99,26 +90,16 @@ class LeafSplits { ...@@ -99,26 +90,16 @@ class LeafSplits {
const score_t* gradients, const score_t* hessians) { const score_t* gradients, const score_t* hessians) {
leaf_index_ = leaf; leaf_index_ = leaf;
data_indices_ = data_partition->GetIndexOnLeaf(leaf, &num_data_in_leaf_); data_indices_ = data_partition->GetIndexOnLeaf(leaf, &num_data_in_leaf_);
if (num_data_in_leaf_ < 4096) { double tmp_sum_gradients = 0.0f;
sum_gradients_ = 0.0f; double tmp_sum_hessians = 0.0f;
sum_hessians_ = 0.0f; #pragma omp parallel for schedule(static, 512) reduction(+:tmp_sum_gradients, tmp_sum_hessians) if (num_data_in_leaf_ >= 1024)
for (data_size_t i = 0; i < num_data_in_leaf_; ++i) { for (data_size_t i = 0; i < num_data_in_leaf_; ++i) {
const data_size_t idx = data_indices_[i]; const data_size_t idx = data_indices_[i];
sum_gradients_ += gradients[idx]; tmp_sum_gradients += gradients[idx];
sum_hessians_ += hessians[idx]; tmp_sum_hessians += hessians[idx];
}
} else {
Threading::SumReduction<data_size_t, double, double>(
0, num_data_in_leaf_, 2048,
[=](int, data_size_t start, data_size_t end, double* s1, double* s2) {
for (data_size_t i = start; i < end; ++i) {
const data_size_t idx = data_indices_[i];
*s1 += gradients[idx];
*s2 += hessians[idx];
}
},
&sum_gradients_, &sum_hessians_);
} }
sum_gradients_ = tmp_sum_gradients;
sum_hessians_ = tmp_sum_hessians;
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment