Unverified Commit ca7a01cd authored by Guolin Ke's avatar Guolin Ke Committed by GitHub
Browse files

speed up multi-threading sum (#3485)

* speed up multi-threading sum

* Apply suggestions from code review
parent ba0a1f8d
......@@ -71,7 +71,7 @@ class Threading {
const std::function<void(int, INDEX_T, INDEX_T)>& inner_fun) {
int n_block = 1;
INDEX_T num_inner = end - start;
BlockInfo<INDEX_T>(end - start, min_block_size, &n_block, &num_inner);
BlockInfo<INDEX_T>(num_inner, min_block_size, &n_block, &num_inner);
OMP_INIT_EX();
#pragma omp parallel for schedule(static, 1)
for (int i = 0; i < n_block; ++i) {
......@@ -93,20 +93,16 @@ class Threading {
VAL1_T* res1, VAL2_T* res2) {
int n_block = 1;
INDEX_T num_inner = end - start;
BlockInfoForceSize<INDEX_T>(end - start, min_block_size, &n_block,
BlockInfoForceSize<INDEX_T>(num_inner, min_block_size, &n_block,
&num_inner);
std::vector<VAL1_T> val_1s(n_block, static_cast<VAL1_T>(0));
std::vector<VAL2_T> val_2s(n_block, static_cast<VAL2_T>(0));
OMP_INIT_EX();
#pragma omp parallel for schedule(static, 1)
#pragma omp parallel for schedule(static)
for (int i = 0; i < n_block; ++i) {
OMP_LOOP_EX_BEGIN();
INDEX_T inner_start = start + num_inner * i;
INDEX_T inner_end = std::min(end, inner_start + num_inner);
inner_fun(i, inner_start, inner_end, &val_1s[i], &val_2s[i]);
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
*res1 = 0;
*res2 = 0;
for (int i = 0; i < n_block; ++i) {
......
......@@ -60,7 +60,7 @@ class LeafSplits {
}
/*!
* \brief Init splits on current leaf, it will traverse all data to sum up the results
* \brief Init splits on the current leaf, it will traverse all data to sum up the results
* \param gradients
* \param hessians
*/
......@@ -68,10 +68,17 @@ class LeafSplits {
num_data_in_leaf_ = num_data_;
leaf_index_ = 0;
data_indices_ = nullptr;
if (num_data_in_leaf_ < 4096) {
sum_gradients_ = 0.0f;
sum_hessians_ = 0.0f;
for (data_size_t i = 0; i < num_data_in_leaf_; ++i) {
sum_gradients_ += gradients[i];
sum_hessians_ += hessians[i];
}
} else {
Threading::SumReduction<data_size_t, double, double>(
0, num_data_in_leaf_, 2048,
[=](int, data_size_t start, data_size_t end, double* s1, double* s2) {
*s1 = *s2 = 0;
for (data_size_t i = start; i < end; ++i) {
*s1 += gradients[i];
*s2 += hessians[i];
......@@ -79,6 +86,7 @@ class LeafSplits {
},
&sum_gradients_, &sum_hessians_);
}
}
/*!
* \brief Init splits on current leaf of partial data.
......@@ -87,21 +95,31 @@ class LeafSplits {
* \param gradients
* \param hessians
*/
void Init(int leaf, const DataPartition* data_partition, const score_t* gradients, const score_t* hessians) {
void Init(int leaf, const DataPartition* data_partition,
const score_t* gradients, const score_t* hessians) {
leaf_index_ = leaf;
data_indices_ = data_partition->GetIndexOnLeaf(leaf, &num_data_in_leaf_);
if (num_data_in_leaf_ < 4096) {
sum_gradients_ = 0.0f;
sum_hessians_ = 0.0f;
for (data_size_t i = 0; i < num_data_in_leaf_; ++i) {
const data_size_t idx = data_indices_[i];
sum_gradients_ += gradients[idx];
sum_hessians_ += hessians[idx];
}
} else {
Threading::SumReduction<data_size_t, double, double>(
0, num_data_in_leaf_, 2048,
[=](int, data_size_t start, data_size_t end, double* s1, double* s2) {
*s1 = *s2 = 0;
for (data_size_t i = start; i < end; ++i) {
data_size_t idx = data_indices_[i];
const data_size_t idx = data_indices_[i];
*s1 += gradients[idx];
*s2 += hessians[idx];
}
},
&sum_gradients_, &sum_hessians_);
}
}
/*!
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment