Unverified Commit 0a847efe authored by shiyu1994's avatar shiyu1994 Committed by GitHub
Browse files

when a leaf has no local data, its histogram shuold be cleared (#4185)

parent 13d0ceee
...@@ -155,6 +155,22 @@ template <typename TREELEARNER_T> ...@@ -155,6 +155,22 @@ template <typename TREELEARNER_T>
void DataParallelTreeLearner<TREELEARNER_T>::FindBestSplits(const Tree* tree) { void DataParallelTreeLearner<TREELEARNER_T>::FindBestSplits(const Tree* tree) {
TREELEARNER_T::ConstructHistograms( TREELEARNER_T::ConstructHistograms(
this->col_sampler_.is_feature_used_bytree(), true); this->col_sampler_.is_feature_used_bytree(), true);
const int smaller_leaf_index = this->smaller_leaf_splits_->leaf_index();
const data_size_t local_data_on_smaller_leaf = this->data_partition_->leaf_count(smaller_leaf_index);
if (local_data_on_smaller_leaf <= 0) {
// clear histogram buffer before synchronizing
// otherwise histogram contents from the previous iteration will be sent
#pragma omp parallel for schedule(static)
for (int feature_index = 0; feature_index < this->num_features_; ++feature_index) {
if (this->col_sampler_.is_feature_used_bytree()[feature_index] == false)
continue;
const BinMapper* feature_bin_mapper = this->train_data_->FeatureBinMapper(feature_index);
const int offset = static_cast<int>(feature_bin_mapper->GetMostFreqBin() == 0);
const int num_bin = feature_bin_mapper->num_bin();
hist_t* hist_ptr = this->smaller_leaf_histogram_array_[feature_index].RawData();
std::memset(reinterpret_cast<void*>(hist_ptr), 0, (num_bin - offset) * kHistEntrySize);
}
}
// construct local histograms // construct local histograms
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (int feature_index = 0; feature_index < this->num_features_; ++feature_index) { for (int feature_index = 0; feature_index < this->num_features_; ++feature_index) {
......
...@@ -259,6 +259,48 @@ void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplits(const Tree* tree) ...@@ -259,6 +259,48 @@ void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplits(const Tree* tree)
} }
TREELEARNER_T::ConstructHistograms(is_feature_used, use_subtract); TREELEARNER_T::ConstructHistograms(is_feature_used, use_subtract);
const int smaller_leaf_index = this->smaller_leaf_splits_->leaf_index();
const data_size_t local_data_on_smaller_leaf = this->data_partition_->leaf_count(smaller_leaf_index);
if (local_data_on_smaller_leaf <= 0) {
// clear histogram buffer before synchronizing
// otherwise histogram contents from the previous iteration will be sent
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
for (int feature_index = 0; feature_index < this->num_features_; ++feature_index) {
OMP_LOOP_EX_BEGIN();
if (!is_feature_used[feature_index]) { continue; }
const BinMapper* feature_bin_mapper = this->train_data_->FeatureBinMapper(feature_index);
const int num_bin = feature_bin_mapper->num_bin();
const int offset = static_cast<int>(feature_bin_mapper->GetMostFreqBin() == 0);
hist_t* hist_ptr = this->smaller_leaf_histogram_array_[feature_index].RawData();
std::memset(reinterpret_cast<void*>(hist_ptr), 0, (num_bin - offset) * kHistEntrySize);
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
}
if (this->larger_leaf_splits_ != nullptr) {
const int larger_leaf_index = this->larger_leaf_splits_->leaf_index();
if (larger_leaf_index >= 0) {
const data_size_t local_data_on_larger_leaf = this->data_partition_->leaf_count(larger_leaf_index);
if (local_data_on_larger_leaf <= 0) {
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
for (int feature_index = 0; feature_index < this->num_features_; ++feature_index) {
OMP_LOOP_EX_BEGIN();
if (!is_feature_used[feature_index]) { continue; }
const BinMapper* feature_bin_mapper = this->train_data_->FeatureBinMapper(feature_index);
const int num_bin = feature_bin_mapper->num_bin();
const int offset = static_cast<int>(feature_bin_mapper->GetMostFreqBin() == 0);
hist_t* hist_ptr = this->larger_leaf_histogram_array_[feature_index].RawData();
std::memset(reinterpret_cast<void*>(hist_ptr), 0, (num_bin - offset) * kHistEntrySize);
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
}
}
}
std::vector<SplitInfo> smaller_bestsplit_per_features(this->num_features_); std::vector<SplitInfo> smaller_bestsplit_per_features(this->num_features_);
std::vector<SplitInfo> larger_bestsplit_per_features(this->num_features_); std::vector<SplitInfo> larger_bestsplit_per_features(this->num_features_);
double smaller_leaf_parent_output = this->GetParentOutput(tree, this->smaller_leaf_splits_.get()); double smaller_leaf_parent_output = this->GetParentOutput(tree, this->smaller_leaf_splits_.get());
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment