Commit f3ae1c10 authored by Guolin Ke's avatar Guolin Ke
Browse files

fix #628 (update the condition of using subset in bagging) .

parent 03ba7304
...@@ -444,6 +444,10 @@ public: ...@@ -444,6 +444,10 @@ public:
return feature_groups_[group]->bin_data_.get(); return feature_groups_[group]->bin_data_.get();
} }
inline bool FeatureGroupIsSparse(int group) const {
return feature_groups_[group]->is_sparse_;
}
inline BinIterator* FeatureIterator(int i) const { inline BinIterator* FeatureIterator(int i) const {
const int group = feature2group_[i]; const int group = feature2group_[i];
const int sub_feature = feature2subfeature_[i]; const int sub_feature = feature2subfeature_[i];
......
...@@ -146,8 +146,17 @@ void GBDT::ResetTrainingData(const BoostingConfig* config, const Dataset* train_ ...@@ -146,8 +146,17 @@ void GBDT::ResetTrainingData(const BoostingConfig* config, const Dataset* train_
left_write_pos_buf_.resize(num_threads_); left_write_pos_buf_.resize(num_threads_);
right_write_pos_buf_.resize(num_threads_); right_write_pos_buf_.resize(num_threads_);
double average_bag_rate = new_config->bagging_fraction / new_config->bagging_freq; double average_bag_rate = new_config->bagging_fraction / new_config->bagging_freq;
int sparse_group = 0;
for (int i = 0; i < train_data->num_feature_groups(); ++i) {
if (train_data->FeatureGroupIsSparse(i)) {
++sparse_group;
}
}
is_use_subset_ = false; is_use_subset_ = false;
if (average_bag_rate <= 0.5) { const int group_threshold_usesubset = 100;
const int sparse_group_threshold_usesubset = train_data->num_feature_groups() / 4;
if (average_bag_rate <= 0.5
&& (train_data->num_feature_groups() < group_threshold_usesubset || sparse_group < sparse_group_threshold_usesubset)) {
tmp_subset_.reset(new Dataset(bag_data_cnt_)); tmp_subset_.reset(new Dataset(bag_data_cnt_));
tmp_subset_->CopyFeatureMapperFrom(train_data); tmp_subset_->CopyFeatureMapperFrom(train_data);
is_use_subset_ = true; is_use_subset_ = true;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment