Unverified Commit 36531679 authored by shiyu1994's avatar shiyu1994 Committed by GitHub
Browse files

change Dataset::CopySubrow from group wise to column wise (#3720)

parent 66b96368
...@@ -232,6 +232,14 @@ class FeatureGroup { ...@@ -232,6 +232,14 @@ class FeatureGroup {
} }
} }
inline void CopySubrowByCol(const FeatureGroup* full_feature, const data_size_t* used_indices, data_size_t num_used_indices, int fidx) {
if (!is_multi_val_) {
bin_data_->CopySubrow(full_feature->bin_data_.get(), used_indices, num_used_indices);
} else {
multi_bin_data_[fidx]->CopySubrow(full_feature->multi_bin_data_[fidx].get(), used_indices, num_used_indices);
}
}
void AddFeaturesFrom(const FeatureGroup* other, int group_id) { void AddFeaturesFrom(const FeatureGroup* other, int group_id) {
CHECK(is_multi_val_); CHECK(is_multi_val_);
CHECK(other->is_multi_val_); CHECK(other->is_multi_val_);
......
...@@ -782,15 +782,36 @@ void Dataset::CopySubrow(const Dataset* fullset, ...@@ -782,15 +782,36 @@ void Dataset::CopySubrow(const Dataset* fullset,
const data_size_t* used_indices, const data_size_t* used_indices,
data_size_t num_used_indices, bool need_meta_data) { data_size_t num_used_indices, bool need_meta_data) {
CHECK_EQ(num_used_indices, num_data_); CHECK_EQ(num_used_indices, num_data_);
OMP_INIT_EX();
#pragma omp parallel for schedule(static) std::vector<int> group_ids, subfeature_ids;
group_ids.reserve(num_features_);
subfeature_ids.reserve(num_features_);
for (int group = 0; group < num_groups_; ++group) { for (int group = 0; group < num_groups_; ++group) {
if (fullset->IsMultiGroup(group)) {
for (int sub_feature = 0; sub_feature <
fullset->feature_groups_[group]->num_feature_; ++sub_feature) {
group_ids.emplace_back(group);
subfeature_ids.emplace_back(sub_feature);
}
} else {
group_ids.emplace_back(group);
subfeature_ids.emplace_back(-1);
}
}
int num_copy_tasks = static_cast<int>(group_ids.size());
OMP_INIT_EX();
#pragma omp parallel for schedule(dynamic)
for (int task_id = 0; task_id < num_copy_tasks; ++task_id) {
OMP_LOOP_EX_BEGIN(); OMP_LOOP_EX_BEGIN();
feature_groups_[group]->CopySubrow(fullset->feature_groups_[group].get(), int group = group_ids[task_id];
used_indices, num_used_indices); int subfeature = subfeature_ids[task_id];
feature_groups_[group]->CopySubrowByCol(fullset->feature_groups_[group].get(),
used_indices, num_used_indices, subfeature);
OMP_LOOP_EX_END(); OMP_LOOP_EX_END();
} }
OMP_THROW_EX(); OMP_THROW_EX();
if (need_meta_data) { if (need_meta_data) {
metadata_.Init(fullset->metadata_, used_indices, num_used_indices); metadata_.Init(fullset->metadata_, used_indices, num_used_indices);
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment