voting_parallel_tree_learner.cpp 23.2 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
10
#include "parallel_tree_learner.h"

#include <LightGBM/utils/common.h>

#include <cstring>
#include <tuple>
#include <vector>

namespace LightGBM {

11
template <typename TREELEARNER_T>
Guolin Ke's avatar
Guolin Ke committed
12
13
14
VotingParallelTreeLearner<TREELEARNER_T>::VotingParallelTreeLearner(const Config* config)
  :TREELEARNER_T(config) {
  top_k_ = this->config_->top_k;
Guolin Ke's avatar
Guolin Ke committed
15
16
}

17
18
19
template <typename TREELEARNER_T>
void VotingParallelTreeLearner<TREELEARNER_T>::Init(const Dataset* train_data, bool is_constant_hessian) {
  TREELEARNER_T::Init(train_data, is_constant_hessian);
Guolin Ke's avatar
Guolin Ke committed
20
21
22
23
  rank_ = Network::rank();
  num_machines_ = Network::num_machines();

  // limit top k
24
25
  if (top_k_ > this->num_features_) {
    top_k_ = this->num_features_;
Guolin Ke's avatar
Guolin Ke committed
26
27
28
  }
  // get max bin
  int max_bin = 0;
29
30
31
  for (int i = 0; i < this->num_features_; ++i) {
    if (max_bin < this->train_data_->FeatureNumBin(i)) {
      max_bin = this->train_data_->FeatureNumBin(i);
Guolin Ke's avatar
Guolin Ke committed
32
33
34
    }
  }
  // calculate buffer size
35
  size_t buffer_size = 2 * top_k_ * std::max(max_bin * sizeof(HistogramBinEntry), sizeof(LightSplitInfo) * num_machines_);
Guolin Ke's avatar
Guolin Ke committed
36
37
38
39
  // left and right on same time, so need double size
  input_buffer_.resize(buffer_size);
  output_buffer_.resize(buffer_size);

40
41
  smaller_is_feature_aggregated_.resize(this->num_features_);
  larger_is_feature_aggregated_.resize(this->num_features_);
Guolin Ke's avatar
Guolin Ke committed
42
43
44
45

  block_start_.resize(num_machines_);
  block_len_.resize(num_machines_);

46
47
  smaller_buffer_read_start_pos_.resize(this->num_features_);
  larger_buffer_read_start_pos_.resize(this->num_features_);
Guolin Ke's avatar
Guolin Ke committed
48
  global_data_count_in_leaf_.resize(this->config_->num_leaves);
Guolin Ke's avatar
Guolin Ke committed
49

50
51
  smaller_leaf_splits_global_.reset(new LeafSplits(this->train_data_->num_data()));
  larger_leaf_splits_global_.reset(new LeafSplits(this->train_data_->num_data()));
Guolin Ke's avatar
Guolin Ke committed
52

Guolin Ke's avatar
Guolin Ke committed
53
54
55
  local_config_ = *this->config_;
  local_config_.min_data_in_leaf /= num_machines_;
  local_config_.min_sum_hessian_in_leaf /= num_machines_;
Guolin Ke's avatar
Guolin Ke committed
56

Guolin Ke's avatar
Guolin Ke committed
57
  this->histogram_pool_.ResetConfig(&local_config_);
Guolin Ke's avatar
Guolin Ke committed
58
59

  // initialize histograms for global
60
61
62
  smaller_leaf_histogram_array_global_.reset(new FeatureHistogram[this->num_features_]);
  larger_leaf_histogram_array_global_.reset(new FeatureHistogram[this->num_features_]);
  auto num_total_bin = this->train_data_->NumTotalBin();
Guolin Ke's avatar
Guolin Ke committed
63
64
65
66
67
68
  smaller_leaf_histogram_data_.resize(num_total_bin);
  larger_leaf_histogram_data_.resize(num_total_bin);
  feature_metas_.resize(train_data->num_features());
#pragma omp parallel for schedule(static)
  for (int i = 0; i < train_data->num_features(); ++i) {
    feature_metas_[i].num_bin = train_data->FeatureNumBin(i);
Guolin Ke's avatar
Guolin Ke committed
69
    feature_metas_[i].default_bin = train_data->FeatureBinMapper(i)->GetDefaultBin();
Guolin Ke's avatar
Guolin Ke committed
70
    feature_metas_[i].missing_type = train_data->FeatureBinMapper(i)->missing_type();
Guolin Ke's avatar
Guolin Ke committed
71
    feature_metas_[i].monotone_type = train_data->FeatureMonotone(i);
Guolin Ke's avatar
Guolin Ke committed
72
    feature_metas_[i].penalty = train_data->FeaturePenalte(i);
Guolin Ke's avatar
Guolin Ke committed
73
74
75
76
77
    if (train_data->FeatureBinMapper(i)->GetDefaultBin() == 0) {
      feature_metas_[i].bias = 1;
    } else {
      feature_metas_[i].bias = 0;
    }
Guolin Ke's avatar
Guolin Ke committed
78
    feature_metas_[i].config = this->config_;
79
    feature_metas_[i].bin_type = train_data->FeatureBinMapper(i)->bin_type();
Guolin Ke's avatar
Guolin Ke committed
80
81
82
83
  }
  uint64_t offset = 0;
  for (int j = 0; j < train_data->num_features(); ++j) {
    offset += static_cast<uint64_t>(train_data->SubFeatureBinOffset(j));
84
85
    smaller_leaf_histogram_array_global_[j].Init(smaller_leaf_histogram_data_.data() + offset, &feature_metas_[j]);
    larger_leaf_histogram_array_global_[j].Init(larger_leaf_histogram_data_.data() + offset, &feature_metas_[j]);
Guolin Ke's avatar
Guolin Ke committed
86
87
88
89
90
    auto num_bin = train_data->FeatureNumBin(j);
    if (train_data->FeatureBinMapper(j)->GetDefaultBin() == 0) {
      num_bin -= 1;
    }
    offset += static_cast<uint64_t>(num_bin);
Guolin Ke's avatar
Guolin Ke committed
91
92
93
  }
}

94
template <typename TREELEARNER_T>
Guolin Ke's avatar
Guolin Ke committed
95
96
void VotingParallelTreeLearner<TREELEARNER_T>::ResetConfig(const Config* config) {
  TREELEARNER_T::ResetConfig(config);
Guolin Ke's avatar
Guolin Ke committed
97

Guolin Ke's avatar
Guolin Ke committed
98
99
100
  local_config_ = *this->config_;
  local_config_.min_data_in_leaf /= num_machines_;
  local_config_.min_sum_hessian_in_leaf /= num_machines_;
Guolin Ke's avatar
Guolin Ke committed
101

Guolin Ke's avatar
Guolin Ke committed
102
103
  this->histogram_pool_.ResetConfig(&local_config_);
  global_data_count_in_leaf_.resize(this->config_->num_leaves);
Guolin Ke's avatar
Guolin Ke committed
104

Guolin Ke's avatar
Guolin Ke committed
105
  for (size_t i = 0; i < feature_metas_.size(); ++i) {
Guolin Ke's avatar
Guolin Ke committed
106
    feature_metas_[i].config = this->config_;
Guolin Ke's avatar
Guolin Ke committed
107
108
  }
}
Guolin Ke's avatar
Guolin Ke committed
109

110
111
112
template <typename TREELEARNER_T>
void VotingParallelTreeLearner<TREELEARNER_T>::BeforeTrain() {
  TREELEARNER_T::BeforeTrain();
Guolin Ke's avatar
Guolin Ke committed
113
  // sync global data sumup info
114
  std::tuple<data_size_t, double, double> data(this->smaller_leaf_splits_->num_data_in_leaf(), this->smaller_leaf_splits_->sum_gradients(), this->smaller_leaf_splits_->sum_hessians());
Guolin Ke's avatar
Guolin Ke committed
115
116
117
  int size = sizeof(std::tuple<data_size_t, double, double>);
  std::memcpy(input_buffer_.data(), &data, size);

Guolin Ke's avatar
Guolin Ke committed
118
119
  Network::Allreduce(input_buffer_.data(), size, sizeof(std::tuple<data_size_t, double, double>), output_buffer_.data(), [](const char *src, char *dst, int type_size, comm_size_t len) {
    comm_size_t used_size = 0;
Guolin Ke's avatar
Guolin Ke committed
120
121
122
123
124
125
126
127
128
129
130
131
132
133
    const std::tuple<data_size_t, double, double> *p1;
    std::tuple<data_size_t, double, double> *p2;
    while (used_size < len) {
      p1 = reinterpret_cast<const std::tuple<data_size_t, double, double> *>(src);
      p2 = reinterpret_cast<std::tuple<data_size_t, double, double> *>(dst);
      std::get<0>(*p2) = std::get<0>(*p2) + std::get<0>(*p1);
      std::get<1>(*p2) = std::get<1>(*p2) + std::get<1>(*p1);
      std::get<2>(*p2) = std::get<2>(*p2) + std::get<2>(*p1);
      src += type_size;
      dst += type_size;
      used_size += type_size;
    }
  });

Tsukasa OMOTO's avatar
Tsukasa OMOTO committed
134
  std::memcpy((void*)&data, output_buffer_.data(), size);
Guolin Ke's avatar
Guolin Ke committed
135
136
137
138
139
140
141
142

  // set global sumup info
  smaller_leaf_splits_global_->Init(std::get<1>(data), std::get<2>(data));
  larger_leaf_splits_global_->Init();
  // init global data count in leaf
  global_data_count_in_leaf_[0] = std::get<0>(data);
}

143
144
145
template <typename TREELEARNER_T>
bool VotingParallelTreeLearner<TREELEARNER_T>::BeforeFindBestSplit(const Tree* tree, int left_leaf, int right_leaf) {
  if (TREELEARNER_T::BeforeFindBestSplit(tree, left_leaf, right_leaf)) {
Guolin Ke's avatar
Guolin Ke committed
146
147
148
149
150
151
    data_size_t num_data_in_left_child = GetGlobalDataCountInLeaf(left_leaf);
    data_size_t num_data_in_right_child = GetGlobalDataCountInLeaf(right_leaf);
    if (right_leaf < 0) {
      return true;
    } else if (num_data_in_left_child < num_data_in_right_child) {
      // get local sumup
152
153
      this->smaller_leaf_splits_->Init(left_leaf, this->data_partition_.get(), this->gradients_, this->hessians_);
      this->larger_leaf_splits_->Init(right_leaf, this->data_partition_.get(), this->gradients_, this->hessians_);
Guolin Ke's avatar
Guolin Ke committed
154
155
    } else {
      // get local sumup
156
157
      this->smaller_leaf_splits_->Init(right_leaf, this->data_partition_.get(), this->gradients_, this->hessians_);
      this->larger_leaf_splits_->Init(left_leaf, this->data_partition_.get(), this->gradients_, this->hessians_);
Guolin Ke's avatar
Guolin Ke committed
158
159
160
161
162
163
164
    }
    return true;
  } else {
    return false;
  }
}

165
template <typename TREELEARNER_T>
166
void VotingParallelTreeLearner<TREELEARNER_T>::GlobalVoting(int leaf_idx, const std::vector<LightSplitInfo>& splits, std::vector<int>* out) {
Guolin Ke's avatar
Guolin Ke committed
167
168
169
170
171
  out->clear();
  if (leaf_idx < 0) {
    return;
  }
  // get mean number on machines
172
  score_t mean_num_data = GetGlobalDataCountInLeaf(leaf_idx) / static_cast<score_t>(num_machines_);
173
  std::vector<LightSplitInfo> feature_best_split(this->train_data_->num_total_features() , LightSplitInfo());
Guolin Ke's avatar
Guolin Ke committed
174
175
176
177
178
179
180
181
182
183
184
185
186
  for (auto & split : splits) {
    int fid = split.feature;
    if (fid < 0) {
      continue;
    }
    // weighted gain
    double gain = split.gain * (split.left_count + split.right_count) / mean_num_data;
    if (gain > feature_best_split[fid].gain) {
      feature_best_split[fid] = split;
      feature_best_split[fid].gain = gain;
    }
  }
  // get top k
187
188
  std::vector<LightSplitInfo> top_k_splits;
  ArrayArgs<LightSplitInfo>::MaxK(feature_best_split, top_k_, &top_k_splits);
Guolin Ke's avatar
Guolin Ke committed
189
190
191
192
193
194
195
196
  for (auto& split : top_k_splits) {
    if (split.gain == kMinScore || split.feature == -1) {
      continue;
    }
    out->push_back(split.feature);
  }
}

197
198
199
template <typename TREELEARNER_T>
void VotingParallelTreeLearner<TREELEARNER_T>::CopyLocalHistogram(const std::vector<int>& smaller_top_features, const std::vector<int>& larger_top_features) {
  for (int i = 0; i < this->num_features_; ++i) {
Guolin Ke's avatar
Guolin Ke committed
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
    smaller_is_feature_aggregated_[i] = false;
    larger_is_feature_aggregated_[i] = false;
  }
  size_t total_num_features = smaller_top_features.size() + larger_top_features.size();
  size_t average_feature = (total_num_features + num_machines_ - 1) / num_machines_;
  size_t used_num_features = 0, smaller_idx = 0, larger_idx = 0;
  block_start_[0] = 0;
  reduce_scatter_size_ = 0;
  // Copy histogram to buffer, and Get local aggregate features
  for (int i = 0; i < num_machines_; ++i) {
    size_t cur_size = 0, cur_used_features = 0;
    size_t cur_total_feature = std::min(average_feature, total_num_features - used_num_features);
    // copy histograms.
    while (cur_used_features < cur_total_feature) {
      // copy smaller leaf histograms first
      if (smaller_idx < smaller_top_features.size()) {
216
        int inner_feature_index = this->train_data_->InnerFeatureIndex(smaller_top_features[smaller_idx]);
Guolin Ke's avatar
Guolin Ke committed
217
218
219
        ++cur_used_features;
        // mark local aggregated feature
        if (i == rank_) {
Guolin Ke's avatar
Guolin Ke committed
220
221
          smaller_is_feature_aggregated_[inner_feature_index] = true;
          smaller_buffer_read_start_pos_[inner_feature_index] = static_cast<int>(cur_size);
Guolin Ke's avatar
Guolin Ke committed
222
223
        }
        // copy
224
225
226
        std::memcpy(input_buffer_.data() + reduce_scatter_size_, this->smaller_leaf_histogram_array_[inner_feature_index].RawData(), this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram());
        cur_size += this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
        reduce_scatter_size_ += this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
Guolin Ke's avatar
Guolin Ke committed
227
228
229
230
231
232
233
        ++smaller_idx;
      }
      if (cur_used_features >= cur_total_feature) {
        break;
      }
      // then copy larger leaf histograms
      if (larger_idx < larger_top_features.size()) {
234
        int inner_feature_index = this->train_data_->InnerFeatureIndex(larger_top_features[larger_idx]);
Guolin Ke's avatar
Guolin Ke committed
235
236
237
        ++cur_used_features;
        // mark local aggregated feature
        if (i == rank_) {
Guolin Ke's avatar
Guolin Ke committed
238
239
          larger_is_feature_aggregated_[inner_feature_index] = true;
          larger_buffer_read_start_pos_[inner_feature_index] = static_cast<int>(cur_size);
Guolin Ke's avatar
Guolin Ke committed
240
241
        }
        // copy
242
243
244
        std::memcpy(input_buffer_.data() + reduce_scatter_size_, this->larger_leaf_histogram_array_[inner_feature_index].RawData(), this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram());
        cur_size += this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
        reduce_scatter_size_ += this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
Guolin Ke's avatar
Guolin Ke committed
245
246
247
248
249
250
251
252
253
254
255
        ++larger_idx;
      }
    }
    used_num_features += cur_used_features;
    block_len_[i] = static_cast<int>(cur_size);
    if (i < num_machines_ - 1) {
      block_start_[i + 1] = block_start_[i] + block_len_[i];
    }
  }
}

256
template <typename TREELEARNER_T>
Guolin Ke's avatar
Guolin Ke committed
257
void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplits() {
Guolin Ke's avatar
Guolin Ke committed
258
  // use local data to find local best splits
259
  std::vector<int8_t> is_feature_used(this->num_features_, 0);
Guolin Ke's avatar
Guolin Ke committed
260
#pragma omp parallel for schedule(static)
261
262
263
264
265
  for (int feature_index = 0; feature_index < this->num_features_; ++feature_index) {
    if (!this->is_feature_used_[feature_index]) continue;
    if (this->parent_leaf_histogram_array_ != nullptr
      && !this->parent_leaf_histogram_array_[feature_index].is_splittable()) {
      this->smaller_leaf_histogram_array_[feature_index].set_is_splittable(false);
Guolin Ke's avatar
Guolin Ke committed
266
267
268
269
270
      continue;
    }
    is_feature_used[feature_index] = 1;
  }
  bool use_subtract = true;
271
  if (this->parent_leaf_histogram_array_ == nullptr) {
Guolin Ke's avatar
Guolin Ke committed
272
273
    use_subtract = false;
  }
Guolin Ke's avatar
Guolin Ke committed
274
  TREELEARNER_T::ConstructHistograms(is_feature_used, use_subtract);
Guolin Ke's avatar
Guolin Ke committed
275

276
277
  std::vector<SplitInfo> smaller_bestsplit_per_features(this->num_features_);
  std::vector<SplitInfo> larger_bestsplit_per_features(this->num_features_);
Guolin Ke's avatar
Guolin Ke committed
278

279
  OMP_INIT_EX();
Guolin Ke's avatar
Guolin Ke committed
280
  // find splits
Guolin Ke's avatar
Guolin Ke committed
281
#pragma omp parallel for schedule(static)
282
  for (int feature_index = 0; feature_index < this->num_features_; ++feature_index) {
283
    OMP_LOOP_EX_BEGIN();
Guolin Ke's avatar
Guolin Ke committed
284
    if (!is_feature_used[feature_index]) { continue; }
285
286
287
288
289
290
291
292
293
294
    const int real_feature_index = this->train_data_->RealFeatureIndex(feature_index);
    this->train_data_->FixHistogram(feature_index,
      this->smaller_leaf_splits_->sum_gradients(), this->smaller_leaf_splits_->sum_hessians(),
      this->smaller_leaf_splits_->num_data_in_leaf(),
      this->smaller_leaf_histogram_array_[feature_index].RawData());

    this->smaller_leaf_histogram_array_[feature_index].FindBestThreshold(
      this->smaller_leaf_splits_->sum_gradients(),
      this->smaller_leaf_splits_->sum_hessians(),
      this->smaller_leaf_splits_->num_data_in_leaf(),
Guolin Ke's avatar
Guolin Ke committed
295
296
      this->smaller_leaf_splits_->min_constraint(),
      this->smaller_leaf_splits_->max_constraint(),
Guolin Ke's avatar
Guolin Ke committed
297
      &smaller_bestsplit_per_features[feature_index]);
Guolin Ke's avatar
Guolin Ke committed
298
    smaller_bestsplit_per_features[feature_index].feature = real_feature_index;
Guolin Ke's avatar
Guolin Ke committed
299
    // only has root leaf
300
    if (this->larger_leaf_splits_ == nullptr || this->larger_leaf_splits_->LeafIndex() < 0) { continue; }
Guolin Ke's avatar
Guolin Ke committed
301
302

    if (use_subtract) {
303
      this->larger_leaf_histogram_array_[feature_index].Subtract(this->smaller_leaf_histogram_array_[feature_index]);
Guolin Ke's avatar
Guolin Ke committed
304
    } else {
305
306
307
      this->train_data_->FixHistogram(feature_index, this->larger_leaf_splits_->sum_gradients(), this->larger_leaf_splits_->sum_hessians(),
        this->larger_leaf_splits_->num_data_in_leaf(),
        this->larger_leaf_histogram_array_[feature_index].RawData());
Guolin Ke's avatar
Guolin Ke committed
308
309
    }
    // find best threshold for larger child
310
311
312
313
    this->larger_leaf_histogram_array_[feature_index].FindBestThreshold(
      this->larger_leaf_splits_->sum_gradients(),
      this->larger_leaf_splits_->sum_hessians(),
      this->larger_leaf_splits_->num_data_in_leaf(),
Guolin Ke's avatar
Guolin Ke committed
314
315
      this->larger_leaf_splits_->min_constraint(),
      this->larger_leaf_splits_->max_constraint(),
Guolin Ke's avatar
Guolin Ke committed
316
      &larger_bestsplit_per_features[feature_index]);
Guolin Ke's avatar
Guolin Ke committed
317
    larger_bestsplit_per_features[feature_index].feature = real_feature_index;
318
    OMP_LOOP_EX_END();
Guolin Ke's avatar
Guolin Ke committed
319
  }
320
  OMP_THROW_EX();
Guolin Ke's avatar
Guolin Ke committed
321

Guolin Ke's avatar
Guolin Ke committed
322
323
  std::vector<SplitInfo> smaller_top_k_splits, larger_top_k_splits;
  // local voting
Guolin Ke's avatar
Guolin Ke committed
324
325
  ArrayArgs<SplitInfo>::MaxK(smaller_bestsplit_per_features, top_k_, &smaller_top_k_splits);
  ArrayArgs<SplitInfo>::MaxK(larger_bestsplit_per_features, top_k_, &larger_top_k_splits);
326
327
328
329
330
331
332
333

  std::vector<LightSplitInfo> smaller_top_k_light_splits(top_k_);
  std::vector<LightSplitInfo> larger_top_k_light_splits(top_k_);
  for (int i = 0; i < top_k_; ++i) {
    smaller_top_k_light_splits[i].CopyFrom(smaller_top_k_splits[i]);
    larger_top_k_light_splits[i].CopyFrom(larger_top_k_splits[i]);
  }

Guolin Ke's avatar
Guolin Ke committed
334
335
336
  // gather
  int offset = 0;
  for (int i = 0; i < top_k_; ++i) {
337
338
339
340
    std::memcpy(input_buffer_.data() + offset, &smaller_top_k_light_splits[i], sizeof(LightSplitInfo));
    offset += sizeof(LightSplitInfo);
    std::memcpy(input_buffer_.data() + offset, &larger_top_k_light_splits[i], sizeof(LightSplitInfo));
    offset += sizeof(LightSplitInfo);
Guolin Ke's avatar
Guolin Ke committed
341
342
343
  }
  Network::Allgather(input_buffer_.data(), offset, output_buffer_.data());
  // get all top-k from all machines
344
345
  std::vector<LightSplitInfo> smaller_top_k_splits_global;
  std::vector<LightSplitInfo> larger_top_k_splits_global;
Guolin Ke's avatar
Guolin Ke committed
346
347
348
  offset = 0;
  for (int i = 0; i < num_machines_; ++i) {
    for (int j = 0; j < top_k_; ++j) {
349
350
351
352
353
354
      smaller_top_k_splits_global.push_back(LightSplitInfo());
      std::memcpy(&smaller_top_k_splits_global.back(), output_buffer_.data() + offset, sizeof(LightSplitInfo));
      offset += sizeof(LightSplitInfo);
      larger_top_k_splits_global.push_back(LightSplitInfo());
      std::memcpy(&larger_top_k_splits_global.back(), output_buffer_.data() + offset, sizeof(LightSplitInfo));
      offset += sizeof(LightSplitInfo);
Guolin Ke's avatar
Guolin Ke committed
355
356
357
358
    }
  }
  // global voting
  std::vector<int> smaller_top_features, larger_top_features;
359
360
  GlobalVoting(this->smaller_leaf_splits_->LeafIndex(), smaller_top_k_splits_global, &smaller_top_features);
  GlobalVoting(this->larger_leaf_splits_->LeafIndex(), larger_top_k_splits_global, &larger_top_features);
Guolin Ke's avatar
Guolin Ke committed
361
362
363
364
  // copy local histgrams to buffer
  CopyLocalHistogram(smaller_top_features, larger_top_features);

  // Reduce scatter for histogram
Guolin Ke's avatar
Guolin Ke committed
365
366
  Network::ReduceScatter(input_buffer_.data(), reduce_scatter_size_, sizeof(HistogramBinEntry), block_start_.data(), block_len_.data(),
                         output_buffer_.data(), static_cast<comm_size_t>(output_buffer_.size()), &HistogramBinEntry::SumReducer);
Guolin Ke's avatar
Guolin Ke committed
367

Guolin Ke's avatar
Guolin Ke committed
368
369
370
371
372
373
374
375
  this->FindBestSplitsFromHistograms(is_feature_used, false);
}

template <typename TREELEARNER_T>
void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplitsFromHistograms(const std::vector<int8_t>&, bool) {

  std::vector<SplitInfo> smaller_bests_per_thread(this->num_threads_);
  std::vector<SplitInfo> larger_best_per_thread(this->num_threads_);
Guolin Ke's avatar
Guolin Ke committed
376
  // find best split from local aggregated histograms
Guolin Ke's avatar
Guolin Ke committed
377
378
379

  OMP_INIT_EX();
  #pragma omp parallel for schedule(static)
380
  for (int feature_index = 0; feature_index < this->num_features_; ++feature_index) {
381
    OMP_LOOP_EX_BEGIN();
Guolin Ke's avatar
Guolin Ke committed
382
    const int tid = omp_get_thread_num();
Guolin Ke's avatar
Guolin Ke committed
383
    const int real_feature_index = this->train_data_->RealFeatureIndex(feature_index);
Guolin Ke's avatar
Guolin Ke committed
384
    if (smaller_is_feature_aggregated_[feature_index]) {
Guolin Ke's avatar
Guolin Ke committed
385
      SplitInfo smaller_split;
Guolin Ke's avatar
Guolin Ke committed
386
387
      // restore from buffer
      smaller_leaf_histogram_array_global_[feature_index].FromMemory(
Guolin Ke's avatar
Guolin Ke committed
388
        output_buffer_.data() + smaller_buffer_read_start_pos_[feature_index]);
Guolin Ke's avatar
Guolin Ke committed
389

390
      this->train_data_->FixHistogram(feature_index,
Guolin Ke's avatar
Guolin Ke committed
391
392
393
                                      smaller_leaf_splits_global_->sum_gradients(), smaller_leaf_splits_global_->sum_hessians(),
                                      GetGlobalDataCountInLeaf(smaller_leaf_splits_global_->LeafIndex()),
                                      smaller_leaf_histogram_array_global_[feature_index].RawData());
Guolin Ke's avatar
Guolin Ke committed
394

Guolin Ke's avatar
Guolin Ke committed
395
396
      // find best threshold
      smaller_leaf_histogram_array_global_[feature_index].FindBestThreshold(
Guolin Ke's avatar
Guolin Ke committed
397
398
399
        smaller_leaf_splits_global_->sum_gradients(),
        smaller_leaf_splits_global_->sum_hessians(),
        GetGlobalDataCountInLeaf(smaller_leaf_splits_global_->LeafIndex()),
Guolin Ke's avatar
Guolin Ke committed
400
401
        smaller_leaf_splits_global_->min_constraint(),
        smaller_leaf_splits_global_->max_constraint(),
Guolin Ke's avatar
Guolin Ke committed
402
        &smaller_split);
Guolin Ke's avatar
Guolin Ke committed
403
404
405
      smaller_split.feature = real_feature_index;
      if (smaller_split > smaller_bests_per_thread[tid]) {
        smaller_bests_per_thread[tid] = smaller_split;
Guolin Ke's avatar
Guolin Ke committed
406
      }
Guolin Ke's avatar
Guolin Ke committed
407
408
409
    }

    if (larger_is_feature_aggregated_[feature_index]) {
Guolin Ke's avatar
Guolin Ke committed
410
      SplitInfo larger_split;
Guolin Ke's avatar
Guolin Ke committed
411
412
      // restore from buffer
      larger_leaf_histogram_array_global_[feature_index].FromMemory(output_buffer_.data() + larger_buffer_read_start_pos_[feature_index]);
Guolin Ke's avatar
Guolin Ke committed
413

414
      this->train_data_->FixHistogram(feature_index,
Guolin Ke's avatar
Guolin Ke committed
415
416
417
                                      larger_leaf_splits_global_->sum_gradients(), larger_leaf_splits_global_->sum_hessians(),
                                      GetGlobalDataCountInLeaf(larger_leaf_splits_global_->LeafIndex()),
                                      larger_leaf_histogram_array_global_[feature_index].RawData());
Guolin Ke's avatar
Guolin Ke committed
418

Guolin Ke's avatar
Guolin Ke committed
419
      // find best threshold
Guolin Ke's avatar
Guolin Ke committed
420
421
422
423
      larger_leaf_histogram_array_global_[feature_index].FindBestThreshold(
        larger_leaf_splits_global_->sum_gradients(),
        larger_leaf_splits_global_->sum_hessians(),
        GetGlobalDataCountInLeaf(larger_leaf_splits_global_->LeafIndex()),
Guolin Ke's avatar
Guolin Ke committed
424
425
        larger_leaf_splits_global_->min_constraint(),
        larger_leaf_splits_global_->max_constraint(),
Guolin Ke's avatar
Guolin Ke committed
426
        &larger_split);
Guolin Ke's avatar
Guolin Ke committed
427
428
429
      larger_split.feature = real_feature_index;
      if (larger_split > larger_best_per_thread[tid]) {
        larger_best_per_thread[tid] = larger_split;
Guolin Ke's avatar
Guolin Ke committed
430
      }
Guolin Ke's avatar
Guolin Ke committed
431
    }
432
    OMP_LOOP_EX_END();
Guolin Ke's avatar
Guolin Ke committed
433
  }
434
  OMP_THROW_EX();
Guolin Ke's avatar
Guolin Ke committed
435
436

  auto smaller_best_idx = ArrayArgs<SplitInfo>::ArgMax(smaller_bests_per_thread);
437
  int leaf = this->smaller_leaf_splits_->LeafIndex();
Guolin Ke's avatar
Guolin Ke committed
438
  this->best_split_per_leaf_[leaf] = smaller_bests_per_thread[smaller_best_idx];
Guolin Ke's avatar
Guolin Ke committed
439

440
441
  if (this->larger_leaf_splits_ != nullptr && this->larger_leaf_splits_->LeafIndex() >= 0) {
    leaf = this->larger_leaf_splits_->LeafIndex();
Guolin Ke's avatar
Guolin Ke committed
442
443
    auto larger_best_idx = ArrayArgs<SplitInfo>::ArgMax(larger_best_per_thread);
    this->best_split_per_leaf_[leaf] = larger_best_per_thread[larger_best_idx];
Guolin Ke's avatar
Guolin Ke committed
444
  }
Guolin Ke's avatar
Guolin Ke committed
445
446

  // find local best
Guolin Ke's avatar
Guolin Ke committed
447
448
  SplitInfo smaller_best_split, larger_best_split;
  smaller_best_split = this->best_split_per_leaf_[this->smaller_leaf_splits_->LeafIndex()];
Guolin Ke's avatar
Guolin Ke committed
449
  // find local best split for larger leaf
450
  if (this->larger_leaf_splits_->LeafIndex() >= 0) {
Guolin Ke's avatar
Guolin Ke committed
451
    larger_best_split = this->best_split_per_leaf_[this->larger_leaf_splits_->LeafIndex()];
Guolin Ke's avatar
Guolin Ke committed
452
453
  }
  // sync global best info
Guolin Ke's avatar
Guolin Ke committed
454
  SyncUpGlobalBestSplit(input_buffer_.data(), input_buffer_.data(), &smaller_best_split, &larger_best_split, this->config_->max_cat_threshold);
Guolin Ke's avatar
Guolin Ke committed
455
456

  // copy back
Guolin Ke's avatar
Guolin Ke committed
457
458
459
  this->best_split_per_leaf_[smaller_leaf_splits_global_->LeafIndex()] = smaller_best_split;
  if (larger_best_split.feature >= 0 && larger_leaf_splits_global_->LeafIndex() >= 0) {
    this->best_split_per_leaf_[larger_leaf_splits_global_->LeafIndex()] = larger_best_split;
Guolin Ke's avatar
Guolin Ke committed
460
461
462
  }
}

463
464
465
466
template <typename TREELEARNER_T>
void VotingParallelTreeLearner<TREELEARNER_T>::Split(Tree* tree, int best_Leaf, int* left_leaf, int* right_leaf) {
  TREELEARNER_T::Split(tree, best_Leaf, left_leaf, right_leaf);
  const SplitInfo& best_split_info = this->best_split_per_leaf_[best_Leaf];
Guolin Ke's avatar
Guolin Ke committed
467
468
469
  // set the global number of data for leaves
  global_data_count_in_leaf_[*left_leaf] = best_split_info.left_count;
  global_data_count_in_leaf_[*right_leaf] = best_split_info.right_count;
Guolin Ke's avatar
Guolin Ke committed
470
471
  auto p_left = smaller_leaf_splits_global_.get();
  auto p_right = larger_leaf_splits_global_.get();
Guolin Ke's avatar
Guolin Ke committed
472
473
  // init the global sumup info
  if (best_split_info.left_count < best_split_info.right_count) {
474
    smaller_leaf_splits_global_->Init(*left_leaf, this->data_partition_.get(),
Guolin Ke's avatar
Guolin Ke committed
475
476
      best_split_info.left_sum_gradient,
      best_split_info.left_sum_hessian);
477
    larger_leaf_splits_global_->Init(*right_leaf, this->data_partition_.get(),
Guolin Ke's avatar
Guolin Ke committed
478
479
      best_split_info.right_sum_gradient,
      best_split_info.right_sum_hessian);
Guolin Ke's avatar
Guolin Ke committed
480
  } else {
481
    smaller_leaf_splits_global_->Init(*right_leaf, this->data_partition_.get(),
Guolin Ke's avatar
Guolin Ke committed
482
483
      best_split_info.right_sum_gradient,
      best_split_info.right_sum_hessian);
484
    larger_leaf_splits_global_->Init(*left_leaf, this->data_partition_.get(),
Guolin Ke's avatar
Guolin Ke committed
485
486
      best_split_info.left_sum_gradient,
      best_split_info.left_sum_hessian);
Guolin Ke's avatar
Guolin Ke committed
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
    p_left = larger_leaf_splits_global_.get();
    p_right = smaller_leaf_splits_global_.get();
  }
  const int inner_feature_index = this->train_data_->InnerFeatureIndex(best_split_info.feature);
  bool is_numerical_split = this->train_data_->FeatureBinMapper(inner_feature_index)->bin_type() == BinType::NumericalBin;
  p_left->SetValueConstraint(best_split_info.min_constraint, best_split_info.max_constraint);
  p_right->SetValueConstraint(best_split_info.min_constraint, best_split_info.max_constraint);
  if (is_numerical_split) {
    double mid = (best_split_info.left_output + best_split_info.right_output) / 2.0f;
    if (best_split_info.monotone_type < 0) {
      p_left->SetValueConstraint(mid, best_split_info.max_constraint);
      p_right->SetValueConstraint(best_split_info.min_constraint, mid);
    } else if (best_split_info.monotone_type > 0) {
      p_left->SetValueConstraint(best_split_info.min_constraint, mid);
      p_right->SetValueConstraint(mid, best_split_info.max_constraint);
    }
Guolin Ke's avatar
Guolin Ke committed
503
504
505
  }
}

506
507
508
// instantiate template classes, otherwise linker cannot find the code
template class VotingParallelTreeLearner<GPUTreeLearner>;
template class VotingParallelTreeLearner<SerialTreeLearner>;
Guolin Ke's avatar
Guolin Ke committed
509
}  // namespace FTLBoost