voting_parallel_tree_learner.cpp 23.3 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
10
11
#include "parallel_tree_learner.h"

#include <LightGBM/utils/common.h>

#include <cstring>

#include <tuple>
#include <vector>

namespace LightGBM {

12
13
14
15
template <typename TREELEARNER_T>
VotingParallelTreeLearner<TREELEARNER_T>::VotingParallelTreeLearner(const TreeConfig* tree_config)
  :TREELEARNER_T(tree_config) {
  top_k_ = this->tree_config_->top_k;
Guolin Ke's avatar
Guolin Ke committed
16
17
}

18
19
20
template <typename TREELEARNER_T>
void VotingParallelTreeLearner<TREELEARNER_T>::Init(const Dataset* train_data, bool is_constant_hessian) {
  TREELEARNER_T::Init(train_data, is_constant_hessian);
Guolin Ke's avatar
Guolin Ke committed
21
22
23
24
  rank_ = Network::rank();
  num_machines_ = Network::num_machines();

  // limit top k
25
26
  if (top_k_ > this->num_features_) {
    top_k_ = this->num_features_;
Guolin Ke's avatar
Guolin Ke committed
27
28
29
  }
  // get max bin
  int max_bin = 0;
30
31
32
  for (int i = 0; i < this->num_features_; ++i) {
    if (max_bin < this->train_data_->FeatureNumBin(i)) {
      max_bin = this->train_data_->FeatureNumBin(i);
Guolin Ke's avatar
Guolin Ke committed
33
34
35
    }
  }
  // calculate buffer size
36
  size_t buffer_size = 2 * top_k_ * std::max(max_bin * sizeof(HistogramBinEntry), sizeof(LightSplitInfo) * num_machines_);
Guolin Ke's avatar
Guolin Ke committed
37
38
39
40
  // left and right on same time, so need double size
  input_buffer_.resize(buffer_size);
  output_buffer_.resize(buffer_size);

41
42
  smaller_is_feature_aggregated_.resize(this->num_features_);
  larger_is_feature_aggregated_.resize(this->num_features_);
Guolin Ke's avatar
Guolin Ke committed
43
44
45
46

  block_start_.resize(num_machines_);
  block_len_.resize(num_machines_);

47
48
49
  smaller_buffer_read_start_pos_.resize(this->num_features_);
  larger_buffer_read_start_pos_.resize(this->num_features_);
  global_data_count_in_leaf_.resize(this->tree_config_->num_leaves);
Guolin Ke's avatar
Guolin Ke committed
50

51
52
  smaller_leaf_splits_global_.reset(new LeafSplits(this->train_data_->num_data()));
  larger_leaf_splits_global_.reset(new LeafSplits(this->train_data_->num_data()));
Guolin Ke's avatar
Guolin Ke committed
53

54
  local_tree_config_ = *this->tree_config_;
Guolin Ke's avatar
Guolin Ke committed
55
56
57
  local_tree_config_.min_data_in_leaf /= num_machines_;
  local_tree_config_.min_sum_hessian_in_leaf /= num_machines_;

58
  this->histogram_pool_.ResetConfig(&local_tree_config_);
Guolin Ke's avatar
Guolin Ke committed
59
60

  // initialize histograms for global
61
62
63
  smaller_leaf_histogram_array_global_.reset(new FeatureHistogram[this->num_features_]);
  larger_leaf_histogram_array_global_.reset(new FeatureHistogram[this->num_features_]);
  auto num_total_bin = this->train_data_->NumTotalBin();
Guolin Ke's avatar
Guolin Ke committed
64
65
66
67
68
69
  smaller_leaf_histogram_data_.resize(num_total_bin);
  larger_leaf_histogram_data_.resize(num_total_bin);
  feature_metas_.resize(train_data->num_features());
#pragma omp parallel for schedule(static)
  for (int i = 0; i < train_data->num_features(); ++i) {
    feature_metas_[i].num_bin = train_data->FeatureNumBin(i);
Guolin Ke's avatar
Guolin Ke committed
70
    feature_metas_[i].default_bin = train_data->FeatureBinMapper(i)->GetDefaultBin();
Guolin Ke's avatar
Guolin Ke committed
71
    feature_metas_[i].missing_type = train_data->FeatureBinMapper(i)->missing_type();
Guolin Ke's avatar
Guolin Ke committed
72
    feature_metas_[i].monotone_type = train_data->FeatureMonotone(i);
Guolin Ke's avatar
Guolin Ke committed
73
74
75
76
77
    if (train_data->FeatureBinMapper(i)->GetDefaultBin() == 0) {
      feature_metas_[i].bias = 1;
    } else {
      feature_metas_[i].bias = 0;
    }
78
    feature_metas_[i].tree_config = this->tree_config_;
79
    feature_metas_[i].bin_type = train_data->FeatureBinMapper(i)->bin_type();
Guolin Ke's avatar
Guolin Ke committed
80
81
82
83
  }
  uint64_t offset = 0;
  for (int j = 0; j < train_data->num_features(); ++j) {
    offset += static_cast<uint64_t>(train_data->SubFeatureBinOffset(j));
84
85
    smaller_leaf_histogram_array_global_[j].Init(smaller_leaf_histogram_data_.data() + offset, &feature_metas_[j]);
    larger_leaf_histogram_array_global_[j].Init(larger_leaf_histogram_data_.data() + offset, &feature_metas_[j]);
Guolin Ke's avatar
Guolin Ke committed
86
87
88
89
90
    auto num_bin = train_data->FeatureNumBin(j);
    if (train_data->FeatureBinMapper(j)->GetDefaultBin() == 0) {
      num_bin -= 1;
    }
    offset += static_cast<uint64_t>(num_bin);
Guolin Ke's avatar
Guolin Ke committed
91
92
93
  }
}

94
95
96
template <typename TREELEARNER_T>
void VotingParallelTreeLearner<TREELEARNER_T>::ResetConfig(const TreeConfig* tree_config) {
  TREELEARNER_T::ResetConfig(tree_config);
Guolin Ke's avatar
Guolin Ke committed
97

98
  local_tree_config_ = *this->tree_config_;
Guolin Ke's avatar
Guolin Ke committed
99
100
101
  local_tree_config_.min_data_in_leaf /= num_machines_;
  local_tree_config_.min_sum_hessian_in_leaf /= num_machines_;

102
103
  this->histogram_pool_.ResetConfig(&local_tree_config_);
  global_data_count_in_leaf_.resize(this->tree_config_->num_leaves);
Guolin Ke's avatar
Guolin Ke committed
104

Guolin Ke's avatar
Guolin Ke committed
105
  for (size_t i = 0; i < feature_metas_.size(); ++i) {
106
    feature_metas_[i].tree_config = this->tree_config_;
Guolin Ke's avatar
Guolin Ke committed
107
108
  }
}
Guolin Ke's avatar
Guolin Ke committed
109

110
111
112
template <typename TREELEARNER_T>
void VotingParallelTreeLearner<TREELEARNER_T>::BeforeTrain() {
  TREELEARNER_T::BeforeTrain();
Guolin Ke's avatar
Guolin Ke committed
113
  // sync global data sumup info
114
  std::tuple<data_size_t, double, double> data(this->smaller_leaf_splits_->num_data_in_leaf(), this->smaller_leaf_splits_->sum_gradients(), this->smaller_leaf_splits_->sum_hessians());
Guolin Ke's avatar
Guolin Ke committed
115
116
117
  int size = sizeof(std::tuple<data_size_t, double, double>);
  std::memcpy(input_buffer_.data(), &data, size);

Guolin Ke's avatar
Guolin Ke committed
118
119
  Network::Allreduce(input_buffer_.data(), size, sizeof(std::tuple<data_size_t, double, double>), output_buffer_.data(), [](const char *src, char *dst, int type_size, comm_size_t len) {
    comm_size_t used_size = 0;
Guolin Ke's avatar
Guolin Ke committed
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
    const std::tuple<data_size_t, double, double> *p1;
    std::tuple<data_size_t, double, double> *p2;
    while (used_size < len) {
      p1 = reinterpret_cast<const std::tuple<data_size_t, double, double> *>(src);
      p2 = reinterpret_cast<std::tuple<data_size_t, double, double> *>(dst);
      std::get<0>(*p2) = std::get<0>(*p2) + std::get<0>(*p1);
      std::get<1>(*p2) = std::get<1>(*p2) + std::get<1>(*p1);
      std::get<2>(*p2) = std::get<2>(*p2) + std::get<2>(*p1);
      src += type_size;
      dst += type_size;
      used_size += type_size;
    }
  });

  std::memcpy(&data, output_buffer_.data(), size);

  // set global sumup info
  smaller_leaf_splits_global_->Init(std::get<1>(data), std::get<2>(data));
  larger_leaf_splits_global_->Init();
  // init global data count in leaf
  global_data_count_in_leaf_[0] = std::get<0>(data);
}

143
144
145
template <typename TREELEARNER_T>
bool VotingParallelTreeLearner<TREELEARNER_T>::BeforeFindBestSplit(const Tree* tree, int left_leaf, int right_leaf) {
  if (TREELEARNER_T::BeforeFindBestSplit(tree, left_leaf, right_leaf)) {
Guolin Ke's avatar
Guolin Ke committed
146
147
148
149
150
151
    data_size_t num_data_in_left_child = GetGlobalDataCountInLeaf(left_leaf);
    data_size_t num_data_in_right_child = GetGlobalDataCountInLeaf(right_leaf);
    if (right_leaf < 0) {
      return true;
    } else if (num_data_in_left_child < num_data_in_right_child) {
      // get local sumup
152
153
      this->smaller_leaf_splits_->Init(left_leaf, this->data_partition_.get(), this->gradients_, this->hessians_);
      this->larger_leaf_splits_->Init(right_leaf, this->data_partition_.get(), this->gradients_, this->hessians_);
Guolin Ke's avatar
Guolin Ke committed
154
155
    } else {
      // get local sumup
156
157
      this->smaller_leaf_splits_->Init(right_leaf, this->data_partition_.get(), this->gradients_, this->hessians_);
      this->larger_leaf_splits_->Init(left_leaf, this->data_partition_.get(), this->gradients_, this->hessians_);
Guolin Ke's avatar
Guolin Ke committed
158
159
160
161
162
163
164
    }
    return true;
  } else {
    return false;
  }
}

165
template <typename TREELEARNER_T>
166
void VotingParallelTreeLearner<TREELEARNER_T>::GlobalVoting(int leaf_idx, const std::vector<LightSplitInfo>& splits, std::vector<int>* out) {
Guolin Ke's avatar
Guolin Ke committed
167
168
169
170
171
  out->clear();
  if (leaf_idx < 0) {
    return;
  }
  // get mean number on machines
172
  score_t mean_num_data = GetGlobalDataCountInLeaf(leaf_idx) / static_cast<score_t>(num_machines_);
173
  std::vector<LightSplitInfo> feature_best_split(this->train_data_->num_total_features() , LightSplitInfo());
Guolin Ke's avatar
Guolin Ke committed
174
175
176
177
178
179
180
181
182
183
184
185
186
  for (auto & split : splits) {
    int fid = split.feature;
    if (fid < 0) {
      continue;
    }
    // weighted gain
    double gain = split.gain * (split.left_count + split.right_count) / mean_num_data;
    if (gain > feature_best_split[fid].gain) {
      feature_best_split[fid] = split;
      feature_best_split[fid].gain = gain;
    }
  }
  // get top k
187
188
  std::vector<LightSplitInfo> top_k_splits;
  ArrayArgs<LightSplitInfo>::MaxK(feature_best_split, top_k_, &top_k_splits);
Guolin Ke's avatar
Guolin Ke committed
189
190
191
192
193
194
195
196
  for (auto& split : top_k_splits) {
    if (split.gain == kMinScore || split.feature == -1) {
      continue;
    }
    out->push_back(split.feature);
  }
}

197
198
199
template <typename TREELEARNER_T>
void VotingParallelTreeLearner<TREELEARNER_T>::CopyLocalHistogram(const std::vector<int>& smaller_top_features, const std::vector<int>& larger_top_features) {
  for (int i = 0; i < this->num_features_; ++i) {
Guolin Ke's avatar
Guolin Ke committed
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
    smaller_is_feature_aggregated_[i] = false;
    larger_is_feature_aggregated_[i] = false;
  }
  size_t total_num_features = smaller_top_features.size() + larger_top_features.size();
  size_t average_feature = (total_num_features + num_machines_ - 1) / num_machines_;
  size_t used_num_features = 0, smaller_idx = 0, larger_idx = 0;
  block_start_[0] = 0;
  reduce_scatter_size_ = 0;
  // Copy histogram to buffer, and Get local aggregate features
  for (int i = 0; i < num_machines_; ++i) {
    size_t cur_size = 0, cur_used_features = 0;
    size_t cur_total_feature = std::min(average_feature, total_num_features - used_num_features);
    // copy histograms.
    while (cur_used_features < cur_total_feature) {
      // copy smaller leaf histograms first
      if (smaller_idx < smaller_top_features.size()) {
216
        int inner_feature_index = this->train_data_->InnerFeatureIndex(smaller_top_features[smaller_idx]);
Guolin Ke's avatar
Guolin Ke committed
217
218
219
        ++cur_used_features;
        // mark local aggregated feature
        if (i == rank_) {
Guolin Ke's avatar
Guolin Ke committed
220
221
          smaller_is_feature_aggregated_[inner_feature_index] = true;
          smaller_buffer_read_start_pos_[inner_feature_index] = static_cast<int>(cur_size);
Guolin Ke's avatar
Guolin Ke committed
222
223
        }
        // copy
224
225
226
        std::memcpy(input_buffer_.data() + reduce_scatter_size_, this->smaller_leaf_histogram_array_[inner_feature_index].RawData(), this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram());
        cur_size += this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
        reduce_scatter_size_ += this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
Guolin Ke's avatar
Guolin Ke committed
227
228
229
230
231
232
233
        ++smaller_idx;
      }
      if (cur_used_features >= cur_total_feature) {
        break;
      }
      // then copy larger leaf histograms
      if (larger_idx < larger_top_features.size()) {
234
        int inner_feature_index = this->train_data_->InnerFeatureIndex(larger_top_features[larger_idx]);
Guolin Ke's avatar
Guolin Ke committed
235
236
237
        ++cur_used_features;
        // mark local aggregated feature
        if (i == rank_) {
Guolin Ke's avatar
Guolin Ke committed
238
239
          larger_is_feature_aggregated_[inner_feature_index] = true;
          larger_buffer_read_start_pos_[inner_feature_index] = static_cast<int>(cur_size);
Guolin Ke's avatar
Guolin Ke committed
240
241
        }
        // copy
242
243
244
        std::memcpy(input_buffer_.data() + reduce_scatter_size_, this->larger_leaf_histogram_array_[inner_feature_index].RawData(), this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram());
        cur_size += this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
        reduce_scatter_size_ += this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
Guolin Ke's avatar
Guolin Ke committed
245
246
247
248
249
250
251
252
253
254
255
        ++larger_idx;
      }
    }
    used_num_features += cur_used_features;
    block_len_[i] = static_cast<int>(cur_size);
    if (i < num_machines_ - 1) {
      block_start_[i + 1] = block_start_[i] + block_len_[i];
    }
  }
}

256
template <typename TREELEARNER_T>
Guolin Ke's avatar
Guolin Ke committed
257
void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplits() {
Guolin Ke's avatar
Guolin Ke committed
258
  // use local data to find local best splits
259
  std::vector<int8_t> is_feature_used(this->num_features_, 0);
Guolin Ke's avatar
Guolin Ke committed
260
#pragma omp parallel for schedule(static)
261
262
263
264
265
  for (int feature_index = 0; feature_index < this->num_features_; ++feature_index) {
    if (!this->is_feature_used_[feature_index]) continue;
    if (this->parent_leaf_histogram_array_ != nullptr
      && !this->parent_leaf_histogram_array_[feature_index].is_splittable()) {
      this->smaller_leaf_histogram_array_[feature_index].set_is_splittable(false);
Guolin Ke's avatar
Guolin Ke committed
266
267
268
269
270
      continue;
    }
    is_feature_used[feature_index] = 1;
  }
  bool use_subtract = true;
271
  if (this->parent_leaf_histogram_array_ == nullptr) {
Guolin Ke's avatar
Guolin Ke committed
272
273
    use_subtract = false;
  }
Guolin Ke's avatar
Guolin Ke committed
274
  TREELEARNER_T::ConstructHistograms(is_feature_used, use_subtract);
Guolin Ke's avatar
Guolin Ke committed
275

276
277
  std::vector<SplitInfo> smaller_bestsplit_per_features(this->num_features_);
  std::vector<SplitInfo> larger_bestsplit_per_features(this->num_features_);
Guolin Ke's avatar
Guolin Ke committed
278

279
  OMP_INIT_EX();
Guolin Ke's avatar
Guolin Ke committed
280
  // find splits
Guolin Ke's avatar
Guolin Ke committed
281
#pragma omp parallel for schedule(static)
282
  for (int feature_index = 0; feature_index < this->num_features_; ++feature_index) {
283
    OMP_LOOP_EX_BEGIN();
Guolin Ke's avatar
Guolin Ke committed
284
    if (!is_feature_used[feature_index]) { continue; }
285
286
287
288
289
290
291
292
293
294
    const int real_feature_index = this->train_data_->RealFeatureIndex(feature_index);
    this->train_data_->FixHistogram(feature_index,
      this->smaller_leaf_splits_->sum_gradients(), this->smaller_leaf_splits_->sum_hessians(),
      this->smaller_leaf_splits_->num_data_in_leaf(),
      this->smaller_leaf_histogram_array_[feature_index].RawData());

    this->smaller_leaf_histogram_array_[feature_index].FindBestThreshold(
      this->smaller_leaf_splits_->sum_gradients(),
      this->smaller_leaf_splits_->sum_hessians(),
      this->smaller_leaf_splits_->num_data_in_leaf(),
Guolin Ke's avatar
Guolin Ke committed
295
296
      this->smaller_leaf_splits_->min_constraint(),
      this->smaller_leaf_splits_->max_constraint(),
Guolin Ke's avatar
Guolin Ke committed
297
      &smaller_bestsplit_per_features[feature_index]);
Guolin Ke's avatar
Guolin Ke committed
298
    smaller_bestsplit_per_features[feature_index].feature = real_feature_index;
Guolin Ke's avatar
Guolin Ke committed
299
    // only has root leaf
300
    if (this->larger_leaf_splits_ == nullptr || this->larger_leaf_splits_->LeafIndex() < 0) { continue; }
Guolin Ke's avatar
Guolin Ke committed
301
302

    if (use_subtract) {
303
      this->larger_leaf_histogram_array_[feature_index].Subtract(this->smaller_leaf_histogram_array_[feature_index]);
Guolin Ke's avatar
Guolin Ke committed
304
    } else {
305
306
307
      this->train_data_->FixHistogram(feature_index, this->larger_leaf_splits_->sum_gradients(), this->larger_leaf_splits_->sum_hessians(),
        this->larger_leaf_splits_->num_data_in_leaf(),
        this->larger_leaf_histogram_array_[feature_index].RawData());
Guolin Ke's avatar
Guolin Ke committed
308
309
    }
    // find best threshold for larger child
310
311
312
313
    this->larger_leaf_histogram_array_[feature_index].FindBestThreshold(
      this->larger_leaf_splits_->sum_gradients(),
      this->larger_leaf_splits_->sum_hessians(),
      this->larger_leaf_splits_->num_data_in_leaf(),
Guolin Ke's avatar
Guolin Ke committed
314
315
      this->larger_leaf_splits_->min_constraint(),
      this->larger_leaf_splits_->max_constraint(),
Guolin Ke's avatar
Guolin Ke committed
316
      &larger_bestsplit_per_features[feature_index]);
Guolin Ke's avatar
Guolin Ke committed
317
    larger_bestsplit_per_features[feature_index].feature = real_feature_index;
318
    OMP_LOOP_EX_END();
Guolin Ke's avatar
Guolin Ke committed
319
  }
320
  OMP_THROW_EX();
Guolin Ke's avatar
Guolin Ke committed
321

Guolin Ke's avatar
Guolin Ke committed
322
323
  std::vector<SplitInfo> smaller_top_k_splits, larger_top_k_splits;
  // local voting
Guolin Ke's avatar
Guolin Ke committed
324
325
  ArrayArgs<SplitInfo>::MaxK(smaller_bestsplit_per_features, top_k_, &smaller_top_k_splits);
  ArrayArgs<SplitInfo>::MaxK(larger_bestsplit_per_features, top_k_, &larger_top_k_splits);
326
327
328
329
330
331
332
333

  std::vector<LightSplitInfo> smaller_top_k_light_splits(top_k_);
  std::vector<LightSplitInfo> larger_top_k_light_splits(top_k_);
  for (int i = 0; i < top_k_; ++i) {
    smaller_top_k_light_splits[i].CopyFrom(smaller_top_k_splits[i]);
    larger_top_k_light_splits[i].CopyFrom(larger_top_k_splits[i]);
  }

Guolin Ke's avatar
Guolin Ke committed
334
335
336
  // gather
  int offset = 0;
  for (int i = 0; i < top_k_; ++i) {
337
338
339
340
    std::memcpy(input_buffer_.data() + offset, &smaller_top_k_light_splits[i], sizeof(LightSplitInfo));
    offset += sizeof(LightSplitInfo);
    std::memcpy(input_buffer_.data() + offset, &larger_top_k_light_splits[i], sizeof(LightSplitInfo));
    offset += sizeof(LightSplitInfo);
Guolin Ke's avatar
Guolin Ke committed
341
342
343
  }
  Network::Allgather(input_buffer_.data(), offset, output_buffer_.data());
  // get all top-k from all machines
344
345
  std::vector<LightSplitInfo> smaller_top_k_splits_global;
  std::vector<LightSplitInfo> larger_top_k_splits_global;
Guolin Ke's avatar
Guolin Ke committed
346
347
348
  offset = 0;
  for (int i = 0; i < num_machines_; ++i) {
    for (int j = 0; j < top_k_; ++j) {
349
350
351
352
353
354
      smaller_top_k_splits_global.push_back(LightSplitInfo());
      std::memcpy(&smaller_top_k_splits_global.back(), output_buffer_.data() + offset, sizeof(LightSplitInfo));
      offset += sizeof(LightSplitInfo);
      larger_top_k_splits_global.push_back(LightSplitInfo());
      std::memcpy(&larger_top_k_splits_global.back(), output_buffer_.data() + offset, sizeof(LightSplitInfo));
      offset += sizeof(LightSplitInfo);
Guolin Ke's avatar
Guolin Ke committed
355
356
357
358
    }
  }
  // global voting
  std::vector<int> smaller_top_features, larger_top_features;
359
360
  GlobalVoting(this->smaller_leaf_splits_->LeafIndex(), smaller_top_k_splits_global, &smaller_top_features);
  GlobalVoting(this->larger_leaf_splits_->LeafIndex(), larger_top_k_splits_global, &larger_top_features);
Guolin Ke's avatar
Guolin Ke committed
361
362
363
364
  // copy local histgrams to buffer
  CopyLocalHistogram(smaller_top_features, larger_top_features);

  // Reduce scatter for histogram
Guolin Ke's avatar
Guolin Ke committed
365
366
  Network::ReduceScatter(input_buffer_.data(), reduce_scatter_size_, sizeof(HistogramBinEntry), block_start_.data(), block_len_.data(),
                         output_buffer_.data(), static_cast<comm_size_t>(output_buffer_.size()), &HistogramBinEntry::SumReducer);
Guolin Ke's avatar
Guolin Ke committed
367

Guolin Ke's avatar
Guolin Ke committed
368
369
370
371
372
373
374
375
  this->FindBestSplitsFromHistograms(is_feature_used, false);
}

template <typename TREELEARNER_T>
void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplitsFromHistograms(const std::vector<int8_t>&, bool) {

  std::vector<SplitInfo> smaller_bests_per_thread(this->num_threads_);
  std::vector<SplitInfo> larger_best_per_thread(this->num_threads_);
Guolin Ke's avatar
Guolin Ke committed
376
  // find best split from local aggregated histograms
Guolin Ke's avatar
Guolin Ke committed
377
378
379

  OMP_INIT_EX();
  #pragma omp parallel for schedule(static)
380
  for (int feature_index = 0; feature_index < this->num_features_; ++feature_index) {
381
    OMP_LOOP_EX_BEGIN();
Guolin Ke's avatar
Guolin Ke committed
382
    const int tid = omp_get_thread_num();
Guolin Ke's avatar
Guolin Ke committed
383
    const int real_feature_index = this->train_data_->RealFeatureIndex(feature_index);
Guolin Ke's avatar
Guolin Ke committed
384
    if (smaller_is_feature_aggregated_[feature_index]) {
Guolin Ke's avatar
Guolin Ke committed
385
      SplitInfo smaller_split;
Guolin Ke's avatar
Guolin Ke committed
386
387
      // restore from buffer
      smaller_leaf_histogram_array_global_[feature_index].FromMemory(
Guolin Ke's avatar
Guolin Ke committed
388
        output_buffer_.data() + smaller_buffer_read_start_pos_[feature_index]);
Guolin Ke's avatar
Guolin Ke committed
389

390
      this->train_data_->FixHistogram(feature_index,
Guolin Ke's avatar
Guolin Ke committed
391
392
393
                                      smaller_leaf_splits_global_->sum_gradients(), smaller_leaf_splits_global_->sum_hessians(),
                                      GetGlobalDataCountInLeaf(smaller_leaf_splits_global_->LeafIndex()),
                                      smaller_leaf_histogram_array_global_[feature_index].RawData());
Guolin Ke's avatar
Guolin Ke committed
394

Guolin Ke's avatar
Guolin Ke committed
395
396
      // find best threshold
      smaller_leaf_histogram_array_global_[feature_index].FindBestThreshold(
Guolin Ke's avatar
Guolin Ke committed
397
398
399
        smaller_leaf_splits_global_->sum_gradients(),
        smaller_leaf_splits_global_->sum_hessians(),
        GetGlobalDataCountInLeaf(smaller_leaf_splits_global_->LeafIndex()),
Guolin Ke's avatar
Guolin Ke committed
400
401
        smaller_leaf_splits_global_->min_constraint(),
        smaller_leaf_splits_global_->max_constraint(),
Guolin Ke's avatar
Guolin Ke committed
402
        &smaller_split);
Guolin Ke's avatar
Guolin Ke committed
403
404
405
      smaller_split.feature = real_feature_index;
      if (smaller_split > smaller_bests_per_thread[tid]) {
        smaller_bests_per_thread[tid] = smaller_split;
Guolin Ke's avatar
Guolin Ke committed
406
      }
Guolin Ke's avatar
Guolin Ke committed
407
408
409
    }

    if (larger_is_feature_aggregated_[feature_index]) {
Guolin Ke's avatar
Guolin Ke committed
410
      SplitInfo larger_split;
Guolin Ke's avatar
Guolin Ke committed
411
412
      // restore from buffer
      larger_leaf_histogram_array_global_[feature_index].FromMemory(output_buffer_.data() + larger_buffer_read_start_pos_[feature_index]);
Guolin Ke's avatar
Guolin Ke committed
413

414
      this->train_data_->FixHistogram(feature_index,
Guolin Ke's avatar
Guolin Ke committed
415
416
417
                                      larger_leaf_splits_global_->sum_gradients(), larger_leaf_splits_global_->sum_hessians(),
                                      GetGlobalDataCountInLeaf(larger_leaf_splits_global_->LeafIndex()),
                                      larger_leaf_histogram_array_global_[feature_index].RawData());
Guolin Ke's avatar
Guolin Ke committed
418

Guolin Ke's avatar
Guolin Ke committed
419
      // find best threshold
Guolin Ke's avatar
Guolin Ke committed
420
421
422
423
      larger_leaf_histogram_array_global_[feature_index].FindBestThreshold(
        larger_leaf_splits_global_->sum_gradients(),
        larger_leaf_splits_global_->sum_hessians(),
        GetGlobalDataCountInLeaf(larger_leaf_splits_global_->LeafIndex()),
Guolin Ke's avatar
Guolin Ke committed
424
425
        larger_leaf_splits_global_->min_constraint(),
        larger_leaf_splits_global_->max_constraint(),
Guolin Ke's avatar
Guolin Ke committed
426
        &larger_split);
Guolin Ke's avatar
Guolin Ke committed
427
428
429
      larger_split.feature = real_feature_index;
      if (larger_split > larger_best_per_thread[tid]) {
        larger_best_per_thread[tid] = larger_split;
Guolin Ke's avatar
Guolin Ke committed
430
      }
Guolin Ke's avatar
Guolin Ke committed
431
    }
432
    OMP_LOOP_EX_END();
Guolin Ke's avatar
Guolin Ke committed
433
  }
434
  OMP_THROW_EX();
Guolin Ke's avatar
Guolin Ke committed
435
436

  auto smaller_best_idx = ArrayArgs<SplitInfo>::ArgMax(smaller_bests_per_thread);
437
  int leaf = this->smaller_leaf_splits_->LeafIndex();
Guolin Ke's avatar
Guolin Ke committed
438
  this->best_split_per_leaf_[leaf] = smaller_bests_per_thread[smaller_best_idx];
Guolin Ke's avatar
Guolin Ke committed
439

440
441
  if (this->larger_leaf_splits_ != nullptr && this->larger_leaf_splits_->LeafIndex() >= 0) {
    leaf = this->larger_leaf_splits_->LeafIndex();
Guolin Ke's avatar
Guolin Ke committed
442
443
    auto larger_best_idx = ArrayArgs<SplitInfo>::ArgMax(larger_best_per_thread);
    this->best_split_per_leaf_[leaf] = larger_best_per_thread[larger_best_idx];
Guolin Ke's avatar
Guolin Ke committed
444
  }
Guolin Ke's avatar
Guolin Ke committed
445
446

  // find local best
Guolin Ke's avatar
Guolin Ke committed
447
448
  SplitInfo smaller_best_split, larger_best_split;
  smaller_best_split = this->best_split_per_leaf_[this->smaller_leaf_splits_->LeafIndex()];
Guolin Ke's avatar
Guolin Ke committed
449
  // find local best split for larger leaf
450
  if (this->larger_leaf_splits_->LeafIndex() >= 0) {
Guolin Ke's avatar
Guolin Ke committed
451
    larger_best_split = this->best_split_per_leaf_[this->larger_leaf_splits_->LeafIndex()];
Guolin Ke's avatar
Guolin Ke committed
452
453
  }
  // sync global best info
454
  SyncUpGlobalBestSplit(input_buffer_.data(), input_buffer_.data(), &smaller_best_split, &larger_best_split, this->tree_config_->max_cat_threshold);
Guolin Ke's avatar
Guolin Ke committed
455
456

  // copy back
Guolin Ke's avatar
Guolin Ke committed
457
458
459
  this->best_split_per_leaf_[smaller_leaf_splits_global_->LeafIndex()] = smaller_best_split;
  if (larger_best_split.feature >= 0 && larger_leaf_splits_global_->LeafIndex() >= 0) {
    this->best_split_per_leaf_[larger_leaf_splits_global_->LeafIndex()] = larger_best_split;
Guolin Ke's avatar
Guolin Ke committed
460
461
462
  }
}

463
464
465
466
template <typename TREELEARNER_T>
void VotingParallelTreeLearner<TREELEARNER_T>::Split(Tree* tree, int best_Leaf, int* left_leaf, int* right_leaf) {
  TREELEARNER_T::Split(tree, best_Leaf, left_leaf, right_leaf);
  const SplitInfo& best_split_info = this->best_split_per_leaf_[best_Leaf];
Guolin Ke's avatar
Guolin Ke committed
467
468
469
  // set the global number of data for leaves
  global_data_count_in_leaf_[*left_leaf] = best_split_info.left_count;
  global_data_count_in_leaf_[*right_leaf] = best_split_info.right_count;
Guolin Ke's avatar
Guolin Ke committed
470
471
  auto p_left = smaller_leaf_splits_global_.get();
  auto p_right = larger_leaf_splits_global_.get();
Guolin Ke's avatar
Guolin Ke committed
472
473
  // init the global sumup info
  if (best_split_info.left_count < best_split_info.right_count) {
474
    smaller_leaf_splits_global_->Init(*left_leaf, this->data_partition_.get(),
Guolin Ke's avatar
Guolin Ke committed
475
476
      best_split_info.left_sum_gradient,
      best_split_info.left_sum_hessian);
477
    larger_leaf_splits_global_->Init(*right_leaf, this->data_partition_.get(),
Guolin Ke's avatar
Guolin Ke committed
478
479
      best_split_info.right_sum_gradient,
      best_split_info.right_sum_hessian);
Guolin Ke's avatar
Guolin Ke committed
480
  } else {
481
    smaller_leaf_splits_global_->Init(*right_leaf, this->data_partition_.get(),
Guolin Ke's avatar
Guolin Ke committed
482
483
      best_split_info.right_sum_gradient,
      best_split_info.right_sum_hessian);
484
    larger_leaf_splits_global_->Init(*left_leaf, this->data_partition_.get(),
Guolin Ke's avatar
Guolin Ke committed
485
486
      best_split_info.left_sum_gradient,
      best_split_info.left_sum_hessian);
Guolin Ke's avatar
Guolin Ke committed
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
    p_left = larger_leaf_splits_global_.get();
    p_right = smaller_leaf_splits_global_.get();
  }
  const int inner_feature_index = this->train_data_->InnerFeatureIndex(best_split_info.feature);
  bool is_numerical_split = this->train_data_->FeatureBinMapper(inner_feature_index)->bin_type() == BinType::NumericalBin;
  p_left->SetValueConstraint(best_split_info.min_constraint, best_split_info.max_constraint);
  p_right->SetValueConstraint(best_split_info.min_constraint, best_split_info.max_constraint);
  if (is_numerical_split) {
    double mid = (best_split_info.left_output + best_split_info.right_output) / 2.0f;
    if (best_split_info.monotone_type < 0) {
      p_left->SetValueConstraint(mid, best_split_info.max_constraint);
      p_right->SetValueConstraint(best_split_info.min_constraint, mid);
    } else if (best_split_info.monotone_type > 0) {
      p_left->SetValueConstraint(best_split_info.min_constraint, mid);
      p_right->SetValueConstraint(mid, best_split_info.max_constraint);
    }
Guolin Ke's avatar
Guolin Ke committed
503
504
505
  }
}

506
507
508
// instantiate template classes, otherwise linker cannot find the code
template class VotingParallelTreeLearner<GPUTreeLearner>;
template class VotingParallelTreeLearner<SerialTreeLearner>;
Guolin Ke's avatar
Guolin Ke committed
509
}  // namespace FTLBoost