voting_parallel_tree_learner.cpp 23.2 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
10
11
#include "parallel_tree_learner.h"

#include <LightGBM/utils/common.h>

#include <cstring>

#include <tuple>
#include <vector>

namespace LightGBM {

12
template <typename TREELEARNER_T>
Guolin Ke's avatar
Guolin Ke committed
13
14
15
VotingParallelTreeLearner<TREELEARNER_T>::VotingParallelTreeLearner(const Config* config)
  :TREELEARNER_T(config) {
  top_k_ = this->config_->top_k;
Guolin Ke's avatar
Guolin Ke committed
16
17
}

18
19
20
template <typename TREELEARNER_T>
void VotingParallelTreeLearner<TREELEARNER_T>::Init(const Dataset* train_data, bool is_constant_hessian) {
  TREELEARNER_T::Init(train_data, is_constant_hessian);
Guolin Ke's avatar
Guolin Ke committed
21
22
23
24
  rank_ = Network::rank();
  num_machines_ = Network::num_machines();

  // limit top k
25
26
  if (top_k_ > this->num_features_) {
    top_k_ = this->num_features_;
Guolin Ke's avatar
Guolin Ke committed
27
28
29
  }
  // get max bin
  int max_bin = 0;
30
31
32
  for (int i = 0; i < this->num_features_; ++i) {
    if (max_bin < this->train_data_->FeatureNumBin(i)) {
      max_bin = this->train_data_->FeatureNumBin(i);
Guolin Ke's avatar
Guolin Ke committed
33
34
35
    }
  }
  // calculate buffer size
36
  size_t buffer_size = 2 * top_k_ * std::max(max_bin * sizeof(HistogramBinEntry), sizeof(LightSplitInfo) * num_machines_);
Guolin Ke's avatar
Guolin Ke committed
37
38
39
40
  // left and right on same time, so need double size
  input_buffer_.resize(buffer_size);
  output_buffer_.resize(buffer_size);

41
42
  smaller_is_feature_aggregated_.resize(this->num_features_);
  larger_is_feature_aggregated_.resize(this->num_features_);
Guolin Ke's avatar
Guolin Ke committed
43
44
45
46

  block_start_.resize(num_machines_);
  block_len_.resize(num_machines_);

47
48
  smaller_buffer_read_start_pos_.resize(this->num_features_);
  larger_buffer_read_start_pos_.resize(this->num_features_);
Guolin Ke's avatar
Guolin Ke committed
49
  global_data_count_in_leaf_.resize(this->config_->num_leaves);
Guolin Ke's avatar
Guolin Ke committed
50

51
52
  smaller_leaf_splits_global_.reset(new LeafSplits(this->train_data_->num_data()));
  larger_leaf_splits_global_.reset(new LeafSplits(this->train_data_->num_data()));
Guolin Ke's avatar
Guolin Ke committed
53

Guolin Ke's avatar
Guolin Ke committed
54
55
56
  local_config_ = *this->config_;
  local_config_.min_data_in_leaf /= num_machines_;
  local_config_.min_sum_hessian_in_leaf /= num_machines_;
Guolin Ke's avatar
Guolin Ke committed
57

Guolin Ke's avatar
Guolin Ke committed
58
  this->histogram_pool_.ResetConfig(&local_config_);
Guolin Ke's avatar
Guolin Ke committed
59
60

  // initialize histograms for global
61
62
63
  smaller_leaf_histogram_array_global_.reset(new FeatureHistogram[this->num_features_]);
  larger_leaf_histogram_array_global_.reset(new FeatureHistogram[this->num_features_]);
  auto num_total_bin = this->train_data_->NumTotalBin();
Guolin Ke's avatar
Guolin Ke committed
64
65
66
67
68
69
  smaller_leaf_histogram_data_.resize(num_total_bin);
  larger_leaf_histogram_data_.resize(num_total_bin);
  feature_metas_.resize(train_data->num_features());
#pragma omp parallel for schedule(static)
  for (int i = 0; i < train_data->num_features(); ++i) {
    feature_metas_[i].num_bin = train_data->FeatureNumBin(i);
Guolin Ke's avatar
Guolin Ke committed
70
    feature_metas_[i].default_bin = train_data->FeatureBinMapper(i)->GetDefaultBin();
Guolin Ke's avatar
Guolin Ke committed
71
    feature_metas_[i].missing_type = train_data->FeatureBinMapper(i)->missing_type();
Guolin Ke's avatar
Guolin Ke committed
72
    feature_metas_[i].monotone_type = train_data->FeatureMonotone(i);
Guolin Ke's avatar
Guolin Ke committed
73
    feature_metas_[i].penalty = train_data->FeaturePenalte(i);
Guolin Ke's avatar
Guolin Ke committed
74
75
76
77
78
    if (train_data->FeatureBinMapper(i)->GetDefaultBin() == 0) {
      feature_metas_[i].bias = 1;
    } else {
      feature_metas_[i].bias = 0;
    }
Guolin Ke's avatar
Guolin Ke committed
79
    feature_metas_[i].config = this->config_;
80
    feature_metas_[i].bin_type = train_data->FeatureBinMapper(i)->bin_type();
Guolin Ke's avatar
Guolin Ke committed
81
82
83
84
  }
  uint64_t offset = 0;
  for (int j = 0; j < train_data->num_features(); ++j) {
    offset += static_cast<uint64_t>(train_data->SubFeatureBinOffset(j));
85
86
    smaller_leaf_histogram_array_global_[j].Init(smaller_leaf_histogram_data_.data() + offset, &feature_metas_[j]);
    larger_leaf_histogram_array_global_[j].Init(larger_leaf_histogram_data_.data() + offset, &feature_metas_[j]);
Guolin Ke's avatar
Guolin Ke committed
87
88
89
90
91
    auto num_bin = train_data->FeatureNumBin(j);
    if (train_data->FeatureBinMapper(j)->GetDefaultBin() == 0) {
      num_bin -= 1;
    }
    offset += static_cast<uint64_t>(num_bin);
Guolin Ke's avatar
Guolin Ke committed
92
93
94
  }
}

95
template <typename TREELEARNER_T>
Guolin Ke's avatar
Guolin Ke committed
96
97
void VotingParallelTreeLearner<TREELEARNER_T>::ResetConfig(const Config* config) {
  TREELEARNER_T::ResetConfig(config);
Guolin Ke's avatar
Guolin Ke committed
98

Guolin Ke's avatar
Guolin Ke committed
99
100
101
  local_config_ = *this->config_;
  local_config_.min_data_in_leaf /= num_machines_;
  local_config_.min_sum_hessian_in_leaf /= num_machines_;
Guolin Ke's avatar
Guolin Ke committed
102

Guolin Ke's avatar
Guolin Ke committed
103
104
  this->histogram_pool_.ResetConfig(&local_config_);
  global_data_count_in_leaf_.resize(this->config_->num_leaves);
Guolin Ke's avatar
Guolin Ke committed
105

Guolin Ke's avatar
Guolin Ke committed
106
  for (size_t i = 0; i < feature_metas_.size(); ++i) {
Guolin Ke's avatar
Guolin Ke committed
107
    feature_metas_[i].config = this->config_;
Guolin Ke's avatar
Guolin Ke committed
108
109
  }
}
Guolin Ke's avatar
Guolin Ke committed
110

111
112
113
template <typename TREELEARNER_T>
void VotingParallelTreeLearner<TREELEARNER_T>::BeforeTrain() {
  TREELEARNER_T::BeforeTrain();
Guolin Ke's avatar
Guolin Ke committed
114
  // sync global data sumup info
115
  std::tuple<data_size_t, double, double> data(this->smaller_leaf_splits_->num_data_in_leaf(), this->smaller_leaf_splits_->sum_gradients(), this->smaller_leaf_splits_->sum_hessians());
Guolin Ke's avatar
Guolin Ke committed
116
117
118
  int size = sizeof(std::tuple<data_size_t, double, double>);
  std::memcpy(input_buffer_.data(), &data, size);

Guolin Ke's avatar
Guolin Ke committed
119
120
  Network::Allreduce(input_buffer_.data(), size, sizeof(std::tuple<data_size_t, double, double>), output_buffer_.data(), [](const char *src, char *dst, int type_size, comm_size_t len) {
    comm_size_t used_size = 0;
Guolin Ke's avatar
Guolin Ke committed
121
122
123
124
125
126
127
128
129
130
131
132
133
134
    const std::tuple<data_size_t, double, double> *p1;
    std::tuple<data_size_t, double, double> *p2;
    while (used_size < len) {
      p1 = reinterpret_cast<const std::tuple<data_size_t, double, double> *>(src);
      p2 = reinterpret_cast<std::tuple<data_size_t, double, double> *>(dst);
      std::get<0>(*p2) = std::get<0>(*p2) + std::get<0>(*p1);
      std::get<1>(*p2) = std::get<1>(*p2) + std::get<1>(*p1);
      std::get<2>(*p2) = std::get<2>(*p2) + std::get<2>(*p1);
      src += type_size;
      dst += type_size;
      used_size += type_size;
    }
  });

Tsukasa OMOTO's avatar
Tsukasa OMOTO committed
135
  std::memcpy((void*)&data, output_buffer_.data(), size);
Guolin Ke's avatar
Guolin Ke committed
136
137
138
139
140
141
142
143

  // set global sumup info
  smaller_leaf_splits_global_->Init(std::get<1>(data), std::get<2>(data));
  larger_leaf_splits_global_->Init();
  // init global data count in leaf
  global_data_count_in_leaf_[0] = std::get<0>(data);
}

144
145
146
template <typename TREELEARNER_T>
bool VotingParallelTreeLearner<TREELEARNER_T>::BeforeFindBestSplit(const Tree* tree, int left_leaf, int right_leaf) {
  if (TREELEARNER_T::BeforeFindBestSplit(tree, left_leaf, right_leaf)) {
Guolin Ke's avatar
Guolin Ke committed
147
148
149
150
151
152
    data_size_t num_data_in_left_child = GetGlobalDataCountInLeaf(left_leaf);
    data_size_t num_data_in_right_child = GetGlobalDataCountInLeaf(right_leaf);
    if (right_leaf < 0) {
      return true;
    } else if (num_data_in_left_child < num_data_in_right_child) {
      // get local sumup
153
154
      this->smaller_leaf_splits_->Init(left_leaf, this->data_partition_.get(), this->gradients_, this->hessians_);
      this->larger_leaf_splits_->Init(right_leaf, this->data_partition_.get(), this->gradients_, this->hessians_);
Guolin Ke's avatar
Guolin Ke committed
155
156
    } else {
      // get local sumup
157
158
      this->smaller_leaf_splits_->Init(right_leaf, this->data_partition_.get(), this->gradients_, this->hessians_);
      this->larger_leaf_splits_->Init(left_leaf, this->data_partition_.get(), this->gradients_, this->hessians_);
Guolin Ke's avatar
Guolin Ke committed
159
160
161
162
163
164
165
    }
    return true;
  } else {
    return false;
  }
}

166
template <typename TREELEARNER_T>
167
void VotingParallelTreeLearner<TREELEARNER_T>::GlobalVoting(int leaf_idx, const std::vector<LightSplitInfo>& splits, std::vector<int>* out) {
Guolin Ke's avatar
Guolin Ke committed
168
169
170
171
172
  out->clear();
  if (leaf_idx < 0) {
    return;
  }
  // get mean number on machines
173
  score_t mean_num_data = GetGlobalDataCountInLeaf(leaf_idx) / static_cast<score_t>(num_machines_);
174
  std::vector<LightSplitInfo> feature_best_split(this->train_data_->num_total_features() , LightSplitInfo());
Guolin Ke's avatar
Guolin Ke committed
175
176
177
178
179
180
181
182
183
184
185
186
187
  for (auto & split : splits) {
    int fid = split.feature;
    if (fid < 0) {
      continue;
    }
    // weighted gain
    double gain = split.gain * (split.left_count + split.right_count) / mean_num_data;
    if (gain > feature_best_split[fid].gain) {
      feature_best_split[fid] = split;
      feature_best_split[fid].gain = gain;
    }
  }
  // get top k
188
189
  std::vector<LightSplitInfo> top_k_splits;
  ArrayArgs<LightSplitInfo>::MaxK(feature_best_split, top_k_, &top_k_splits);
Guolin Ke's avatar
Guolin Ke committed
190
191
192
193
194
195
196
197
  for (auto& split : top_k_splits) {
    if (split.gain == kMinScore || split.feature == -1) {
      continue;
    }
    out->push_back(split.feature);
  }
}

198
199
200
template <typename TREELEARNER_T>
void VotingParallelTreeLearner<TREELEARNER_T>::CopyLocalHistogram(const std::vector<int>& smaller_top_features, const std::vector<int>& larger_top_features) {
  for (int i = 0; i < this->num_features_; ++i) {
Guolin Ke's avatar
Guolin Ke committed
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
    smaller_is_feature_aggregated_[i] = false;
    larger_is_feature_aggregated_[i] = false;
  }
  size_t total_num_features = smaller_top_features.size() + larger_top_features.size();
  size_t average_feature = (total_num_features + num_machines_ - 1) / num_machines_;
  size_t used_num_features = 0, smaller_idx = 0, larger_idx = 0;
  block_start_[0] = 0;
  reduce_scatter_size_ = 0;
  // Copy histogram to buffer, and Get local aggregate features
  for (int i = 0; i < num_machines_; ++i) {
    size_t cur_size = 0, cur_used_features = 0;
    size_t cur_total_feature = std::min(average_feature, total_num_features - used_num_features);
    // copy histograms.
    while (cur_used_features < cur_total_feature) {
      // copy smaller leaf histograms first
      if (smaller_idx < smaller_top_features.size()) {
217
        int inner_feature_index = this->train_data_->InnerFeatureIndex(smaller_top_features[smaller_idx]);
Guolin Ke's avatar
Guolin Ke committed
218
219
220
        ++cur_used_features;
        // mark local aggregated feature
        if (i == rank_) {
Guolin Ke's avatar
Guolin Ke committed
221
222
          smaller_is_feature_aggregated_[inner_feature_index] = true;
          smaller_buffer_read_start_pos_[inner_feature_index] = static_cast<int>(cur_size);
Guolin Ke's avatar
Guolin Ke committed
223
224
        }
        // copy
225
226
227
        std::memcpy(input_buffer_.data() + reduce_scatter_size_, this->smaller_leaf_histogram_array_[inner_feature_index].RawData(), this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram());
        cur_size += this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
        reduce_scatter_size_ += this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
Guolin Ke's avatar
Guolin Ke committed
228
229
230
231
232
233
234
        ++smaller_idx;
      }
      if (cur_used_features >= cur_total_feature) {
        break;
      }
      // then copy larger leaf histograms
      if (larger_idx < larger_top_features.size()) {
235
        int inner_feature_index = this->train_data_->InnerFeatureIndex(larger_top_features[larger_idx]);
Guolin Ke's avatar
Guolin Ke committed
236
237
238
        ++cur_used_features;
        // mark local aggregated feature
        if (i == rank_) {
Guolin Ke's avatar
Guolin Ke committed
239
240
          larger_is_feature_aggregated_[inner_feature_index] = true;
          larger_buffer_read_start_pos_[inner_feature_index] = static_cast<int>(cur_size);
Guolin Ke's avatar
Guolin Ke committed
241
242
        }
        // copy
243
244
245
        std::memcpy(input_buffer_.data() + reduce_scatter_size_, this->larger_leaf_histogram_array_[inner_feature_index].RawData(), this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram());
        cur_size += this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
        reduce_scatter_size_ += this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
Guolin Ke's avatar
Guolin Ke committed
246
247
248
249
250
251
252
253
254
255
256
        ++larger_idx;
      }
    }
    used_num_features += cur_used_features;
    block_len_[i] = static_cast<int>(cur_size);
    if (i < num_machines_ - 1) {
      block_start_[i + 1] = block_start_[i] + block_len_[i];
    }
  }
}

257
template <typename TREELEARNER_T>
Guolin Ke's avatar
Guolin Ke committed
258
void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplits() {
Guolin Ke's avatar
Guolin Ke committed
259
  // use local data to find local best splits
260
  std::vector<int8_t> is_feature_used(this->num_features_, 0);
Guolin Ke's avatar
Guolin Ke committed
261
#pragma omp parallel for schedule(static)
262
263
264
265
266
  for (int feature_index = 0; feature_index < this->num_features_; ++feature_index) {
    if (!this->is_feature_used_[feature_index]) continue;
    if (this->parent_leaf_histogram_array_ != nullptr
      && !this->parent_leaf_histogram_array_[feature_index].is_splittable()) {
      this->smaller_leaf_histogram_array_[feature_index].set_is_splittable(false);
Guolin Ke's avatar
Guolin Ke committed
267
268
269
270
271
      continue;
    }
    is_feature_used[feature_index] = 1;
  }
  bool use_subtract = true;
272
  if (this->parent_leaf_histogram_array_ == nullptr) {
Guolin Ke's avatar
Guolin Ke committed
273
274
    use_subtract = false;
  }
Guolin Ke's avatar
Guolin Ke committed
275
  TREELEARNER_T::ConstructHistograms(is_feature_used, use_subtract);
Guolin Ke's avatar
Guolin Ke committed
276

277
278
  std::vector<SplitInfo> smaller_bestsplit_per_features(this->num_features_);
  std::vector<SplitInfo> larger_bestsplit_per_features(this->num_features_);
Guolin Ke's avatar
Guolin Ke committed
279

280
  OMP_INIT_EX();
Guolin Ke's avatar
Guolin Ke committed
281
  // find splits
Guolin Ke's avatar
Guolin Ke committed
282
#pragma omp parallel for schedule(static)
283
  for (int feature_index = 0; feature_index < this->num_features_; ++feature_index) {
284
    OMP_LOOP_EX_BEGIN();
Guolin Ke's avatar
Guolin Ke committed
285
    if (!is_feature_used[feature_index]) { continue; }
286
287
288
289
290
291
292
293
294
295
    const int real_feature_index = this->train_data_->RealFeatureIndex(feature_index);
    this->train_data_->FixHistogram(feature_index,
      this->smaller_leaf_splits_->sum_gradients(), this->smaller_leaf_splits_->sum_hessians(),
      this->smaller_leaf_splits_->num_data_in_leaf(),
      this->smaller_leaf_histogram_array_[feature_index].RawData());

    this->smaller_leaf_histogram_array_[feature_index].FindBestThreshold(
      this->smaller_leaf_splits_->sum_gradients(),
      this->smaller_leaf_splits_->sum_hessians(),
      this->smaller_leaf_splits_->num_data_in_leaf(),
Guolin Ke's avatar
Guolin Ke committed
296
297
      this->smaller_leaf_splits_->min_constraint(),
      this->smaller_leaf_splits_->max_constraint(),
Guolin Ke's avatar
Guolin Ke committed
298
      &smaller_bestsplit_per_features[feature_index]);
Guolin Ke's avatar
Guolin Ke committed
299
    smaller_bestsplit_per_features[feature_index].feature = real_feature_index;
Guolin Ke's avatar
Guolin Ke committed
300
    // only has root leaf
301
    if (this->larger_leaf_splits_ == nullptr || this->larger_leaf_splits_->LeafIndex() < 0) { continue; }
Guolin Ke's avatar
Guolin Ke committed
302
303

    if (use_subtract) {
304
      this->larger_leaf_histogram_array_[feature_index].Subtract(this->smaller_leaf_histogram_array_[feature_index]);
Guolin Ke's avatar
Guolin Ke committed
305
    } else {
306
307
308
      this->train_data_->FixHistogram(feature_index, this->larger_leaf_splits_->sum_gradients(), this->larger_leaf_splits_->sum_hessians(),
        this->larger_leaf_splits_->num_data_in_leaf(),
        this->larger_leaf_histogram_array_[feature_index].RawData());
Guolin Ke's avatar
Guolin Ke committed
309
310
    }
    // find best threshold for larger child
311
312
313
314
    this->larger_leaf_histogram_array_[feature_index].FindBestThreshold(
      this->larger_leaf_splits_->sum_gradients(),
      this->larger_leaf_splits_->sum_hessians(),
      this->larger_leaf_splits_->num_data_in_leaf(),
Guolin Ke's avatar
Guolin Ke committed
315
316
      this->larger_leaf_splits_->min_constraint(),
      this->larger_leaf_splits_->max_constraint(),
Guolin Ke's avatar
Guolin Ke committed
317
      &larger_bestsplit_per_features[feature_index]);
Guolin Ke's avatar
Guolin Ke committed
318
    larger_bestsplit_per_features[feature_index].feature = real_feature_index;
319
    OMP_LOOP_EX_END();
Guolin Ke's avatar
Guolin Ke committed
320
  }
321
  OMP_THROW_EX();
Guolin Ke's avatar
Guolin Ke committed
322

Guolin Ke's avatar
Guolin Ke committed
323
324
  std::vector<SplitInfo> smaller_top_k_splits, larger_top_k_splits;
  // local voting
Guolin Ke's avatar
Guolin Ke committed
325
326
  ArrayArgs<SplitInfo>::MaxK(smaller_bestsplit_per_features, top_k_, &smaller_top_k_splits);
  ArrayArgs<SplitInfo>::MaxK(larger_bestsplit_per_features, top_k_, &larger_top_k_splits);
327
328
329
330
331
332
333
334

  std::vector<LightSplitInfo> smaller_top_k_light_splits(top_k_);
  std::vector<LightSplitInfo> larger_top_k_light_splits(top_k_);
  for (int i = 0; i < top_k_; ++i) {
    smaller_top_k_light_splits[i].CopyFrom(smaller_top_k_splits[i]);
    larger_top_k_light_splits[i].CopyFrom(larger_top_k_splits[i]);
  }

Guolin Ke's avatar
Guolin Ke committed
335
336
337
  // gather
  int offset = 0;
  for (int i = 0; i < top_k_; ++i) {
338
339
340
341
    std::memcpy(input_buffer_.data() + offset, &smaller_top_k_light_splits[i], sizeof(LightSplitInfo));
    offset += sizeof(LightSplitInfo);
    std::memcpy(input_buffer_.data() + offset, &larger_top_k_light_splits[i], sizeof(LightSplitInfo));
    offset += sizeof(LightSplitInfo);
Guolin Ke's avatar
Guolin Ke committed
342
343
344
  }
  Network::Allgather(input_buffer_.data(), offset, output_buffer_.data());
  // get all top-k from all machines
345
346
  std::vector<LightSplitInfo> smaller_top_k_splits_global;
  std::vector<LightSplitInfo> larger_top_k_splits_global;
Guolin Ke's avatar
Guolin Ke committed
347
348
349
  offset = 0;
  for (int i = 0; i < num_machines_; ++i) {
    for (int j = 0; j < top_k_; ++j) {
350
351
352
353
354
355
      smaller_top_k_splits_global.push_back(LightSplitInfo());
      std::memcpy(&smaller_top_k_splits_global.back(), output_buffer_.data() + offset, sizeof(LightSplitInfo));
      offset += sizeof(LightSplitInfo);
      larger_top_k_splits_global.push_back(LightSplitInfo());
      std::memcpy(&larger_top_k_splits_global.back(), output_buffer_.data() + offset, sizeof(LightSplitInfo));
      offset += sizeof(LightSplitInfo);
Guolin Ke's avatar
Guolin Ke committed
356
357
358
359
    }
  }
  // global voting
  std::vector<int> smaller_top_features, larger_top_features;
360
361
  GlobalVoting(this->smaller_leaf_splits_->LeafIndex(), smaller_top_k_splits_global, &smaller_top_features);
  GlobalVoting(this->larger_leaf_splits_->LeafIndex(), larger_top_k_splits_global, &larger_top_features);
Guolin Ke's avatar
Guolin Ke committed
362
363
364
365
  // copy local histgrams to buffer
  CopyLocalHistogram(smaller_top_features, larger_top_features);

  // Reduce scatter for histogram
Guolin Ke's avatar
Guolin Ke committed
366
367
  Network::ReduceScatter(input_buffer_.data(), reduce_scatter_size_, sizeof(HistogramBinEntry), block_start_.data(), block_len_.data(),
                         output_buffer_.data(), static_cast<comm_size_t>(output_buffer_.size()), &HistogramBinEntry::SumReducer);
Guolin Ke's avatar
Guolin Ke committed
368

Guolin Ke's avatar
Guolin Ke committed
369
370
371
372
373
374
375
376
  this->FindBestSplitsFromHistograms(is_feature_used, false);
}

template <typename TREELEARNER_T>
void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplitsFromHistograms(const std::vector<int8_t>&, bool) {

  std::vector<SplitInfo> smaller_bests_per_thread(this->num_threads_);
  std::vector<SplitInfo> larger_best_per_thread(this->num_threads_);
Guolin Ke's avatar
Guolin Ke committed
377
  // find best split from local aggregated histograms
Guolin Ke's avatar
Guolin Ke committed
378
379
380

  OMP_INIT_EX();
  #pragma omp parallel for schedule(static)
381
  for (int feature_index = 0; feature_index < this->num_features_; ++feature_index) {
382
    OMP_LOOP_EX_BEGIN();
Guolin Ke's avatar
Guolin Ke committed
383
    const int tid = omp_get_thread_num();
Guolin Ke's avatar
Guolin Ke committed
384
    const int real_feature_index = this->train_data_->RealFeatureIndex(feature_index);
Guolin Ke's avatar
Guolin Ke committed
385
    if (smaller_is_feature_aggregated_[feature_index]) {
Guolin Ke's avatar
Guolin Ke committed
386
      SplitInfo smaller_split;
Guolin Ke's avatar
Guolin Ke committed
387
388
      // restore from buffer
      smaller_leaf_histogram_array_global_[feature_index].FromMemory(
Guolin Ke's avatar
Guolin Ke committed
389
        output_buffer_.data() + smaller_buffer_read_start_pos_[feature_index]);
Guolin Ke's avatar
Guolin Ke committed
390

391
      this->train_data_->FixHistogram(feature_index,
Guolin Ke's avatar
Guolin Ke committed
392
393
394
                                      smaller_leaf_splits_global_->sum_gradients(), smaller_leaf_splits_global_->sum_hessians(),
                                      GetGlobalDataCountInLeaf(smaller_leaf_splits_global_->LeafIndex()),
                                      smaller_leaf_histogram_array_global_[feature_index].RawData());
Guolin Ke's avatar
Guolin Ke committed
395

Guolin Ke's avatar
Guolin Ke committed
396
397
      // find best threshold
      smaller_leaf_histogram_array_global_[feature_index].FindBestThreshold(
Guolin Ke's avatar
Guolin Ke committed
398
399
400
        smaller_leaf_splits_global_->sum_gradients(),
        smaller_leaf_splits_global_->sum_hessians(),
        GetGlobalDataCountInLeaf(smaller_leaf_splits_global_->LeafIndex()),
Guolin Ke's avatar
Guolin Ke committed
401
402
        smaller_leaf_splits_global_->min_constraint(),
        smaller_leaf_splits_global_->max_constraint(),
Guolin Ke's avatar
Guolin Ke committed
403
        &smaller_split);
Guolin Ke's avatar
Guolin Ke committed
404
405
406
      smaller_split.feature = real_feature_index;
      if (smaller_split > smaller_bests_per_thread[tid]) {
        smaller_bests_per_thread[tid] = smaller_split;
Guolin Ke's avatar
Guolin Ke committed
407
      }
Guolin Ke's avatar
Guolin Ke committed
408
409
410
    }

    if (larger_is_feature_aggregated_[feature_index]) {
Guolin Ke's avatar
Guolin Ke committed
411
      SplitInfo larger_split;
Guolin Ke's avatar
Guolin Ke committed
412
413
      // restore from buffer
      larger_leaf_histogram_array_global_[feature_index].FromMemory(output_buffer_.data() + larger_buffer_read_start_pos_[feature_index]);
Guolin Ke's avatar
Guolin Ke committed
414

415
      this->train_data_->FixHistogram(feature_index,
Guolin Ke's avatar
Guolin Ke committed
416
417
418
                                      larger_leaf_splits_global_->sum_gradients(), larger_leaf_splits_global_->sum_hessians(),
                                      GetGlobalDataCountInLeaf(larger_leaf_splits_global_->LeafIndex()),
                                      larger_leaf_histogram_array_global_[feature_index].RawData());
Guolin Ke's avatar
Guolin Ke committed
419

Guolin Ke's avatar
Guolin Ke committed
420
      // find best threshold
Guolin Ke's avatar
Guolin Ke committed
421
422
423
424
      larger_leaf_histogram_array_global_[feature_index].FindBestThreshold(
        larger_leaf_splits_global_->sum_gradients(),
        larger_leaf_splits_global_->sum_hessians(),
        GetGlobalDataCountInLeaf(larger_leaf_splits_global_->LeafIndex()),
Guolin Ke's avatar
Guolin Ke committed
425
426
        larger_leaf_splits_global_->min_constraint(),
        larger_leaf_splits_global_->max_constraint(),
Guolin Ke's avatar
Guolin Ke committed
427
        &larger_split);
Guolin Ke's avatar
Guolin Ke committed
428
429
430
      larger_split.feature = real_feature_index;
      if (larger_split > larger_best_per_thread[tid]) {
        larger_best_per_thread[tid] = larger_split;
Guolin Ke's avatar
Guolin Ke committed
431
      }
Guolin Ke's avatar
Guolin Ke committed
432
    }
433
    OMP_LOOP_EX_END();
Guolin Ke's avatar
Guolin Ke committed
434
  }
435
  OMP_THROW_EX();
Guolin Ke's avatar
Guolin Ke committed
436
437

  auto smaller_best_idx = ArrayArgs<SplitInfo>::ArgMax(smaller_bests_per_thread);
438
  int leaf = this->smaller_leaf_splits_->LeafIndex();
Guolin Ke's avatar
Guolin Ke committed
439
  this->best_split_per_leaf_[leaf] = smaller_bests_per_thread[smaller_best_idx];
Guolin Ke's avatar
Guolin Ke committed
440

441
442
  if (this->larger_leaf_splits_ != nullptr && this->larger_leaf_splits_->LeafIndex() >= 0) {
    leaf = this->larger_leaf_splits_->LeafIndex();
Guolin Ke's avatar
Guolin Ke committed
443
444
    auto larger_best_idx = ArrayArgs<SplitInfo>::ArgMax(larger_best_per_thread);
    this->best_split_per_leaf_[leaf] = larger_best_per_thread[larger_best_idx];
Guolin Ke's avatar
Guolin Ke committed
445
  }
Guolin Ke's avatar
Guolin Ke committed
446
447

  // find local best
Guolin Ke's avatar
Guolin Ke committed
448
449
  SplitInfo smaller_best_split, larger_best_split;
  smaller_best_split = this->best_split_per_leaf_[this->smaller_leaf_splits_->LeafIndex()];
Guolin Ke's avatar
Guolin Ke committed
450
  // find local best split for larger leaf
451
  if (this->larger_leaf_splits_->LeafIndex() >= 0) {
Guolin Ke's avatar
Guolin Ke committed
452
    larger_best_split = this->best_split_per_leaf_[this->larger_leaf_splits_->LeafIndex()];
Guolin Ke's avatar
Guolin Ke committed
453
454
  }
  // sync global best info
Guolin Ke's avatar
Guolin Ke committed
455
  SyncUpGlobalBestSplit(input_buffer_.data(), input_buffer_.data(), &smaller_best_split, &larger_best_split, this->config_->max_cat_threshold);
Guolin Ke's avatar
Guolin Ke committed
456
457

  // copy back
Guolin Ke's avatar
Guolin Ke committed
458
459
460
  this->best_split_per_leaf_[smaller_leaf_splits_global_->LeafIndex()] = smaller_best_split;
  if (larger_best_split.feature >= 0 && larger_leaf_splits_global_->LeafIndex() >= 0) {
    this->best_split_per_leaf_[larger_leaf_splits_global_->LeafIndex()] = larger_best_split;
Guolin Ke's avatar
Guolin Ke committed
461
462
463
  }
}

464
465
466
467
template <typename TREELEARNER_T>
void VotingParallelTreeLearner<TREELEARNER_T>::Split(Tree* tree, int best_Leaf, int* left_leaf, int* right_leaf) {
  TREELEARNER_T::Split(tree, best_Leaf, left_leaf, right_leaf);
  const SplitInfo& best_split_info = this->best_split_per_leaf_[best_Leaf];
Guolin Ke's avatar
Guolin Ke committed
468
469
470
  // set the global number of data for leaves
  global_data_count_in_leaf_[*left_leaf] = best_split_info.left_count;
  global_data_count_in_leaf_[*right_leaf] = best_split_info.right_count;
Guolin Ke's avatar
Guolin Ke committed
471
472
  auto p_left = smaller_leaf_splits_global_.get();
  auto p_right = larger_leaf_splits_global_.get();
Guolin Ke's avatar
Guolin Ke committed
473
474
  // init the global sumup info
  if (best_split_info.left_count < best_split_info.right_count) {
475
    smaller_leaf_splits_global_->Init(*left_leaf, this->data_partition_.get(),
Guolin Ke's avatar
Guolin Ke committed
476
477
      best_split_info.left_sum_gradient,
      best_split_info.left_sum_hessian);
478
    larger_leaf_splits_global_->Init(*right_leaf, this->data_partition_.get(),
Guolin Ke's avatar
Guolin Ke committed
479
480
      best_split_info.right_sum_gradient,
      best_split_info.right_sum_hessian);
Guolin Ke's avatar
Guolin Ke committed
481
  } else {
482
    smaller_leaf_splits_global_->Init(*right_leaf, this->data_partition_.get(),
Guolin Ke's avatar
Guolin Ke committed
483
484
      best_split_info.right_sum_gradient,
      best_split_info.right_sum_hessian);
485
    larger_leaf_splits_global_->Init(*left_leaf, this->data_partition_.get(),
Guolin Ke's avatar
Guolin Ke committed
486
487
      best_split_info.left_sum_gradient,
      best_split_info.left_sum_hessian);
Guolin Ke's avatar
Guolin Ke committed
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
    p_left = larger_leaf_splits_global_.get();
    p_right = smaller_leaf_splits_global_.get();
  }
  const int inner_feature_index = this->train_data_->InnerFeatureIndex(best_split_info.feature);
  bool is_numerical_split = this->train_data_->FeatureBinMapper(inner_feature_index)->bin_type() == BinType::NumericalBin;
  p_left->SetValueConstraint(best_split_info.min_constraint, best_split_info.max_constraint);
  p_right->SetValueConstraint(best_split_info.min_constraint, best_split_info.max_constraint);
  if (is_numerical_split) {
    double mid = (best_split_info.left_output + best_split_info.right_output) / 2.0f;
    if (best_split_info.monotone_type < 0) {
      p_left->SetValueConstraint(mid, best_split_info.max_constraint);
      p_right->SetValueConstraint(best_split_info.min_constraint, mid);
    } else if (best_split_info.monotone_type > 0) {
      p_left->SetValueConstraint(best_split_info.min_constraint, mid);
      p_right->SetValueConstraint(mid, best_split_info.max_constraint);
    }
Guolin Ke's avatar
Guolin Ke committed
504
505
506
  }
}

507
508
509
// instantiate template classes, otherwise linker cannot find the code
template class VotingParallelTreeLearner<GPUTreeLearner>;
template class VotingParallelTreeLearner<SerialTreeLearner>;
Guolin Ke's avatar
Guolin Ke committed
510
}  // namespace FTLBoost