voting_parallel_tree_learner.cpp 23.9 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
Guolin Ke's avatar
Guolin Ke committed
5
6
7
8
9
10
#include <LightGBM/utils/common.h>

#include <cstring>
#include <tuple>
#include <vector>

11
12
#include "parallel_tree_learner.h"

Guolin Ke's avatar
Guolin Ke committed
13
14
namespace LightGBM {

15
template <typename TREELEARNER_T>
Guolin Ke's avatar
Guolin Ke committed
16
17
18
VotingParallelTreeLearner<TREELEARNER_T>::VotingParallelTreeLearner(const Config* config)
  :TREELEARNER_T(config) {
  top_k_ = this->config_->top_k;
Guolin Ke's avatar
Guolin Ke committed
19
20
}

21
22
23
template <typename TREELEARNER_T>
void VotingParallelTreeLearner<TREELEARNER_T>::Init(const Dataset* train_data, bool is_constant_hessian) {
  TREELEARNER_T::Init(train_data, is_constant_hessian);
Guolin Ke's avatar
Guolin Ke committed
24
25
26
27
  rank_ = Network::rank();
  num_machines_ = Network::num_machines();

  // limit top k
28
29
  if (top_k_ > this->num_features_) {
    top_k_ = this->num_features_;
Guolin Ke's avatar
Guolin Ke committed
30
31
32
  }
  // get max bin
  int max_bin = 0;
33
34
35
  for (int i = 0; i < this->num_features_; ++i) {
    if (max_bin < this->train_data_->FeatureNumBin(i)) {
      max_bin = this->train_data_->FeatureNumBin(i);
Guolin Ke's avatar
Guolin Ke committed
36
37
38
    }
  }
  // calculate buffer size
39
  size_t buffer_size = 2 * top_k_ * std::max(max_bin * sizeof(HistogramBinEntry), sizeof(LightSplitInfo) * num_machines_);
Guolin Ke's avatar
Guolin Ke committed
40
41
42
43
  // left and right on same time, so need double size
  input_buffer_.resize(buffer_size);
  output_buffer_.resize(buffer_size);

44
45
  smaller_is_feature_aggregated_.resize(this->num_features_);
  larger_is_feature_aggregated_.resize(this->num_features_);
Guolin Ke's avatar
Guolin Ke committed
46
47
48
49

  block_start_.resize(num_machines_);
  block_len_.resize(num_machines_);

50
51
  smaller_buffer_read_start_pos_.resize(this->num_features_);
  larger_buffer_read_start_pos_.resize(this->num_features_);
Guolin Ke's avatar
Guolin Ke committed
52
  global_data_count_in_leaf_.resize(this->config_->num_leaves);
Guolin Ke's avatar
Guolin Ke committed
53

54
55
  smaller_leaf_splits_global_.reset(new LeafSplits(this->train_data_->num_data()));
  larger_leaf_splits_global_.reset(new LeafSplits(this->train_data_->num_data()));
Guolin Ke's avatar
Guolin Ke committed
56

Guolin Ke's avatar
Guolin Ke committed
57
58
59
  local_config_ = *this->config_;
  local_config_.min_data_in_leaf /= num_machines_;
  local_config_.min_sum_hessian_in_leaf /= num_machines_;
Guolin Ke's avatar
Guolin Ke committed
60

Guolin Ke's avatar
Guolin Ke committed
61
  this->histogram_pool_.ResetConfig(&local_config_);
Guolin Ke's avatar
Guolin Ke committed
62
63

  // initialize histograms for global
64
65
66
  smaller_leaf_histogram_array_global_.reset(new FeatureHistogram[this->num_features_]);
  larger_leaf_histogram_array_global_.reset(new FeatureHistogram[this->num_features_]);
  auto num_total_bin = this->train_data_->NumTotalBin();
Guolin Ke's avatar
Guolin Ke committed
67
68
69
70
71
72
  smaller_leaf_histogram_data_.resize(num_total_bin);
  larger_leaf_histogram_data_.resize(num_total_bin);
  feature_metas_.resize(train_data->num_features());
#pragma omp parallel for schedule(static)
  for (int i = 0; i < train_data->num_features(); ++i) {
    feature_metas_[i].num_bin = train_data->FeatureNumBin(i);
Guolin Ke's avatar
Guolin Ke committed
73
    feature_metas_[i].default_bin = train_data->FeatureBinMapper(i)->GetDefaultBin();
Guolin Ke's avatar
Guolin Ke committed
74
    feature_metas_[i].missing_type = train_data->FeatureBinMapper(i)->missing_type();
Guolin Ke's avatar
Guolin Ke committed
75
    feature_metas_[i].monotone_type = train_data->FeatureMonotone(i);
Guolin Ke's avatar
Guolin Ke committed
76
    feature_metas_[i].penalty = train_data->FeaturePenalte(i);
Guolin Ke's avatar
Guolin Ke committed
77
78
79
80
81
    if (train_data->FeatureBinMapper(i)->GetDefaultBin() == 0) {
      feature_metas_[i].bias = 1;
    } else {
      feature_metas_[i].bias = 0;
    }
Guolin Ke's avatar
Guolin Ke committed
82
    feature_metas_[i].config = this->config_;
83
    feature_metas_[i].bin_type = train_data->FeatureBinMapper(i)->bin_type();
Guolin Ke's avatar
Guolin Ke committed
84
85
86
87
  }
  uint64_t offset = 0;
  for (int j = 0; j < train_data->num_features(); ++j) {
    offset += static_cast<uint64_t>(train_data->SubFeatureBinOffset(j));
88
89
    smaller_leaf_histogram_array_global_[j].Init(smaller_leaf_histogram_data_.data() + offset, &feature_metas_[j]);
    larger_leaf_histogram_array_global_[j].Init(larger_leaf_histogram_data_.data() + offset, &feature_metas_[j]);
Guolin Ke's avatar
Guolin Ke committed
90
91
92
93
94
    auto num_bin = train_data->FeatureNumBin(j);
    if (train_data->FeatureBinMapper(j)->GetDefaultBin() == 0) {
      num_bin -= 1;
    }
    offset += static_cast<uint64_t>(num_bin);
Guolin Ke's avatar
Guolin Ke committed
95
96
97
  }
}

98
template <typename TREELEARNER_T>
Guolin Ke's avatar
Guolin Ke committed
99
100
void VotingParallelTreeLearner<TREELEARNER_T>::ResetConfig(const Config* config) {
  TREELEARNER_T::ResetConfig(config);
Guolin Ke's avatar
Guolin Ke committed
101

Guolin Ke's avatar
Guolin Ke committed
102
103
104
  local_config_ = *this->config_;
  local_config_.min_data_in_leaf /= num_machines_;
  local_config_.min_sum_hessian_in_leaf /= num_machines_;
Guolin Ke's avatar
Guolin Ke committed
105

Guolin Ke's avatar
Guolin Ke committed
106
107
  this->histogram_pool_.ResetConfig(&local_config_);
  global_data_count_in_leaf_.resize(this->config_->num_leaves);
Guolin Ke's avatar
Guolin Ke committed
108

Guolin Ke's avatar
Guolin Ke committed
109
  for (size_t i = 0; i < feature_metas_.size(); ++i) {
Guolin Ke's avatar
Guolin Ke committed
110
    feature_metas_[i].config = this->config_;
Guolin Ke's avatar
Guolin Ke committed
111
112
  }
}
Guolin Ke's avatar
Guolin Ke committed
113

114
115
116
template <typename TREELEARNER_T>
void VotingParallelTreeLearner<TREELEARNER_T>::BeforeTrain() {
  TREELEARNER_T::BeforeTrain();
Guolin Ke's avatar
Guolin Ke committed
117
  // sync global data sumup info
118
  std::tuple<data_size_t, double, double> data(this->smaller_leaf_splits_->num_data_in_leaf(), this->smaller_leaf_splits_->sum_gradients(), this->smaller_leaf_splits_->sum_hessians());
Guolin Ke's avatar
Guolin Ke committed
119
120
121
  int size = sizeof(std::tuple<data_size_t, double, double>);
  std::memcpy(input_buffer_.data(), &data, size);

Guolin Ke's avatar
Guolin Ke committed
122
123
  Network::Allreduce(input_buffer_.data(), size, sizeof(std::tuple<data_size_t, double, double>), output_buffer_.data(), [](const char *src, char *dst, int type_size, comm_size_t len) {
    comm_size_t used_size = 0;
Guolin Ke's avatar
Guolin Ke committed
124
125
126
127
128
129
130
131
132
133
134
135
136
137
    const std::tuple<data_size_t, double, double> *p1;
    std::tuple<data_size_t, double, double> *p2;
    while (used_size < len) {
      p1 = reinterpret_cast<const std::tuple<data_size_t, double, double> *>(src);
      p2 = reinterpret_cast<std::tuple<data_size_t, double, double> *>(dst);
      std::get<0>(*p2) = std::get<0>(*p2) + std::get<0>(*p1);
      std::get<1>(*p2) = std::get<1>(*p2) + std::get<1>(*p1);
      std::get<2>(*p2) = std::get<2>(*p2) + std::get<2>(*p1);
      src += type_size;
      dst += type_size;
      used_size += type_size;
    }
  });

Tsukasa OMOTO's avatar
Tsukasa OMOTO committed
138
  std::memcpy((void*)&data, output_buffer_.data(), size);
Guolin Ke's avatar
Guolin Ke committed
139
140
141
142
143
144
145
146

  // set global sumup info
  smaller_leaf_splits_global_->Init(std::get<1>(data), std::get<2>(data));
  larger_leaf_splits_global_->Init();
  // init global data count in leaf
  global_data_count_in_leaf_[0] = std::get<0>(data);
}

147
148
149
template <typename TREELEARNER_T>
bool VotingParallelTreeLearner<TREELEARNER_T>::BeforeFindBestSplit(const Tree* tree, int left_leaf, int right_leaf) {
  if (TREELEARNER_T::BeforeFindBestSplit(tree, left_leaf, right_leaf)) {
Guolin Ke's avatar
Guolin Ke committed
150
151
152
153
154
155
    data_size_t num_data_in_left_child = GetGlobalDataCountInLeaf(left_leaf);
    data_size_t num_data_in_right_child = GetGlobalDataCountInLeaf(right_leaf);
    if (right_leaf < 0) {
      return true;
    } else if (num_data_in_left_child < num_data_in_right_child) {
      // get local sumup
156
157
      this->smaller_leaf_splits_->Init(left_leaf, this->data_partition_.get(), this->gradients_, this->hessians_);
      this->larger_leaf_splits_->Init(right_leaf, this->data_partition_.get(), this->gradients_, this->hessians_);
Guolin Ke's avatar
Guolin Ke committed
158
159
    } else {
      // get local sumup
160
161
      this->smaller_leaf_splits_->Init(right_leaf, this->data_partition_.get(), this->gradients_, this->hessians_);
      this->larger_leaf_splits_->Init(left_leaf, this->data_partition_.get(), this->gradients_, this->hessians_);
Guolin Ke's avatar
Guolin Ke committed
162
163
164
165
166
167
168
    }
    return true;
  } else {
    return false;
  }
}

169
template <typename TREELEARNER_T>
170
void VotingParallelTreeLearner<TREELEARNER_T>::GlobalVoting(int leaf_idx, const std::vector<LightSplitInfo>& splits, std::vector<int>* out) {
Guolin Ke's avatar
Guolin Ke committed
171
172
173
174
175
  out->clear();
  if (leaf_idx < 0) {
    return;
  }
  // get mean number on machines
176
  score_t mean_num_data = GetGlobalDataCountInLeaf(leaf_idx) / static_cast<score_t>(num_machines_);
177
  std::vector<LightSplitInfo> feature_best_split(this->train_data_->num_total_features() , LightSplitInfo());
Guolin Ke's avatar
Guolin Ke committed
178
179
180
181
182
183
184
185
186
187
188
189
190
  for (auto & split : splits) {
    int fid = split.feature;
    if (fid < 0) {
      continue;
    }
    // weighted gain
    double gain = split.gain * (split.left_count + split.right_count) / mean_num_data;
    if (gain > feature_best_split[fid].gain) {
      feature_best_split[fid] = split;
      feature_best_split[fid].gain = gain;
    }
  }
  // get top k
191
192
  std::vector<LightSplitInfo> top_k_splits;
  ArrayArgs<LightSplitInfo>::MaxK(feature_best_split, top_k_, &top_k_splits);
193
  std::stable_sort(top_k_splits.begin(), top_k_splits.end(), std::greater<LightSplitInfo>());
Guolin Ke's avatar
Guolin Ke committed
194
195
196
197
198
199
200
201
  for (auto& split : top_k_splits) {
    if (split.gain == kMinScore || split.feature == -1) {
      continue;
    }
    out->push_back(split.feature);
  }
}

202
203
204
template <typename TREELEARNER_T>
void VotingParallelTreeLearner<TREELEARNER_T>::CopyLocalHistogram(const std::vector<int>& smaller_top_features, const std::vector<int>& larger_top_features) {
  for (int i = 0; i < this->num_features_; ++i) {
Guolin Ke's avatar
Guolin Ke committed
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
    smaller_is_feature_aggregated_[i] = false;
    larger_is_feature_aggregated_[i] = false;
  }
  size_t total_num_features = smaller_top_features.size() + larger_top_features.size();
  size_t average_feature = (total_num_features + num_machines_ - 1) / num_machines_;
  size_t used_num_features = 0, smaller_idx = 0, larger_idx = 0;
  block_start_[0] = 0;
  reduce_scatter_size_ = 0;
  // Copy histogram to buffer, and Get local aggregate features
  for (int i = 0; i < num_machines_; ++i) {
    size_t cur_size = 0, cur_used_features = 0;
    size_t cur_total_feature = std::min(average_feature, total_num_features - used_num_features);
    // copy histograms.
    while (cur_used_features < cur_total_feature) {
      // copy smaller leaf histograms first
      if (smaller_idx < smaller_top_features.size()) {
221
        int inner_feature_index = this->train_data_->InnerFeatureIndex(smaller_top_features[smaller_idx]);
Guolin Ke's avatar
Guolin Ke committed
222
223
224
        ++cur_used_features;
        // mark local aggregated feature
        if (i == rank_) {
Guolin Ke's avatar
Guolin Ke committed
225
226
          smaller_is_feature_aggregated_[inner_feature_index] = true;
          smaller_buffer_read_start_pos_[inner_feature_index] = static_cast<int>(cur_size);
Guolin Ke's avatar
Guolin Ke committed
227
228
        }
        // copy
229
230
231
        std::memcpy(input_buffer_.data() + reduce_scatter_size_, this->smaller_leaf_histogram_array_[inner_feature_index].RawData(), this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram());
        cur_size += this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
        reduce_scatter_size_ += this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
Guolin Ke's avatar
Guolin Ke committed
232
233
234
235
236
237
238
        ++smaller_idx;
      }
      if (cur_used_features >= cur_total_feature) {
        break;
      }
      // then copy larger leaf histograms
      if (larger_idx < larger_top_features.size()) {
239
        int inner_feature_index = this->train_data_->InnerFeatureIndex(larger_top_features[larger_idx]);
Guolin Ke's avatar
Guolin Ke committed
240
241
242
        ++cur_used_features;
        // mark local aggregated feature
        if (i == rank_) {
Guolin Ke's avatar
Guolin Ke committed
243
244
          larger_is_feature_aggregated_[inner_feature_index] = true;
          larger_buffer_read_start_pos_[inner_feature_index] = static_cast<int>(cur_size);
Guolin Ke's avatar
Guolin Ke committed
245
246
        }
        // copy
247
248
249
        std::memcpy(input_buffer_.data() + reduce_scatter_size_, this->larger_leaf_histogram_array_[inner_feature_index].RawData(), this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram());
        cur_size += this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
        reduce_scatter_size_ += this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
Guolin Ke's avatar
Guolin Ke committed
250
251
252
253
254
255
256
257
258
259
260
        ++larger_idx;
      }
    }
    used_num_features += cur_used_features;
    block_len_[i] = static_cast<int>(cur_size);
    if (i < num_machines_ - 1) {
      block_start_[i + 1] = block_start_[i] + block_len_[i];
    }
  }
}

261
template <typename TREELEARNER_T>
Guolin Ke's avatar
Guolin Ke committed
262
void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplits() {
Guolin Ke's avatar
Guolin Ke committed
263
  // use local data to find local best splits
264
  std::vector<int8_t> is_feature_used(this->num_features_, 0);
Guolin Ke's avatar
Guolin Ke committed
265
#pragma omp parallel for schedule(static)
266
267
268
269
270
  for (int feature_index = 0; feature_index < this->num_features_; ++feature_index) {
    if (!this->is_feature_used_[feature_index]) continue;
    if (this->parent_leaf_histogram_array_ != nullptr
      && !this->parent_leaf_histogram_array_[feature_index].is_splittable()) {
      this->smaller_leaf_histogram_array_[feature_index].set_is_splittable(false);
Guolin Ke's avatar
Guolin Ke committed
271
272
273
274
275
      continue;
    }
    is_feature_used[feature_index] = 1;
  }
  bool use_subtract = true;
276
  if (this->parent_leaf_histogram_array_ == nullptr) {
Guolin Ke's avatar
Guolin Ke committed
277
278
    use_subtract = false;
  }
Guolin Ke's avatar
Guolin Ke committed
279
  TREELEARNER_T::ConstructHistograms(is_feature_used, use_subtract);
Guolin Ke's avatar
Guolin Ke committed
280

281
282
  std::vector<SplitInfo> smaller_bestsplit_per_features(this->num_features_);
  std::vector<SplitInfo> larger_bestsplit_per_features(this->num_features_);
Guolin Ke's avatar
Guolin Ke committed
283

284
  OMP_INIT_EX();
Guolin Ke's avatar
Guolin Ke committed
285
  // find splits
Guolin Ke's avatar
Guolin Ke committed
286
#pragma omp parallel for schedule(static)
287
  for (int feature_index = 0; feature_index < this->num_features_; ++feature_index) {
288
    OMP_LOOP_EX_BEGIN();
Guolin Ke's avatar
Guolin Ke committed
289
    if (!is_feature_used[feature_index]) { continue; }
290
291
292
293
294
295
296
297
298
299
    const int real_feature_index = this->train_data_->RealFeatureIndex(feature_index);
    this->train_data_->FixHistogram(feature_index,
      this->smaller_leaf_splits_->sum_gradients(), this->smaller_leaf_splits_->sum_hessians(),
      this->smaller_leaf_splits_->num_data_in_leaf(),
      this->smaller_leaf_histogram_array_[feature_index].RawData());

    this->smaller_leaf_histogram_array_[feature_index].FindBestThreshold(
      this->smaller_leaf_splits_->sum_gradients(),
      this->smaller_leaf_splits_->sum_hessians(),
      this->smaller_leaf_splits_->num_data_in_leaf(),
Guolin Ke's avatar
Guolin Ke committed
300
301
      this->smaller_leaf_splits_->min_constraint(),
      this->smaller_leaf_splits_->max_constraint(),
Guolin Ke's avatar
Guolin Ke committed
302
      &smaller_bestsplit_per_features[feature_index]);
Guolin Ke's avatar
Guolin Ke committed
303
    smaller_bestsplit_per_features[feature_index].feature = real_feature_index;
Guolin Ke's avatar
Guolin Ke committed
304
    // only has root leaf
305
    if (this->larger_leaf_splits_ == nullptr || this->larger_leaf_splits_->LeafIndex() < 0) { continue; }
Guolin Ke's avatar
Guolin Ke committed
306
307

    if (use_subtract) {
308
      this->larger_leaf_histogram_array_[feature_index].Subtract(this->smaller_leaf_histogram_array_[feature_index]);
Guolin Ke's avatar
Guolin Ke committed
309
    } else {
310
311
312
      this->train_data_->FixHistogram(feature_index, this->larger_leaf_splits_->sum_gradients(), this->larger_leaf_splits_->sum_hessians(),
        this->larger_leaf_splits_->num_data_in_leaf(),
        this->larger_leaf_histogram_array_[feature_index].RawData());
Guolin Ke's avatar
Guolin Ke committed
313
314
    }
    // find best threshold for larger child
315
316
317
318
    this->larger_leaf_histogram_array_[feature_index].FindBestThreshold(
      this->larger_leaf_splits_->sum_gradients(),
      this->larger_leaf_splits_->sum_hessians(),
      this->larger_leaf_splits_->num_data_in_leaf(),
Guolin Ke's avatar
Guolin Ke committed
319
320
      this->larger_leaf_splits_->min_constraint(),
      this->larger_leaf_splits_->max_constraint(),
Guolin Ke's avatar
Guolin Ke committed
321
      &larger_bestsplit_per_features[feature_index]);
Guolin Ke's avatar
Guolin Ke committed
322
    larger_bestsplit_per_features[feature_index].feature = real_feature_index;
323
    OMP_LOOP_EX_END();
Guolin Ke's avatar
Guolin Ke committed
324
  }
325
  OMP_THROW_EX();
Guolin Ke's avatar
Guolin Ke committed
326

Guolin Ke's avatar
Guolin Ke committed
327
328
  std::vector<SplitInfo> smaller_top_k_splits, larger_top_k_splits;
  // local voting
Guolin Ke's avatar
Guolin Ke committed
329
330
  ArrayArgs<SplitInfo>::MaxK(smaller_bestsplit_per_features, top_k_, &smaller_top_k_splits);
  ArrayArgs<SplitInfo>::MaxK(larger_bestsplit_per_features, top_k_, &larger_top_k_splits);
331
332
333
334
335
336
337
338

  std::vector<LightSplitInfo> smaller_top_k_light_splits(top_k_);
  std::vector<LightSplitInfo> larger_top_k_light_splits(top_k_);
  for (int i = 0; i < top_k_; ++i) {
    smaller_top_k_light_splits[i].CopyFrom(smaller_top_k_splits[i]);
    larger_top_k_light_splits[i].CopyFrom(larger_top_k_splits[i]);
  }

Guolin Ke's avatar
Guolin Ke committed
339
340
341
  // gather
  int offset = 0;
  for (int i = 0; i < top_k_; ++i) {
342
343
344
345
    std::memcpy(input_buffer_.data() + offset, &smaller_top_k_light_splits[i], sizeof(LightSplitInfo));
    offset += sizeof(LightSplitInfo);
    std::memcpy(input_buffer_.data() + offset, &larger_top_k_light_splits[i], sizeof(LightSplitInfo));
    offset += sizeof(LightSplitInfo);
Guolin Ke's avatar
Guolin Ke committed
346
347
348
  }
  Network::Allgather(input_buffer_.data(), offset, output_buffer_.data());
  // get all top-k from all machines
349
350
  std::vector<LightSplitInfo> smaller_top_k_splits_global;
  std::vector<LightSplitInfo> larger_top_k_splits_global;
Guolin Ke's avatar
Guolin Ke committed
351
352
353
  offset = 0;
  for (int i = 0; i < num_machines_; ++i) {
    for (int j = 0; j < top_k_; ++j) {
354
355
356
357
358
359
      smaller_top_k_splits_global.push_back(LightSplitInfo());
      std::memcpy(&smaller_top_k_splits_global.back(), output_buffer_.data() + offset, sizeof(LightSplitInfo));
      offset += sizeof(LightSplitInfo);
      larger_top_k_splits_global.push_back(LightSplitInfo());
      std::memcpy(&larger_top_k_splits_global.back(), output_buffer_.data() + offset, sizeof(LightSplitInfo));
      offset += sizeof(LightSplitInfo);
Guolin Ke's avatar
Guolin Ke committed
360
361
362
363
    }
  }
  // global voting
  std::vector<int> smaller_top_features, larger_top_features;
364
365
  GlobalVoting(this->smaller_leaf_splits_->LeafIndex(), smaller_top_k_splits_global, &smaller_top_features);
  GlobalVoting(this->larger_leaf_splits_->LeafIndex(), larger_top_k_splits_global, &larger_top_features);
Guolin Ke's avatar
Guolin Ke committed
366
367
368
369
  // copy local histgrams to buffer
  CopyLocalHistogram(smaller_top_features, larger_top_features);

  // Reduce scatter for histogram
Guolin Ke's avatar
Guolin Ke committed
370
371
  Network::ReduceScatter(input_buffer_.data(), reduce_scatter_size_, sizeof(HistogramBinEntry), block_start_.data(), block_len_.data(),
                         output_buffer_.data(), static_cast<comm_size_t>(output_buffer_.size()), &HistogramBinEntry::SumReducer);
Guolin Ke's avatar
Guolin Ke committed
372

Guolin Ke's avatar
Guolin Ke committed
373
374
375
376
377
378
379
  this->FindBestSplitsFromHistograms(is_feature_used, false);
}

template <typename TREELEARNER_T>
void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplitsFromHistograms(const std::vector<int8_t>&, bool) {
  std::vector<SplitInfo> smaller_bests_per_thread(this->num_threads_);
  std::vector<SplitInfo> larger_best_per_thread(this->num_threads_);
380
381
  std::vector<int8_t> smaller_node_used_features(this->num_features_, 1);
  std::vector<int8_t> larger_node_used_features(this->num_features_, 1);
382
383
384
  if (this->config_->feature_fraction_bynode < 1.0f) {
    smaller_node_used_features = this->GetUsedFeatures(false);
    larger_node_used_features = this->GetUsedFeatures(false);
385
  }
Guolin Ke's avatar
Guolin Ke committed
386
  // find best split from local aggregated histograms
Guolin Ke's avatar
Guolin Ke committed
387
388
389

  OMP_INIT_EX();
  #pragma omp parallel for schedule(static)
390
  for (int feature_index = 0; feature_index < this->num_features_; ++feature_index) {
391
    OMP_LOOP_EX_BEGIN();
Guolin Ke's avatar
Guolin Ke committed
392
    const int tid = omp_get_thread_num();
Guolin Ke's avatar
Guolin Ke committed
393
    const int real_feature_index = this->train_data_->RealFeatureIndex(feature_index);
Guolin Ke's avatar
Guolin Ke committed
394
    if (smaller_is_feature_aggregated_[feature_index]) {
Guolin Ke's avatar
Guolin Ke committed
395
      SplitInfo smaller_split;
Guolin Ke's avatar
Guolin Ke committed
396
397
      // restore from buffer
      smaller_leaf_histogram_array_global_[feature_index].FromMemory(
Guolin Ke's avatar
Guolin Ke committed
398
        output_buffer_.data() + smaller_buffer_read_start_pos_[feature_index]);
Guolin Ke's avatar
Guolin Ke committed
399

400
      this->train_data_->FixHistogram(feature_index,
Guolin Ke's avatar
Guolin Ke committed
401
402
403
                                      smaller_leaf_splits_global_->sum_gradients(), smaller_leaf_splits_global_->sum_hessians(),
                                      GetGlobalDataCountInLeaf(smaller_leaf_splits_global_->LeafIndex()),
                                      smaller_leaf_histogram_array_global_[feature_index].RawData());
Guolin Ke's avatar
Guolin Ke committed
404

Guolin Ke's avatar
Guolin Ke committed
405
406
      // find best threshold
      smaller_leaf_histogram_array_global_[feature_index].FindBestThreshold(
Guolin Ke's avatar
Guolin Ke committed
407
408
409
        smaller_leaf_splits_global_->sum_gradients(),
        smaller_leaf_splits_global_->sum_hessians(),
        GetGlobalDataCountInLeaf(smaller_leaf_splits_global_->LeafIndex()),
Guolin Ke's avatar
Guolin Ke committed
410
411
        smaller_leaf_splits_global_->min_constraint(),
        smaller_leaf_splits_global_->max_constraint(),
Guolin Ke's avatar
Guolin Ke committed
412
        &smaller_split);
Guolin Ke's avatar
Guolin Ke committed
413
      smaller_split.feature = real_feature_index;
414
      if (smaller_split > smaller_bests_per_thread[tid] && smaller_node_used_features[feature_index]) {
Guolin Ke's avatar
Guolin Ke committed
415
        smaller_bests_per_thread[tid] = smaller_split;
Guolin Ke's avatar
Guolin Ke committed
416
      }
Guolin Ke's avatar
Guolin Ke committed
417
418
419
    }

    if (larger_is_feature_aggregated_[feature_index]) {
Guolin Ke's avatar
Guolin Ke committed
420
      SplitInfo larger_split;
Guolin Ke's avatar
Guolin Ke committed
421
422
      // restore from buffer
      larger_leaf_histogram_array_global_[feature_index].FromMemory(output_buffer_.data() + larger_buffer_read_start_pos_[feature_index]);
Guolin Ke's avatar
Guolin Ke committed
423

424
      this->train_data_->FixHistogram(feature_index,
Guolin Ke's avatar
Guolin Ke committed
425
426
427
                                      larger_leaf_splits_global_->sum_gradients(), larger_leaf_splits_global_->sum_hessians(),
                                      GetGlobalDataCountInLeaf(larger_leaf_splits_global_->LeafIndex()),
                                      larger_leaf_histogram_array_global_[feature_index].RawData());
Guolin Ke's avatar
Guolin Ke committed
428

Guolin Ke's avatar
Guolin Ke committed
429
      // find best threshold
Guolin Ke's avatar
Guolin Ke committed
430
431
432
433
      larger_leaf_histogram_array_global_[feature_index].FindBestThreshold(
        larger_leaf_splits_global_->sum_gradients(),
        larger_leaf_splits_global_->sum_hessians(),
        GetGlobalDataCountInLeaf(larger_leaf_splits_global_->LeafIndex()),
Guolin Ke's avatar
Guolin Ke committed
434
435
        larger_leaf_splits_global_->min_constraint(),
        larger_leaf_splits_global_->max_constraint(),
Guolin Ke's avatar
Guolin Ke committed
436
        &larger_split);
Guolin Ke's avatar
Guolin Ke committed
437
      larger_split.feature = real_feature_index;
438
      if (larger_split > larger_best_per_thread[tid] && larger_node_used_features[feature_index]) {
Guolin Ke's avatar
Guolin Ke committed
439
        larger_best_per_thread[tid] = larger_split;
Guolin Ke's avatar
Guolin Ke committed
440
      }
Guolin Ke's avatar
Guolin Ke committed
441
    }
442
    OMP_LOOP_EX_END();
Guolin Ke's avatar
Guolin Ke committed
443
  }
444
  OMP_THROW_EX();
Guolin Ke's avatar
Guolin Ke committed
445
446

  auto smaller_best_idx = ArrayArgs<SplitInfo>::ArgMax(smaller_bests_per_thread);
447
  int leaf = this->smaller_leaf_splits_->LeafIndex();
Guolin Ke's avatar
Guolin Ke committed
448
  this->best_split_per_leaf_[leaf] = smaller_bests_per_thread[smaller_best_idx];
Guolin Ke's avatar
Guolin Ke committed
449

450
451
  if (this->larger_leaf_splits_ != nullptr && this->larger_leaf_splits_->LeafIndex() >= 0) {
    leaf = this->larger_leaf_splits_->LeafIndex();
Guolin Ke's avatar
Guolin Ke committed
452
453
    auto larger_best_idx = ArrayArgs<SplitInfo>::ArgMax(larger_best_per_thread);
    this->best_split_per_leaf_[leaf] = larger_best_per_thread[larger_best_idx];
Guolin Ke's avatar
Guolin Ke committed
454
  }
Guolin Ke's avatar
Guolin Ke committed
455
456

  // find local best
Guolin Ke's avatar
Guolin Ke committed
457
458
  SplitInfo smaller_best_split, larger_best_split;
  smaller_best_split = this->best_split_per_leaf_[this->smaller_leaf_splits_->LeafIndex()];
Guolin Ke's avatar
Guolin Ke committed
459
  // find local best split for larger leaf
460
  if (this->larger_leaf_splits_->LeafIndex() >= 0) {
Guolin Ke's avatar
Guolin Ke committed
461
    larger_best_split = this->best_split_per_leaf_[this->larger_leaf_splits_->LeafIndex()];
Guolin Ke's avatar
Guolin Ke committed
462
463
  }
  // sync global best info
Guolin Ke's avatar
Guolin Ke committed
464
  SyncUpGlobalBestSplit(input_buffer_.data(), input_buffer_.data(), &smaller_best_split, &larger_best_split, this->config_->max_cat_threshold);
Guolin Ke's avatar
Guolin Ke committed
465
466

  // copy back
Guolin Ke's avatar
Guolin Ke committed
467
468
469
  this->best_split_per_leaf_[smaller_leaf_splits_global_->LeafIndex()] = smaller_best_split;
  if (larger_best_split.feature >= 0 && larger_leaf_splits_global_->LeafIndex() >= 0) {
    this->best_split_per_leaf_[larger_leaf_splits_global_->LeafIndex()] = larger_best_split;
Guolin Ke's avatar
Guolin Ke committed
470
471
472
  }
}

473
474
475
476
template <typename TREELEARNER_T>
void VotingParallelTreeLearner<TREELEARNER_T>::Split(Tree* tree, int best_Leaf, int* left_leaf, int* right_leaf) {
  TREELEARNER_T::Split(tree, best_Leaf, left_leaf, right_leaf);
  const SplitInfo& best_split_info = this->best_split_per_leaf_[best_Leaf];
Guolin Ke's avatar
Guolin Ke committed
477
478
479
  // set the global number of data for leaves
  global_data_count_in_leaf_[*left_leaf] = best_split_info.left_count;
  global_data_count_in_leaf_[*right_leaf] = best_split_info.right_count;
Guolin Ke's avatar
Guolin Ke committed
480
481
  auto p_left = smaller_leaf_splits_global_.get();
  auto p_right = larger_leaf_splits_global_.get();
Guolin Ke's avatar
Guolin Ke committed
482
483
  // init the global sumup info
  if (best_split_info.left_count < best_split_info.right_count) {
484
    smaller_leaf_splits_global_->Init(*left_leaf, this->data_partition_.get(),
Guolin Ke's avatar
Guolin Ke committed
485
486
      best_split_info.left_sum_gradient,
      best_split_info.left_sum_hessian);
487
    larger_leaf_splits_global_->Init(*right_leaf, this->data_partition_.get(),
Guolin Ke's avatar
Guolin Ke committed
488
489
      best_split_info.right_sum_gradient,
      best_split_info.right_sum_hessian);
Guolin Ke's avatar
Guolin Ke committed
490
  } else {
491
    smaller_leaf_splits_global_->Init(*right_leaf, this->data_partition_.get(),
Guolin Ke's avatar
Guolin Ke committed
492
493
      best_split_info.right_sum_gradient,
      best_split_info.right_sum_hessian);
494
    larger_leaf_splits_global_->Init(*left_leaf, this->data_partition_.get(),
Guolin Ke's avatar
Guolin Ke committed
495
496
      best_split_info.left_sum_gradient,
      best_split_info.left_sum_hessian);
Guolin Ke's avatar
Guolin Ke committed
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
    p_left = larger_leaf_splits_global_.get();
    p_right = smaller_leaf_splits_global_.get();
  }
  const int inner_feature_index = this->train_data_->InnerFeatureIndex(best_split_info.feature);
  bool is_numerical_split = this->train_data_->FeatureBinMapper(inner_feature_index)->bin_type() == BinType::NumericalBin;
  p_left->SetValueConstraint(best_split_info.min_constraint, best_split_info.max_constraint);
  p_right->SetValueConstraint(best_split_info.min_constraint, best_split_info.max_constraint);
  if (is_numerical_split) {
    double mid = (best_split_info.left_output + best_split_info.right_output) / 2.0f;
    if (best_split_info.monotone_type < 0) {
      p_left->SetValueConstraint(mid, best_split_info.max_constraint);
      p_right->SetValueConstraint(best_split_info.min_constraint, mid);
    } else if (best_split_info.monotone_type > 0) {
      p_left->SetValueConstraint(best_split_info.min_constraint, mid);
      p_right->SetValueConstraint(mid, best_split_info.max_constraint);
    }
Guolin Ke's avatar
Guolin Ke committed
513
514
515
  }
}

516
517
518
// instantiate template classes, otherwise linker cannot find the code
template class VotingParallelTreeLearner<GPUTreeLearner>;
template class VotingParallelTreeLearner<SerialTreeLearner>;
519
}  // namespace LightGBM