serial_tree_learner.cpp 27.2 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
#include "serial_tree_learner.h"

#include <LightGBM/utils/array_args.h>
4
5
#include <LightGBM/network.h>
#include <LightGBM/objective_function.h>
Guolin Ke's avatar
Guolin Ke committed
6
7
8
9
10
11

#include <algorithm>
#include <vector>

namespace LightGBM {

Guolin Ke's avatar
Guolin Ke committed
12
13
14
15
16
17
18
19
20
#ifdef TIMETAG
std::chrono::duration<double, std::milli> init_train_time;
std::chrono::duration<double, std::milli> init_split_time;
std::chrono::duration<double, std::milli> hist_time;
std::chrono::duration<double, std::milli> find_split_time;
std::chrono::duration<double, std::milli> split_time;
std::chrono::duration<double, std::milli> ordered_bin_time;
#endif // TIMETAG

Guolin Ke's avatar
Guolin Ke committed
21
SerialTreeLearner::SerialTreeLearner(const TreeConfig* tree_config)
Guolin Ke's avatar
Guolin Ke committed
22
  :tree_config_(tree_config) {
Guolin Ke's avatar
Guolin Ke committed
23
  random_ = Random(tree_config_->feature_fraction_seed);
24
25
  #pragma omp parallel
  #pragma omp master
Guolin Ke's avatar
Guolin Ke committed
26
27
28
  {
    num_threads_ = omp_get_num_threads();
  }
Guolin Ke's avatar
Guolin Ke committed
29
30
31
}

SerialTreeLearner::~SerialTreeLearner() {
32
  #ifdef TIMETAG
Guolin Ke's avatar
Guolin Ke committed
33
34
35
36
37
38
  Log::Info("SerialTreeLearner::init_train costs %f", init_train_time * 1e-3);
  Log::Info("SerialTreeLearner::init_split costs %f", init_split_time * 1e-3);
  Log::Info("SerialTreeLearner::hist_build costs %f", hist_time * 1e-3);
  Log::Info("SerialTreeLearner::find_split costs %f", find_split_time * 1e-3);
  Log::Info("SerialTreeLearner::split costs %f", split_time * 1e-3);
  Log::Info("SerialTreeLearner::ordered_bin costs %f", ordered_bin_time * 1e-3);
39
  #endif
Guolin Ke's avatar
Guolin Ke committed
40
41
}

42
void SerialTreeLearner::Init(const Dataset* train_data, bool is_constant_hessian) {
Guolin Ke's avatar
Guolin Ke committed
43
44
45
  train_data_ = train_data;
  num_data_ = train_data_->num_data();
  num_features_ = train_data_->num_features();
46
  is_constant_hessian_ = is_constant_hessian;
47
48
  int max_cache_size = 0;
  // Get the max size of pool
Guolin Ke's avatar
Guolin Ke committed
49
50
  if (tree_config_->histogram_pool_size <= 0) {
    max_cache_size = tree_config_->num_leaves;
51
52
53
  } else {
    size_t total_histogram_size = 0;
    for (int i = 0; i < train_data_->num_features(); ++i) {
Guolin Ke's avatar
Guolin Ke committed
54
      total_histogram_size += sizeof(HistogramBinEntry) * train_data_->FeatureNumBin(i);
55
    }
Guolin Ke's avatar
Guolin Ke committed
56
    max_cache_size = static_cast<int>(tree_config_->histogram_pool_size * 1024 * 1024 / total_histogram_size);
57
58
  }
  // at least need 2 leaves
Guolin Ke's avatar
Guolin Ke committed
59
  max_cache_size = std::max(2, max_cache_size);
Guolin Ke's avatar
Guolin Ke committed
60
  max_cache_size = std::min(max_cache_size, tree_config_->num_leaves);
Guolin Ke's avatar
Guolin Ke committed
61

Guolin Ke's avatar
Guolin Ke committed
62
  histogram_pool_.DynamicChangeSize(train_data_, tree_config_, max_cache_size, tree_config_->num_leaves);
Guolin Ke's avatar
Guolin Ke committed
63
  // push split information for all leaves
Guolin Ke's avatar
Guolin Ke committed
64
  best_split_per_leaf_.resize(tree_config_->num_leaves);
Guolin Ke's avatar
Guolin Ke committed
65

Guolin Ke's avatar
Guolin Ke committed
66
  // get ordered bin
Guolin Ke's avatar
Guolin Ke committed
67
  train_data_->CreateOrderedBins(&ordered_bins_);
Guolin Ke's avatar
Guolin Ke committed
68
69

  // check existing for ordered bin
Guolin Ke's avatar
Guolin Ke committed
70
  for (int i = 0; i < static_cast<int>(ordered_bins_.size()); ++i) {
Guolin Ke's avatar
Guolin Ke committed
71
72
73
74
75
    if (ordered_bins_[i] != nullptr) {
      has_ordered_bin_ = true;
      break;
    }
  }
wxchan's avatar
wxchan committed
76
  // initialize splits for leaf
Guolin Ke's avatar
Guolin Ke committed
77
78
  smaller_leaf_splits_.reset(new LeafSplits(train_data_->num_data()));
  larger_leaf_splits_.reset(new LeafSplits(train_data_->num_data()));
Guolin Ke's avatar
Guolin Ke committed
79
80

  // initialize data partition
Guolin Ke's avatar
Guolin Ke committed
81
  data_partition_.reset(new DataPartition(num_data_, tree_config_->num_leaves));
Guolin Ke's avatar
Guolin Ke committed
82
  is_feature_used_.resize(num_features_);
83
  valid_feature_indices_ = train_data_->ValidFeatureIndices();
Guolin Ke's avatar
Guolin Ke committed
84
  // initialize ordered gradients and hessians
Guolin Ke's avatar
Guolin Ke committed
85
86
87
  ordered_gradients_.resize(num_data_);
  ordered_hessians_.resize(num_data_);
  // if has ordered bin, need to allocate a buffer to fast split
Guolin Ke's avatar
Guolin Ke committed
88
  if (has_ordered_bin_) {
Guolin Ke's avatar
Guolin Ke committed
89
    is_data_in_leaf_.resize(num_data_);
90
    std::fill(is_data_in_leaf_.begin(), is_data_in_leaf_.end(), static_cast<char>(0));
Guolin Ke's avatar
Guolin Ke committed
91
    ordered_bin_indices_.clear();
Guolin Ke's avatar
Guolin Ke committed
92
93
    for (int i = 0; i < static_cast<int>(ordered_bins_.size()); i++) {
      if (ordered_bins_[i] != nullptr) {
Guolin Ke's avatar
Guolin Ke committed
94
        ordered_bin_indices_.push_back(i);
Guolin Ke's avatar
Guolin Ke committed
95
96
      }
    }
Guolin Ke's avatar
Guolin Ke committed
97
  }
Guolin Ke's avatar
Guolin Ke committed
98
  Log::Info("Number of data: %d, number of used features: %d", num_data_, num_features_);
Guolin Ke's avatar
Guolin Ke committed
99
100
}

Guolin Ke's avatar
Guolin Ke committed
101
102
103
void SerialTreeLearner::ResetTrainingData(const Dataset* train_data) {
  train_data_ = train_data;
  num_data_ = train_data_->num_data();
Guolin Ke's avatar
Guolin Ke committed
104
  CHECK(num_features_ == train_data_->num_features());
Guolin Ke's avatar
Guolin Ke committed
105
106

  // get ordered bin
Guolin Ke's avatar
Guolin Ke committed
107
108
  train_data_->CreateOrderedBins(&ordered_bins_);

Guolin Ke's avatar
Guolin Ke committed
109
110
111
112
113
114
115
116
117
118
119
120
121
  // initialize splits for leaf
  smaller_leaf_splits_->ResetNumData(num_data_);
  larger_leaf_splits_->ResetNumData(num_data_);

  // initialize data partition
  data_partition_->ResetNumData(num_data_);

  // initialize ordered gradients and hessians
  ordered_gradients_.resize(num_data_);
  ordered_hessians_.resize(num_data_);
  // if has ordered bin, need to allocate a buffer to fast split
  if (has_ordered_bin_) {
    is_data_in_leaf_.resize(num_data_);
122
    std::fill(is_data_in_leaf_.begin(), is_data_in_leaf_.end(), static_cast<char>(0));
Guolin Ke's avatar
Guolin Ke committed
123
124
  }
}
Guolin Ke's avatar
Guolin Ke committed
125

Guolin Ke's avatar
Guolin Ke committed
126
127
128
129
130
131
132
133
134
135
void SerialTreeLearner::ResetConfig(const TreeConfig* tree_config) {
  if (tree_config_->num_leaves != tree_config->num_leaves) {
    tree_config_ = tree_config;
    int max_cache_size = 0;
    // Get the max size of pool
    if (tree_config->histogram_pool_size <= 0) {
      max_cache_size = tree_config_->num_leaves;
    } else {
      size_t total_histogram_size = 0;
      for (int i = 0; i < train_data_->num_features(); ++i) {
Guolin Ke's avatar
Guolin Ke committed
136
        total_histogram_size += sizeof(HistogramBinEntry) * train_data_->FeatureNumBin(i);
Guolin Ke's avatar
Guolin Ke committed
137
138
139
140
141
142
      }
      max_cache_size = static_cast<int>(tree_config_->histogram_pool_size * 1024 * 1024 / total_histogram_size);
    }
    // at least need 2 leaves
    max_cache_size = std::max(2, max_cache_size);
    max_cache_size = std::min(max_cache_size, tree_config_->num_leaves);
Guolin Ke's avatar
Guolin Ke committed
143
    histogram_pool_.DynamicChangeSize(train_data_, tree_config_, max_cache_size, tree_config_->num_leaves);
Guolin Ke's avatar
Guolin Ke committed
144
145
146

    // push split information for all leaves
    best_split_per_leaf_.resize(tree_config_->num_leaves);
147
    data_partition_->ResetLeaves(tree_config_->num_leaves);
Guolin Ke's avatar
Guolin Ke committed
148
149
150
151
  } else {
    tree_config_ = tree_config;
  }

Guolin Ke's avatar
Guolin Ke committed
152
  histogram_pool_.ResetConfig(tree_config_);
Guolin Ke's avatar
Guolin Ke committed
153
154
}

155
Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians, bool is_constant_hessian) {
Guolin Ke's avatar
Guolin Ke committed
156
157
  gradients_ = gradients;
  hessians_ = hessians;
158
  is_constant_hessian_ = is_constant_hessian;
159
  #ifdef TIMETAG
Guolin Ke's avatar
Guolin Ke committed
160
  auto start_time = std::chrono::steady_clock::now();
161
  #endif
Guolin Ke's avatar
Guolin Ke committed
162
163
  // some initial works before training
  BeforeTrain();
Guolin Ke's avatar
Guolin Ke committed
164

165
  #ifdef TIMETAG
Guolin Ke's avatar
Guolin Ke committed
166
  init_train_time += std::chrono::steady_clock::now() - start_time;
167
  #endif
Guolin Ke's avatar
Guolin Ke committed
168

Guolin Ke's avatar
Guolin Ke committed
169
  auto tree = std::unique_ptr<Tree>(new Tree(tree_config_->num_leaves));
Guolin Ke's avatar
Guolin Ke committed
170
171
  // root leaf
  int left_leaf = 0;
172
  int cur_depth = 1;
Guolin Ke's avatar
Guolin Ke committed
173
174
  // only root leaf can be splitted on first time
  int right_leaf = -1;
Guolin Ke's avatar
Guolin Ke committed
175
  for (int split = 0; split < tree_config_->num_leaves - 1; ++split) {
176
    #ifdef TIMETAG
Guolin Ke's avatar
Guolin Ke committed
177
    start_time = std::chrono::steady_clock::now();
178
    #endif
Guolin Ke's avatar
Guolin Ke committed
179
    // some initial works before finding best split
Guolin Ke's avatar
Guolin Ke committed
180
    if (BeforeFindBestSplit(tree.get(), left_leaf, right_leaf)) {
181
      #ifdef TIMETAG
Guolin Ke's avatar
Guolin Ke committed
182
      init_split_time += std::chrono::steady_clock::now() - start_time;
183
      #endif
Guolin Ke's avatar
Guolin Ke committed
184
      // find best threshold for every feature
Guolin Ke's avatar
Guolin Ke committed
185
      FindBestSplits();
Guolin Ke's avatar
Guolin Ke committed
186
187
188
189
190
191
192
    }
    // Get a leaf with max split gain
    int best_leaf = static_cast<int>(ArrayArgs<SplitInfo>::ArgMax(best_split_per_leaf_));
    // Get split information for best leaf
    const SplitInfo& best_leaf_SplitInfo = best_split_per_leaf_[best_leaf];
    // cannot split, quit
    if (best_leaf_SplitInfo.gain <= 0.0) {
Guolin Ke's avatar
Guolin Ke committed
193
      Log::Warning("No further splits with positive gain, best gain: %f", best_leaf_SplitInfo.gain);
Guolin Ke's avatar
Guolin Ke committed
194
195
      break;
    }
196
    #ifdef TIMETAG
Guolin Ke's avatar
Guolin Ke committed
197
    start_time = std::chrono::steady_clock::now();
198
    #endif
Guolin Ke's avatar
Guolin Ke committed
199
    // split tree with best leaf
Guolin Ke's avatar
Guolin Ke committed
200
    Split(tree.get(), best_leaf, &left_leaf, &right_leaf);
201
    #ifdef TIMETAG
Guolin Ke's avatar
Guolin Ke committed
202
    split_time += std::chrono::steady_clock::now() - start_time;
203
    #endif
204
    cur_depth = std::max(cur_depth, tree->leaf_depth(left_leaf));
Guolin Ke's avatar
Guolin Ke committed
205
  }
Guolin Ke's avatar
Guolin Ke committed
206
  Log::Debug("Trained a tree with leaves=%d and max_depth=%d", tree->num_leaves(), cur_depth);
Guolin Ke's avatar
Guolin Ke committed
207
  return tree.release();
Guolin Ke's avatar
Guolin Ke committed
208
209
}

210
Tree* SerialTreeLearner::FitByExistingTree(const Tree* old_tree, const score_t* gradients, const score_t *hessians) const {
Guolin Ke's avatar
Guolin Ke committed
211
212
  auto tree = std::unique_ptr<Tree>(new Tree(*old_tree));
  CHECK(data_partition_->num_leaves() >= tree->num_leaves());
213
  OMP_INIT_EX();
Guolin Ke's avatar
Guolin Ke committed
214
  #pragma omp parallel for schedule(static)
215
  for (int i = 0; i < tree->num_leaves(); ++i) {
216
    OMP_LOOP_EX_BEGIN();
Guolin Ke's avatar
Guolin Ke committed
217
218
219
    data_size_t cnt_leaf_data = 0;
    auto tmp_idx = data_partition_->GetIndexOnLeaf(i, &cnt_leaf_data);
    double sum_grad = 0.0f;
220
    double sum_hess = kEpsilon;
Guolin Ke's avatar
Guolin Ke committed
221
222
223
224
225
226
    for (data_size_t j = 0; j < cnt_leaf_data; ++j) {
      auto idx = tmp_idx[j];
      sum_grad += gradients[idx];
      sum_hess += hessians[idx];
    }
    double output = FeatureHistogram::CalculateSplittedLeafOutput(sum_grad, sum_hess,
227
                                                                  tree_config_->lambda_l1, tree_config_->lambda_l2, tree_config_->max_delta_step);
228
    tree->SetLeafOutput(i, output* tree->shrinkage());
229
    OMP_LOOP_EX_END();
Guolin Ke's avatar
Guolin Ke committed
230
  }
231
  OMP_THROW_EX();
Guolin Ke's avatar
Guolin Ke committed
232
233
234
  return tree.release();
}

235
236
237
238
239
Tree* SerialTreeLearner::FitByExistingTree(const Tree* old_tree, const std::vector<int>& leaf_pred, const score_t* gradients, const score_t *hessians) {
  data_partition_->ResetByLeafPred(leaf_pred, old_tree->num_leaves());
  return FitByExistingTree(old_tree, gradients, hessians);
}

Guolin Ke's avatar
Guolin Ke committed
240
void SerialTreeLearner::BeforeTrain() {
Guolin Ke's avatar
Guolin Ke committed
241

242
243
  // reset histogram pool
  histogram_pool_.ResetMap();
Guolin Ke's avatar
Guolin Ke committed
244

Guolin Ke's avatar
Guolin Ke committed
245
  if (tree_config_->feature_fraction < 1) {
246
247
248
    int used_feature_cnt = static_cast<int>(valid_feature_indices_.size()*tree_config_->feature_fraction);
    // at least use one feature
    used_feature_cnt = std::max(used_feature_cnt, 1);
Guolin Ke's avatar
Guolin Ke committed
249
250
251
    // initialize used features
    std::memset(is_feature_used_.data(), 0, sizeof(int8_t) * num_features_);
    // Get used feature at current tree
Guolin Ke's avatar
Guolin Ke committed
252
    auto sampled_indices = random_.Sample(static_cast<int>(valid_feature_indices_.size()), used_feature_cnt);
253
    int omp_loop_size = static_cast<int>(sampled_indices.size());
Guolin Ke's avatar
Guolin Ke committed
254
255
    #pragma omp parallel for schedule(static, 512) if (omp_loop_size >= 1024)
    for (int i = 0; i < omp_loop_size; ++i) {
256
257
258
      int used_feature = valid_feature_indices_[sampled_indices[i]];
      int inner_feature_index = train_data_->InnerFeatureIndex(used_feature);
      CHECK(inner_feature_index >= 0);
Guolin Ke's avatar
Guolin Ke committed
259
      is_feature_used_[inner_feature_index] = 1;
Guolin Ke's avatar
Guolin Ke committed
260
261
    }
  } else {
Guolin Ke's avatar
Guolin Ke committed
262
    #pragma omp parallel for schedule(static, 512) if (num_features_ >= 1024)
Guolin Ke's avatar
Guolin Ke committed
263
264
265
    for (int i = 0; i < num_features_; ++i) {
      is_feature_used_[i] = 1;
    }
Guolin Ke's avatar
Guolin Ke committed
266
  }
267

Guolin Ke's avatar
Guolin Ke committed
268
269
270
271
  // initialize data partition
  data_partition_->Init();

  // reset the splits for leaves
Guolin Ke's avatar
Guolin Ke committed
272
  for (int i = 0; i < tree_config_->num_leaves; ++i) {
Guolin Ke's avatar
Guolin Ke committed
273
274
275
276
277
278
279
    best_split_per_leaf_[i].Reset();
  }

  // Sumup for root
  if (data_partition_->leaf_count(0) == num_data_) {
    // use all data
    smaller_leaf_splits_->Init(gradients_, hessians_);
Guolin Ke's avatar
Guolin Ke committed
280

Guolin Ke's avatar
Guolin Ke committed
281
282
  } else {
    // use bagging, only use part of data
Guolin Ke's avatar
Guolin Ke committed
283
    smaller_leaf_splits_->Init(0, data_partition_.get(), gradients_, hessians_);
Guolin Ke's avatar
Guolin Ke committed
284
285
286
287
288
289
  }

  larger_leaf_splits_->Init();

  // if has ordered bin, need to initialize the ordered bin
  if (has_ordered_bin_) {
290
    #ifdef TIMETAG
Guolin Ke's avatar
Guolin Ke committed
291
    auto start_time = std::chrono::steady_clock::now();
292
    #endif
Guolin Ke's avatar
Guolin Ke committed
293
294
    if (data_partition_->leaf_count(0) == num_data_) {
      // use all data, pass nullptr
295
296
      OMP_INIT_EX();
      #pragma omp parallel for schedule(static)
Guolin Ke's avatar
Guolin Ke committed
297
      for (int i = 0; i < static_cast<int>(ordered_bin_indices_.size()); ++i) {
298
        OMP_LOOP_EX_BEGIN();
Guolin Ke's avatar
Guolin Ke committed
299
        ordered_bins_[ordered_bin_indices_[i]]->Init(nullptr, tree_config_->num_leaves);
300
        OMP_LOOP_EX_END();
Guolin Ke's avatar
Guolin Ke committed
301
      }
302
      OMP_THROW_EX();
Guolin Ke's avatar
Guolin Ke committed
303
304
305
306
307
308
309
    } else {
      // bagging, only use part of data

      // mark used data
      const data_size_t* indices = data_partition_->indices();
      data_size_t begin = data_partition_->leaf_begin(0);
      data_size_t end = begin + data_partition_->leaf_count(0);
Guolin Ke's avatar
Guolin Ke committed
310
311
      data_size_t loop_size = end - begin;
      #pragma omp parallel for schedule(static, 512) if(loop_size >= 1024)
Guolin Ke's avatar
Guolin Ke committed
312
313
314
      for (data_size_t i = begin; i < end; ++i) {
        is_data_in_leaf_[indices[i]] = 1;
      }
315
      OMP_INIT_EX();
Guolin Ke's avatar
Guolin Ke committed
316
      // initialize ordered bin
317
      #pragma omp parallel for schedule(static)
Guolin Ke's avatar
Guolin Ke committed
318
      for (int i = 0; i < static_cast<int>(ordered_bin_indices_.size()); ++i) {
319
        OMP_LOOP_EX_BEGIN();
Guolin Ke's avatar
Guolin Ke committed
320
        ordered_bins_[ordered_bin_indices_[i]]->Init(is_data_in_leaf_.data(), tree_config_->num_leaves);
321
        OMP_LOOP_EX_END();
Guolin Ke's avatar
Guolin Ke committed
322
      }
323
      OMP_THROW_EX();
Guolin Ke's avatar
Guolin Ke committed
324
      #pragma omp parallel for schedule(static, 512) if(loop_size >= 1024)
Guolin Ke's avatar
Guolin Ke committed
325
326
327
      for (data_size_t i = begin; i < end; ++i) {
        is_data_in_leaf_[indices[i]] = 0;
      }
Guolin Ke's avatar
Guolin Ke committed
328
    }
329
    #ifdef TIMETAG
Guolin Ke's avatar
Guolin Ke committed
330
    ordered_bin_time += std::chrono::steady_clock::now() - start_time;
331
    #endif
Guolin Ke's avatar
Guolin Ke committed
332
333
334
  }
}

Guolin Ke's avatar
Guolin Ke committed
335
bool SerialTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int right_leaf) {
Guolin Ke's avatar
Guolin Ke committed
336
  // check depth of current leaf
Guolin Ke's avatar
Guolin Ke committed
337
  if (tree_config_->max_depth > 0) {
Guolin Ke's avatar
Guolin Ke committed
338
    // only need to check left leaf, since right leaf is in same level of left leaf
Guolin Ke's avatar
Guolin Ke committed
339
    if (tree->leaf_depth(left_leaf) >= tree_config_->max_depth) {
Guolin Ke's avatar
Guolin Ke committed
340
341
342
343
344
345
346
      best_split_per_leaf_[left_leaf].gain = kMinScore;
      if (right_leaf >= 0) {
        best_split_per_leaf_[right_leaf].gain = kMinScore;
      }
      return false;
    }
  }
Guolin Ke's avatar
Guolin Ke committed
347
348
349
  data_size_t num_data_in_left_child = GetGlobalDataCountInLeaf(left_leaf);
  data_size_t num_data_in_right_child = GetGlobalDataCountInLeaf(right_leaf);
  // no enough data to continue
Guolin Ke's avatar
Guolin Ke committed
350
  if (num_data_in_right_child < static_cast<data_size_t>(tree_config_->min_data_in_leaf * 2)
Guolin Ke's avatar
Guolin Ke committed
351
      && num_data_in_left_child < static_cast<data_size_t>(tree_config_->min_data_in_leaf * 2)) {
Guolin Ke's avatar
Guolin Ke committed
352
353
354
355
356
357
    best_split_per_leaf_[left_leaf].gain = kMinScore;
    if (right_leaf >= 0) {
      best_split_per_leaf_[right_leaf].gain = kMinScore;
    }
    return false;
  }
358
  parent_leaf_histogram_array_ = nullptr;
Guolin Ke's avatar
Guolin Ke committed
359
360
  // only have root
  if (right_leaf < 0) {
361
    histogram_pool_.Get(left_leaf, &smaller_leaf_histogram_array_);
Guolin Ke's avatar
Guolin Ke committed
362
363
    larger_leaf_histogram_array_ = nullptr;
  } else if (num_data_in_left_child < num_data_in_right_child) {
Hui Xue's avatar
Hui Xue committed
364
    // put parent(left) leaf's histograms into larger leaf's histograms
365
366
367
    if (histogram_pool_.Get(left_leaf, &larger_leaf_histogram_array_)) { parent_leaf_histogram_array_ = larger_leaf_histogram_array_; }
    histogram_pool_.Move(left_leaf, right_leaf);
    histogram_pool_.Get(left_leaf, &smaller_leaf_histogram_array_);
Guolin Ke's avatar
Guolin Ke committed
368
  } else {
Hui Xue's avatar
Hui Xue committed
369
    // put parent(left) leaf's histograms to larger leaf's histograms
370
371
    if (histogram_pool_.Get(left_leaf, &larger_leaf_histogram_array_)) { parent_leaf_histogram_array_ = larger_leaf_histogram_array_; }
    histogram_pool_.Get(right_leaf, &smaller_leaf_histogram_array_);
Guolin Ke's avatar
Guolin Ke committed
372
373
374
  }
  // split for the ordered bin
  if (has_ordered_bin_ && right_leaf >= 0) {
375
    #ifdef TIMETAG
Guolin Ke's avatar
Guolin Ke committed
376
    auto start_time = std::chrono::steady_clock::now();
377
    #endif
Guolin Ke's avatar
Guolin Ke committed
378
379
    // mark data that at left-leaf
    const data_size_t* indices = data_partition_->indices();
Guolin Ke's avatar
Guolin Ke committed
380
381
382
    const auto left_cnt = data_partition_->leaf_count(left_leaf);
    const auto right_cnt = data_partition_->leaf_count(right_leaf);
    char mark = 1;
Guolin Ke's avatar
Guolin Ke committed
383
    data_size_t begin = data_partition_->leaf_begin(left_leaf);
Guolin Ke's avatar
Guolin Ke committed
384
    data_size_t end = begin + left_cnt;
Guolin Ke's avatar
Guolin Ke committed
385
    data_size_t loop_size = end - begin;
Guolin Ke's avatar
Guolin Ke committed
386
387
388
389
390
    if (left_cnt > right_cnt) {
      begin = data_partition_->leaf_begin(right_leaf);
      end = begin + right_cnt;
      mark = 0;
    }
Guolin Ke's avatar
Guolin Ke committed
391
    #pragma omp parallel for schedule(static, 512) if(loop_size >= 1024)
Guolin Ke's avatar
Guolin Ke committed
392
393
394
    for (data_size_t i = begin; i < end; ++i) {
      is_data_in_leaf_[indices[i]] = 1;
    }
395
    OMP_INIT_EX();
Guolin Ke's avatar
Guolin Ke committed
396
    // split the ordered bin
397
    #pragma omp parallel for schedule(static)
Guolin Ke's avatar
Guolin Ke committed
398
    for (int i = 0; i < static_cast<int>(ordered_bin_indices_.size()); ++i) {
399
      OMP_LOOP_EX_BEGIN();
Guolin Ke's avatar
Guolin Ke committed
400
      ordered_bins_[ordered_bin_indices_[i]]->Split(left_leaf, right_leaf, is_data_in_leaf_.data(), mark);
401
      OMP_LOOP_EX_END();
Guolin Ke's avatar
Guolin Ke committed
402
    }
403
    OMP_THROW_EX();
Guolin Ke's avatar
Guolin Ke committed
404
    #pragma omp parallel for schedule(static, 512) if(loop_size >= 1024)
Guolin Ke's avatar
Guolin Ke committed
405
406
407
    for (data_size_t i = begin; i < end; ++i) {
      is_data_in_leaf_[indices[i]] = 0;
    }
408
    #ifdef TIMETAG
Guolin Ke's avatar
Guolin Ke committed
409
    ordered_bin_time += std::chrono::steady_clock::now() - start_time;
410
    #endif
Guolin Ke's avatar
Guolin Ke committed
411
412
413
414
  }
  return true;
}

Guolin Ke's avatar
Guolin Ke committed
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
void SerialTreeLearner::FindBestSplits() {
  std::vector<int8_t> is_feature_used(num_features_, 0);
  #pragma omp parallel for schedule(static,1024) if (num_features_ >= 2048)
  for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
    if (!is_feature_used_[feature_index]) continue;
    if (parent_leaf_histogram_array_ != nullptr
        && !parent_leaf_histogram_array_[feature_index].is_splittable()) {
      smaller_leaf_histogram_array_[feature_index].set_is_splittable(false);
      continue;
    }
    is_feature_used[feature_index] = 1;
  }
  bool use_subtract = parent_leaf_histogram_array_ != nullptr;
  ConstructHistograms(is_feature_used, use_subtract);
  FindBestSplitsFromHistograms(is_feature_used, use_subtract);
}

432
void SerialTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_feature_used, bool use_subtract) {
Guolin Ke's avatar
Guolin Ke committed
433
  #ifdef TIMETAG
Guolin Ke's avatar
Guolin Ke committed
434
  auto start_time = std::chrono::steady_clock::now();
Guolin Ke's avatar
Guolin Ke committed
435
  #endif
Guolin Ke's avatar
Guolin Ke committed
436
437
438
  // construct smaller leaf
  HistogramBinEntry* ptr_smaller_leaf_hist_data = smaller_leaf_histogram_array_[0].RawData() - 1;
  train_data_->ConstructHistograms(is_feature_used,
Guolin Ke's avatar
Guolin Ke committed
439
440
441
                                   smaller_leaf_splits_->data_indices(), smaller_leaf_splits_->num_data_in_leaf(),
                                   smaller_leaf_splits_->LeafIndex(),
                                   ordered_bins_, gradients_, hessians_,
442
                                   ordered_gradients_.data(), ordered_hessians_.data(), is_constant_hessian_,
Guolin Ke's avatar
Guolin Ke committed
443
                                   ptr_smaller_leaf_hist_data);
Guolin Ke's avatar
Guolin Ke committed
444
445
446
447
448

  if (larger_leaf_histogram_array_ != nullptr && !use_subtract) {
    // construct larger leaf
    HistogramBinEntry* ptr_larger_leaf_hist_data = larger_leaf_histogram_array_[0].RawData() - 1;
    train_data_->ConstructHistograms(is_feature_used,
Guolin Ke's avatar
Guolin Ke committed
449
450
451
                                     larger_leaf_splits_->data_indices(), larger_leaf_splits_->num_data_in_leaf(),
                                     larger_leaf_splits_->LeafIndex(),
                                     ordered_bins_, gradients_, hessians_,
452
                                     ordered_gradients_.data(), ordered_hessians_.data(), is_constant_hessian_,
Guolin Ke's avatar
Guolin Ke committed
453
                                     ptr_larger_leaf_hist_data);
Guolin Ke's avatar
Guolin Ke committed
454
  }
Guolin Ke's avatar
Guolin Ke committed
455
  #ifdef TIMETAG
Guolin Ke's avatar
Guolin Ke committed
456
  hist_time += std::chrono::steady_clock::now() - start_time;
Guolin Ke's avatar
Guolin Ke committed
457
  #endif
458
459
}

Guolin Ke's avatar
Guolin Ke committed
460
void SerialTreeLearner::FindBestSplitsFromHistograms(const std::vector<int8_t>& is_feature_used, bool use_subtract) {
461
  #ifdef TIMETAG
462
  auto start_time = std::chrono::steady_clock::now();
463
  #endif
Guolin Ke's avatar
Guolin Ke committed
464
465
  std::vector<SplitInfo> smaller_best(num_threads_);
  std::vector<SplitInfo> larger_best(num_threads_);
466
  OMP_INIT_EX();
Guolin Ke's avatar
Guolin Ke committed
467
  // find splits
468
  #pragma omp parallel for schedule(static)
Guolin Ke's avatar
Guolin Ke committed
469
  for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
470
    OMP_LOOP_EX_BEGIN();
Guolin Ke's avatar
Guolin Ke committed
471
472
473
    if (!is_feature_used[feature_index]) { continue; }
    const int tid = omp_get_thread_num();
    SplitInfo smaller_split;
Guolin Ke's avatar
Guolin Ke committed
474
475
476
477
    train_data_->FixHistogram(feature_index,
                              smaller_leaf_splits_->sum_gradients(), smaller_leaf_splits_->sum_hessians(),
                              smaller_leaf_splits_->num_data_in_leaf(),
                              smaller_leaf_histogram_array_[feature_index].RawData());
478
    int real_fidx = train_data_->RealFeatureIndex(feature_index);
Guolin Ke's avatar
Guolin Ke committed
479
480
481
482
    smaller_leaf_histogram_array_[feature_index].FindBestThreshold(
      smaller_leaf_splits_->sum_gradients(),
      smaller_leaf_splits_->sum_hessians(),
      smaller_leaf_splits_->num_data_in_leaf(),
Guolin Ke's avatar
Guolin Ke committed
483
484
      smaller_leaf_splits_->min_constraint(),
      smaller_leaf_splits_->max_constraint(),
Guolin Ke's avatar
Guolin Ke committed
485
      &smaller_split);
486
487
    smaller_split.feature = real_fidx;
    if (smaller_split > smaller_best[tid]) {
Guolin Ke's avatar
Guolin Ke committed
488
489
      smaller_best[tid] = smaller_split;
    }
Guolin Ke's avatar
Guolin Ke committed
490
    // only has root leaf
Guolin Ke's avatar
Guolin Ke committed
491
    if (larger_leaf_splits_ == nullptr || larger_leaf_splits_->LeafIndex() < 0) { continue; }
Guolin Ke's avatar
Guolin Ke committed
492

Guolin Ke's avatar
Guolin Ke committed
493
    if (use_subtract) {
494
495
      larger_leaf_histogram_array_[feature_index].Subtract(smaller_leaf_histogram_array_[feature_index]);
    } else {
Guolin Ke's avatar
Guolin Ke committed
496
      train_data_->FixHistogram(feature_index, larger_leaf_splits_->sum_gradients(), larger_leaf_splits_->sum_hessians(),
Guolin Ke's avatar
Guolin Ke committed
497
498
                                larger_leaf_splits_->num_data_in_leaf(),
                                larger_leaf_histogram_array_[feature_index].RawData());
499
    }
Guolin Ke's avatar
Guolin Ke committed
500
    SplitInfo larger_split;
Guolin Ke's avatar
Guolin Ke committed
501
    // find best threshold for larger child
Guolin Ke's avatar
Guolin Ke committed
502
503
504
505
    larger_leaf_histogram_array_[feature_index].FindBestThreshold(
      larger_leaf_splits_->sum_gradients(),
      larger_leaf_splits_->sum_hessians(),
      larger_leaf_splits_->num_data_in_leaf(),
Guolin Ke's avatar
Guolin Ke committed
506
507
      larger_leaf_splits_->min_constraint(),
      larger_leaf_splits_->max_constraint(),
Guolin Ke's avatar
Guolin Ke committed
508
      &larger_split);
509
510
    larger_split.feature = real_fidx;
    if (larger_split > larger_best[tid]) {
Guolin Ke's avatar
Guolin Ke committed
511
      larger_best[tid] = larger_split;
Guolin Ke's avatar
Guolin Ke committed
512
    }
513
    OMP_LOOP_EX_END();
Guolin Ke's avatar
Guolin Ke committed
514
  }
515
  OMP_THROW_EX();
Guolin Ke's avatar
Guolin Ke committed
516
517
518
519
520
521
522
523
524
525

  auto smaller_best_idx = ArrayArgs<SplitInfo>::ArgMax(smaller_best);
  int leaf = smaller_leaf_splits_->LeafIndex();
  best_split_per_leaf_[leaf] = smaller_best[smaller_best_idx];

  if (larger_leaf_splits_ != nullptr && larger_leaf_splits_->LeafIndex() >= 0) {
    leaf = larger_leaf_splits_->LeafIndex();
    auto larger_best_idx = ArrayArgs<SplitInfo>::ArgMax(larger_best);
    best_split_per_leaf_[leaf] = larger_best[larger_best_idx];
  }
526
  #ifdef TIMETAG
Guolin Ke's avatar
Guolin Ke committed
527
  find_split_time += std::chrono::steady_clock::now() - start_time;
528
  #endif
Guolin Ke's avatar
Guolin Ke committed
529
530
}

Guolin Ke's avatar
Guolin Ke committed
531

532
533
void SerialTreeLearner::Split(Tree* tree, int best_leaf, int* left_leaf, int* right_leaf) {
  const SplitInfo& best_split_info = best_split_per_leaf_[best_leaf];
Guolin Ke's avatar
Guolin Ke committed
534
  const int inner_feature_index = train_data_->InnerFeatureIndex(best_split_info.feature);
Guolin Ke's avatar
Guolin Ke committed
535
  // left = parent
536
  *left_leaf = best_leaf;
Guolin Ke's avatar
Guolin Ke committed
537
538
  bool is_numerical_split = train_data_->FeatureBinMapper(inner_feature_index)->bin_type() == BinType::NumericalBin;
  if (is_numerical_split) {
539
    auto threshold_double = train_data_->RealThreshold(inner_feature_index, best_split_info.threshold);
540
541
542
543
544
    // split tree, will return right leaf
    *right_leaf = tree->Split(best_leaf,
                              inner_feature_index,
                              best_split_info.feature,
                              best_split_info.threshold,
545
                              threshold_double,
546
547
548
549
                              static_cast<double>(best_split_info.left_output),
                              static_cast<double>(best_split_info.right_output),
                              static_cast<data_size_t>(best_split_info.left_count),
                              static_cast<data_size_t>(best_split_info.right_count),
550
                              static_cast<float>(best_split_info.gain),
551
552
                              train_data_->FeatureBinMapper(inner_feature_index)->missing_type(),
                              best_split_info.default_left);
553
554
    data_partition_->Split(best_leaf, train_data_, inner_feature_index,
                           &best_split_info.threshold, 1, best_split_info.default_left, *right_leaf);
555
  } else {
556
557
558
559
560
561
    std::vector<uint32_t> cat_bitset_inner = Common::ConstructBitset(best_split_info.cat_threshold.data(), best_split_info.num_cat_threshold);
    std::vector<int> threshold_int(best_split_info.num_cat_threshold);
    for (int i = 0; i < best_split_info.num_cat_threshold; ++i) {
      threshold_int[i] = static_cast<int>(train_data_->RealThreshold(inner_feature_index, best_split_info.cat_threshold[i]));
    }
    std::vector<uint32_t> cat_bitset = Common::ConstructBitset(threshold_int.data(), best_split_info.num_cat_threshold);
562
563
564
    *right_leaf = tree->SplitCategorical(best_leaf,
                                         inner_feature_index,
                                         best_split_info.feature,
565
566
567
568
                                         cat_bitset_inner.data(),
                                         static_cast<int>(cat_bitset_inner.size()),
                                         cat_bitset.data(),
                                         static_cast<int>(cat_bitset.size()),
569
570
571
572
                                         static_cast<double>(best_split_info.left_output),
                                         static_cast<double>(best_split_info.right_output),
                                         static_cast<data_size_t>(best_split_info.left_count),
                                         static_cast<data_size_t>(best_split_info.right_count),
573
                                         static_cast<float>(best_split_info.gain),
574
                                         train_data_->FeatureBinMapper(inner_feature_index)->missing_type());
575
576
    data_partition_->Split(best_leaf, train_data_, inner_feature_index,
                           cat_bitset_inner.data(), static_cast<int>(cat_bitset_inner.size()), best_split_info.default_left, *right_leaf);
577
  }
578
579
580
581

  #ifdef DEBUG
  CHECK(best_split_info.left_count == data_partition_->leaf_count(best_leaf));
  #endif
Guolin Ke's avatar
Guolin Ke committed
582
583
  auto p_left = smaller_leaf_splits_.get();
  auto p_right = larger_leaf_splits_.get();
Guolin Ke's avatar
Guolin Ke committed
584
585
  // init the leaves that used on next iteration
  if (best_split_info.left_count < best_split_info.right_count) {
Guolin Ke's avatar
Guolin Ke committed
586
587
    smaller_leaf_splits_->Init(*left_leaf, data_partition_.get(), best_split_info.left_sum_gradient, best_split_info.left_sum_hessian);
    larger_leaf_splits_->Init(*right_leaf, data_partition_.get(), best_split_info.right_sum_gradient, best_split_info.right_sum_hessian);
Guolin Ke's avatar
Guolin Ke committed
588
  } else {
Guolin Ke's avatar
Guolin Ke committed
589
590
    smaller_leaf_splits_->Init(*right_leaf, data_partition_.get(), best_split_info.right_sum_gradient, best_split_info.right_sum_hessian);
    larger_leaf_splits_->Init(*left_leaf, data_partition_.get(), best_split_info.left_sum_gradient, best_split_info.left_sum_hessian);
Guolin Ke's avatar
Guolin Ke committed
591
592
593
594
595
596
597
598
599
600
601
602
603
604
    p_right = smaller_leaf_splits_.get();
    p_left = larger_leaf_splits_.get();
  }
  p_left->SetValueConstraint(best_split_info.min_constraint, best_split_info.max_constraint);
  p_right->SetValueConstraint(best_split_info.min_constraint, best_split_info.max_constraint);
  if (is_numerical_split) {
    double mid = (best_split_info.left_output + best_split_info.right_output) / 2.0f;
    if (best_split_info.monotone_type < 0) {
      p_left->SetValueConstraint(mid, best_split_info.max_constraint);
      p_right->SetValueConstraint(best_split_info.min_constraint, mid);
    } else if (best_split_info.monotone_type > 0) {
      p_left->SetValueConstraint(best_split_info.min_constraint, mid);
      p_right->SetValueConstraint(mid, best_split_info.max_constraint);
    }
Guolin Ke's avatar
Guolin Ke committed
605
606
607
  }
}

608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
void SerialTreeLearner::RenewTreeOutput(Tree* tree, const ObjectiveFunction* obj, const double* prediction,
                                        data_size_t total_num_data, const data_size_t* bag_indices, data_size_t bag_cnt) const {
  if (obj != nullptr && obj->IsRenewTreeOutput()) {
    CHECK(tree->num_leaves() <= data_partition_->num_leaves());
    const data_size_t* bag_mapper = nullptr;
    if (total_num_data != num_data_) {
      CHECK(bag_cnt == num_data_);
      bag_mapper = bag_indices;
    }
    #pragma omp parallel for schedule(static)
    for (int i = 0; i < tree->num_leaves(); ++i) {
      const double output = static_cast<double>(tree->LeafOutput(i));
      data_size_t cnt_leaf_data = 0;
      auto index_mapper = data_partition_->GetIndexOnLeaf(i, &cnt_leaf_data);
      CHECK(cnt_leaf_data > 0);
      // bag_mapper[index_mapper[i]]
      const double new_output = obj->RenewTreeOutput(output, prediction, index_mapper, bag_mapper, cnt_leaf_data);
      tree->SetLeafOutput(i, new_output);
    }
    if (Network::num_machines() > 1) {
      std::vector<double> outputs(tree->num_leaves());
      for (int i = 0; i < tree->num_leaves(); ++i) {
        outputs[i] = static_cast<double>(tree->LeafOutput(i));
      }
      Network::GlobalSum(outputs);
      for (int i = 0; i < tree->num_leaves(); ++i) {
        tree->SetLeafOutput(i, outputs[i] / Network::num_machines());
      }
    } 
  }
}

Guolin Ke's avatar
Guolin Ke committed
640
}  // namespace LightGBM