serial_tree_learner.cpp 34.3 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
Guolin Ke's avatar
Guolin Ke committed
5
6
#include "serial_tree_learner.h"

7
8
9
10
11
#include <LightGBM/network.h>
#include <LightGBM/objective_function.h>
#include <LightGBM/utils/array_args.h>
#include <LightGBM/utils/common.h>

12
13
14
15
16
#include <algorithm>
#include <queue>
#include <unordered_map>
#include <utility>

17
18
#include "cost_effective_gradient_boosting.hpp"

Guolin Ke's avatar
Guolin Ke committed
19
20
namespace LightGBM {

Guolin Ke's avatar
Guolin Ke committed
21
SerialTreeLearner::SerialTreeLearner(const Config* config)
22
    : config_(config), col_sampler_(config) {
Guolin Ke's avatar
Guolin Ke committed
23
24
25
26
27
}

SerialTreeLearner::~SerialTreeLearner() {
}

28
void SerialTreeLearner::Init(const Dataset* train_data, bool is_constant_hessian) {
Guolin Ke's avatar
Guolin Ke committed
29
30
31
  train_data_ = train_data;
  num_data_ = train_data_->num_data();
  num_features_ = train_data_->num_features();
32
33
  int max_cache_size = 0;
  // Get the max size of pool
Guolin Ke's avatar
Guolin Ke committed
34
35
  if (config_->histogram_pool_size <= 0) {
    max_cache_size = config_->num_leaves;
36
37
38
  } else {
    size_t total_histogram_size = 0;
    for (int i = 0; i < train_data_->num_features(); ++i) {
39
      total_histogram_size += kHistEntrySize * train_data_->FeatureNumBin(i);
40
    }
Guolin Ke's avatar
Guolin Ke committed
41
    max_cache_size = static_cast<int>(config_->histogram_pool_size * 1024 * 1024 / total_histogram_size);
42
43
  }
  // at least need 2 leaves
Guolin Ke's avatar
Guolin Ke committed
44
  max_cache_size = std::max(2, max_cache_size);
Guolin Ke's avatar
Guolin Ke committed
45
  max_cache_size = std::min(max_cache_size, config_->num_leaves);
Guolin Ke's avatar
Guolin Ke committed
46

Guolin Ke's avatar
Guolin Ke committed
47
  // push split information for all leaves
Guolin Ke's avatar
Guolin Ke committed
48
  best_split_per_leaf_.resize(config_->num_leaves);
49
  constraints_.reset(LeafConstraintsBase::Create(config_, config_->num_leaves, train_data_->num_features()));
Guolin Ke's avatar
Guolin Ke committed
50

wxchan's avatar
wxchan committed
51
  // initialize splits for leaf
Guolin Ke's avatar
Guolin Ke committed
52
53
  smaller_leaf_splits_.reset(new LeafSplits(train_data_->num_data(), config_));
  larger_leaf_splits_.reset(new LeafSplits(train_data_->num_data(), config_));
Guolin Ke's avatar
Guolin Ke committed
54
55

  // initialize data partition
Guolin Ke's avatar
Guolin Ke committed
56
  data_partition_.reset(new DataPartition(num_data_, config_->num_leaves));
57
  col_sampler_.SetTrainingData(train_data_);
Guolin Ke's avatar
Guolin Ke committed
58
  // initialize ordered gradients and hessians
Guolin Ke's avatar
Guolin Ke committed
59
60
  ordered_gradients_.resize(num_data_);
  ordered_hessians_.resize(num_data_);
61

62
  GetShareStates(train_data_, is_constant_hessian, true);
63
64
65
66
  histogram_pool_.DynamicChangeSize(train_data_,
  share_state_->num_hist_total_bin(),
  share_state_->feature_hist_offsets(),
  config_, max_cache_size, config_->num_leaves);
67
  Log::Info("Number of data points in the train set: %d, number of used features: %d", num_data_, num_features_);
68
69
70
  if (CostEfficientGradientBoosting::IsEnable(config_)) {
    cegb_.reset(new CostEfficientGradientBoosting(this));
    cegb_->Init();
71
  }
Guolin Ke's avatar
Guolin Ke committed
72
73
}

74
75
76
void SerialTreeLearner::GetShareStates(const Dataset* dataset,
                                       bool is_constant_hessian,
                                       bool is_first_time) {
77
  if (is_first_time) {
78
    share_state_.reset(dataset->GetShareStates(
79
80
81
        ordered_gradients_.data(), ordered_hessians_.data(),
        col_sampler_.is_feature_used_bytree(), is_constant_hessian,
        config_->force_col_wise, config_->force_row_wise));
82
  } else {
Nikita Titov's avatar
Nikita Titov committed
83
    CHECK_NOTNULL(share_state_);
84
    // cannot change is_hist_col_wise during training
85
    share_state_.reset(dataset->GetShareStates(
86
        ordered_gradients_.data(), ordered_hessians_.data(), col_sampler_.is_feature_used_bytree(),
87
88
        is_constant_hessian, share_state_->is_col_wise,
        !share_state_->is_col_wise));
89
  }
Nikita Titov's avatar
Nikita Titov committed
90
  CHECK_NOTNULL(share_state_);
91
92
}

93
94
95
void SerialTreeLearner::ResetTrainingDataInner(const Dataset* train_data,
                                               bool is_constant_hessian,
                                               bool reset_multi_val_bin) {
Guolin Ke's avatar
Guolin Ke committed
96
97
  train_data_ = train_data;
  num_data_ = train_data_->num_data();
98
  CHECK_EQ(num_features_, train_data_->num_features());
Guolin Ke's avatar
Guolin Ke committed
99
100
101
102
103
104
105

  // initialize splits for leaf
  smaller_leaf_splits_->ResetNumData(num_data_);
  larger_leaf_splits_->ResetNumData(num_data_);

  // initialize data partition
  data_partition_->ResetNumData(num_data_);
106
  if (reset_multi_val_bin) {
107
    col_sampler_.SetTrainingData(train_data_);
108
109
    GetShareStates(train_data_, is_constant_hessian, false);
  }
110

Guolin Ke's avatar
Guolin Ke committed
111
112
113
  // initialize ordered gradients and hessians
  ordered_gradients_.resize(num_data_);
  ordered_hessians_.resize(num_data_);
114
115
116
  if (cegb_ != nullptr) {
    cegb_->Init();
  }
Guolin Ke's avatar
Guolin Ke committed
117
}
Guolin Ke's avatar
Guolin Ke committed
118

Guolin Ke's avatar
Guolin Ke committed
119
120
121
void SerialTreeLearner::ResetConfig(const Config* config) {
  if (config_->num_leaves != config->num_leaves) {
    config_ = config;
Guolin Ke's avatar
Guolin Ke committed
122
123
    int max_cache_size = 0;
    // Get the max size of pool
Guolin Ke's avatar
Guolin Ke committed
124
125
    if (config->histogram_pool_size <= 0) {
      max_cache_size = config_->num_leaves;
Guolin Ke's avatar
Guolin Ke committed
126
127
128
    } else {
      size_t total_histogram_size = 0;
      for (int i = 0; i < train_data_->num_features(); ++i) {
129
        total_histogram_size += kHistEntrySize * train_data_->FeatureNumBin(i);
Guolin Ke's avatar
Guolin Ke committed
130
      }
Guolin Ke's avatar
Guolin Ke committed
131
      max_cache_size = static_cast<int>(config_->histogram_pool_size * 1024 * 1024 / total_histogram_size);
Guolin Ke's avatar
Guolin Ke committed
132
133
134
    }
    // at least need 2 leaves
    max_cache_size = std::max(2, max_cache_size);
Guolin Ke's avatar
Guolin Ke committed
135
    max_cache_size = std::min(max_cache_size, config_->num_leaves);
136
137
138
139
    histogram_pool_.DynamicChangeSize(train_data_,
    share_state_->num_hist_total_bin(),
    share_state_->feature_hist_offsets(),
    config_, max_cache_size, config_->num_leaves);
Guolin Ke's avatar
Guolin Ke committed
140
141

    // push split information for all leaves
Guolin Ke's avatar
Guolin Ke committed
142
143
    best_split_per_leaf_.resize(config_->num_leaves);
    data_partition_->ResetLeaves(config_->num_leaves);
Guolin Ke's avatar
Guolin Ke committed
144
  } else {
Guolin Ke's avatar
Guolin Ke committed
145
    config_ = config;
Guolin Ke's avatar
Guolin Ke committed
146
  }
147
  col_sampler_.SetConfig(config_);
148
  histogram_pool_.ResetConfig(train_data_, config_);
149
  if (CostEfficientGradientBoosting::IsEnable(config_)) {
150
151
152
    if (cegb_ == nullptr) {
      cegb_.reset(new CostEfficientGradientBoosting(this));
    }
153
154
    cegb_->Init();
  }
155
  constraints_.reset(LeafConstraintsBase::Create(config_, config_->num_leaves, train_data_->num_features()));
Guolin Ke's avatar
Guolin Ke committed
156
157
}

158
Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians, bool /*is_first_tree*/) {
159
  Common::FunctionTimer fun_timer("SerialTreeLearner::Train", global_timer);
Guolin Ke's avatar
Guolin Ke committed
160
161
  gradients_ = gradients;
  hessians_ = hessians;
162
  int num_threads = OMP_NUM_THREADS();
Nikita Titov's avatar
Nikita Titov committed
163
  if (share_state_->num_threads != num_threads && share_state_->num_threads > 0) {
164
    Log::Warning(
Nikita Titov's avatar
Nikita Titov committed
165
166
        "Detected that num_threads changed during training (from %d to %d), "
        "it may cause unexpected errors.",
167
168
169
        share_state_->num_threads, num_threads);
  }
  share_state_->num_threads = num_threads;
Nikita Titov's avatar
Nikita Titov committed
170

Guolin Ke's avatar
Guolin Ke committed
171
172
  // some initial works before training
  BeforeTrain();
Guolin Ke's avatar
Guolin Ke committed
173

174
  bool track_branch_features = !(config_->interaction_constraints_vector.empty());
175
  auto tree = std::unique_ptr<Tree>(new Tree(config_->num_leaves, track_branch_features, false));
Guolin Ke's avatar
Guolin Ke committed
176
177
  auto tree_ptr = tree.get();
  constraints_->ShareTreePointer(tree_ptr);
178

Guolin Ke's avatar
Guolin Ke committed
179
180
  // root leaf
  int left_leaf = 0;
181
  int cur_depth = 1;
Guolin Ke's avatar
Guolin Ke committed
182
183
  // only root leaf can be splitted on first time
  int right_leaf = -1;
184

Guolin Ke's avatar
Guolin Ke committed
185
  int init_splits = ForceSplits(tree_ptr, &left_leaf, &right_leaf, &cur_depth);
186

Guolin Ke's avatar
Guolin Ke committed
187
  for (int split = init_splits; split < config_->num_leaves - 1; ++split) {
Guolin Ke's avatar
Guolin Ke committed
188
    // some initial works before finding best split
Guolin Ke's avatar
Guolin Ke committed
189
    if (BeforeFindBestSplit(tree_ptr, left_leaf, right_leaf)) {
Guolin Ke's avatar
Guolin Ke committed
190
      // find best threshold for every feature
Guolin Ke's avatar
Guolin Ke committed
191
      FindBestSplits(tree_ptr);
192
    }
Guolin Ke's avatar
Guolin Ke committed
193
194
195
196
197
198
    // Get a leaf with max split gain
    int best_leaf = static_cast<int>(ArrayArgs<SplitInfo>::ArgMax(best_split_per_leaf_));
    // Get split information for best leaf
    const SplitInfo& best_leaf_SplitInfo = best_split_per_leaf_[best_leaf];
    // cannot split, quit
    if (best_leaf_SplitInfo.gain <= 0.0) {
Guolin Ke's avatar
Guolin Ke committed
199
      Log::Warning("No further splits with positive gain, best gain: %f", best_leaf_SplitInfo.gain);
Guolin Ke's avatar
Guolin Ke committed
200
201
202
      break;
    }
    // split tree with best leaf
Guolin Ke's avatar
Guolin Ke committed
203
    Split(tree_ptr, best_leaf, &left_leaf, &right_leaf);
204
    cur_depth = std::max(cur_depth, tree->leaf_depth(left_leaf));
Guolin Ke's avatar
Guolin Ke committed
205
  }
206

207
  Log::Debug("Trained a tree with leaves = %d and max_depth = %d", tree->num_leaves(), cur_depth);
Guolin Ke's avatar
Guolin Ke committed
208
  return tree.release();
Guolin Ke's avatar
Guolin Ke committed
209
210
}

211
Tree* SerialTreeLearner::FitByExistingTree(const Tree* old_tree, const score_t* gradients, const score_t *hessians) const {
Guolin Ke's avatar
Guolin Ke committed
212
  auto tree = std::unique_ptr<Tree>(new Tree(*old_tree));
Nikita Titov's avatar
Nikita Titov committed
213
  CHECK_GE(data_partition_->num_leaves(), tree->num_leaves());
214
  OMP_INIT_EX();
Guolin Ke's avatar
Guolin Ke committed
215
  #pragma omp parallel for schedule(static)
216
  for (int i = 0; i < tree->num_leaves(); ++i) {
217
    OMP_LOOP_EX_BEGIN();
Guolin Ke's avatar
Guolin Ke committed
218
219
220
    data_size_t cnt_leaf_data = 0;
    auto tmp_idx = data_partition_->GetIndexOnLeaf(i, &cnt_leaf_data);
    double sum_grad = 0.0f;
221
    double sum_hess = kEpsilon;
Guolin Ke's avatar
Guolin Ke committed
222
223
224
225
226
    for (data_size_t j = 0; j < cnt_leaf_data; ++j) {
      auto idx = tmp_idx[j];
      sum_grad += gradients[idx];
      sum_hess += hessians[idx];
    }
Belinda Trotta's avatar
Belinda Trotta committed
227
228
229
230
231
232
233
234
235
236
    double output;
    if ((config_->path_smooth > kEpsilon) & (i > 0)) {
      output = FeatureHistogram::CalculateSplittedLeafOutput<true, true, true>(
          sum_grad, sum_hess, config_->lambda_l1, config_->lambda_l2,
          config_->max_delta_step, config_->path_smooth, cnt_leaf_data, tree->leaf_parent(i));
    } else {
      output = FeatureHistogram::CalculateSplittedLeafOutput<true, true, false>(
          sum_grad, sum_hess, config_->lambda_l1, config_->lambda_l2,
          config_->max_delta_step, config_->path_smooth, cnt_leaf_data, 0);
    }
Guolin Ke's avatar
Guolin Ke committed
237
238
239
    auto old_leaf_output = tree->LeafOutput(i);
    auto new_leaf_output = output * tree->shrinkage();
    tree->SetLeafOutput(i, config_->refit_decay_rate * old_leaf_output + (1.0 - config_->refit_decay_rate) * new_leaf_output);
240
    OMP_LOOP_EX_END();
Guolin Ke's avatar
Guolin Ke committed
241
  }
242
  OMP_THROW_EX();
Guolin Ke's avatar
Guolin Ke committed
243
244
245
  return tree.release();
}

246
247
Tree* SerialTreeLearner::FitByExistingTree(const Tree* old_tree, const std::vector<int>& leaf_pred,
                                           const score_t* gradients, const score_t *hessians) const {
248
249
250
251
  data_partition_->ResetByLeafPred(leaf_pred, old_tree->num_leaves());
  return FitByExistingTree(old_tree, gradients, hessians);
}

Guolin Ke's avatar
Guolin Ke committed
252
void SerialTreeLearner::BeforeTrain() {
253
  Common::FunctionTimer fun_timer("SerialTreeLearner::BeforeTrain", global_timer);
254
255
  // reset histogram pool
  histogram_pool_.ResetMap();
Guolin Ke's avatar
Guolin Ke committed
256

257
258
  col_sampler_.ResetByTree();
  train_data_->InitTrain(col_sampler_.is_feature_used_bytree(), share_state_.get());
Guolin Ke's avatar
Guolin Ke committed
259
260
261
  // initialize data partition
  data_partition_->Init();

262
263
  constraints_->Reset();

Guolin Ke's avatar
Guolin Ke committed
264
  // reset the splits for leaves
Guolin Ke's avatar
Guolin Ke committed
265
  for (int i = 0; i < config_->num_leaves; ++i) {
Guolin Ke's avatar
Guolin Ke committed
266
267
268
269
270
271
272
    best_split_per_leaf_[i].Reset();
  }

  // Sumup for root
  if (data_partition_->leaf_count(0) == num_data_) {
    // use all data
    smaller_leaf_splits_->Init(gradients_, hessians_);
Guolin Ke's avatar
Guolin Ke committed
273

Guolin Ke's avatar
Guolin Ke committed
274
275
  } else {
    // use bagging, only use part of data
Guolin Ke's avatar
Guolin Ke committed
276
    smaller_leaf_splits_->Init(0, data_partition_.get(), gradients_, hessians_);
Guolin Ke's avatar
Guolin Ke committed
277
278
279
280
281
  }

  larger_leaf_splits_->Init();
}

Guolin Ke's avatar
Guolin Ke committed
282
bool SerialTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int right_leaf) {
283
  Common::FunctionTimer fun_timer("SerialTreeLearner::BeforeFindBestSplit", global_timer);
Guolin Ke's avatar
Guolin Ke committed
284
  // check depth of current leaf
Guolin Ke's avatar
Guolin Ke committed
285
  if (config_->max_depth > 0) {
Guolin Ke's avatar
Guolin Ke committed
286
    // only need to check left leaf, since right leaf is in same level of left leaf
Guolin Ke's avatar
Guolin Ke committed
287
    if (tree->leaf_depth(left_leaf) >= config_->max_depth) {
Guolin Ke's avatar
Guolin Ke committed
288
289
290
291
292
293
294
      best_split_per_leaf_[left_leaf].gain = kMinScore;
      if (right_leaf >= 0) {
        best_split_per_leaf_[right_leaf].gain = kMinScore;
      }
      return false;
    }
  }
Guolin Ke's avatar
Guolin Ke committed
295
296
297
  data_size_t num_data_in_left_child = GetGlobalDataCountInLeaf(left_leaf);
  data_size_t num_data_in_right_child = GetGlobalDataCountInLeaf(right_leaf);
  // no enough data to continue
Guolin Ke's avatar
Guolin Ke committed
298
299
  if (num_data_in_right_child < static_cast<data_size_t>(config_->min_data_in_leaf * 2)
      && num_data_in_left_child < static_cast<data_size_t>(config_->min_data_in_leaf * 2)) {
Guolin Ke's avatar
Guolin Ke committed
300
301
302
303
304
305
    best_split_per_leaf_[left_leaf].gain = kMinScore;
    if (right_leaf >= 0) {
      best_split_per_leaf_[right_leaf].gain = kMinScore;
    }
    return false;
  }
306
  parent_leaf_histogram_array_ = nullptr;
Guolin Ke's avatar
Guolin Ke committed
307
308
  // only have root
  if (right_leaf < 0) {
309
    histogram_pool_.Get(left_leaf, &smaller_leaf_histogram_array_);
Guolin Ke's avatar
Guolin Ke committed
310
311
    larger_leaf_histogram_array_ = nullptr;
  } else if (num_data_in_left_child < num_data_in_right_child) {
Hui Xue's avatar
Hui Xue committed
312
    // put parent(left) leaf's histograms into larger leaf's histograms
313
314
315
    if (histogram_pool_.Get(left_leaf, &larger_leaf_histogram_array_)) { parent_leaf_histogram_array_ = larger_leaf_histogram_array_; }
    histogram_pool_.Move(left_leaf, right_leaf);
    histogram_pool_.Get(left_leaf, &smaller_leaf_histogram_array_);
Guolin Ke's avatar
Guolin Ke committed
316
  } else {
Hui Xue's avatar
Hui Xue committed
317
    // put parent(left) leaf's histograms to larger leaf's histograms
318
319
    if (histogram_pool_.Get(left_leaf, &larger_leaf_histogram_array_)) { parent_leaf_histogram_array_ = larger_leaf_histogram_array_; }
    histogram_pool_.Get(right_leaf, &smaller_leaf_histogram_array_);
Guolin Ke's avatar
Guolin Ke committed
320
321
322
323
  }
  return true;
}

324
void SerialTreeLearner::FindBestSplits(const Tree* tree) {
Guolin Ke's avatar
Guolin Ke committed
325
  std::vector<int8_t> is_feature_used(num_features_, 0);
326
  #pragma omp parallel for schedule(static, 256) if (num_features_ >= 512)
Guolin Ke's avatar
Guolin Ke committed
327
  for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
328
    if (!col_sampler_.is_feature_used_bytree()[feature_index]) continue;
Guolin Ke's avatar
Guolin Ke committed
329
330
331
332
333
334
335
336
    if (parent_leaf_histogram_array_ != nullptr
        && !parent_leaf_histogram_array_[feature_index].is_splittable()) {
      smaller_leaf_histogram_array_[feature_index].set_is_splittable(false);
      continue;
    }
    is_feature_used[feature_index] = 1;
  }
  bool use_subtract = parent_leaf_histogram_array_ != nullptr;
337
338
339
340
341
342
343
344

#ifdef USE_CUDA
  if (LGBM_config_::current_learner == use_cpu_learner) {
    SerialTreeLearner::ConstructHistograms(is_feature_used, use_subtract);
  } else {
    ConstructHistograms(is_feature_used, use_subtract);
  }
#else
Guolin Ke's avatar
Guolin Ke committed
345
  ConstructHistograms(is_feature_used, use_subtract);
346
#endif
347
  FindBestSplitsFromHistograms(is_feature_used, use_subtract, tree);
Guolin Ke's avatar
Guolin Ke committed
348
349
}

350
351
352
353
void SerialTreeLearner::ConstructHistograms(
    const std::vector<int8_t>& is_feature_used, bool use_subtract) {
  Common::FunctionTimer fun_timer("SerialTreeLearner::ConstructHistograms",
                                  global_timer);
Guolin Ke's avatar
Guolin Ke committed
354
  // construct smaller leaf
355
356
  hist_t* ptr_smaller_leaf_hist_data =
      smaller_leaf_histogram_array_[0].RawData() - kHistOffset;
357
358
359
  train_data_->ConstructHistograms(
      is_feature_used, smaller_leaf_splits_->data_indices(),
      smaller_leaf_splits_->num_data_in_leaf(), gradients_, hessians_,
360
361
      ordered_gradients_.data(), ordered_hessians_.data(), share_state_.get(),
      ptr_smaller_leaf_hist_data);
Guolin Ke's avatar
Guolin Ke committed
362
363
  if (larger_leaf_histogram_array_ != nullptr && !use_subtract) {
    // construct larger leaf
364
365
    hist_t* ptr_larger_leaf_hist_data =
        larger_leaf_histogram_array_[0].RawData() - kHistOffset;
366
367
368
    train_data_->ConstructHistograms(
        is_feature_used, larger_leaf_splits_->data_indices(),
        larger_leaf_splits_->num_data_in_leaf(), gradients_, hessians_,
369
        ordered_gradients_.data(), ordered_hessians_.data(), share_state_.get(),
370
        ptr_larger_leaf_hist_data);
Guolin Ke's avatar
Guolin Ke committed
371
  }
372
373
}

Guolin Ke's avatar
Guolin Ke committed
374
void SerialTreeLearner::FindBestSplitsFromHistograms(
375
    const std::vector<int8_t>& is_feature_used, bool use_subtract, const Tree* tree) {
Guolin Ke's avatar
Guolin Ke committed
376
377
  Common::FunctionTimer fun_timer(
      "SerialTreeLearner::FindBestSplitsFromHistograms", global_timer);
378
379
  std::vector<SplitInfo> smaller_best(share_state_->num_threads);
  std::vector<SplitInfo> larger_best(share_state_->num_threads);
380
381
  std::vector<int8_t> smaller_node_used_features = col_sampler_.GetByNode(tree, smaller_leaf_splits_->leaf_index());
  std::vector<int8_t> larger_node_used_features;
382
383
384
385
386
  double smaller_leaf_parent_output = GetParentOutput(tree, smaller_leaf_splits_.get());
  double larger_leaf_parent_output = 0;
  if (larger_leaf_splits_ != nullptr && larger_leaf_splits_->leaf_index() >= 0) {
    larger_leaf_parent_output = GetParentOutput(tree, larger_leaf_splits_.get());
  }
387
388
389
  if (larger_leaf_splits_->leaf_index() >= 0) {
    larger_node_used_features = col_sampler_.GetByNode(tree, larger_leaf_splits_->leaf_index());
  }
390
  OMP_INIT_EX();
Guolin Ke's avatar
Guolin Ke committed
391
// find splits
392
#pragma omp parallel for schedule(static) num_threads(share_state_->num_threads)
Guolin Ke's avatar
Guolin Ke committed
393
  for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
394
    OMP_LOOP_EX_BEGIN();
Guolin Ke's avatar
Guolin Ke committed
395
396
397
    if (!is_feature_used[feature_index]) {
      continue;
    }
Guolin Ke's avatar
Guolin Ke committed
398
    const int tid = omp_get_thread_num();
Guolin Ke's avatar
Guolin Ke committed
399
400
401
402
    train_data_->FixHistogram(
        feature_index, smaller_leaf_splits_->sum_gradients(),
        smaller_leaf_splits_->sum_hessians(),
        smaller_leaf_histogram_array_[feature_index].RawData());
403
    int real_fidx = train_data_->RealFeatureIndex(feature_index);
404
405
406
407
408

    ComputeBestSplitForFeature(smaller_leaf_histogram_array_, feature_index,
                               real_fidx,
                               smaller_node_used_features[feature_index],
                               smaller_leaf_splits_->num_data_in_leaf(),
409
410
                               smaller_leaf_splits_.get(), &smaller_best[tid],
                               smaller_leaf_parent_output);
411

Guolin Ke's avatar
Guolin Ke committed
412
    // only has root leaf
Guolin Ke's avatar
Guolin Ke committed
413
414
415
416
    if (larger_leaf_splits_ == nullptr ||
        larger_leaf_splits_->leaf_index() < 0) {
      continue;
    }
Guolin Ke's avatar
Guolin Ke committed
417

Guolin Ke's avatar
Guolin Ke committed
418
    if (use_subtract) {
Guolin Ke's avatar
Guolin Ke committed
419
420
      larger_leaf_histogram_array_[feature_index].Subtract(
          smaller_leaf_histogram_array_[feature_index]);
421
    } else {
Guolin Ke's avatar
Guolin Ke committed
422
423
424
425
      train_data_->FixHistogram(
          feature_index, larger_leaf_splits_->sum_gradients(),
          larger_leaf_splits_->sum_hessians(),
          larger_leaf_histogram_array_[feature_index].RawData());
426
    }
427
428
429
430
431

    ComputeBestSplitForFeature(larger_leaf_histogram_array_, feature_index,
                               real_fidx,
                               larger_node_used_features[feature_index],
                               larger_leaf_splits_->num_data_in_leaf(),
432
433
                               larger_leaf_splits_.get(), &larger_best[tid],
                               larger_leaf_parent_output);
434

435
    OMP_LOOP_EX_END();
Guolin Ke's avatar
Guolin Ke committed
436
  }
437
  OMP_THROW_EX();
Guolin Ke's avatar
Guolin Ke committed
438
  auto smaller_best_idx = ArrayArgs<SplitInfo>::ArgMax(smaller_best);
439
  int leaf = smaller_leaf_splits_->leaf_index();
Guolin Ke's avatar
Guolin Ke committed
440
441
  best_split_per_leaf_[leaf] = smaller_best[smaller_best_idx];

Guolin Ke's avatar
Guolin Ke committed
442
443
  if (larger_leaf_splits_ != nullptr &&
      larger_leaf_splits_->leaf_index() >= 0) {
444
    leaf = larger_leaf_splits_->leaf_index();
Guolin Ke's avatar
Guolin Ke committed
445
446
447
448
449
    auto larger_best_idx = ArrayArgs<SplitInfo>::ArgMax(larger_best);
    best_split_per_leaf_[leaf] = larger_best[larger_best_idx];
  }
}

450
451
452
453
454
455
int32_t SerialTreeLearner::ForceSplits(Tree* tree, int* left_leaf,
                                       int* right_leaf, int *cur_depth) {
  bool abort_last_forced_split = false;
  if (forced_split_json_ == nullptr) {
    return 0;
  }
456
457
458
459
  int32_t result_count = 0;
  // start at root leaf
  *left_leaf = 0;
  std::queue<std::pair<Json, int>> q;
460
  Json left = *forced_split_json_;
461
462
463
  Json right;
  bool left_smaller = true;
  std::unordered_map<int, SplitInfo> forceSplitMap;
464
  q.push(std::make_pair(left, *left_leaf));
465
  while (!q.empty()) {
466
467
468
    // before processing next node from queue, store info for current left/right leaf
    // store "best split" for left and right, even if they might be overwritten by forced split
    if (BeforeFindBestSplit(tree, *left_leaf, *right_leaf)) {
469
      FindBestSplits(tree);
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
    }
    // then, compute own splits
    SplitInfo left_split;
    SplitInfo right_split;

    if (!left.is_null()) {
      const int left_feature = left["feature"].int_value();
      const double left_threshold_double = left["threshold"].number_value();
      const int left_inner_feature_index = train_data_->InnerFeatureIndex(left_feature);
      const uint32_t left_threshold = train_data_->BinThreshold(
              left_inner_feature_index, left_threshold_double);
      auto leaf_histogram_array = (left_smaller) ? smaller_leaf_histogram_array_ : larger_leaf_histogram_array_;
      auto left_leaf_splits = (left_smaller) ? smaller_leaf_splits_.get() : larger_leaf_splits_.get();
      leaf_histogram_array[left_inner_feature_index].GatherInfoForThreshold(
              left_leaf_splits->sum_gradients(),
              left_leaf_splits->sum_hessians(),
              left_threshold,
              left_leaf_splits->num_data_in_leaf(),
Belinda Trotta's avatar
Belinda Trotta committed
488
              left_leaf_splits->weight(),
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
              &left_split);
      left_split.feature = left_feature;
      forceSplitMap[*left_leaf] = left_split;
      if (left_split.gain < 0) {
        forceSplitMap.erase(*left_leaf);
      }
    }

    if (!right.is_null()) {
      const int right_feature = right["feature"].int_value();
      const double right_threshold_double = right["threshold"].number_value();
      const int right_inner_feature_index = train_data_->InnerFeatureIndex(right_feature);
      const uint32_t right_threshold = train_data_->BinThreshold(
              right_inner_feature_index, right_threshold_double);
      auto leaf_histogram_array = (left_smaller) ? larger_leaf_histogram_array_ : smaller_leaf_histogram_array_;
      auto right_leaf_splits = (left_smaller) ? larger_leaf_splits_.get() : smaller_leaf_splits_.get();
      leaf_histogram_array[right_inner_feature_index].GatherInfoForThreshold(
        right_leaf_splits->sum_gradients(),
        right_leaf_splits->sum_hessians(),
        right_threshold,
        right_leaf_splits->num_data_in_leaf(),
Belinda Trotta's avatar
Belinda Trotta committed
510
        right_leaf_splits->weight(),
511
512
513
514
515
516
517
518
519
520
521
522
523
        &right_split);
      right_split.feature = right_feature;
      forceSplitMap[*right_leaf] = right_split;
      if (right_split.gain < 0) {
        forceSplitMap.erase(*right_leaf);
      }
    }

    std::pair<Json, int> pair = q.front();
    q.pop();
    int current_leaf = pair.second;
    // split info should exist because searching in bfs fashion - should have added from parent
    if (forceSplitMap.find(current_leaf) == forceSplitMap.end()) {
524
        abort_last_forced_split = true;
525
526
        break;
    }
527
528
529
530
    best_split_per_leaf_[current_leaf] = forceSplitMap[current_leaf];
    Split(tree, current_leaf, left_leaf, right_leaf);
    left_smaller = best_split_per_leaf_[current_leaf].left_count <
                   best_split_per_leaf_[current_leaf].right_count;
531
532
533
534
    left = Json();
    right = Json();
    if ((pair.first).object_items().count("left") > 0) {
      left = (pair.first)["left"];
535
536
537
      if (left.object_items().count("feature") > 0 && left.object_items().count("threshold") > 0) {
        q.push(std::make_pair(left, *left_leaf));
      }
538
539
540
    }
    if ((pair.first).object_items().count("right") > 0) {
      right = (pair.first)["right"];
541
542
543
      if (right.object_items().count("feature") > 0 && right.object_items().count("threshold") > 0) {
        q.push(std::make_pair(right, *right_leaf));
      }
544
545
546
547
    }
    result_count++;
    *(cur_depth) = std::max(*(cur_depth), tree->leaf_depth(*left_leaf));
  }
548
549
550
551
552
553
554
555
556
557
558
  if (abort_last_forced_split) {
    int best_leaf =
        static_cast<int>(ArrayArgs<SplitInfo>::ArgMax(best_split_per_leaf_));
    const SplitInfo& best_leaf_SplitInfo = best_split_per_leaf_[best_leaf];
    if (best_leaf_SplitInfo.gain <= 0.0) {
      Log::Warning("No further splits with positive gain, best gain: %f",
                   best_leaf_SplitInfo.gain);
      return config_->num_leaves;
    }
    Split(tree, best_leaf, left_leaf, right_leaf);
    *(cur_depth) = std::max(*(cur_depth), tree->leaf_depth(*left_leaf));
559
    ++result_count;
560
  }
561
562
  return result_count;
}
Guolin Ke's avatar
Guolin Ke committed
563

564
565
566
void SerialTreeLearner::SplitInner(Tree* tree, int best_leaf, int* left_leaf,
                                   int* right_leaf, bool update_cnt) {
  Common::FunctionTimer fun_timer("SerialTreeLearner::SplitInner", global_timer);
567
  SplitInfo& best_split_info = best_split_per_leaf_[best_leaf];
568
569
  const int inner_feature_index =
      train_data_->InnerFeatureIndex(best_split_info.feature);
570
  if (cegb_ != nullptr) {
571
572
    cegb_->UpdateLeafBestSplits(tree, best_leaf, &best_split_info,
                                &best_split_per_leaf_);
573
  }
574
  *left_leaf = best_leaf;
575
576
  auto next_leaf_id = tree->NextLeafId();

577
  // update before tree split
578
  constraints_->BeforeSplit(best_leaf, next_leaf_id,
579
580
                            best_split_info.monotone_type);

581
582
583
  bool is_numerical_split =
      train_data_->FeatureBinMapper(inner_feature_index)->bin_type() ==
      BinType::NumericalBin;
Guolin Ke's avatar
Guolin Ke committed
584
  if (is_numerical_split) {
585
586
    auto threshold_double = train_data_->RealThreshold(
        inner_feature_index, best_split_info.threshold);
587
    data_partition_->Split(best_leaf, train_data_, inner_feature_index,
588
589
590
591
592
593
594
                           &best_split_info.threshold, 1,
                           best_split_info.default_left, next_leaf_id);
    if (update_cnt) {
      // don't need to update this in data-based parallel model
      best_split_info.left_count = data_partition_->leaf_count(*left_leaf);
      best_split_info.right_count = data_partition_->leaf_count(next_leaf_id);
    }
595
    // split tree, will return right leaf
596
597
598
599
600
601
602
603
604
    *right_leaf = tree->Split(
        best_leaf, inner_feature_index, best_split_info.feature,
        best_split_info.threshold, threshold_double,
        static_cast<double>(best_split_info.left_output),
        static_cast<double>(best_split_info.right_output),
        static_cast<data_size_t>(best_split_info.left_count),
        static_cast<data_size_t>(best_split_info.right_count),
        static_cast<double>(best_split_info.left_sum_hessian),
        static_cast<double>(best_split_info.right_sum_hessian),
605
606
        // store the true split gain in tree model
        static_cast<float>(best_split_info.gain + config_->min_gain_to_split),
607
608
        train_data_->FeatureBinMapper(inner_feature_index)->missing_type(),
        best_split_info.default_left);
609
  } else {
610
611
612
    std::vector<uint32_t> cat_bitset_inner =
        Common::ConstructBitset(best_split_info.cat_threshold.data(),
                                best_split_info.num_cat_threshold);
613
614
    std::vector<int> threshold_int(best_split_info.num_cat_threshold);
    for (int i = 0; i < best_split_info.num_cat_threshold; ++i) {
615
616
      threshold_int[i] = static_cast<int>(train_data_->RealThreshold(
          inner_feature_index, best_split_info.cat_threshold[i]));
617
    }
618
619
    std::vector<uint32_t> cat_bitset = Common::ConstructBitset(
        threshold_int.data(), best_split_info.num_cat_threshold);
620

621
    data_partition_->Split(best_leaf, train_data_, inner_feature_index,
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
                           cat_bitset_inner.data(),
                           static_cast<int>(cat_bitset_inner.size()),
                           best_split_info.default_left, next_leaf_id);

    if (update_cnt) {
      // don't need to update this in data-based parallel model
      best_split_info.left_count = data_partition_->leaf_count(*left_leaf);
      best_split_info.right_count = data_partition_->leaf_count(next_leaf_id);
    }

    *right_leaf = tree->SplitCategorical(
        best_leaf, inner_feature_index, best_split_info.feature,
        cat_bitset_inner.data(), static_cast<int>(cat_bitset_inner.size()),
        cat_bitset.data(), static_cast<int>(cat_bitset.size()),
        static_cast<double>(best_split_info.left_output),
        static_cast<double>(best_split_info.right_output),
        static_cast<data_size_t>(best_split_info.left_count),
        static_cast<data_size_t>(best_split_info.right_count),
        static_cast<double>(best_split_info.left_sum_hessian),
        static_cast<double>(best_split_info.right_sum_hessian),
642
643
        // store the true split gain in tree model
        static_cast<float>(best_split_info.gain + config_->min_gain_to_split),
644
        train_data_->FeatureBinMapper(inner_feature_index)->missing_type());
645
  }
646

647
#ifdef DEBUG
648
  CHECK(*right_leaf == next_leaf_id);
649
#endif
650

Guolin Ke's avatar
Guolin Ke committed
651
652
  // init the leaves that used on next iteration
  if (best_split_info.left_count < best_split_info.right_count) {
653
    CHECK_GT(best_split_info.left_count, 0);
654
655
    smaller_leaf_splits_->Init(*left_leaf, data_partition_.get(),
                               best_split_info.left_sum_gradient,
Belinda Trotta's avatar
Belinda Trotta committed
656
657
                               best_split_info.left_sum_hessian,
                               best_split_info.left_output);
658
659
    larger_leaf_splits_->Init(*right_leaf, data_partition_.get(),
                              best_split_info.right_sum_gradient,
Belinda Trotta's avatar
Belinda Trotta committed
660
661
                              best_split_info.right_sum_hessian,
                              best_split_info.right_output);
Guolin Ke's avatar
Guolin Ke committed
662
  } else {
663
    CHECK_GT(best_split_info.right_count, 0);
664
665
    smaller_leaf_splits_->Init(*right_leaf, data_partition_.get(),
                               best_split_info.right_sum_gradient,
Belinda Trotta's avatar
Belinda Trotta committed
666
667
                               best_split_info.right_sum_hessian,
                               best_split_info.right_output);
668
669
    larger_leaf_splits_->Init(*left_leaf, data_partition_.get(),
                              best_split_info.left_sum_gradient,
Belinda Trotta's avatar
Belinda Trotta committed
670
671
                              best_split_info.left_sum_hessian,
                              best_split_info.left_output);
Guolin Ke's avatar
Guolin Ke committed
672
  }
673
  auto leaves_need_update = constraints_->Update(
674
      is_numerical_split, *left_leaf, *right_leaf,
675
676
677
678
679
680
681
      best_split_info.monotone_type, best_split_info.right_output,
      best_split_info.left_output, inner_feature_index, best_split_info,
      best_split_per_leaf_);
  // update leave outputs if needed
  for (auto leaf : leaves_need_update) {
    RecomputeBestSplitForLeaf(leaf, &best_split_per_leaf_[leaf]);
  }
682
}
Guolin Ke's avatar
Guolin Ke committed
683

684
void SerialTreeLearner::RenewTreeOutput(Tree* tree, const ObjectiveFunction* obj, std::function<double(const label_t*, int)> residual_getter,
685
686
                                        data_size_t total_num_data, const data_size_t* bag_indices, data_size_t bag_cnt) const {
  if (obj != nullptr && obj->IsRenewTreeOutput()) {
Nikita Titov's avatar
Nikita Titov committed
687
    CHECK_LE(tree->num_leaves(), data_partition_->num_leaves());
688
689
    const data_size_t* bag_mapper = nullptr;
    if (total_num_data != num_data_) {
690
      CHECK_EQ(bag_cnt, num_data_);
691
692
      bag_mapper = bag_indices;
    }
Guolin Ke's avatar
Guolin Ke committed
693
    std::vector<int> n_nozeroworker_perleaf(tree->num_leaves(), 1);
694
    int num_machines = Network::num_machines();
695
696
697
698
699
    #pragma omp parallel for schedule(static)
    for (int i = 0; i < tree->num_leaves(); ++i) {
      const double output = static_cast<double>(tree->LeafOutput(i));
      data_size_t cnt_leaf_data = 0;
      auto index_mapper = data_partition_->GetIndexOnLeaf(i, &cnt_leaf_data);
Guolin Ke's avatar
Guolin Ke committed
700
701
      if (cnt_leaf_data > 0) {
        // bag_mapper[index_mapper[i]]
702
        const double new_output = obj->RenewTreeOutput(output, residual_getter, index_mapper, bag_mapper, cnt_leaf_data);
Guolin Ke's avatar
Guolin Ke committed
703
704
        tree->SetLeafOutput(i, new_output);
      } else {
705
        CHECK_GT(num_machines, 1);
Guolin Ke's avatar
Guolin Ke committed
706
707
708
709
710
711
712
713
714
        tree->SetLeafOutput(i, 0.0);
        n_nozeroworker_perleaf[i] = 0;
      }
    }
    if (num_machines > 1) {
      std::vector<double> outputs(tree->num_leaves());
      for (int i = 0; i < tree->num_leaves(); ++i) {
        outputs[i] = static_cast<double>(tree->LeafOutput(i));
      }
Guolin Ke's avatar
Guolin Ke committed
715
716
      outputs = Network::GlobalSum(&outputs);
      n_nozeroworker_perleaf = Network::GlobalSum(&n_nozeroworker_perleaf);
Guolin Ke's avatar
Guolin Ke committed
717
718
719
720
721
722
723
      for (int i = 0; i < tree->num_leaves(); ++i) {
        tree->SetLeafOutput(i, outputs[i] / n_nozeroworker_perleaf[i]);
      }
    }
  }
}

724
725
void SerialTreeLearner::ComputeBestSplitForFeature(
    FeatureHistogram* histogram_array_, int feature_index, int real_fidx,
Guolin Ke's avatar
Guolin Ke committed
726
    int8_t is_feature_used, int num_data, const LeafSplits* leaf_splits,
727
    SplitInfo* best_split, double parent_output) {
728
729
730
731
732
733
734
  bool is_feature_numerical = train_data_->FeatureBinMapper(feature_index)
                                  ->bin_type() == BinType::NumericalBin;
  if (is_feature_numerical & !config_->monotone_constraints.empty()) {
    constraints_->RecomputeConstraintsIfNeeded(
        constraints_.get(), feature_index, ~(leaf_splits->leaf_index()),
        train_data_->FeatureNumBin(feature_index));
  }
735
736
737
  SplitInfo new_split;
  histogram_array_[feature_index].FindBestThreshold(
      leaf_splits->sum_gradients(), leaf_splits->sum_hessians(), num_data,
738
      constraints_->GetFeatureConstraint(leaf_splits->leaf_index(), feature_index), parent_output, &new_split);
739
740
741
742
743
744
  new_split.feature = real_fidx;
  if (cegb_ != nullptr) {
    new_split.gain -=
        cegb_->DetlaGain(feature_index, real_fidx, leaf_splits->leaf_index(),
                         num_data, new_split);
  }
745
746
747
748
749
  if (new_split.monotone_type != 0) {
    double penalty = constraints_->ComputeMonotoneSplitGainPenalty(
        leaf_splits->leaf_index(), config_->monotone_penalty);
    new_split.gain *= penalty;
  }
Guolin Ke's avatar
Guolin Ke committed
750
751
752
  // it is needed to filter the features after the above code.
  // Otherwise, the `is_splittable` in `FeatureHistogram` will be wrong, and cause some features being accidentally filtered in the later nodes.
  if (new_split > *best_split && is_feature_used) {
753
754
755
756
    *best_split = new_split;
  }
}

757
758
759
760
761
762
763
764
765
766
767
768
769
770
double SerialTreeLearner::GetParentOutput(const Tree* tree, const LeafSplits* leaf_splits) const {
  double parent_output;
  if (tree->num_leaves() == 1) {
    // for root leaf the "parent" output is its own output because we don't apply any smoothing to the root
    parent_output = FeatureHistogram::CalculateSplittedLeafOutput<true, true, true, false>(
      leaf_splits->sum_gradients(), leaf_splits->sum_hessians(), config_->lambda_l1,
      config_->lambda_l2, config_->max_delta_step, BasicConstraint(),
      config_->path_smooth, static_cast<data_size_t>(leaf_splits->num_data_in_leaf()), 0);
  } else {
    parent_output = leaf_splits->weight();
  }
  return parent_output;
}

771
772
773
774
775
776
777
778
779
780
781
782
783
784
void SerialTreeLearner::RecomputeBestSplitForLeaf(int leaf, SplitInfo* split) {
  FeatureHistogram* histogram_array_;
  if (!histogram_pool_.Get(leaf, &histogram_array_)) {
    Log::Warning(
        "Get historical Histogram for leaf %d failed, will skip the "
        "``RecomputeBestSplitForLeaf``",
        leaf);
    return;
  }
  double sum_gradients = split->left_sum_gradient + split->right_sum_gradient;
  double sum_hessians = split->left_sum_hessian + split->right_sum_hessian;
  int num_data = split->left_count + split->right_count;

  std::vector<SplitInfo> bests(share_state_->num_threads);
Guolin Ke's avatar
Guolin Ke committed
785
  LeafSplits leaf_splits(num_data, config_);
786
787
  leaf_splits.Init(leaf, sum_gradients, sum_hessians);

788
789
790
791
792
793
794
795
  // can't use GetParentOutput because leaf_splits doesn't have weight property set
  double parent_output = 0;
  if (config_->path_smooth > kEpsilon) {
    parent_output = FeatureHistogram::CalculateSplittedLeafOutput<true, true, true, false>(
      sum_gradients, sum_hessians, config_->lambda_l1, config_->lambda_l2, config_->max_delta_step,
      BasicConstraint(), config_->path_smooth, static_cast<data_size_t>(num_data), 0);
  }

796
797
798
799
800
801
802
803
804
805
806
  OMP_INIT_EX();
// find splits
#pragma omp parallel for schedule(static) num_threads(share_state_->num_threads)
  for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
    OMP_LOOP_EX_BEGIN();
    if (!col_sampler_.is_feature_used_bytree()[feature_index] ||
        !histogram_array_[feature_index].is_splittable()) {
      continue;
    }
    const int tid = omp_get_thread_num();
    int real_fidx = train_data_->RealFeatureIndex(feature_index);
807
808
    ComputeBestSplitForFeature(histogram_array_, feature_index, real_fidx, true,
                               num_data, &leaf_splits, &bests[tid], parent_output);
809
810
811
812
813
814
815
816

    OMP_LOOP_EX_END();
  }
  OMP_THROW_EX();
  auto best_idx = ArrayArgs<SplitInfo>::ArgMax(bests);
  *split = bests[best_idx];
}

Guolin Ke's avatar
Guolin Ke committed
817
}  // namespace LightGBM