gbdt.cpp 31.5 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
Guolin Ke's avatar
Guolin Ke committed
5
6
7
#include "gbdt.h"

#include <LightGBM/metric.h>
Guolin Ke's avatar
Guolin Ke committed
8
#include <LightGBM/network.h>
9
10
11
12
#include <LightGBM/objective_function.h>
#include <LightGBM/prediction_early_stop.h>
#include <LightGBM/utils/common.h>
#include <LightGBM/utils/openmp_wrapper.h>
Guolin Ke's avatar
Guolin Ke committed
13

14
15
16
17
#include <chrono>
#include <ctime>
#include <sstream>

Guolin Ke's avatar
Guolin Ke committed
18
19
namespace LightGBM {

20
21
Common::Timer global_timer;

22
23
24
int LGBM_config_::current_device = lgbm_device_cpu;
int LGBM_config_::current_learner = use_cpu_learner;

25
26
27
GBDT::GBDT()
    : iter_(0),
      train_data_(nullptr),
28
      config_(nullptr),
29
30
31
32
33
34
35
36
37
38
39
40
      objective_function_(nullptr),
      early_stopping_round_(0),
      es_first_metric_only_(false),
      max_feature_idx_(0),
      num_tree_per_iteration_(1),
      num_class_(1),
      num_iteration_for_pred_(0),
      shrinkage_rate_(0.1f),
      num_init_iteration_(0),
      need_re_bagging_(false),
      balanced_bagging_(false),
      bagging_runner_(0, bagging_rand_block_) {
Guolin Ke's avatar
Guolin Ke committed
41
  average_output_ = false;
Guolin Ke's avatar
Guolin Ke committed
42
  tree_learner_ = nullptr;
43
  linear_tree_ = false;
Guolin Ke's avatar
Guolin Ke committed
44
45
46
47
48
}

GBDT::~GBDT() {
}

Guolin Ke's avatar
Guolin Ke committed
49
void GBDT::Init(const Config* config, const Dataset* train_data, const ObjectiveFunction* objective_function,
50
                const std::vector<const Metric*>& training_metrics) {
Nikita Titov's avatar
Nikita Titov committed
51
  CHECK_NOTNULL(train_data);
52
  train_data_ = train_data;
53
  if (!config->monotone_constraints.empty()) {
Nikita Titov's avatar
Nikita Titov committed
54
    CHECK_EQ(static_cast<size_t>(train_data_->num_total_features()), config->monotone_constraints.size());
Nikita Titov's avatar
Nikita Titov committed
55
  }
56
  if (!config->feature_contri.empty()) {
Nikita Titov's avatar
Nikita Titov committed
57
    CHECK_EQ(static_cast<size_t>(train_data_->num_total_features()), config->feature_contri.size());
58
  }
59
  iter_ = 0;
wxchan's avatar
wxchan committed
60
  num_iteration_for_pred_ = 0;
61
  max_feature_idx_ = 0;
wxchan's avatar
wxchan committed
62
  num_class_ = config->num_class;
Guolin Ke's avatar
Guolin Ke committed
63
64
  config_ = std::unique_ptr<Config>(new Config(*config));
  early_stopping_round_ = config_->early_stopping_round;
65
  es_first_metric_only_ = config_->first_metric_only;
Guolin Ke's avatar
Guolin Ke committed
66
  shrinkage_rate_ = config_->learning_rate;
67

68
69
70
71
  if (config_->device_type == std::string("cuda")) {
    LGBM_config_::current_learner = use_cuda_learner;
  }

72
  // load forced_splits file
73
74
75
76
77
  if (!config->forcedsplits_filename.empty()) {
    std::ifstream forced_splits_file(config->forcedsplits_filename.c_str());
    std::stringstream buffer;
    buffer << forced_splits_file.rdbuf();
    std::string err;
Guolin Ke's avatar
Guolin Ke committed
78
    forced_splits_json_ = Json::parse(buffer.str(), &err);
79
80
  }

81
82
83
  objective_function_ = objective_function;
  num_tree_per_iteration_ = num_class_;
  if (objective_function_ != nullptr) {
Guolin Ke's avatar
Guolin Ke committed
84
    num_tree_per_iteration_ = objective_function_->NumModelPerIteration();
85
86
87
    if (objective_function_->IsRenewTreeOutput() && !config->monotone_constraints.empty()) {
      Log::Fatal("Cannot use ``monotone_constraints`` in %s objective, please disable it.", objective_function_->GetName());
    }
88
89
  }

90
91
  is_constant_hessian_ = GetIsConstHessian(objective_function);

92
93
  tree_learner_ = std::unique_ptr<TreeLearner>(TreeLearner::CreateTreeLearner(config_->tree_learner, config_->device_type,
                                                                              config_.get()));
94
95
96

  // init tree learner
  tree_learner_->Init(train_data_, is_constant_hessian_);
97
  tree_learner_->SetForcedSplit(&forced_splits_json_);
98
99
100
101
102
103
104
105
106
107
108

  // push training metrics
  training_metrics_.clear();
  for (const auto& metric : training_metrics) {
    training_metrics_.push_back(metric);
  }
  training_metrics_.shrink_to_fit();

  train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_));

  num_data_ = train_data_->num_data();
Andrew Ziem's avatar
Andrew Ziem committed
109
  // create buffer for gradients and Hessians
110
111
112
113
114
115
116
117
118
119
120
121
  if (objective_function_ != nullptr) {
    size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
    gradients_.resize(total_size);
    hessians_.resize(total_size);
  }
  // get max feature index
  max_feature_idx_ = train_data_->num_total_features() - 1;
  // get label index
  label_idx_ = train_data_->label_idx();
  // get feature names
  feature_names_ = train_data_->feature_names();
  feature_infos_ = train_data_->feature_infos();
122
  monotone_constraints_ = config->monotone_constraints;
123
124
  // get parser config file content
  parser_config_str_ = train_data_->parser_config_str();
125
126

  // if need bagging, create buffer
Guolin Ke's avatar
Guolin Ke committed
127
  ResetBaggingConfig(config_.get(), true);
128
129
130

  class_need_train_ = std::vector<bool>(num_tree_per_iteration_, true);
  if (objective_function_ != nullptr && objective_function_->SkipEmptyClass()) {
Nikita Titov's avatar
Nikita Titov committed
131
    CHECK_EQ(num_tree_per_iteration_, num_class_);
132
133
    for (int i = 0; i < num_class_; ++i) {
      class_need_train_[i] = objective_function_->ClassNeedTrain(i);
134
135
    }
  }
136
137
138
139

  if (config_->linear_tree) {
    linear_tree_ = true;
  }
wxchan's avatar
wxchan committed
140
141
142
}

void GBDT::AddValidDataset(const Dataset* valid_data,
143
                           const std::vector<const Metric*>& valid_metrics) {
wxchan's avatar
wxchan committed
144
  if (!train_data_->CheckAlign(*valid_data)) {
145
    Log::Fatal("Cannot add validation data, since it has different bin mappers with training data");
146
  }
Guolin Ke's avatar
Guolin Ke committed
147
  // for a validation dataset, we need its score and metric
148
  auto new_score_updater = std::unique_ptr<ScoreUpdater>(new ScoreUpdater(valid_data, num_tree_per_iteration_));
wxchan's avatar
wxchan committed
149
150
  // update score
  for (int i = 0; i < iter_; ++i) {
151
152
153
    for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
      auto curr_tree = (i + num_init_iteration_) * num_tree_per_iteration_ + cur_tree_id;
      new_score_updater->AddScore(models_[curr_tree].get(), cur_tree_id);
wxchan's avatar
wxchan committed
154
155
    }
  }
Guolin Ke's avatar
Guolin Ke committed
156
  valid_score_updater_.push_back(std::move(new_score_updater));
Guolin Ke's avatar
Guolin Ke committed
157
158
159
160
  valid_metrics_.emplace_back();
  for (const auto& metric : valid_metrics) {
    valid_metrics_.back().push_back(metric);
  }
Guolin Ke's avatar
Guolin Ke committed
161
  valid_metrics_.back().shrink_to_fit();
162

163
164
165
166
167
168
169
  if (early_stopping_round_ > 0) {
    auto num_metrics = valid_metrics.size();
    if (es_first_metric_only_) { num_metrics = 1; }
    best_iter_.emplace_back(num_metrics, 0);
    best_score_.emplace_back(num_metrics, kMinScore);
    best_msg_.emplace_back(num_metrics);
  }
Guolin Ke's avatar
Guolin Ke committed
170
171
}

Guolin Ke's avatar
Guolin Ke committed
172
void GBDT::Boosting() {
173
  Common::FunctionTimer fun_timer("GBDT::Boosting", global_timer);
Guolin Ke's avatar
Guolin Ke committed
174
175
176
177
178
179
180
181
182
  if (objective_function_ == nullptr) {
    Log::Fatal("No object function provided");
  }
  // objective function will calculate gradients and hessians
  int64_t num_score = 0;
  objective_function_->
    GetGradients(GetTrainingScore(&num_score), gradients_.data(), hessians_.data());
}

183
data_size_t GBDT::BaggingHelper(data_size_t start, data_size_t cnt, data_size_t* buffer) {
184
185
186
  if (cnt <= 0) {
    return 0;
  }
187
  data_size_t cur_left_cnt = 0;
188
  data_size_t cur_right_pos = cnt;
189
190
  // random bagging, minimal unit is one record
  for (data_size_t i = 0; i < cnt; ++i) {
191
192
193
    auto cur_idx = start + i;
    if (bagging_rands_[cur_idx / bagging_rand_block_].NextFloat() < config_->bagging_fraction) {
      buffer[cur_left_cnt++] = cur_idx;
194
    } else {
195
      buffer[--cur_right_pos] = cur_idx;
196
197
198
199
    }
  }
  return cur_left_cnt;
}
Guolin Ke's avatar
Guolin Ke committed
200

201
202
data_size_t GBDT::BalancedBaggingHelper(data_size_t start, data_size_t cnt,
                                        data_size_t* buffer) {
Guolin Ke's avatar
Guolin Ke committed
203
204
205
206
207
  if (cnt <= 0) {
    return 0;
  }
  auto label_ptr = train_data_->metadata().label();
  data_size_t cur_left_cnt = 0;
208
  data_size_t cur_right_pos = cnt;
Guolin Ke's avatar
Guolin Ke committed
209
210
  // random bagging, minimal unit is one record
  for (data_size_t i = 0; i < cnt; ++i) {
211
    auto cur_idx = start + i;
Guolin Ke's avatar
Guolin Ke committed
212
213
214
    bool is_pos = label_ptr[start + i] > 0;
    bool is_in_bag = false;
    if (is_pos) {
215
216
      is_in_bag = bagging_rands_[cur_idx / bagging_rand_block_].NextFloat() <
                  config_->pos_bagging_fraction;
Guolin Ke's avatar
Guolin Ke committed
217
    } else {
218
219
      is_in_bag = bagging_rands_[cur_idx / bagging_rand_block_].NextFloat() <
                  config_->neg_bagging_fraction;
Guolin Ke's avatar
Guolin Ke committed
220
221
    }
    if (is_in_bag) {
222
      buffer[cur_left_cnt++] = cur_idx;
Guolin Ke's avatar
Guolin Ke committed
223
    } else {
224
      buffer[--cur_right_pos] = cur_idx;
Guolin Ke's avatar
Guolin Ke committed
225
226
227
228
229
    }
  }
  return cur_left_cnt;
}

230
void GBDT::Bagging(int iter) {
231
  Common::FunctionTimer fun_timer("GBDT::Bagging", global_timer);
Guolin Ke's avatar
Guolin Ke committed
232
  // if need bagging
Guolin Ke's avatar
Guolin Ke committed
233
234
  if ((bag_data_cnt_ < num_data_ && iter % config_->bagging_freq == 0) ||
      need_re_bagging_) {
Guolin Ke's avatar
Guolin Ke committed
235
    need_re_bagging_ = false;
236
237
238
239
240
241
242
243
    auto left_cnt = bagging_runner_.Run<true>(
        num_data_,
        [=](int, data_size_t cur_start, data_size_t cur_cnt, data_size_t* left,
            data_size_t*) {
          data_size_t cur_left_count = 0;
          if (balanced_bagging_) {
            cur_left_count =
                BalancedBaggingHelper(cur_start, cur_cnt, left);
Guolin Ke's avatar
Guolin Ke committed
244
          } else {
245
            cur_left_count = BaggingHelper(cur_start, cur_cnt, left);
Guolin Ke's avatar
Guolin Ke committed
246
          }
247
248
249
          return cur_left_count;
        },
        bag_data_indices_.data());
Guolin Ke's avatar
Guolin Ke committed
250
    bag_data_cnt_ = left_cnt;
Guolin Ke's avatar
Guolin Ke committed
251
    Log::Debug("Re-bagging, using %d data to train", bag_data_cnt_);
Guolin Ke's avatar
Guolin Ke committed
252
    // set bagging data to tree learner
Guolin Ke's avatar
Guolin Ke committed
253
    if (!is_use_subset_) {
254
      tree_learner_->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_);
Guolin Ke's avatar
Guolin Ke committed
255
256
    } else {
      // get subset
Guolin Ke's avatar
Guolin Ke committed
257
      tmp_subset_->ReSize(bag_data_cnt_);
258
      tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(),
Guolin Ke's avatar
Guolin Ke committed
259
                              bag_data_cnt_, false);
260
261
      tree_learner_->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(),
                                    bag_data_cnt_);
Guolin Ke's avatar
Guolin Ke committed
262
    }
Guolin Ke's avatar
Guolin Ke committed
263
264
265
  }
}

Guolin Ke's avatar
Guolin Ke committed
266
void GBDT::Train(int snapshot_freq, const std::string& model_output_path) {
267
  Common::FunctionTimer fun_timer("GBDT::Train", global_timer);
Guolin Ke's avatar
Guolin Ke committed
268
269
  bool is_finished = false;
  auto start_time = std::chrono::steady_clock::now();
Guolin Ke's avatar
Guolin Ke committed
270
  for (int iter = 0; iter < config_->num_iterations && !is_finished; ++iter) {
Guolin Ke's avatar
Guolin Ke committed
271
272
273
274
    is_finished = TrainOneIter(nullptr, nullptr);
    if (!is_finished) {
      is_finished = EvalAndCheckEarlyStopping();
    }
Guolin Ke's avatar
Guolin Ke committed
275
276
277
278
279
280
281
    auto end_time = std::chrono::steady_clock::now();
    // output used time per iteration
    Log::Info("%f seconds elapsed, finished iteration %d", std::chrono::duration<double,
              std::milli>(end_time - start_time) * 1e-3, iter + 1);
    if (snapshot_freq > 0
        && (iter + 1) % snapshot_freq == 0) {
      std::string snapshot_out = model_output_path + ".snapshot_iter_" + std::to_string(iter + 1);
282
      SaveModelToFile(0, -1, config_->saved_feature_importance_type, snapshot_out.c_str());
Guolin Ke's avatar
Guolin Ke committed
283
284
285
286
    }
  }
}

287
void GBDT::RefitTree(const std::vector<std::vector<int>>& tree_leaf_prediction) {
288
289
290
  CHECK_GT(tree_leaf_prediction.size(), 0);
  CHECK_EQ(static_cast<size_t>(num_data_), tree_leaf_prediction.size());
  CHECK_EQ(static_cast<size_t>(models_.size()), tree_leaf_prediction[0].size());
291
292
  int num_iterations = static_cast<int>(models_.size() / num_tree_per_iteration_);
  std::vector<int> leaf_pred(num_data_);
293
294
295
296
297
298
299
300
301
302
303
304
305
  if (linear_tree_) {
    std::vector<int> max_leaves_by_thread = std::vector<int>(OMP_NUM_THREADS(), 0);
    #pragma omp parallel for schedule(static)
    for (int i = 0; i < static_cast<int>(tree_leaf_prediction.size()); ++i) {
      int tid = omp_get_thread_num();
      for (size_t j = 0; j < tree_leaf_prediction[i].size(); ++j) {
        max_leaves_by_thread[tid] = std::max(max_leaves_by_thread[tid], tree_leaf_prediction[i][j]);
      }
    }
    int max_leaves = *std::max_element(max_leaves_by_thread.begin(), max_leaves_by_thread.end());
    max_leaves += 1;
    tree_learner_->InitLinear(train_data_, max_leaves);
  }
306
307
308
309
310
311
312
  for (int iter = 0; iter < num_iterations; ++iter) {
    Boosting();
    for (int tree_id = 0; tree_id < num_tree_per_iteration_; ++tree_id) {
      int model_index = iter * num_tree_per_iteration_ + tree_id;
      #pragma omp parallel for schedule(static)
      for (int i = 0; i < num_data_; ++i) {
        leaf_pred[i] = tree_leaf_prediction[i][model_index];
Nikita Titov's avatar
Nikita Titov committed
313
        CHECK_LT(leaf_pred[i], models_[model_index]->num_leaves());
314
      }
315
316
317
      size_t offset = static_cast<size_t>(tree_id) * num_data_;
      auto grad = gradients_.data() + offset;
      auto hess = hessians_.data() + offset;
318
319
320
321
322
323
324
      auto new_tree = tree_learner_->FitByExistingTree(models_[model_index].get(), leaf_pred, grad, hess);
      train_score_updater_->AddScore(tree_learner_.get(), new_tree, tree_id);
      models_[model_index].reset(new_tree);
    }
  }
}

Andrew Ziem's avatar
Andrew Ziem committed
325
/* If the custom "average" is implemented it will be used in place of the label average (if enabled)
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
*
* An improvement to this is to have options to explicitly choose
* (i) standard average
* (ii) custom average if available
* (iii) any user defined scalar bias (e.g. using a new option "init_score" that overrides (i) and (ii) )
*
* (i) and (ii) could be selected as say "auto_init_score" = 0 or 1 etc..
*
*/
double ObtainAutomaticInitialScore(const ObjectiveFunction* fobj, int class_id) {
  double init_score = 0.0;
  if (fobj != nullptr) {
    init_score = fobj->BoostFromScore(class_id);
  }
  if (Network::num_machines() > 1) {
    init_score = Network::GlobalSyncUpByMean(init_score);
  }
  return init_score;
}

Guolin Ke's avatar
Guolin Ke committed
346
double GBDT::BoostFromAverage(int class_id, bool update_scorer) {
347
  Common::FunctionTimer fun_timer("GBDT::BoostFromAverage", global_timer);
348
  // boosting from average label; or customized "average" if implemented for the current objective
349
350
351
  if (models_.empty() && !train_score_updater_->has_init_score() && objective_function_ != nullptr) {
    if (config_->boost_from_average || (train_data_ != nullptr && train_data_->num_features() == 0)) {
      double init_score = ObtainAutomaticInitialScore(objective_function_, class_id);
352
      if (std::fabs(init_score) > kEpsilon) {
Guolin Ke's avatar
Guolin Ke committed
353
354
355
356
357
        if (update_scorer) {
          train_score_updater_->AddScore(init_score, class_id);
          for (auto& score_updater : valid_score_updater_) {
            score_updater->AddScore(init_score, class_id);
          }
358
359
360
        }
        Log::Info("Start training from score %lf", init_score);
        return init_score;
Guolin Ke's avatar
Guolin Ke committed
361
      }
362
363
364
    } else if (std::string(objective_function_->GetName()) == std::string("regression_l1")
               || std::string(objective_function_->GetName()) == std::string("quantile")
               || std::string(objective_function_->GetName()) == std::string("mape")) {
365
      Log::Warning("Disabling boost_from_average in %s may cause the slow convergence", objective_function_->GetName());
366
    }
367
  }
Guolin Ke's avatar
Guolin Ke committed
368
369
  return 0.0f;
}
Guolin Ke's avatar
Guolin Ke committed
370

Guolin Ke's avatar
Guolin Ke committed
371
bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
372
  Common::FunctionTimer fun_timer("GBDT::TrainOneIter", global_timer);
373
  std::vector<double> init_scores(num_tree_per_iteration_, 0.0);
Guolin Ke's avatar
Guolin Ke committed
374
  // boosting first
Guolin Ke's avatar
Guolin Ke committed
375
  if (gradients == nullptr || hessians == nullptr) {
376
    for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
Guolin Ke's avatar
Guolin Ke committed
377
      init_scores[cur_tree_id] = BoostFromAverage(cur_tree_id, true);
378
    }
Guolin Ke's avatar
Guolin Ke committed
379
    Boosting();
Guolin Ke's avatar
Guolin Ke committed
380
381
    gradients = gradients_.data();
    hessians = hessians_.data();
Guolin Ke's avatar
Guolin Ke committed
382
  }
383
384
  // bagging logic
  Bagging(iter_);
Guolin Ke's avatar
Guolin Ke committed
385

Guolin Ke's avatar
Guolin Ke committed
386
  bool should_continue = false;
387
  for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
388
    const size_t offset = static_cast<size_t>(cur_tree_id) * num_data_;
389
    std::unique_ptr<Tree> new_tree(new Tree(2, false, false));
390
    if (class_need_train_[cur_tree_id] && train_data_->num_features() > 0) {
391
392
      auto grad = gradients + offset;
      auto hess = hessians + offset;
Guolin Ke's avatar
Guolin Ke committed
393
394
395
      // need to copy gradients for bagging subset.
      if (is_use_subset_ && bag_data_cnt_ < num_data_) {
        for (int i = 0; i < bag_data_cnt_; ++i) {
396
397
          gradients_[offset + i] = grad[bag_data_indices_[i]];
          hessians_[offset + i] = hess[bag_data_indices_[i]];
Guolin Ke's avatar
Guolin Ke committed
398
        }
399
400
        grad = gradients_.data() + offset;
        hess = hessians_.data() + offset;
Guolin Ke's avatar
Guolin Ke committed
401
      }
402
403
      bool is_first_tree = models_.size() < static_cast<size_t>(num_tree_per_iteration_);
      new_tree.reset(tree_learner_->Train(grad, hess, is_first_tree));
404
    }
Guolin Ke's avatar
Guolin Ke committed
405

Guolin Ke's avatar
Guolin Ke committed
406
    if (new_tree->num_leaves() > 1) {
Guolin Ke's avatar
Guolin Ke committed
407
      should_continue = true;
408
      auto score_ptr = train_score_updater_->score() + offset;
409
410
      auto residual_getter = [score_ptr](const label_t* label, int i) {return static_cast<double>(label[i]) - score_ptr[i]; };
      tree_learner_->RenewTreeOutput(new_tree.get(), objective_function_, residual_getter,
411
                                     num_data_, bag_data_indices_.data(), bag_data_cnt_);
Guolin Ke's avatar
Guolin Ke committed
412
413
414
      // shrinkage by learning rate
      new_tree->Shrinkage(shrinkage_rate_);
      // update score
415
      UpdateScore(new_tree.get(), cur_tree_id);
416
417
      if (std::fabs(init_scores[cur_tree_id]) > kEpsilon) {
        new_tree->AddBias(init_scores[cur_tree_id]);
Guolin Ke's avatar
Guolin Ke committed
418
      }
419
420
    } else {
      // only add default score one-time
421
422
423
424
425
426
427
428
429
      if (models_.size() < static_cast<size_t>(num_tree_per_iteration_)) {
        double output = 0.0;
        if (!class_need_train_[cur_tree_id]) {
          if (objective_function_ != nullptr) {
            output = objective_function_->BoostFromScore(cur_tree_id);
          }
        } else {
          output = init_scores[cur_tree_id];
        }
430
        new_tree->AsConstantTree(output);
Guolin Ke's avatar
Guolin Ke committed
431
        // updates scores
432
        train_score_updater_->AddScore(output, cur_tree_id);
433
        for (auto& score_updater : valid_score_updater_) {
434
          score_updater->AddScore(output, cur_tree_id);
435
436
437
        }
      }
    }
Guolin Ke's avatar
Guolin Ke committed
438
439
440
    // add model
    models_.push_back(std::move(new_tree));
  }
Guolin Ke's avatar
Guolin Ke committed
441

Guolin Ke's avatar
Guolin Ke committed
442
  if (!should_continue) {
443
    Log::Warning("Stopped training because there are no more leaves that meet the split requirements");
444
445
446
447
    if (models_.size() > static_cast<size_t>(num_tree_per_iteration_)) {
      for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
        models_.pop_back();
      }
Guolin Ke's avatar
Guolin Ke committed
448
449
450
    }
    return true;
  }
451

Guolin Ke's avatar
Guolin Ke committed
452
453
  ++iter_;
  return false;
Guolin Ke's avatar
Guolin Ke committed
454
}
455

wxchan's avatar
wxchan committed
456
void GBDT::RollbackOneIter() {
457
  if (iter_ <= 0) { return; }
wxchan's avatar
wxchan committed
458
  // reset score
459
  for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
Guolin Ke's avatar
Guolin Ke committed
460
    auto curr_tree = models_.size() - num_tree_per_iteration_ + cur_tree_id;
wxchan's avatar
wxchan committed
461
    models_[curr_tree]->Shrinkage(-1.0);
462
    train_score_updater_->AddScore(models_[curr_tree].get(), cur_tree_id);
wxchan's avatar
wxchan committed
463
    for (auto& score_updater : valid_score_updater_) {
464
      score_updater->AddScore(models_[curr_tree].get(), cur_tree_id);
wxchan's avatar
wxchan committed
465
466
467
    }
  }
  // remove model
468
  for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
wxchan's avatar
wxchan committed
469
470
471
472
473
    models_.pop_back();
  }
  --iter_;
}

Guolin Ke's avatar
Guolin Ke committed
474
bool GBDT::EvalAndCheckEarlyStopping() {
475
476
  bool is_met_early_stopping = false;
  // print message for metric
Guolin Ke's avatar
Guolin Ke committed
477
  auto best_msg = OutputMetric(iter_);
Guolin Ke's avatar
Guolin Ke committed
478
479


Guolin Ke's avatar
Guolin Ke committed
480
  is_met_early_stopping = !best_msg.empty();
481
482
  if (is_met_early_stopping) {
    Log::Info("Early stopping at iteration %d, the best iteration round is %d",
483
              iter_, iter_ - early_stopping_round_);
Guolin Ke's avatar
Guolin Ke committed
484
    Log::Info("Output of best iteration round:\n%s", best_msg.c_str());
485
    // pop last early_stopping_round_ models
486
    for (int i = 0; i < early_stopping_round_ * num_tree_per_iteration_; ++i) {
487
488
489
490
      models_.pop_back();
    }
  }
  return is_met_early_stopping;
Guolin Ke's avatar
Guolin Ke committed
491
492
}

493
void GBDT::UpdateScore(const Tree* tree, const int cur_tree_id) {
494
  Common::FunctionTimer fun_timer("GBDT::UpdateScore", global_timer);
Guolin Ke's avatar
Guolin Ke committed
495
  // update training score
Guolin Ke's avatar
Guolin Ke committed
496
  if (!is_use_subset_) {
497
    train_score_updater_->AddScore(tree_learner_.get(), tree, cur_tree_id);
Guolin Ke's avatar
Guolin Ke committed
498
499
500
501
502
503

    // we need to predict out-of-bag scores of data for boosting
    if (num_data_ - bag_data_cnt_ > 0) {
      train_score_updater_->AddScore(tree, bag_data_indices_.data() + bag_data_cnt_, num_data_ - bag_data_cnt_, cur_tree_id);
    }

Guolin Ke's avatar
Guolin Ke committed
504
  } else {
505
    train_score_updater_->AddScore(tree, cur_tree_id);
Guolin Ke's avatar
Guolin Ke committed
506
  }
Guolin Ke's avatar
Guolin Ke committed
507
508


Guolin Ke's avatar
Guolin Ke committed
509
  // update validation score
Guolin Ke's avatar
Guolin Ke committed
510
  for (auto& score_updater : valid_score_updater_) {
511
    score_updater->AddScore(tree, cur_tree_id);
Guolin Ke's avatar
Guolin Ke committed
512
513
514
  }
}

Guolin Ke's avatar
Guolin Ke committed
515
516
517
518
std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* score) const {
  return metric->Eval(score, objective_function_);
}

Guolin Ke's avatar
Guolin Ke committed
519
std::string GBDT::OutputMetric(int iter) {
Guolin Ke's avatar
Guolin Ke committed
520
  bool need_output = (iter % config_->metric_freq) == 0;
Guolin Ke's avatar
Guolin Ke committed
521
522
  std::string ret = "";
  std::stringstream msg_buf;
523
  std::vector<std::pair<size_t, size_t>> meet_early_stopping_pairs;
Guolin Ke's avatar
Guolin Ke committed
524
  // print training metric
Guolin Ke's avatar
Guolin Ke committed
525
  if (need_output) {
526
527
    for (auto& sub_metric : training_metrics_) {
      auto name = sub_metric->GetName();
Guolin Ke's avatar
Guolin Ke committed
528
      auto scores = EvalOneMetric(sub_metric, train_score_updater_->score());
Guolin Ke's avatar
Guolin Ke committed
529
      for (size_t k = 0; k < name.size(); ++k) {
Guolin Ke's avatar
Guolin Ke committed
530
531
532
533
534
535
        std::stringstream tmp_buf;
        tmp_buf << "Iteration:" << iter
          << ", training " << name[k]
          << " : " << scores[k];
        Log::Info(tmp_buf.str().c_str());
        if (early_stopping_round_ > 0) {
536
          msg_buf << tmp_buf.str() << '\n';
Guolin Ke's avatar
Guolin Ke committed
537
        }
538
      }
539
    }
Guolin Ke's avatar
Guolin Ke committed
540
541
  }
  // print validation metric
Guolin Ke's avatar
Guolin Ke committed
542
  if (need_output || early_stopping_round_ > 0) {
543
544
    for (size_t i = 0; i < valid_metrics_.size(); ++i) {
      for (size_t j = 0; j < valid_metrics_[i].size(); ++j) {
Guolin Ke's avatar
Guolin Ke committed
545
        auto test_scores = EvalOneMetric(valid_metrics_[i][j], valid_score_updater_[i]->score());
Guolin Ke's avatar
Guolin Ke committed
546
547
548
549
550
551
552
553
554
555
        auto name = valid_metrics_[i][j]->GetName();
        for (size_t k = 0; k < name.size(); ++k) {
          std::stringstream tmp_buf;
          tmp_buf << "Iteration:" << iter
            << ", valid_" << i + 1 << " " << name[k]
            << " : " << test_scores[k];
          if (need_output) {
            Log::Info(tmp_buf.str().c_str());
          }
          if (early_stopping_round_ > 0) {
556
            msg_buf << tmp_buf.str() << '\n';
557
          }
wxchan's avatar
wxchan committed
558
        }
559
        if (es_first_metric_only_ && j > 0) { continue; }
Guolin Ke's avatar
Guolin Ke committed
560
        if (ret.empty() && early_stopping_round_ > 0) {
561
562
563
          auto cur_score = valid_metrics_[i][j]->factor_to_bigger_better() * test_scores.back();
          if (cur_score > best_score_[i][j]) {
            best_score_[i][j] = cur_score;
564
            best_iter_[i][j] = iter;
Guolin Ke's avatar
Guolin Ke committed
565
            meet_early_stopping_pairs.emplace_back(i, j);
566
          } else {
Guolin Ke's avatar
Guolin Ke committed
567
            if (iter - best_iter_[i][j] >= early_stopping_round_) { ret = best_msg_[i][j]; }
568
          }
wxchan's avatar
wxchan committed
569
570
        }
      }
Guolin Ke's avatar
Guolin Ke committed
571
572
    }
  }
Guolin Ke's avatar
Guolin Ke committed
573
574
575
  for (auto& pair : meet_early_stopping_pairs) {
    best_msg_[pair.first][pair.second] = msg_buf.str();
  }
wxchan's avatar
wxchan committed
576
  return ret;
Guolin Ke's avatar
Guolin Ke committed
577
578
}

579
/*! \brief Get eval result */
580
std::vector<double> GBDT::GetEvalAt(int data_idx) const {
Guolin Ke's avatar
Guolin Ke committed
581
  CHECK(data_idx >= 0 && data_idx <= static_cast<int>(valid_score_updater_.size()));
582
583
  std::vector<double> ret;
  if (data_idx == 0) {
584
    for (auto& sub_metric : training_metrics_) {
Guolin Ke's avatar
Guolin Ke committed
585
      auto scores = EvalOneMetric(sub_metric, train_score_updater_->score());
586
587
588
      for (auto score : scores) {
        ret.push_back(score);
      }
589
    }
590
  } else {
591
592
    auto used_idx = data_idx - 1;
    for (size_t j = 0; j < valid_metrics_[used_idx].size(); ++j) {
Guolin Ke's avatar
Guolin Ke committed
593
      auto test_scores = EvalOneMetric(valid_metrics_[used_idx][j], valid_score_updater_[used_idx]->score());
594
595
596
      for (auto score : test_scores) {
        ret.push_back(score);
      }
597
598
599
600
601
    }
  }
  return ret;
}

Guolin Ke's avatar
Guolin Ke committed
602
/*! \brief Get training scores result */
603
const double* GBDT::GetTrainingScore(int64_t* out_len) {
604
  *out_len = static_cast<int64_t>(train_score_updater_->num_data()) * num_class_;
Guolin Ke's avatar
Guolin Ke committed
605
  return train_score_updater_->score();
606
607
}

608
void GBDT::PredictContrib(const double* features, double* output) const {
609
  // set zero
Guolin Ke's avatar
Guolin Ke committed
610
611
  const int num_features = max_feature_idx_ + 1;
  std::memset(output, 0, sizeof(double) * num_tree_per_iteration_ * (num_features + 1));
612
613
  const int end_iteration_for_pred = start_iteration_for_pred_ + num_iteration_for_pred_;
  for (int i = start_iteration_for_pred_; i < end_iteration_for_pred; ++i) {
614
615
    // predict all the trees for one iteration
    for (int k = 0; k < num_tree_per_iteration_; ++k) {
Guolin Ke's avatar
Guolin Ke committed
616
      models_[i * num_tree_per_iteration_ + k]->PredictContrib(features, num_features, output + k*(num_features + 1));
617
    }
618
619
620
621
622
623
  }
}

void GBDT::PredictContribByMap(const std::unordered_map<int, double>& features,
                               std::vector<std::unordered_map<int, double>>* output) const {
  const int num_features = max_feature_idx_ + 1;
624
625
  const int end_iteration_for_pred = start_iteration_for_pred_ + num_iteration_for_pred_;
  for (int i = start_iteration_for_pred_; i < end_iteration_for_pred; ++i) {
626
627
628
    // predict all the trees for one iteration
    for (int k = 0; k < num_tree_per_iteration_; ++k) {
      models_[i * num_tree_per_iteration_ + k]->PredictContribByMap(features, num_features, &((*output)[k]));
629
630
631
632
    }
  }
}

Guolin Ke's avatar
Guolin Ke committed
633
634
void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) {
  CHECK(data_idx >= 0 && data_idx <= static_cast<int>(valid_score_updater_.size()));
Guolin Ke's avatar
Guolin Ke committed
635

636
  const double* raw_scores = nullptr;
Guolin Ke's avatar
Guolin Ke committed
637
638
  data_size_t num_data = 0;
  if (data_idx == 0) {
wxchan's avatar
wxchan committed
639
    raw_scores = GetTrainingScore(out_len);
Guolin Ke's avatar
Guolin Ke committed
640
641
642
643
644
    num_data = train_score_updater_->num_data();
  } else {
    auto used_idx = data_idx - 1;
    raw_scores = valid_score_updater_[used_idx]->score();
    num_data = valid_score_updater_[used_idx]->num_data();
645
    *out_len = static_cast<int64_t>(num_data) * num_class_;
Guolin Ke's avatar
Guolin Ke committed
646
  }
Guolin Ke's avatar
Guolin Ke committed
647
  if (objective_function_ != nullptr) {
Guolin Ke's avatar
Guolin Ke committed
648
649
    #pragma omp parallel for schedule(static)
    for (data_size_t i = 0; i < num_data; ++i) {
Guolin Ke's avatar
Guolin Ke committed
650
      std::vector<double> tree_pred(num_tree_per_iteration_);
651
      for (int j = 0; j < num_tree_per_iteration_; ++j) {
Guolin Ke's avatar
Guolin Ke committed
652
        tree_pred[j] = raw_scores[j * num_data + i];
653
      }
Guolin Ke's avatar
Guolin Ke committed
654
655
      std::vector<double> tmp_result(num_class_);
      objective_function_->ConvertOutput(tree_pred.data(), tmp_result.data());
Guolin Ke's avatar
Guolin Ke committed
656
      for (int j = 0; j < num_class_; ++j) {
657
        out_result[j * num_data + i] = static_cast<double>(tmp_result[j]);
Guolin Ke's avatar
Guolin Ke committed
658
659
      }
    }
660
  } else {
Guolin Ke's avatar
Guolin Ke committed
661
    #pragma omp parallel for schedule(static)
Guolin Ke's avatar
Guolin Ke committed
662
    for (data_size_t i = 0; i < num_data; ++i) {
663
      for (int j = 0; j < num_tree_per_iteration_; ++j) {
Guolin Ke's avatar
Guolin Ke committed
664
        out_result[j * num_data + i] = static_cast<double>(raw_scores[j * num_data + i]);
Guolin Ke's avatar
Guolin Ke committed
665
666
667
668
669
      }
    }
  }
}

670
671
double GBDT::GetUpperBoundValue() const {
  double max_value = 0.0;
Nikita Titov's avatar
Nikita Titov committed
672
  for (const auto &tree : models_) {
673
674
675
676
677
678
679
    max_value += tree->GetUpperBoundValue();
  }
  return max_value;
}

double GBDT::GetLowerBoundValue() const {
  double min_value = 0.0;
Nikita Titov's avatar
Nikita Titov committed
680
  for (const auto &tree : models_) {
681
682
683
684
685
    min_value += tree->GetLowerBoundValue();
  }
  return min_value;
}

Guolin Ke's avatar
Guolin Ke committed
686
687
688
void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction* objective_function,
                             const std::vector<const Metric*>& training_metrics) {
  if (train_data != train_data_ && !train_data_->CheckAlign(*train_data)) {
689
    Log::Fatal("Cannot reset training data, since new training data has different bin mappers");
wxchan's avatar
wxchan committed
690
691
  }

Guolin Ke's avatar
Guolin Ke committed
692
693
  objective_function_ = objective_function;
  if (objective_function_ != nullptr) {
Nikita Titov's avatar
Nikita Titov committed
694
    CHECK_EQ(num_tree_per_iteration_, objective_function_->NumModelPerIteration());
695
696
697
    if (objective_function_->IsRenewTreeOutput() && !config_->monotone_constraints.empty()) {
      Log::Fatal("Cannot use ``monotone_constraints`` in %s objective, please disable it.", objective_function_->GetName());
    }
698
  }
699
  is_constant_hessian_ = GetIsConstHessian(objective_function);
700

Guolin Ke's avatar
Guolin Ke committed
701
702
703
704
  // push training metrics
  training_metrics_.clear();
  for (const auto& metric : training_metrics) {
    training_metrics_.push_back(metric);
705
  }
Guolin Ke's avatar
Guolin Ke committed
706
  training_metrics_.shrink_to_fit();
707

Guolin Ke's avatar
Guolin Ke committed
708
709
710
711
712
  if (train_data != train_data_) {
    train_data_ = train_data;
    // not same training data, need reset score and others
    // create score tracker
    train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_));
713

Guolin Ke's avatar
Guolin Ke committed
714
715
716
717
718
719
    // update score
    for (int i = 0; i < iter_; ++i) {
      for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
        auto curr_tree = (i + num_init_iteration_) * num_tree_per_iteration_ + cur_tree_id;
        train_score_updater_->AddScore(models_[curr_tree].get(), cur_tree_id);
      }
720
721
    }

Guolin Ke's avatar
Guolin Ke committed
722
    num_data_ = train_data_->num_data();
723

Guolin Ke's avatar
Guolin Ke committed
724
725
726
727
728
729
    // create buffer for gradients and hessians
    if (objective_function_ != nullptr) {
      size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
      gradients_.resize(total_size);
      hessians_.resize(total_size);
    }
730

Guolin Ke's avatar
Guolin Ke committed
731
732
733
734
    max_feature_idx_ = train_data_->num_total_features() - 1;
    label_idx_ = train_data_->label_idx();
    feature_names_ = train_data_->feature_names();
    feature_infos_ = train_data_->feature_infos();
735
    parser_config_str_ = train_data_->parser_config_str();
736

737
    tree_learner_->ResetTrainingData(train_data, is_constant_hessian_);
Guolin Ke's avatar
Guolin Ke committed
738
    ResetBaggingConfig(config_.get(), true);
739
740
  } else {
    tree_learner_->ResetIsConstantHessian(is_constant_hessian_);
741
  }
742
743
}

Guolin Ke's avatar
Guolin Ke committed
744
745
void GBDT::ResetConfig(const Config* config) {
  auto new_config = std::unique_ptr<Config>(new Config(*config));
746
  if (!config->monotone_constraints.empty()) {
Nikita Titov's avatar
Nikita Titov committed
747
    CHECK_EQ(static_cast<size_t>(train_data_->num_total_features()), config->monotone_constraints.size());
748
749
  }
  if (!config->feature_contri.empty()) {
Nikita Titov's avatar
Nikita Titov committed
750
    CHECK_EQ(static_cast<size_t>(train_data_->num_total_features()), config->feature_contri.size());
751
  }
752
753
754
  if (objective_function_ != nullptr && objective_function_->IsRenewTreeOutput() && !config->monotone_constraints.empty()) {
    Log::Fatal("Cannot use ``monotone_constraints`` in %s objective, please disable it.", objective_function_->GetName());
  }
Guolin Ke's avatar
Guolin Ke committed
755
756
757
  early_stopping_round_ = new_config->early_stopping_round;
  shrinkage_rate_ = new_config->learning_rate;
  if (tree_learner_ != nullptr) {
Guolin Ke's avatar
Guolin Ke committed
758
    tree_learner_->ResetConfig(new_config.get());
759
  }
Guolin Ke's avatar
Guolin Ke committed
760
761
  if (train_data_ != nullptr) {
    ResetBaggingConfig(new_config.get(), false);
762
  }
763
  if (config_.get() != nullptr && config_->forcedsplits_filename != new_config->forcedsplits_filename) {
764
765
766
767
768
769
770
    // load forced_splits file
    if (!new_config->forcedsplits_filename.empty()) {
      std::ifstream forced_splits_file(
          new_config->forcedsplits_filename.c_str());
      std::stringstream buffer;
      buffer << forced_splits_file.rdbuf();
      std::string err;
Guolin Ke's avatar
Guolin Ke committed
771
      forced_splits_json_ = Json::parse(buffer.str(), &err);
772
773
774
775
776
777
      tree_learner_->SetForcedSplit(&forced_splits_json_);
    } else {
      forced_splits_json_ = Json();
      tree_learner_->SetForcedSplit(nullptr);
    }
  }
Guolin Ke's avatar
Guolin Ke committed
778
  config_.reset(new_config.release());
Guolin Ke's avatar
Guolin Ke committed
779
780
}

Guolin Ke's avatar
Guolin Ke committed
781
void GBDT::ResetBaggingConfig(const Config* config, bool is_change_dataset) {
Guolin Ke's avatar
Guolin Ke committed
782
  // if need bagging, create buffer
Guolin Ke's avatar
Guolin Ke committed
783
784
785
786
787
788
  data_size_t num_pos_data = 0;
  if (objective_function_ != nullptr) {
    num_pos_data = objective_function_->NumPositiveData();
  }
  bool balance_bagging_cond = (config->pos_bagging_fraction < 1.0 || config->neg_bagging_fraction < 1.0) && (num_pos_data > 0);
  if ((config->bagging_fraction < 1.0 || balance_bagging_cond) && config->bagging_freq > 0) {
789
790
    need_re_bagging_ = false;
    if (!is_change_dataset &&
Guolin Ke's avatar
Guolin Ke committed
791
792
      config_.get() != nullptr && config_->bagging_fraction == config->bagging_fraction && config_->bagging_freq == config->bagging_freq
      && config_->pos_bagging_fraction == config->pos_bagging_fraction && config_->neg_bagging_fraction == config->neg_bagging_fraction) {
793
794
      return;
    }
Guolin Ke's avatar
Guolin Ke committed
795
796
    if (balance_bagging_cond) {
      balanced_bagging_ = true;
797
      bag_data_cnt_ = static_cast<data_size_t>(num_pos_data * config->pos_bagging_fraction)
Guolin Ke's avatar
Guolin Ke committed
798
799
800
801
                      + static_cast<data_size_t>((num_data_ - num_pos_data) * config->neg_bagging_fraction);
    } else {
      bag_data_cnt_ = static_cast<data_size_t>(config->bagging_fraction * num_data_);
    }
Guolin Ke's avatar
Guolin Ke committed
802
    bag_data_indices_.resize(num_data_);
803
804
805
806
807
808
    bagging_runner_.ReSize(num_data_);
    bagging_rands_.clear();
    for (int i = 0;
         i < (num_data_ + bagging_rand_block_ - 1) / bagging_rand_block_; ++i) {
      bagging_rands_.emplace_back(config_->bagging_seed + i);
    }
809

810
811
    double average_bag_rate =
        (static_cast<double>(bag_data_cnt_) / num_data_) / config->bagging_freq;
Guolin Ke's avatar
Guolin Ke committed
812
813
    is_use_subset_ = false;
    const int group_threshold_usesubset = 100;
814
    if (average_bag_rate <= 0.5
815
        && (train_data_->num_feature_groups() < group_threshold_usesubset)) {
Guolin Ke's avatar
Guolin Ke committed
816
817
818
819
820
      if (tmp_subset_ == nullptr || is_change_dataset) {
        tmp_subset_.reset(new Dataset(bag_data_cnt_));
        tmp_subset_->CopyFeatureMapperFrom(train_data_);
      }
      is_use_subset_ = true;
821
      Log::Debug("Use subset for bagging");
Guolin Ke's avatar
Guolin Ke committed
822
823
    }

824
    need_re_bagging_ = true;
825

Guolin Ke's avatar
Guolin Ke committed
826
827
828
829
830
    if (is_use_subset_ && bag_data_cnt_ < num_data_) {
      if (objective_function_ == nullptr) {
        size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
        gradients_.resize(total_size);
        hessians_.resize(total_size);
831
      }
832
    }
833
  } else {
Guolin Ke's avatar
Guolin Ke committed
834
835
    bag_data_cnt_ = num_data_;
    bag_data_indices_.clear();
836
    bagging_runner_.ReSize(0);
Guolin Ke's avatar
Guolin Ke committed
837
    is_use_subset_ = false;
838
  }
wxchan's avatar
wxchan committed
839
840
}

Guolin Ke's avatar
Guolin Ke committed
841
}  // namespace LightGBM