gbdt.cpp 31.7 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
Guolin Ke's avatar
Guolin Ke committed
5
6
7
#include "gbdt.h"

#include <LightGBM/metric.h>
Guolin Ke's avatar
Guolin Ke committed
8
#include <LightGBM/network.h>
9
10
11
12
#include <LightGBM/objective_function.h>
#include <LightGBM/prediction_early_stop.h>
#include <LightGBM/utils/common.h>
#include <LightGBM/utils/openmp_wrapper.h>
Guolin Ke's avatar
Guolin Ke committed
13

14
15
16
17
#include <chrono>
#include <ctime>
#include <sstream>

Guolin Ke's avatar
Guolin Ke committed
18
19
namespace LightGBM {

20
21
Common::Timer global_timer;

22
23
24
int LGBM_config_::current_device = lgbm_device_cpu;
int LGBM_config_::current_learner = use_cpu_learner;

25
26
27
GBDT::GBDT()
    : iter_(0),
      train_data_(nullptr),
28
      config_(nullptr),
29
30
31
32
33
34
35
36
37
38
39
40
      objective_function_(nullptr),
      early_stopping_round_(0),
      es_first_metric_only_(false),
      max_feature_idx_(0),
      num_tree_per_iteration_(1),
      num_class_(1),
      num_iteration_for_pred_(0),
      shrinkage_rate_(0.1f),
      num_init_iteration_(0),
      need_re_bagging_(false),
      balanced_bagging_(false),
      bagging_runner_(0, bagging_rand_block_) {
Guolin Ke's avatar
Guolin Ke committed
41
  average_output_ = false;
Guolin Ke's avatar
Guolin Ke committed
42
  tree_learner_ = nullptr;
43
  linear_tree_ = false;
Guolin Ke's avatar
Guolin Ke committed
44
45
46
47
48
}

GBDT::~GBDT() {
}

Guolin Ke's avatar
Guolin Ke committed
49
void GBDT::Init(const Config* config, const Dataset* train_data, const ObjectiveFunction* objective_function,
50
                const std::vector<const Metric*>& training_metrics) {
Nikita Titov's avatar
Nikita Titov committed
51
  CHECK_NOTNULL(train_data);
52
  train_data_ = train_data;
53
  if (!config->monotone_constraints.empty()) {
Nikita Titov's avatar
Nikita Titov committed
54
    CHECK_EQ(static_cast<size_t>(train_data_->num_total_features()), config->monotone_constraints.size());
Nikita Titov's avatar
Nikita Titov committed
55
  }
56
  if (!config->feature_contri.empty()) {
Nikita Titov's avatar
Nikita Titov committed
57
    CHECK_EQ(static_cast<size_t>(train_data_->num_total_features()), config->feature_contri.size());
58
  }
59
  iter_ = 0;
wxchan's avatar
wxchan committed
60
  num_iteration_for_pred_ = 0;
61
  max_feature_idx_ = 0;
wxchan's avatar
wxchan committed
62
  num_class_ = config->num_class;
Guolin Ke's avatar
Guolin Ke committed
63
64
  config_ = std::unique_ptr<Config>(new Config(*config));
  early_stopping_round_ = config_->early_stopping_round;
65
  es_first_metric_only_ = config_->first_metric_only;
Guolin Ke's avatar
Guolin Ke committed
66
  shrinkage_rate_ = config_->learning_rate;
67

68
  if (config_->device_type == std::string("cuda") || config_->device_type == std::string("cuda_exp")) {
69
70
71
    LGBM_config_::current_learner = use_cuda_learner;
  }

72
  // load forced_splits file
73
74
75
76
77
  if (!config->forcedsplits_filename.empty()) {
    std::ifstream forced_splits_file(config->forcedsplits_filename.c_str());
    std::stringstream buffer;
    buffer << forced_splits_file.rdbuf();
    std::string err;
Guolin Ke's avatar
Guolin Ke committed
78
    forced_splits_json_ = Json::parse(buffer.str(), &err);
79
80
  }

81
82
83
  objective_function_ = objective_function;
  num_tree_per_iteration_ = num_class_;
  if (objective_function_ != nullptr) {
Guolin Ke's avatar
Guolin Ke committed
84
    num_tree_per_iteration_ = objective_function_->NumModelPerIteration();
85
86
87
    if (objective_function_->IsRenewTreeOutput() && !config->monotone_constraints.empty()) {
      Log::Fatal("Cannot use ``monotone_constraints`` in %s objective, please disable it.", objective_function_->GetName());
    }
88
89
  }

90
91
  is_constant_hessian_ = GetIsConstHessian(objective_function);

92
93
  tree_learner_ = std::unique_ptr<TreeLearner>(TreeLearner::CreateTreeLearner(config_->tree_learner, config_->device_type,
                                                                              config_.get()));
94
95
96

  // init tree learner
  tree_learner_->Init(train_data_, is_constant_hessian_);
97
  tree_learner_->SetForcedSplit(&forced_splits_json_);
98
99
100
101
102
103
104
105
106
107
108

  // push training metrics
  training_metrics_.clear();
  for (const auto& metric : training_metrics) {
    training_metrics_.push_back(metric);
  }
  training_metrics_.shrink_to_fit();

  train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_));

  num_data_ = train_data_->num_data();
Andrew Ziem's avatar
Andrew Ziem committed
109
  // create buffer for gradients and Hessians
110
111
112
113
114
115
116
117
118
119
120
121
  if (objective_function_ != nullptr) {
    size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
    gradients_.resize(total_size);
    hessians_.resize(total_size);
  }
  // get max feature index
  max_feature_idx_ = train_data_->num_total_features() - 1;
  // get label index
  label_idx_ = train_data_->label_idx();
  // get feature names
  feature_names_ = train_data_->feature_names();
  feature_infos_ = train_data_->feature_infos();
122
  monotone_constraints_ = config->monotone_constraints;
123
124
  // get parser config file content
  parser_config_str_ = train_data_->parser_config_str();
125
126

  // if need bagging, create buffer
Guolin Ke's avatar
Guolin Ke committed
127
  ResetBaggingConfig(config_.get(), true);
128
129
130

  class_need_train_ = std::vector<bool>(num_tree_per_iteration_, true);
  if (objective_function_ != nullptr && objective_function_->SkipEmptyClass()) {
Nikita Titov's avatar
Nikita Titov committed
131
    CHECK_EQ(num_tree_per_iteration_, num_class_);
132
133
    for (int i = 0; i < num_class_; ++i) {
      class_need_train_[i] = objective_function_->ClassNeedTrain(i);
134
135
    }
  }
136
137
138
139

  if (config_->linear_tree) {
    linear_tree_ = true;
  }
wxchan's avatar
wxchan committed
140
141
142
}

void GBDT::AddValidDataset(const Dataset* valid_data,
143
                           const std::vector<const Metric*>& valid_metrics) {
wxchan's avatar
wxchan committed
144
  if (!train_data_->CheckAlign(*valid_data)) {
145
    Log::Fatal("Cannot add validation data, since it has different bin mappers with training data");
146
  }
Guolin Ke's avatar
Guolin Ke committed
147
  // for a validation dataset, we need its score and metric
148
  auto new_score_updater = std::unique_ptr<ScoreUpdater>(new ScoreUpdater(valid_data, num_tree_per_iteration_));
wxchan's avatar
wxchan committed
149
150
  // update score
  for (int i = 0; i < iter_; ++i) {
151
152
153
    for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
      auto curr_tree = (i + num_init_iteration_) * num_tree_per_iteration_ + cur_tree_id;
      new_score_updater->AddScore(models_[curr_tree].get(), cur_tree_id);
wxchan's avatar
wxchan committed
154
155
    }
  }
Guolin Ke's avatar
Guolin Ke committed
156
  valid_score_updater_.push_back(std::move(new_score_updater));
Guolin Ke's avatar
Guolin Ke committed
157
158
159
160
  valid_metrics_.emplace_back();
  for (const auto& metric : valid_metrics) {
    valid_metrics_.back().push_back(metric);
  }
Guolin Ke's avatar
Guolin Ke committed
161
  valid_metrics_.back().shrink_to_fit();
162

163
164
165
166
167
168
169
  if (early_stopping_round_ > 0) {
    auto num_metrics = valid_metrics.size();
    if (es_first_metric_only_) { num_metrics = 1; }
    best_iter_.emplace_back(num_metrics, 0);
    best_score_.emplace_back(num_metrics, kMinScore);
    best_msg_.emplace_back(num_metrics);
  }
Guolin Ke's avatar
Guolin Ke committed
170
171
}

Guolin Ke's avatar
Guolin Ke committed
172
void GBDT::Boosting() {
173
  Common::FunctionTimer fun_timer("GBDT::Boosting", global_timer);
Guolin Ke's avatar
Guolin Ke committed
174
175
176
177
178
179
180
181
182
  if (objective_function_ == nullptr) {
    Log::Fatal("No object function provided");
  }
  // objective function will calculate gradients and hessians
  int64_t num_score = 0;
  objective_function_->
    GetGradients(GetTrainingScore(&num_score), gradients_.data(), hessians_.data());
}

183
data_size_t GBDT::BaggingHelper(data_size_t start, data_size_t cnt, data_size_t* buffer) {
184
185
186
  if (cnt <= 0) {
    return 0;
  }
187
  data_size_t cur_left_cnt = 0;
188
  data_size_t cur_right_pos = cnt;
189
190
  // random bagging, minimal unit is one record
  for (data_size_t i = 0; i < cnt; ++i) {
191
192
193
    auto cur_idx = start + i;
    if (bagging_rands_[cur_idx / bagging_rand_block_].NextFloat() < config_->bagging_fraction) {
      buffer[cur_left_cnt++] = cur_idx;
194
    } else {
195
      buffer[--cur_right_pos] = cur_idx;
196
197
198
199
    }
  }
  return cur_left_cnt;
}
Guolin Ke's avatar
Guolin Ke committed
200

201
202
data_size_t GBDT::BalancedBaggingHelper(data_size_t start, data_size_t cnt,
                                        data_size_t* buffer) {
Guolin Ke's avatar
Guolin Ke committed
203
204
205
206
207
  if (cnt <= 0) {
    return 0;
  }
  auto label_ptr = train_data_->metadata().label();
  data_size_t cur_left_cnt = 0;
208
  data_size_t cur_right_pos = cnt;
Guolin Ke's avatar
Guolin Ke committed
209
210
  // random bagging, minimal unit is one record
  for (data_size_t i = 0; i < cnt; ++i) {
211
    auto cur_idx = start + i;
Guolin Ke's avatar
Guolin Ke committed
212
213
214
    bool is_pos = label_ptr[start + i] > 0;
    bool is_in_bag = false;
    if (is_pos) {
215
216
      is_in_bag = bagging_rands_[cur_idx / bagging_rand_block_].NextFloat() <
                  config_->pos_bagging_fraction;
Guolin Ke's avatar
Guolin Ke committed
217
    } else {
218
219
      is_in_bag = bagging_rands_[cur_idx / bagging_rand_block_].NextFloat() <
                  config_->neg_bagging_fraction;
Guolin Ke's avatar
Guolin Ke committed
220
221
    }
    if (is_in_bag) {
222
      buffer[cur_left_cnt++] = cur_idx;
Guolin Ke's avatar
Guolin Ke committed
223
    } else {
224
      buffer[--cur_right_pos] = cur_idx;
Guolin Ke's avatar
Guolin Ke committed
225
226
227
228
229
    }
  }
  return cur_left_cnt;
}

230
void GBDT::Bagging(int iter) {
231
  Common::FunctionTimer fun_timer("GBDT::Bagging", global_timer);
Guolin Ke's avatar
Guolin Ke committed
232
  // if need bagging
Guolin Ke's avatar
Guolin Ke committed
233
234
  if ((bag_data_cnt_ < num_data_ && iter % config_->bagging_freq == 0) ||
      need_re_bagging_) {
Guolin Ke's avatar
Guolin Ke committed
235
    need_re_bagging_ = false;
236
237
238
239
240
241
242
243
    auto left_cnt = bagging_runner_.Run<true>(
        num_data_,
        [=](int, data_size_t cur_start, data_size_t cur_cnt, data_size_t* left,
            data_size_t*) {
          data_size_t cur_left_count = 0;
          if (balanced_bagging_) {
            cur_left_count =
                BalancedBaggingHelper(cur_start, cur_cnt, left);
Guolin Ke's avatar
Guolin Ke committed
244
          } else {
245
            cur_left_count = BaggingHelper(cur_start, cur_cnt, left);
Guolin Ke's avatar
Guolin Ke committed
246
          }
247
248
249
          return cur_left_count;
        },
        bag_data_indices_.data());
Guolin Ke's avatar
Guolin Ke committed
250
    bag_data_cnt_ = left_cnt;
Guolin Ke's avatar
Guolin Ke committed
251
    Log::Debug("Re-bagging, using %d data to train", bag_data_cnt_);
Guolin Ke's avatar
Guolin Ke committed
252
    // set bagging data to tree learner
Guolin Ke's avatar
Guolin Ke committed
253
    if (!is_use_subset_) {
254
      tree_learner_->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_);
Guolin Ke's avatar
Guolin Ke committed
255
256
    } else {
      // get subset
Guolin Ke's avatar
Guolin Ke committed
257
      tmp_subset_->ReSize(bag_data_cnt_);
258
      tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(),
Guolin Ke's avatar
Guolin Ke committed
259
                              bag_data_cnt_, false);
260
261
      tree_learner_->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(),
                                    bag_data_cnt_);
Guolin Ke's avatar
Guolin Ke committed
262
    }
Guolin Ke's avatar
Guolin Ke committed
263
264
265
  }
}

Guolin Ke's avatar
Guolin Ke committed
266
void GBDT::Train(int snapshot_freq, const std::string& model_output_path) {
267
  Common::FunctionTimer fun_timer("GBDT::Train", global_timer);
Guolin Ke's avatar
Guolin Ke committed
268
269
  bool is_finished = false;
  auto start_time = std::chrono::steady_clock::now();
Guolin Ke's avatar
Guolin Ke committed
270
  for (int iter = 0; iter < config_->num_iterations && !is_finished; ++iter) {
Guolin Ke's avatar
Guolin Ke committed
271
272
273
274
    is_finished = TrainOneIter(nullptr, nullptr);
    if (!is_finished) {
      is_finished = EvalAndCheckEarlyStopping();
    }
Guolin Ke's avatar
Guolin Ke committed
275
276
277
278
279
280
281
    auto end_time = std::chrono::steady_clock::now();
    // output used time per iteration
    Log::Info("%f seconds elapsed, finished iteration %d", std::chrono::duration<double,
              std::milli>(end_time - start_time) * 1e-3, iter + 1);
    if (snapshot_freq > 0
        && (iter + 1) % snapshot_freq == 0) {
      std::string snapshot_out = model_output_path + ".snapshot_iter_" + std::to_string(iter + 1);
282
      SaveModelToFile(0, -1, config_->saved_feature_importance_type, snapshot_out.c_str());
Guolin Ke's avatar
Guolin Ke committed
283
284
285
286
    }
  }
}

287
void GBDT::RefitTree(const std::vector<std::vector<int>>& tree_leaf_prediction) {
288
289
290
  CHECK_GT(tree_leaf_prediction.size(), 0);
  CHECK_EQ(static_cast<size_t>(num_data_), tree_leaf_prediction.size());
  CHECK_EQ(static_cast<size_t>(models_.size()), tree_leaf_prediction[0].size());
291
292
  int num_iterations = static_cast<int>(models_.size() / num_tree_per_iteration_);
  std::vector<int> leaf_pred(num_data_);
293
294
295
296
297
298
299
300
301
302
303
304
305
  if (linear_tree_) {
    std::vector<int> max_leaves_by_thread = std::vector<int>(OMP_NUM_THREADS(), 0);
    #pragma omp parallel for schedule(static)
    for (int i = 0; i < static_cast<int>(tree_leaf_prediction.size()); ++i) {
      int tid = omp_get_thread_num();
      for (size_t j = 0; j < tree_leaf_prediction[i].size(); ++j) {
        max_leaves_by_thread[tid] = std::max(max_leaves_by_thread[tid], tree_leaf_prediction[i][j]);
      }
    }
    int max_leaves = *std::max_element(max_leaves_by_thread.begin(), max_leaves_by_thread.end());
    max_leaves += 1;
    tree_learner_->InitLinear(train_data_, max_leaves);
  }
306
307
308
309
310
311
312
  for (int iter = 0; iter < num_iterations; ++iter) {
    Boosting();
    for (int tree_id = 0; tree_id < num_tree_per_iteration_; ++tree_id) {
      int model_index = iter * num_tree_per_iteration_ + tree_id;
      #pragma omp parallel for schedule(static)
      for (int i = 0; i < num_data_; ++i) {
        leaf_pred[i] = tree_leaf_prediction[i][model_index];
Nikita Titov's avatar
Nikita Titov committed
313
        CHECK_LT(leaf_pred[i], models_[model_index]->num_leaves());
314
      }
315
316
317
      size_t offset = static_cast<size_t>(tree_id) * num_data_;
      auto grad = gradients_.data() + offset;
      auto hess = hessians_.data() + offset;
318
319
320
321
322
323
324
      auto new_tree = tree_learner_->FitByExistingTree(models_[model_index].get(), leaf_pred, grad, hess);
      train_score_updater_->AddScore(tree_learner_.get(), new_tree, tree_id);
      models_[model_index].reset(new_tree);
    }
  }
}

Andrew Ziem's avatar
Andrew Ziem committed
325
/* If the custom "average" is implemented it will be used in place of the label average (if enabled)
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
*
* An improvement to this is to have options to explicitly choose
* (i) standard average
* (ii) custom average if available
* (iii) any user defined scalar bias (e.g. using a new option "init_score" that overrides (i) and (ii) )
*
* (i) and (ii) could be selected as say "auto_init_score" = 0 or 1 etc..
*
*/
double ObtainAutomaticInitialScore(const ObjectiveFunction* fobj, int class_id) {
  double init_score = 0.0;
  if (fobj != nullptr) {
    init_score = fobj->BoostFromScore(class_id);
  }
  if (Network::num_machines() > 1) {
    init_score = Network::GlobalSyncUpByMean(init_score);
  }
  return init_score;
}

Guolin Ke's avatar
Guolin Ke committed
346
double GBDT::BoostFromAverage(int class_id, bool update_scorer) {
347
  Common::FunctionTimer fun_timer("GBDT::BoostFromAverage", global_timer);
348
  // boosting from average label; or customized "average" if implemented for the current objective
349
350
351
  if (models_.empty() && !train_score_updater_->has_init_score() && objective_function_ != nullptr) {
    if (config_->boost_from_average || (train_data_ != nullptr && train_data_->num_features() == 0)) {
      double init_score = ObtainAutomaticInitialScore(objective_function_, class_id);
352
      if (std::fabs(init_score) > kEpsilon) {
Guolin Ke's avatar
Guolin Ke committed
353
354
355
356
357
        if (update_scorer) {
          train_score_updater_->AddScore(init_score, class_id);
          for (auto& score_updater : valid_score_updater_) {
            score_updater->AddScore(init_score, class_id);
          }
358
359
360
        }
        Log::Info("Start training from score %lf", init_score);
        return init_score;
Guolin Ke's avatar
Guolin Ke committed
361
      }
362
363
364
    } else if (std::string(objective_function_->GetName()) == std::string("regression_l1")
               || std::string(objective_function_->GetName()) == std::string("quantile")
               || std::string(objective_function_->GetName()) == std::string("mape")) {
365
      Log::Warning("Disabling boost_from_average in %s may cause the slow convergence", objective_function_->GetName());
366
    }
367
  }
Guolin Ke's avatar
Guolin Ke committed
368
369
  return 0.0f;
}
Guolin Ke's avatar
Guolin Ke committed
370

Guolin Ke's avatar
Guolin Ke committed
371
bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
372
  Common::FunctionTimer fun_timer("GBDT::TrainOneIter", global_timer);
373
  std::vector<double> init_scores(num_tree_per_iteration_, 0.0);
Guolin Ke's avatar
Guolin Ke committed
374
  // boosting first
Guolin Ke's avatar
Guolin Ke committed
375
  if (gradients == nullptr || hessians == nullptr) {
376
    for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
Guolin Ke's avatar
Guolin Ke committed
377
      init_scores[cur_tree_id] = BoostFromAverage(cur_tree_id, true);
378
    }
Guolin Ke's avatar
Guolin Ke committed
379
    Boosting();
Guolin Ke's avatar
Guolin Ke committed
380
381
    gradients = gradients_.data();
    hessians = hessians_.data();
Guolin Ke's avatar
Guolin Ke committed
382
  }
383
384
  // bagging logic
  Bagging(iter_);
Guolin Ke's avatar
Guolin Ke committed
385

Guolin Ke's avatar
Guolin Ke committed
386
  bool should_continue = false;
387
  for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
388
    const size_t offset = static_cast<size_t>(cur_tree_id) * num_data_;
389
    std::unique_ptr<Tree> new_tree(new Tree(2, false, false));
390
    if (class_need_train_[cur_tree_id] && train_data_->num_features() > 0) {
391
392
      auto grad = gradients + offset;
      auto hess = hessians + offset;
Guolin Ke's avatar
Guolin Ke committed
393
      // need to copy gradients for bagging subset.
394
      if (is_use_subset_ && bag_data_cnt_ < num_data_ && config_->device_type != std::string("cuda_exp")) {
Guolin Ke's avatar
Guolin Ke committed
395
        for (int i = 0; i < bag_data_cnt_; ++i) {
396
397
          gradients_[offset + i] = grad[bag_data_indices_[i]];
          hessians_[offset + i] = hess[bag_data_indices_[i]];
Guolin Ke's avatar
Guolin Ke committed
398
        }
399
400
        grad = gradients_.data() + offset;
        hess = hessians_.data() + offset;
Guolin Ke's avatar
Guolin Ke committed
401
      }
402
403
      bool is_first_tree = models_.size() < static_cast<size_t>(num_tree_per_iteration_);
      new_tree.reset(tree_learner_->Train(grad, hess, is_first_tree));
404
    }
Guolin Ke's avatar
Guolin Ke committed
405

Guolin Ke's avatar
Guolin Ke committed
406
    if (new_tree->num_leaves() > 1) {
Guolin Ke's avatar
Guolin Ke committed
407
      should_continue = true;
408
      auto score_ptr = train_score_updater_->score() + offset;
409
410
      auto residual_getter = [score_ptr](const label_t* label, int i) {return static_cast<double>(label[i]) - score_ptr[i]; };
      tree_learner_->RenewTreeOutput(new_tree.get(), objective_function_, residual_getter,
411
                                     num_data_, bag_data_indices_.data(), bag_data_cnt_);
Guolin Ke's avatar
Guolin Ke committed
412
413
414
      // shrinkage by learning rate
      new_tree->Shrinkage(shrinkage_rate_);
      // update score
415
      UpdateScore(new_tree.get(), cur_tree_id);
416
417
      if (std::fabs(init_scores[cur_tree_id]) > kEpsilon) {
        new_tree->AddBias(init_scores[cur_tree_id]);
Guolin Ke's avatar
Guolin Ke committed
418
      }
419
420
    } else {
      // only add default score one-time
421
      if (models_.size() < static_cast<size_t>(num_tree_per_iteration_)) {
422
423
424
425
426
427
        if (objective_function_ != nullptr && !config_->boost_from_average && !train_score_updater_->has_init_score()) {
          init_scores[cur_tree_id] = ObtainAutomaticInitialScore(objective_function_, cur_tree_id);
          // updates scores
          train_score_updater_->AddScore(init_scores[cur_tree_id], cur_tree_id);
          for (auto& score_updater : valid_score_updater_) {
            score_updater->AddScore(init_scores[cur_tree_id], cur_tree_id);
428
          }
429
        }
430
        new_tree->AsConstantTree(init_scores[cur_tree_id]);
431
432
      }
    }
Guolin Ke's avatar
Guolin Ke committed
433
434
435
    // add model
    models_.push_back(std::move(new_tree));
  }
Guolin Ke's avatar
Guolin Ke committed
436

Guolin Ke's avatar
Guolin Ke committed
437
  if (!should_continue) {
438
    Log::Warning("Stopped training because there are no more leaves that meet the split requirements");
439
440
441
442
    if (models_.size() > static_cast<size_t>(num_tree_per_iteration_)) {
      for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
        models_.pop_back();
      }
Guolin Ke's avatar
Guolin Ke committed
443
444
445
    }
    return true;
  }
446

Guolin Ke's avatar
Guolin Ke committed
447
448
  ++iter_;
  return false;
Guolin Ke's avatar
Guolin Ke committed
449
}
450

wxchan's avatar
wxchan committed
451
void GBDT::RollbackOneIter() {
452
  if (iter_ <= 0) { return; }
wxchan's avatar
wxchan committed
453
  // reset score
454
  for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
Guolin Ke's avatar
Guolin Ke committed
455
    auto curr_tree = models_.size() - num_tree_per_iteration_ + cur_tree_id;
wxchan's avatar
wxchan committed
456
    models_[curr_tree]->Shrinkage(-1.0);
457
    train_score_updater_->AddScore(models_[curr_tree].get(), cur_tree_id);
wxchan's avatar
wxchan committed
458
    for (auto& score_updater : valid_score_updater_) {
459
      score_updater->AddScore(models_[curr_tree].get(), cur_tree_id);
wxchan's avatar
wxchan committed
460
461
462
    }
  }
  // remove model
463
  for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
wxchan's avatar
wxchan committed
464
465
466
467
468
    models_.pop_back();
  }
  --iter_;
}

Guolin Ke's avatar
Guolin Ke committed
469
bool GBDT::EvalAndCheckEarlyStopping() {
470
471
  bool is_met_early_stopping = false;
  // print message for metric
Guolin Ke's avatar
Guolin Ke committed
472
  auto best_msg = OutputMetric(iter_);
Guolin Ke's avatar
Guolin Ke committed
473
474


Guolin Ke's avatar
Guolin Ke committed
475
  is_met_early_stopping = !best_msg.empty();
476
477
  if (is_met_early_stopping) {
    Log::Info("Early stopping at iteration %d, the best iteration round is %d",
478
              iter_, iter_ - early_stopping_round_);
Guolin Ke's avatar
Guolin Ke committed
479
    Log::Info("Output of best iteration round:\n%s", best_msg.c_str());
480
    // pop last early_stopping_round_ models
481
    for (int i = 0; i < early_stopping_round_ * num_tree_per_iteration_; ++i) {
482
483
484
485
      models_.pop_back();
    }
  }
  return is_met_early_stopping;
Guolin Ke's avatar
Guolin Ke committed
486
487
}

488
void GBDT::UpdateScore(const Tree* tree, const int cur_tree_id) {
489
  Common::FunctionTimer fun_timer("GBDT::UpdateScore", global_timer);
Guolin Ke's avatar
Guolin Ke committed
490
  // update training score
Guolin Ke's avatar
Guolin Ke committed
491
  if (!is_use_subset_) {
492
    train_score_updater_->AddScore(tree_learner_.get(), tree, cur_tree_id);
Guolin Ke's avatar
Guolin Ke committed
493
494
495
496
497
498

    // we need to predict out-of-bag scores of data for boosting
    if (num_data_ - bag_data_cnt_ > 0) {
      train_score_updater_->AddScore(tree, bag_data_indices_.data() + bag_data_cnt_, num_data_ - bag_data_cnt_, cur_tree_id);
    }

Guolin Ke's avatar
Guolin Ke committed
499
  } else {
500
    train_score_updater_->AddScore(tree, cur_tree_id);
Guolin Ke's avatar
Guolin Ke committed
501
  }
Guolin Ke's avatar
Guolin Ke committed
502
503


Guolin Ke's avatar
Guolin Ke committed
504
  // update validation score
Guolin Ke's avatar
Guolin Ke committed
505
  for (auto& score_updater : valid_score_updater_) {
506
    score_updater->AddScore(tree, cur_tree_id);
Guolin Ke's avatar
Guolin Ke committed
507
508
509
  }
}

Guolin Ke's avatar
Guolin Ke committed
510
511
512
513
std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* score) const {
  return metric->Eval(score, objective_function_);
}

Guolin Ke's avatar
Guolin Ke committed
514
std::string GBDT::OutputMetric(int iter) {
Guolin Ke's avatar
Guolin Ke committed
515
  bool need_output = (iter % config_->metric_freq) == 0;
Guolin Ke's avatar
Guolin Ke committed
516
517
  std::string ret = "";
  std::stringstream msg_buf;
518
  std::vector<std::pair<size_t, size_t>> meet_early_stopping_pairs;
Guolin Ke's avatar
Guolin Ke committed
519
  // print training metric
Guolin Ke's avatar
Guolin Ke committed
520
  if (need_output) {
521
522
    for (auto& sub_metric : training_metrics_) {
      auto name = sub_metric->GetName();
Guolin Ke's avatar
Guolin Ke committed
523
      auto scores = EvalOneMetric(sub_metric, train_score_updater_->score());
Guolin Ke's avatar
Guolin Ke committed
524
      for (size_t k = 0; k < name.size(); ++k) {
Guolin Ke's avatar
Guolin Ke committed
525
526
527
528
529
530
        std::stringstream tmp_buf;
        tmp_buf << "Iteration:" << iter
          << ", training " << name[k]
          << " : " << scores[k];
        Log::Info(tmp_buf.str().c_str());
        if (early_stopping_round_ > 0) {
531
          msg_buf << tmp_buf.str() << '\n';
Guolin Ke's avatar
Guolin Ke committed
532
        }
533
      }
534
    }
Guolin Ke's avatar
Guolin Ke committed
535
536
  }
  // print validation metric
Guolin Ke's avatar
Guolin Ke committed
537
  if (need_output || early_stopping_round_ > 0) {
538
539
    for (size_t i = 0; i < valid_metrics_.size(); ++i) {
      for (size_t j = 0; j < valid_metrics_[i].size(); ++j) {
Guolin Ke's avatar
Guolin Ke committed
540
        auto test_scores = EvalOneMetric(valid_metrics_[i][j], valid_score_updater_[i]->score());
Guolin Ke's avatar
Guolin Ke committed
541
542
543
544
545
546
547
548
549
550
        auto name = valid_metrics_[i][j]->GetName();
        for (size_t k = 0; k < name.size(); ++k) {
          std::stringstream tmp_buf;
          tmp_buf << "Iteration:" << iter
            << ", valid_" << i + 1 << " " << name[k]
            << " : " << test_scores[k];
          if (need_output) {
            Log::Info(tmp_buf.str().c_str());
          }
          if (early_stopping_round_ > 0) {
551
            msg_buf << tmp_buf.str() << '\n';
552
          }
wxchan's avatar
wxchan committed
553
        }
554
        if (es_first_metric_only_ && j > 0) { continue; }
Guolin Ke's avatar
Guolin Ke committed
555
        if (ret.empty() && early_stopping_round_ > 0) {
556
557
558
          auto cur_score = valid_metrics_[i][j]->factor_to_bigger_better() * test_scores.back();
          if (cur_score > best_score_[i][j]) {
            best_score_[i][j] = cur_score;
559
            best_iter_[i][j] = iter;
Guolin Ke's avatar
Guolin Ke committed
560
            meet_early_stopping_pairs.emplace_back(i, j);
561
          } else {
Guolin Ke's avatar
Guolin Ke committed
562
            if (iter - best_iter_[i][j] >= early_stopping_round_) { ret = best_msg_[i][j]; }
563
          }
wxchan's avatar
wxchan committed
564
565
        }
      }
Guolin Ke's avatar
Guolin Ke committed
566
567
    }
  }
Guolin Ke's avatar
Guolin Ke committed
568
569
570
  for (auto& pair : meet_early_stopping_pairs) {
    best_msg_[pair.first][pair.second] = msg_buf.str();
  }
wxchan's avatar
wxchan committed
571
  return ret;
Guolin Ke's avatar
Guolin Ke committed
572
573
}

574
/*! \brief Get eval result */
575
std::vector<double> GBDT::GetEvalAt(int data_idx) const {
Guolin Ke's avatar
Guolin Ke committed
576
  CHECK(data_idx >= 0 && data_idx <= static_cast<int>(valid_score_updater_.size()));
577
578
  std::vector<double> ret;
  if (data_idx == 0) {
579
    for (auto& sub_metric : training_metrics_) {
Guolin Ke's avatar
Guolin Ke committed
580
      auto scores = EvalOneMetric(sub_metric, train_score_updater_->score());
581
582
583
      for (auto score : scores) {
        ret.push_back(score);
      }
584
    }
585
  } else {
586
587
    auto used_idx = data_idx - 1;
    for (size_t j = 0; j < valid_metrics_[used_idx].size(); ++j) {
Guolin Ke's avatar
Guolin Ke committed
588
      auto test_scores = EvalOneMetric(valid_metrics_[used_idx][j], valid_score_updater_[used_idx]->score());
589
590
591
      for (auto score : test_scores) {
        ret.push_back(score);
      }
592
593
594
595
596
    }
  }
  return ret;
}

Guolin Ke's avatar
Guolin Ke committed
597
/*! \brief Get training scores result */
598
const double* GBDT::GetTrainingScore(int64_t* out_len) {
599
  *out_len = static_cast<int64_t>(train_score_updater_->num_data()) * num_class_;
Guolin Ke's avatar
Guolin Ke committed
600
  return train_score_updater_->score();
601
602
}

603
void GBDT::PredictContrib(const double* features, double* output) const {
604
  // set zero
Guolin Ke's avatar
Guolin Ke committed
605
606
  const int num_features = max_feature_idx_ + 1;
  std::memset(output, 0, sizeof(double) * num_tree_per_iteration_ * (num_features + 1));
607
608
  const int end_iteration_for_pred = start_iteration_for_pred_ + num_iteration_for_pred_;
  for (int i = start_iteration_for_pred_; i < end_iteration_for_pred; ++i) {
609
610
    // predict all the trees for one iteration
    for (int k = 0; k < num_tree_per_iteration_; ++k) {
Guolin Ke's avatar
Guolin Ke committed
611
      models_[i * num_tree_per_iteration_ + k]->PredictContrib(features, num_features, output + k*(num_features + 1));
612
    }
613
614
615
616
617
618
  }
}

void GBDT::PredictContribByMap(const std::unordered_map<int, double>& features,
                               std::vector<std::unordered_map<int, double>>* output) const {
  const int num_features = max_feature_idx_ + 1;
619
620
  const int end_iteration_for_pred = start_iteration_for_pred_ + num_iteration_for_pred_;
  for (int i = start_iteration_for_pred_; i < end_iteration_for_pred; ++i) {
621
622
623
    // predict all the trees for one iteration
    for (int k = 0; k < num_tree_per_iteration_; ++k) {
      models_[i * num_tree_per_iteration_ + k]->PredictContribByMap(features, num_features, &((*output)[k]));
624
625
626
627
    }
  }
}

Guolin Ke's avatar
Guolin Ke committed
628
629
void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) {
  CHECK(data_idx >= 0 && data_idx <= static_cast<int>(valid_score_updater_.size()));
Guolin Ke's avatar
Guolin Ke committed
630

631
  const double* raw_scores = nullptr;
Guolin Ke's avatar
Guolin Ke committed
632
633
  data_size_t num_data = 0;
  if (data_idx == 0) {
wxchan's avatar
wxchan committed
634
    raw_scores = GetTrainingScore(out_len);
Guolin Ke's avatar
Guolin Ke committed
635
636
637
638
639
    num_data = train_score_updater_->num_data();
  } else {
    auto used_idx = data_idx - 1;
    raw_scores = valid_score_updater_[used_idx]->score();
    num_data = valid_score_updater_[used_idx]->num_data();
640
    *out_len = static_cast<int64_t>(num_data) * num_class_;
Guolin Ke's avatar
Guolin Ke committed
641
  }
Guolin Ke's avatar
Guolin Ke committed
642
  if (objective_function_ != nullptr) {
Guolin Ke's avatar
Guolin Ke committed
643
644
    #pragma omp parallel for schedule(static)
    for (data_size_t i = 0; i < num_data; ++i) {
Guolin Ke's avatar
Guolin Ke committed
645
      std::vector<double> tree_pred(num_tree_per_iteration_);
646
      for (int j = 0; j < num_tree_per_iteration_; ++j) {
Guolin Ke's avatar
Guolin Ke committed
647
        tree_pred[j] = raw_scores[j * num_data + i];
648
      }
Guolin Ke's avatar
Guolin Ke committed
649
650
      std::vector<double> tmp_result(num_class_);
      objective_function_->ConvertOutput(tree_pred.data(), tmp_result.data());
Guolin Ke's avatar
Guolin Ke committed
651
      for (int j = 0; j < num_class_; ++j) {
652
        out_result[j * num_data + i] = static_cast<double>(tmp_result[j]);
Guolin Ke's avatar
Guolin Ke committed
653
654
      }
    }
655
  } else {
Guolin Ke's avatar
Guolin Ke committed
656
    #pragma omp parallel for schedule(static)
Guolin Ke's avatar
Guolin Ke committed
657
    for (data_size_t i = 0; i < num_data; ++i) {
658
      for (int j = 0; j < num_tree_per_iteration_; ++j) {
Guolin Ke's avatar
Guolin Ke committed
659
        out_result[j * num_data + i] = static_cast<double>(raw_scores[j * num_data + i]);
Guolin Ke's avatar
Guolin Ke committed
660
661
662
663
664
      }
    }
  }
}

665
666
double GBDT::GetUpperBoundValue() const {
  double max_value = 0.0;
Nikita Titov's avatar
Nikita Titov committed
667
  for (const auto &tree : models_) {
668
669
670
671
672
673
674
    max_value += tree->GetUpperBoundValue();
  }
  return max_value;
}

double GBDT::GetLowerBoundValue() const {
  double min_value = 0.0;
Nikita Titov's avatar
Nikita Titov committed
675
  for (const auto &tree : models_) {
676
677
678
679
680
    min_value += tree->GetLowerBoundValue();
  }
  return min_value;
}

Guolin Ke's avatar
Guolin Ke committed
681
682
683
void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction* objective_function,
                             const std::vector<const Metric*>& training_metrics) {
  if (train_data != train_data_ && !train_data_->CheckAlign(*train_data)) {
684
    Log::Fatal("Cannot reset training data, since new training data has different bin mappers");
wxchan's avatar
wxchan committed
685
686
  }

Guolin Ke's avatar
Guolin Ke committed
687
688
  objective_function_ = objective_function;
  if (objective_function_ != nullptr) {
Nikita Titov's avatar
Nikita Titov committed
689
    CHECK_EQ(num_tree_per_iteration_, objective_function_->NumModelPerIteration());
690
691
692
    if (objective_function_->IsRenewTreeOutput() && !config_->monotone_constraints.empty()) {
      Log::Fatal("Cannot use ``monotone_constraints`` in %s objective, please disable it.", objective_function_->GetName());
    }
693
  }
694
  is_constant_hessian_ = GetIsConstHessian(objective_function);
695

Guolin Ke's avatar
Guolin Ke committed
696
697
698
699
  // push training metrics
  training_metrics_.clear();
  for (const auto& metric : training_metrics) {
    training_metrics_.push_back(metric);
700
  }
Guolin Ke's avatar
Guolin Ke committed
701
  training_metrics_.shrink_to_fit();
702

Guolin Ke's avatar
Guolin Ke committed
703
704
705
706
707
  if (train_data != train_data_) {
    train_data_ = train_data;
    // not same training data, need reset score and others
    // create score tracker
    train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_));
708

Guolin Ke's avatar
Guolin Ke committed
709
710
711
712
713
714
    // update score
    for (int i = 0; i < iter_; ++i) {
      for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
        auto curr_tree = (i + num_init_iteration_) * num_tree_per_iteration_ + cur_tree_id;
        train_score_updater_->AddScore(models_[curr_tree].get(), cur_tree_id);
      }
715
716
    }

Guolin Ke's avatar
Guolin Ke committed
717
    num_data_ = train_data_->num_data();
718

Guolin Ke's avatar
Guolin Ke committed
719
720
721
722
723
724
    // create buffer for gradients and hessians
    if (objective_function_ != nullptr) {
      size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
      gradients_.resize(total_size);
      hessians_.resize(total_size);
    }
725

Guolin Ke's avatar
Guolin Ke committed
726
727
728
729
    max_feature_idx_ = train_data_->num_total_features() - 1;
    label_idx_ = train_data_->label_idx();
    feature_names_ = train_data_->feature_names();
    feature_infos_ = train_data_->feature_infos();
730
    parser_config_str_ = train_data_->parser_config_str();
731

732
    tree_learner_->ResetTrainingData(train_data, is_constant_hessian_);
Guolin Ke's avatar
Guolin Ke committed
733
    ResetBaggingConfig(config_.get(), true);
734
735
  } else {
    tree_learner_->ResetIsConstantHessian(is_constant_hessian_);
736
  }
737
738
}

Guolin Ke's avatar
Guolin Ke committed
739
740
void GBDT::ResetConfig(const Config* config) {
  auto new_config = std::unique_ptr<Config>(new Config(*config));
741
  if (!config->monotone_constraints.empty()) {
Nikita Titov's avatar
Nikita Titov committed
742
    CHECK_EQ(static_cast<size_t>(train_data_->num_total_features()), config->monotone_constraints.size());
743
744
  }
  if (!config->feature_contri.empty()) {
Nikita Titov's avatar
Nikita Titov committed
745
    CHECK_EQ(static_cast<size_t>(train_data_->num_total_features()), config->feature_contri.size());
746
  }
747
748
749
  if (objective_function_ != nullptr && objective_function_->IsRenewTreeOutput() && !config->monotone_constraints.empty()) {
    Log::Fatal("Cannot use ``monotone_constraints`` in %s objective, please disable it.", objective_function_->GetName());
  }
Guolin Ke's avatar
Guolin Ke committed
750
751
752
  early_stopping_round_ = new_config->early_stopping_round;
  shrinkage_rate_ = new_config->learning_rate;
  if (tree_learner_ != nullptr) {
Guolin Ke's avatar
Guolin Ke committed
753
    tree_learner_->ResetConfig(new_config.get());
754
  }
Guolin Ke's avatar
Guolin Ke committed
755
756
  if (train_data_ != nullptr) {
    ResetBaggingConfig(new_config.get(), false);
757
  }
758
  if (config_.get() != nullptr && config_->forcedsplits_filename != new_config->forcedsplits_filename) {
759
760
761
762
763
764
765
    // load forced_splits file
    if (!new_config->forcedsplits_filename.empty()) {
      std::ifstream forced_splits_file(
          new_config->forcedsplits_filename.c_str());
      std::stringstream buffer;
      buffer << forced_splits_file.rdbuf();
      std::string err;
Guolin Ke's avatar
Guolin Ke committed
766
      forced_splits_json_ = Json::parse(buffer.str(), &err);
767
768
769
770
771
772
      tree_learner_->SetForcedSplit(&forced_splits_json_);
    } else {
      forced_splits_json_ = Json();
      tree_learner_->SetForcedSplit(nullptr);
    }
  }
Guolin Ke's avatar
Guolin Ke committed
773
  config_.reset(new_config.release());
Guolin Ke's avatar
Guolin Ke committed
774
775
}

Guolin Ke's avatar
Guolin Ke committed
776
void GBDT::ResetBaggingConfig(const Config* config, bool is_change_dataset) {
Guolin Ke's avatar
Guolin Ke committed
777
  // if need bagging, create buffer
Guolin Ke's avatar
Guolin Ke committed
778
779
780
781
782
783
  data_size_t num_pos_data = 0;
  if (objective_function_ != nullptr) {
    num_pos_data = objective_function_->NumPositiveData();
  }
  bool balance_bagging_cond = (config->pos_bagging_fraction < 1.0 || config->neg_bagging_fraction < 1.0) && (num_pos_data > 0);
  if ((config->bagging_fraction < 1.0 || balance_bagging_cond) && config->bagging_freq > 0) {
784
785
    need_re_bagging_ = false;
    if (!is_change_dataset &&
Guolin Ke's avatar
Guolin Ke committed
786
787
      config_.get() != nullptr && config_->bagging_fraction == config->bagging_fraction && config_->bagging_freq == config->bagging_freq
      && config_->pos_bagging_fraction == config->pos_bagging_fraction && config_->neg_bagging_fraction == config->neg_bagging_fraction) {
788
789
      return;
    }
Guolin Ke's avatar
Guolin Ke committed
790
791
    if (balance_bagging_cond) {
      balanced_bagging_ = true;
792
      bag_data_cnt_ = static_cast<data_size_t>(num_pos_data * config->pos_bagging_fraction)
Guolin Ke's avatar
Guolin Ke committed
793
794
795
796
                      + static_cast<data_size_t>((num_data_ - num_pos_data) * config->neg_bagging_fraction);
    } else {
      bag_data_cnt_ = static_cast<data_size_t>(config->bagging_fraction * num_data_);
    }
Guolin Ke's avatar
Guolin Ke committed
797
    bag_data_indices_.resize(num_data_);
798
799
800
801
802
803
    bagging_runner_.ReSize(num_data_);
    bagging_rands_.clear();
    for (int i = 0;
         i < (num_data_ + bagging_rand_block_ - 1) / bagging_rand_block_; ++i) {
      bagging_rands_.emplace_back(config_->bagging_seed + i);
    }
804

805
806
    double average_bag_rate =
        (static_cast<double>(bag_data_cnt_) / num_data_) / config->bagging_freq;
Guolin Ke's avatar
Guolin Ke committed
807
    is_use_subset_ = false;
808
809
810
811
812
813
814
815
816
817
    if (config_->device_type != std::string("cuda_exp")) {
      const int group_threshold_usesubset = 100;
      if (average_bag_rate <= 0.5
          && (train_data_->num_feature_groups() < group_threshold_usesubset)) {
        if (tmp_subset_ == nullptr || is_change_dataset) {
          tmp_subset_.reset(new Dataset(bag_data_cnt_));
          tmp_subset_->CopyFeatureMapperFrom(train_data_);
        }
        is_use_subset_ = true;
        Log::Debug("Use subset for bagging");
Guolin Ke's avatar
Guolin Ke committed
818
      }
Guolin Ke's avatar
Guolin Ke committed
819
820
    }

821
    need_re_bagging_ = true;
822

Guolin Ke's avatar
Guolin Ke committed
823
824
825
826
827
    if (is_use_subset_ && bag_data_cnt_ < num_data_) {
      if (objective_function_ == nullptr) {
        size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
        gradients_.resize(total_size);
        hessians_.resize(total_size);
828
      }
829
    }
830
  } else {
Guolin Ke's avatar
Guolin Ke committed
831
832
    bag_data_cnt_ = num_data_;
    bag_data_indices_.clear();
833
    bagging_runner_.ReSize(0);
Guolin Ke's avatar
Guolin Ke committed
834
    is_use_subset_ = false;
835
  }
wxchan's avatar
wxchan committed
836
837
}

Guolin Ke's avatar
Guolin Ke committed
838
}  // namespace LightGBM