gbdt.cpp 31.4 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
Guolin Ke's avatar
Guolin Ke committed
5
6
7
#include "gbdt.h"

#include <LightGBM/metric.h>
Guolin Ke's avatar
Guolin Ke committed
8
#include <LightGBM/network.h>
9
10
11
12
#include <LightGBM/objective_function.h>
#include <LightGBM/prediction_early_stop.h>
#include <LightGBM/utils/common.h>
#include <LightGBM/utils/openmp_wrapper.h>
Guolin Ke's avatar
Guolin Ke committed
13

14
15
16
17
#include <chrono>
#include <ctime>
#include <sstream>

Guolin Ke's avatar
Guolin Ke committed
18
19
namespace LightGBM {

20
21
Common::Timer global_timer;

22
23
24
int LGBM_config_::current_device = lgbm_device_cpu;
int LGBM_config_::current_learner = use_cpu_learner;

25
26
27
GBDT::GBDT()
    : iter_(0),
      train_data_(nullptr),
28
      config_(nullptr),
29
30
31
32
33
34
35
36
37
38
39
40
      objective_function_(nullptr),
      early_stopping_round_(0),
      es_first_metric_only_(false),
      max_feature_idx_(0),
      num_tree_per_iteration_(1),
      num_class_(1),
      num_iteration_for_pred_(0),
      shrinkage_rate_(0.1f),
      num_init_iteration_(0),
      need_re_bagging_(false),
      balanced_bagging_(false),
      bagging_runner_(0, bagging_rand_block_) {
Guolin Ke's avatar
Guolin Ke committed
41
  average_output_ = false;
Guolin Ke's avatar
Guolin Ke committed
42
  tree_learner_ = nullptr;
43
  linear_tree_ = false;
Guolin Ke's avatar
Guolin Ke committed
44
45
46
47
48
}

GBDT::~GBDT() {
}

Guolin Ke's avatar
Guolin Ke committed
49
void GBDT::Init(const Config* config, const Dataset* train_data, const ObjectiveFunction* objective_function,
50
                const std::vector<const Metric*>& training_metrics) {
Nikita Titov's avatar
Nikita Titov committed
51
  CHECK_NOTNULL(train_data);
52
  train_data_ = train_data;
53
  if (!config->monotone_constraints.empty()) {
Nikita Titov's avatar
Nikita Titov committed
54
    CHECK_EQ(static_cast<size_t>(train_data_->num_total_features()), config->monotone_constraints.size());
Nikita Titov's avatar
Nikita Titov committed
55
  }
56
  if (!config->feature_contri.empty()) {
Nikita Titov's avatar
Nikita Titov committed
57
    CHECK_EQ(static_cast<size_t>(train_data_->num_total_features()), config->feature_contri.size());
58
  }
59
  iter_ = 0;
wxchan's avatar
wxchan committed
60
  num_iteration_for_pred_ = 0;
61
  max_feature_idx_ = 0;
wxchan's avatar
wxchan committed
62
  num_class_ = config->num_class;
Guolin Ke's avatar
Guolin Ke committed
63
64
  config_ = std::unique_ptr<Config>(new Config(*config));
  early_stopping_round_ = config_->early_stopping_round;
65
  es_first_metric_only_ = config_->first_metric_only;
Guolin Ke's avatar
Guolin Ke committed
66
  shrinkage_rate_ = config_->learning_rate;
67

68
69
70
71
  if (config_->device_type == std::string("cuda")) {
    LGBM_config_::current_learner = use_cuda_learner;
  }

72
  // load forced_splits file
73
74
75
76
77
  if (!config->forcedsplits_filename.empty()) {
    std::ifstream forced_splits_file(config->forcedsplits_filename.c_str());
    std::stringstream buffer;
    buffer << forced_splits_file.rdbuf();
    std::string err;
Guolin Ke's avatar
Guolin Ke committed
78
    forced_splits_json_ = Json::parse(buffer.str(), &err);
79
80
  }

81
82
83
  objective_function_ = objective_function;
  num_tree_per_iteration_ = num_class_;
  if (objective_function_ != nullptr) {
Guolin Ke's avatar
Guolin Ke committed
84
    num_tree_per_iteration_ = objective_function_->NumModelPerIteration();
85
86
87
    if (objective_function_->IsRenewTreeOutput() && !config->monotone_constraints.empty()) {
      Log::Fatal("Cannot use ``monotone_constraints`` in %s objective, please disable it.", objective_function_->GetName());
    }
88
89
  }

90
91
  is_constant_hessian_ = GetIsConstHessian(objective_function);

92
93
  tree_learner_ = std::unique_ptr<TreeLearner>(TreeLearner::CreateTreeLearner(config_->tree_learner, config_->device_type,
                                                                              config_.get()));
94
95
96

  // init tree learner
  tree_learner_->Init(train_data_, is_constant_hessian_);
97
  tree_learner_->SetForcedSplit(&forced_splits_json_);
98
99
100
101
102
103
104
105
106
107
108

  // push training metrics
  training_metrics_.clear();
  for (const auto& metric : training_metrics) {
    training_metrics_.push_back(metric);
  }
  training_metrics_.shrink_to_fit();

  train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_));

  num_data_ = train_data_->num_data();
Andrew Ziem's avatar
Andrew Ziem committed
109
  // create buffer for gradients and Hessians
110
111
112
113
114
115
116
117
118
119
120
121
  if (objective_function_ != nullptr) {
    size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
    gradients_.resize(total_size);
    hessians_.resize(total_size);
  }
  // get max feature index
  max_feature_idx_ = train_data_->num_total_features() - 1;
  // get label index
  label_idx_ = train_data_->label_idx();
  // get feature names
  feature_names_ = train_data_->feature_names();
  feature_infos_ = train_data_->feature_infos();
122
  monotone_constraints_ = config->monotone_constraints;
123
124

  // if need bagging, create buffer
Guolin Ke's avatar
Guolin Ke committed
125
  ResetBaggingConfig(config_.get(), true);
126
127
128

  class_need_train_ = std::vector<bool>(num_tree_per_iteration_, true);
  if (objective_function_ != nullptr && objective_function_->SkipEmptyClass()) {
Nikita Titov's avatar
Nikita Titov committed
129
    CHECK_EQ(num_tree_per_iteration_, num_class_);
130
131
    for (int i = 0; i < num_class_; ++i) {
      class_need_train_[i] = objective_function_->ClassNeedTrain(i);
132
133
    }
  }
134
135
136
137

  if (config_->linear_tree) {
    linear_tree_ = true;
  }
wxchan's avatar
wxchan committed
138
139
140
}

void GBDT::AddValidDataset(const Dataset* valid_data,
141
                           const std::vector<const Metric*>& valid_metrics) {
wxchan's avatar
wxchan committed
142
  if (!train_data_->CheckAlign(*valid_data)) {
143
    Log::Fatal("Cannot add validation data, since it has different bin mappers with training data");
144
  }
Guolin Ke's avatar
Guolin Ke committed
145
  // for a validation dataset, we need its score and metric
146
  auto new_score_updater = std::unique_ptr<ScoreUpdater>(new ScoreUpdater(valid_data, num_tree_per_iteration_));
wxchan's avatar
wxchan committed
147
148
  // update score
  for (int i = 0; i < iter_; ++i) {
149
150
151
    for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
      auto curr_tree = (i + num_init_iteration_) * num_tree_per_iteration_ + cur_tree_id;
      new_score_updater->AddScore(models_[curr_tree].get(), cur_tree_id);
wxchan's avatar
wxchan committed
152
153
    }
  }
Guolin Ke's avatar
Guolin Ke committed
154
  valid_score_updater_.push_back(std::move(new_score_updater));
Guolin Ke's avatar
Guolin Ke committed
155
156
157
158
  valid_metrics_.emplace_back();
  for (const auto& metric : valid_metrics) {
    valid_metrics_.back().push_back(metric);
  }
Guolin Ke's avatar
Guolin Ke committed
159
  valid_metrics_.back().shrink_to_fit();
160

161
162
163
164
165
166
167
  if (early_stopping_round_ > 0) {
    auto num_metrics = valid_metrics.size();
    if (es_first_metric_only_) { num_metrics = 1; }
    best_iter_.emplace_back(num_metrics, 0);
    best_score_.emplace_back(num_metrics, kMinScore);
    best_msg_.emplace_back(num_metrics);
  }
Guolin Ke's avatar
Guolin Ke committed
168
169
}

Guolin Ke's avatar
Guolin Ke committed
170
void GBDT::Boosting() {
171
  Common::FunctionTimer fun_timer("GBDT::Boosting", global_timer);
Guolin Ke's avatar
Guolin Ke committed
172
173
174
175
176
177
178
179
180
  if (objective_function_ == nullptr) {
    Log::Fatal("No object function provided");
  }
  // objective function will calculate gradients and hessians
  int64_t num_score = 0;
  objective_function_->
    GetGradients(GetTrainingScore(&num_score), gradients_.data(), hessians_.data());
}

181
data_size_t GBDT::BaggingHelper(data_size_t start, data_size_t cnt, data_size_t* buffer) {
182
183
184
  if (cnt <= 0) {
    return 0;
  }
185
  data_size_t cur_left_cnt = 0;
186
  data_size_t cur_right_pos = cnt;
187
188
  // random bagging, minimal unit is one record
  for (data_size_t i = 0; i < cnt; ++i) {
189
190
191
    auto cur_idx = start + i;
    if (bagging_rands_[cur_idx / bagging_rand_block_].NextFloat() < config_->bagging_fraction) {
      buffer[cur_left_cnt++] = cur_idx;
192
    } else {
193
      buffer[--cur_right_pos] = cur_idx;
194
195
196
197
    }
  }
  return cur_left_cnt;
}
Guolin Ke's avatar
Guolin Ke committed
198

199
200
data_size_t GBDT::BalancedBaggingHelper(data_size_t start, data_size_t cnt,
                                        data_size_t* buffer) {
Guolin Ke's avatar
Guolin Ke committed
201
202
203
204
205
  if (cnt <= 0) {
    return 0;
  }
  auto label_ptr = train_data_->metadata().label();
  data_size_t cur_left_cnt = 0;
206
  data_size_t cur_right_pos = cnt;
Guolin Ke's avatar
Guolin Ke committed
207
208
  // random bagging, minimal unit is one record
  for (data_size_t i = 0; i < cnt; ++i) {
209
    auto cur_idx = start + i;
Guolin Ke's avatar
Guolin Ke committed
210
211
212
    bool is_pos = label_ptr[start + i] > 0;
    bool is_in_bag = false;
    if (is_pos) {
213
214
      is_in_bag = bagging_rands_[cur_idx / bagging_rand_block_].NextFloat() <
                  config_->pos_bagging_fraction;
Guolin Ke's avatar
Guolin Ke committed
215
    } else {
216
217
      is_in_bag = bagging_rands_[cur_idx / bagging_rand_block_].NextFloat() <
                  config_->neg_bagging_fraction;
Guolin Ke's avatar
Guolin Ke committed
218
219
    }
    if (is_in_bag) {
220
      buffer[cur_left_cnt++] = cur_idx;
Guolin Ke's avatar
Guolin Ke committed
221
    } else {
222
      buffer[--cur_right_pos] = cur_idx;
Guolin Ke's avatar
Guolin Ke committed
223
224
225
226
227
    }
  }
  return cur_left_cnt;
}

228
void GBDT::Bagging(int iter) {
229
  Common::FunctionTimer fun_timer("GBDT::Bagging", global_timer);
Guolin Ke's avatar
Guolin Ke committed
230
  // if need bagging
Guolin Ke's avatar
Guolin Ke committed
231
232
  if ((bag_data_cnt_ < num_data_ && iter % config_->bagging_freq == 0) ||
      need_re_bagging_) {
Guolin Ke's avatar
Guolin Ke committed
233
    need_re_bagging_ = false;
234
235
236
237
238
239
240
241
    auto left_cnt = bagging_runner_.Run<true>(
        num_data_,
        [=](int, data_size_t cur_start, data_size_t cur_cnt, data_size_t* left,
            data_size_t*) {
          data_size_t cur_left_count = 0;
          if (balanced_bagging_) {
            cur_left_count =
                BalancedBaggingHelper(cur_start, cur_cnt, left);
Guolin Ke's avatar
Guolin Ke committed
242
          } else {
243
            cur_left_count = BaggingHelper(cur_start, cur_cnt, left);
Guolin Ke's avatar
Guolin Ke committed
244
          }
245
246
247
          return cur_left_count;
        },
        bag_data_indices_.data());
Guolin Ke's avatar
Guolin Ke committed
248
    bag_data_cnt_ = left_cnt;
Guolin Ke's avatar
Guolin Ke committed
249
    Log::Debug("Re-bagging, using %d data to train", bag_data_cnt_);
Guolin Ke's avatar
Guolin Ke committed
250
    // set bagging data to tree learner
Guolin Ke's avatar
Guolin Ke committed
251
    if (!is_use_subset_) {
252
      tree_learner_->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_);
Guolin Ke's avatar
Guolin Ke committed
253
254
    } else {
      // get subset
Guolin Ke's avatar
Guolin Ke committed
255
      tmp_subset_->ReSize(bag_data_cnt_);
256
      tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(),
Guolin Ke's avatar
Guolin Ke committed
257
                              bag_data_cnt_, false);
258
259
      tree_learner_->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(),
                                    bag_data_cnt_);
Guolin Ke's avatar
Guolin Ke committed
260
    }
Guolin Ke's avatar
Guolin Ke committed
261
262
263
  }
}

Guolin Ke's avatar
Guolin Ke committed
264
void GBDT::Train(int snapshot_freq, const std::string& model_output_path) {
265
  Common::FunctionTimer fun_timer("GBDT::Train", global_timer);
Guolin Ke's avatar
Guolin Ke committed
266
267
  bool is_finished = false;
  auto start_time = std::chrono::steady_clock::now();
Guolin Ke's avatar
Guolin Ke committed
268
  for (int iter = 0; iter < config_->num_iterations && !is_finished; ++iter) {
Guolin Ke's avatar
Guolin Ke committed
269
270
271
272
    is_finished = TrainOneIter(nullptr, nullptr);
    if (!is_finished) {
      is_finished = EvalAndCheckEarlyStopping();
    }
Guolin Ke's avatar
Guolin Ke committed
273
274
275
276
277
278
279
    auto end_time = std::chrono::steady_clock::now();
    // output used time per iteration
    Log::Info("%f seconds elapsed, finished iteration %d", std::chrono::duration<double,
              std::milli>(end_time - start_time) * 1e-3, iter + 1);
    if (snapshot_freq > 0
        && (iter + 1) % snapshot_freq == 0) {
      std::string snapshot_out = model_output_path + ".snapshot_iter_" + std::to_string(iter + 1);
280
      SaveModelToFile(0, -1, config_->saved_feature_importance_type, snapshot_out.c_str());
Guolin Ke's avatar
Guolin Ke committed
281
282
283
284
    }
  }
}

285
void GBDT::RefitTree(const std::vector<std::vector<int>>& tree_leaf_prediction) {
286
287
288
  CHECK_GT(tree_leaf_prediction.size(), 0);
  CHECK_EQ(static_cast<size_t>(num_data_), tree_leaf_prediction.size());
  CHECK_EQ(static_cast<size_t>(models_.size()), tree_leaf_prediction[0].size());
289
290
  int num_iterations = static_cast<int>(models_.size() / num_tree_per_iteration_);
  std::vector<int> leaf_pred(num_data_);
291
292
293
294
295
296
297
298
299
300
301
302
303
  if (linear_tree_) {
    std::vector<int> max_leaves_by_thread = std::vector<int>(OMP_NUM_THREADS(), 0);
    #pragma omp parallel for schedule(static)
    for (int i = 0; i < static_cast<int>(tree_leaf_prediction.size()); ++i) {
      int tid = omp_get_thread_num();
      for (size_t j = 0; j < tree_leaf_prediction[i].size(); ++j) {
        max_leaves_by_thread[tid] = std::max(max_leaves_by_thread[tid], tree_leaf_prediction[i][j]);
      }
    }
    int max_leaves = *std::max_element(max_leaves_by_thread.begin(), max_leaves_by_thread.end());
    max_leaves += 1;
    tree_learner_->InitLinear(train_data_, max_leaves);
  }
304
305
306
307
308
309
310
  for (int iter = 0; iter < num_iterations; ++iter) {
    Boosting();
    for (int tree_id = 0; tree_id < num_tree_per_iteration_; ++tree_id) {
      int model_index = iter * num_tree_per_iteration_ + tree_id;
      #pragma omp parallel for schedule(static)
      for (int i = 0; i < num_data_; ++i) {
        leaf_pred[i] = tree_leaf_prediction[i][model_index];
Nikita Titov's avatar
Nikita Titov committed
311
        CHECK_LT(leaf_pred[i], models_[model_index]->num_leaves());
312
      }
313
314
315
      size_t offset = static_cast<size_t>(tree_id) * num_data_;
      auto grad = gradients_.data() + offset;
      auto hess = hessians_.data() + offset;
316
317
318
319
320
321
322
      auto new_tree = tree_learner_->FitByExistingTree(models_[model_index].get(), leaf_pred, grad, hess);
      train_score_updater_->AddScore(tree_learner_.get(), new_tree, tree_id);
      models_[model_index].reset(new_tree);
    }
  }
}

Andrew Ziem's avatar
Andrew Ziem committed
323
/* If the custom "average" is implemented it will be used in place of the label average (if enabled)
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
*
* An improvement to this is to have options to explicitly choose
* (i) standard average
* (ii) custom average if available
* (iii) any user defined scalar bias (e.g. using a new option "init_score" that overrides (i) and (ii) )
*
* (i) and (ii) could be selected as say "auto_init_score" = 0 or 1 etc..
*
*/
double ObtainAutomaticInitialScore(const ObjectiveFunction* fobj, int class_id) {
  double init_score = 0.0;
  if (fobj != nullptr) {
    init_score = fobj->BoostFromScore(class_id);
  }
  if (Network::num_machines() > 1) {
    init_score = Network::GlobalSyncUpByMean(init_score);
  }
  return init_score;
}

Guolin Ke's avatar
Guolin Ke committed
344
double GBDT::BoostFromAverage(int class_id, bool update_scorer) {
345
  Common::FunctionTimer fun_timer("GBDT::BoostFromAverage", global_timer);
346
  // boosting from average label; or customized "average" if implemented for the current objective
347
348
349
  if (models_.empty() && !train_score_updater_->has_init_score() && objective_function_ != nullptr) {
    if (config_->boost_from_average || (train_data_ != nullptr && train_data_->num_features() == 0)) {
      double init_score = ObtainAutomaticInitialScore(objective_function_, class_id);
350
      if (std::fabs(init_score) > kEpsilon) {
Guolin Ke's avatar
Guolin Ke committed
351
352
353
354
355
        if (update_scorer) {
          train_score_updater_->AddScore(init_score, class_id);
          for (auto& score_updater : valid_score_updater_) {
            score_updater->AddScore(init_score, class_id);
          }
356
357
358
        }
        Log::Info("Start training from score %lf", init_score);
        return init_score;
Guolin Ke's avatar
Guolin Ke committed
359
      }
360
361
362
    } else if (std::string(objective_function_->GetName()) == std::string("regression_l1")
               || std::string(objective_function_->GetName()) == std::string("quantile")
               || std::string(objective_function_->GetName()) == std::string("mape")) {
363
      Log::Warning("Disabling boost_from_average in %s may cause the slow convergence", objective_function_->GetName());
364
    }
365
  }
Guolin Ke's avatar
Guolin Ke committed
366
367
  return 0.0f;
}
Guolin Ke's avatar
Guolin Ke committed
368

Guolin Ke's avatar
Guolin Ke committed
369
bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
370
  Common::FunctionTimer fun_timer("GBDT::TrainOneIter", global_timer);
371
  std::vector<double> init_scores(num_tree_per_iteration_, 0.0);
Guolin Ke's avatar
Guolin Ke committed
372
  // boosting first
Guolin Ke's avatar
Guolin Ke committed
373
  if (gradients == nullptr || hessians == nullptr) {
374
    for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
Guolin Ke's avatar
Guolin Ke committed
375
      init_scores[cur_tree_id] = BoostFromAverage(cur_tree_id, true);
376
    }
Guolin Ke's avatar
Guolin Ke committed
377
    Boosting();
Guolin Ke's avatar
Guolin Ke committed
378
379
    gradients = gradients_.data();
    hessians = hessians_.data();
Guolin Ke's avatar
Guolin Ke committed
380
  }
381
382
  // bagging logic
  Bagging(iter_);
Guolin Ke's avatar
Guolin Ke committed
383

Guolin Ke's avatar
Guolin Ke committed
384
  bool should_continue = false;
385
  for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
386
    const size_t offset = static_cast<size_t>(cur_tree_id) * num_data_;
387
    std::unique_ptr<Tree> new_tree(new Tree(2, false, false));
388
    if (class_need_train_[cur_tree_id] && train_data_->num_features() > 0) {
389
390
      auto grad = gradients + offset;
      auto hess = hessians + offset;
Guolin Ke's avatar
Guolin Ke committed
391
392
393
      // need to copy gradients for bagging subset.
      if (is_use_subset_ && bag_data_cnt_ < num_data_) {
        for (int i = 0; i < bag_data_cnt_; ++i) {
394
395
          gradients_[offset + i] = grad[bag_data_indices_[i]];
          hessians_[offset + i] = hess[bag_data_indices_[i]];
Guolin Ke's avatar
Guolin Ke committed
396
        }
397
398
        grad = gradients_.data() + offset;
        hess = hessians_.data() + offset;
Guolin Ke's avatar
Guolin Ke committed
399
      }
400
401
      bool is_first_tree = models_.size() < static_cast<size_t>(num_tree_per_iteration_);
      new_tree.reset(tree_learner_->Train(grad, hess, is_first_tree));
402
    }
Guolin Ke's avatar
Guolin Ke committed
403

Guolin Ke's avatar
Guolin Ke committed
404
    if (new_tree->num_leaves() > 1) {
Guolin Ke's avatar
Guolin Ke committed
405
      should_continue = true;
406
      auto score_ptr = train_score_updater_->score() + offset;
407
408
      auto residual_getter = [score_ptr](const label_t* label, int i) {return static_cast<double>(label[i]) - score_ptr[i]; };
      tree_learner_->RenewTreeOutput(new_tree.get(), objective_function_, residual_getter,
409
                                     num_data_, bag_data_indices_.data(), bag_data_cnt_);
Guolin Ke's avatar
Guolin Ke committed
410
411
412
      // shrinkage by learning rate
      new_tree->Shrinkage(shrinkage_rate_);
      // update score
413
      UpdateScore(new_tree.get(), cur_tree_id);
414
415
      if (std::fabs(init_scores[cur_tree_id]) > kEpsilon) {
        new_tree->AddBias(init_scores[cur_tree_id]);
Guolin Ke's avatar
Guolin Ke committed
416
      }
417
418
    } else {
      // only add default score one-time
419
420
421
422
423
424
425
426
427
      if (models_.size() < static_cast<size_t>(num_tree_per_iteration_)) {
        double output = 0.0;
        if (!class_need_train_[cur_tree_id]) {
          if (objective_function_ != nullptr) {
            output = objective_function_->BoostFromScore(cur_tree_id);
          }
        } else {
          output = init_scores[cur_tree_id];
        }
428
        new_tree->AsConstantTree(output);
Guolin Ke's avatar
Guolin Ke committed
429
        // updates scores
430
        train_score_updater_->AddScore(output, cur_tree_id);
431
        for (auto& score_updater : valid_score_updater_) {
432
          score_updater->AddScore(output, cur_tree_id);
433
434
435
        }
      }
    }
Guolin Ke's avatar
Guolin Ke committed
436
437
438
    // add model
    models_.push_back(std::move(new_tree));
  }
Guolin Ke's avatar
Guolin Ke committed
439

Guolin Ke's avatar
Guolin Ke committed
440
  if (!should_continue) {
441
    Log::Warning("Stopped training because there are no more leaves that meet the split requirements");
442
443
444
445
    if (models_.size() > static_cast<size_t>(num_tree_per_iteration_)) {
      for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
        models_.pop_back();
      }
Guolin Ke's avatar
Guolin Ke committed
446
447
448
    }
    return true;
  }
449

Guolin Ke's avatar
Guolin Ke committed
450
451
  ++iter_;
  return false;
Guolin Ke's avatar
Guolin Ke committed
452
}
453

wxchan's avatar
wxchan committed
454
void GBDT::RollbackOneIter() {
455
  if (iter_ <= 0) { return; }
wxchan's avatar
wxchan committed
456
  // reset score
457
  for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
Guolin Ke's avatar
Guolin Ke committed
458
    auto curr_tree = models_.size() - num_tree_per_iteration_ + cur_tree_id;
wxchan's avatar
wxchan committed
459
    models_[curr_tree]->Shrinkage(-1.0);
460
    train_score_updater_->AddScore(models_[curr_tree].get(), cur_tree_id);
wxchan's avatar
wxchan committed
461
    for (auto& score_updater : valid_score_updater_) {
462
      score_updater->AddScore(models_[curr_tree].get(), cur_tree_id);
wxchan's avatar
wxchan committed
463
464
465
    }
  }
  // remove model
466
  for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
wxchan's avatar
wxchan committed
467
468
469
470
471
    models_.pop_back();
  }
  --iter_;
}

Guolin Ke's avatar
Guolin Ke committed
472
bool GBDT::EvalAndCheckEarlyStopping() {
473
474
  bool is_met_early_stopping = false;
  // print message for metric
Guolin Ke's avatar
Guolin Ke committed
475
  auto best_msg = OutputMetric(iter_);
Guolin Ke's avatar
Guolin Ke committed
476
477


Guolin Ke's avatar
Guolin Ke committed
478
  is_met_early_stopping = !best_msg.empty();
479
480
  if (is_met_early_stopping) {
    Log::Info("Early stopping at iteration %d, the best iteration round is %d",
481
              iter_, iter_ - early_stopping_round_);
Guolin Ke's avatar
Guolin Ke committed
482
    Log::Info("Output of best iteration round:\n%s", best_msg.c_str());
483
    // pop last early_stopping_round_ models
484
    for (int i = 0; i < early_stopping_round_ * num_tree_per_iteration_; ++i) {
485
486
487
488
      models_.pop_back();
    }
  }
  return is_met_early_stopping;
Guolin Ke's avatar
Guolin Ke committed
489
490
}

491
void GBDT::UpdateScore(const Tree* tree, const int cur_tree_id) {
492
  Common::FunctionTimer fun_timer("GBDT::UpdateScore", global_timer);
Guolin Ke's avatar
Guolin Ke committed
493
  // update training score
Guolin Ke's avatar
Guolin Ke committed
494
  if (!is_use_subset_) {
495
    train_score_updater_->AddScore(tree_learner_.get(), tree, cur_tree_id);
Guolin Ke's avatar
Guolin Ke committed
496
497
498
499
500
501

    // we need to predict out-of-bag scores of data for boosting
    if (num_data_ - bag_data_cnt_ > 0) {
      train_score_updater_->AddScore(tree, bag_data_indices_.data() + bag_data_cnt_, num_data_ - bag_data_cnt_, cur_tree_id);
    }

Guolin Ke's avatar
Guolin Ke committed
502
  } else {
503
    train_score_updater_->AddScore(tree, cur_tree_id);
Guolin Ke's avatar
Guolin Ke committed
504
  }
Guolin Ke's avatar
Guolin Ke committed
505
506


Guolin Ke's avatar
Guolin Ke committed
507
  // update validation score
Guolin Ke's avatar
Guolin Ke committed
508
  for (auto& score_updater : valid_score_updater_) {
509
    score_updater->AddScore(tree, cur_tree_id);
Guolin Ke's avatar
Guolin Ke committed
510
511
512
  }
}

Guolin Ke's avatar
Guolin Ke committed
513
514
515
516
std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* score) const {
  return metric->Eval(score, objective_function_);
}

Guolin Ke's avatar
Guolin Ke committed
517
std::string GBDT::OutputMetric(int iter) {
Guolin Ke's avatar
Guolin Ke committed
518
  bool need_output = (iter % config_->metric_freq) == 0;
Guolin Ke's avatar
Guolin Ke committed
519
520
  std::string ret = "";
  std::stringstream msg_buf;
521
  std::vector<std::pair<size_t, size_t>> meet_early_stopping_pairs;
Guolin Ke's avatar
Guolin Ke committed
522
  // print training metric
Guolin Ke's avatar
Guolin Ke committed
523
  if (need_output) {
524
525
    for (auto& sub_metric : training_metrics_) {
      auto name = sub_metric->GetName();
Guolin Ke's avatar
Guolin Ke committed
526
      auto scores = EvalOneMetric(sub_metric, train_score_updater_->score());
Guolin Ke's avatar
Guolin Ke committed
527
      for (size_t k = 0; k < name.size(); ++k) {
Guolin Ke's avatar
Guolin Ke committed
528
529
530
531
532
533
        std::stringstream tmp_buf;
        tmp_buf << "Iteration:" << iter
          << ", training " << name[k]
          << " : " << scores[k];
        Log::Info(tmp_buf.str().c_str());
        if (early_stopping_round_ > 0) {
534
          msg_buf << tmp_buf.str() << '\n';
Guolin Ke's avatar
Guolin Ke committed
535
        }
536
      }
537
    }
Guolin Ke's avatar
Guolin Ke committed
538
539
  }
  // print validation metric
Guolin Ke's avatar
Guolin Ke committed
540
  if (need_output || early_stopping_round_ > 0) {
541
542
    for (size_t i = 0; i < valid_metrics_.size(); ++i) {
      for (size_t j = 0; j < valid_metrics_[i].size(); ++j) {
Guolin Ke's avatar
Guolin Ke committed
543
        auto test_scores = EvalOneMetric(valid_metrics_[i][j], valid_score_updater_[i]->score());
Guolin Ke's avatar
Guolin Ke committed
544
545
546
547
548
549
550
551
552
553
        auto name = valid_metrics_[i][j]->GetName();
        for (size_t k = 0; k < name.size(); ++k) {
          std::stringstream tmp_buf;
          tmp_buf << "Iteration:" << iter
            << ", valid_" << i + 1 << " " << name[k]
            << " : " << test_scores[k];
          if (need_output) {
            Log::Info(tmp_buf.str().c_str());
          }
          if (early_stopping_round_ > 0) {
554
            msg_buf << tmp_buf.str() << '\n';
555
          }
wxchan's avatar
wxchan committed
556
        }
557
        if (es_first_metric_only_ && j > 0) { continue; }
Guolin Ke's avatar
Guolin Ke committed
558
        if (ret.empty() && early_stopping_round_ > 0) {
559
560
561
          auto cur_score = valid_metrics_[i][j]->factor_to_bigger_better() * test_scores.back();
          if (cur_score > best_score_[i][j]) {
            best_score_[i][j] = cur_score;
562
            best_iter_[i][j] = iter;
Guolin Ke's avatar
Guolin Ke committed
563
            meet_early_stopping_pairs.emplace_back(i, j);
564
          } else {
Guolin Ke's avatar
Guolin Ke committed
565
            if (iter - best_iter_[i][j] >= early_stopping_round_) { ret = best_msg_[i][j]; }
566
          }
wxchan's avatar
wxchan committed
567
568
        }
      }
Guolin Ke's avatar
Guolin Ke committed
569
570
    }
  }
Guolin Ke's avatar
Guolin Ke committed
571
572
573
  for (auto& pair : meet_early_stopping_pairs) {
    best_msg_[pair.first][pair.second] = msg_buf.str();
  }
wxchan's avatar
wxchan committed
574
  return ret;
Guolin Ke's avatar
Guolin Ke committed
575
576
}

577
/*! \brief Get eval result */
578
std::vector<double> GBDT::GetEvalAt(int data_idx) const {
Guolin Ke's avatar
Guolin Ke committed
579
  CHECK(data_idx >= 0 && data_idx <= static_cast<int>(valid_score_updater_.size()));
580
581
  std::vector<double> ret;
  if (data_idx == 0) {
582
    for (auto& sub_metric : training_metrics_) {
Guolin Ke's avatar
Guolin Ke committed
583
      auto scores = EvalOneMetric(sub_metric, train_score_updater_->score());
584
585
586
      for (auto score : scores) {
        ret.push_back(score);
      }
587
    }
588
  } else {
589
590
    auto used_idx = data_idx - 1;
    for (size_t j = 0; j < valid_metrics_[used_idx].size(); ++j) {
Guolin Ke's avatar
Guolin Ke committed
591
      auto test_scores = EvalOneMetric(valid_metrics_[used_idx][j], valid_score_updater_[used_idx]->score());
592
593
594
      for (auto score : test_scores) {
        ret.push_back(score);
      }
595
596
597
598
599
    }
  }
  return ret;
}

Guolin Ke's avatar
Guolin Ke committed
600
/*! \brief Get training scores result */
601
const double* GBDT::GetTrainingScore(int64_t* out_len) {
602
  *out_len = static_cast<int64_t>(train_score_updater_->num_data()) * num_class_;
Guolin Ke's avatar
Guolin Ke committed
603
  return train_score_updater_->score();
604
605
}

606
void GBDT::PredictContrib(const double* features, double* output) const {
607
  // set zero
Guolin Ke's avatar
Guolin Ke committed
608
609
  const int num_features = max_feature_idx_ + 1;
  std::memset(output, 0, sizeof(double) * num_tree_per_iteration_ * (num_features + 1));
610
611
  const int end_iteration_for_pred = start_iteration_for_pred_ + num_iteration_for_pred_;
  for (int i = start_iteration_for_pred_; i < end_iteration_for_pred; ++i) {
612
613
    // predict all the trees for one iteration
    for (int k = 0; k < num_tree_per_iteration_; ++k) {
Guolin Ke's avatar
Guolin Ke committed
614
      models_[i * num_tree_per_iteration_ + k]->PredictContrib(features, num_features, output + k*(num_features + 1));
615
    }
616
617
618
619
620
621
  }
}

void GBDT::PredictContribByMap(const std::unordered_map<int, double>& features,
                               std::vector<std::unordered_map<int, double>>* output) const {
  const int num_features = max_feature_idx_ + 1;
622
623
  const int end_iteration_for_pred = start_iteration_for_pred_ + num_iteration_for_pred_;
  for (int i = start_iteration_for_pred_; i < end_iteration_for_pred; ++i) {
624
625
626
    // predict all the trees for one iteration
    for (int k = 0; k < num_tree_per_iteration_; ++k) {
      models_[i * num_tree_per_iteration_ + k]->PredictContribByMap(features, num_features, &((*output)[k]));
627
628
629
630
    }
  }
}

Guolin Ke's avatar
Guolin Ke committed
631
632
void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) {
  CHECK(data_idx >= 0 && data_idx <= static_cast<int>(valid_score_updater_.size()));
Guolin Ke's avatar
Guolin Ke committed
633

634
  const double* raw_scores = nullptr;
Guolin Ke's avatar
Guolin Ke committed
635
636
  data_size_t num_data = 0;
  if (data_idx == 0) {
wxchan's avatar
wxchan committed
637
    raw_scores = GetTrainingScore(out_len);
Guolin Ke's avatar
Guolin Ke committed
638
639
640
641
642
    num_data = train_score_updater_->num_data();
  } else {
    auto used_idx = data_idx - 1;
    raw_scores = valid_score_updater_[used_idx]->score();
    num_data = valid_score_updater_[used_idx]->num_data();
643
    *out_len = static_cast<int64_t>(num_data) * num_class_;
Guolin Ke's avatar
Guolin Ke committed
644
  }
Guolin Ke's avatar
Guolin Ke committed
645
  if (objective_function_ != nullptr) {
Guolin Ke's avatar
Guolin Ke committed
646
647
    #pragma omp parallel for schedule(static)
    for (data_size_t i = 0; i < num_data; ++i) {
Guolin Ke's avatar
Guolin Ke committed
648
      std::vector<double> tree_pred(num_tree_per_iteration_);
649
      for (int j = 0; j < num_tree_per_iteration_; ++j) {
Guolin Ke's avatar
Guolin Ke committed
650
        tree_pred[j] = raw_scores[j * num_data + i];
651
      }
Guolin Ke's avatar
Guolin Ke committed
652
653
      std::vector<double> tmp_result(num_class_);
      objective_function_->ConvertOutput(tree_pred.data(), tmp_result.data());
Guolin Ke's avatar
Guolin Ke committed
654
      for (int j = 0; j < num_class_; ++j) {
655
        out_result[j * num_data + i] = static_cast<double>(tmp_result[j]);
Guolin Ke's avatar
Guolin Ke committed
656
657
      }
    }
658
  } else {
Guolin Ke's avatar
Guolin Ke committed
659
    #pragma omp parallel for schedule(static)
Guolin Ke's avatar
Guolin Ke committed
660
    for (data_size_t i = 0; i < num_data; ++i) {
661
      for (int j = 0; j < num_tree_per_iteration_; ++j) {
Guolin Ke's avatar
Guolin Ke committed
662
        out_result[j * num_data + i] = static_cast<double>(raw_scores[j * num_data + i]);
Guolin Ke's avatar
Guolin Ke committed
663
664
665
666
667
      }
    }
  }
}

668
669
double GBDT::GetUpperBoundValue() const {
  double max_value = 0.0;
Nikita Titov's avatar
Nikita Titov committed
670
  for (const auto &tree : models_) {
671
672
673
674
675
676
677
    max_value += tree->GetUpperBoundValue();
  }
  return max_value;
}

double GBDT::GetLowerBoundValue() const {
  double min_value = 0.0;
Nikita Titov's avatar
Nikita Titov committed
678
  for (const auto &tree : models_) {
679
680
681
682
683
    min_value += tree->GetLowerBoundValue();
  }
  return min_value;
}

Guolin Ke's avatar
Guolin Ke committed
684
685
686
void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction* objective_function,
                             const std::vector<const Metric*>& training_metrics) {
  if (train_data != train_data_ && !train_data_->CheckAlign(*train_data)) {
687
    Log::Fatal("Cannot reset training data, since new training data has different bin mappers");
wxchan's avatar
wxchan committed
688
689
  }

Guolin Ke's avatar
Guolin Ke committed
690
691
  objective_function_ = objective_function;
  if (objective_function_ != nullptr) {
Nikita Titov's avatar
Nikita Titov committed
692
    CHECK_EQ(num_tree_per_iteration_, objective_function_->NumModelPerIteration());
693
694
695
    if (objective_function_->IsRenewTreeOutput() && !config_->monotone_constraints.empty()) {
      Log::Fatal("Cannot use ``monotone_constraints`` in %s objective, please disable it.", objective_function_->GetName());
    }
696
  }
697
  is_constant_hessian_ = GetIsConstHessian(objective_function);
698

Guolin Ke's avatar
Guolin Ke committed
699
700
701
702
  // push training metrics
  training_metrics_.clear();
  for (const auto& metric : training_metrics) {
    training_metrics_.push_back(metric);
703
  }
Guolin Ke's avatar
Guolin Ke committed
704
  training_metrics_.shrink_to_fit();
705

Guolin Ke's avatar
Guolin Ke committed
706
707
708
709
710
  if (train_data != train_data_) {
    train_data_ = train_data;
    // not same training data, need reset score and others
    // create score tracker
    train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_));
711

Guolin Ke's avatar
Guolin Ke committed
712
713
714
715
716
717
    // update score
    for (int i = 0; i < iter_; ++i) {
      for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
        auto curr_tree = (i + num_init_iteration_) * num_tree_per_iteration_ + cur_tree_id;
        train_score_updater_->AddScore(models_[curr_tree].get(), cur_tree_id);
      }
718
719
    }

Guolin Ke's avatar
Guolin Ke committed
720
    num_data_ = train_data_->num_data();
721

Guolin Ke's avatar
Guolin Ke committed
722
723
724
725
726
727
    // create buffer for gradients and hessians
    if (objective_function_ != nullptr) {
      size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
      gradients_.resize(total_size);
      hessians_.resize(total_size);
    }
728

Guolin Ke's avatar
Guolin Ke committed
729
730
731
732
    max_feature_idx_ = train_data_->num_total_features() - 1;
    label_idx_ = train_data_->label_idx();
    feature_names_ = train_data_->feature_names();
    feature_infos_ = train_data_->feature_infos();
733

734
    tree_learner_->ResetTrainingData(train_data, is_constant_hessian_);
Guolin Ke's avatar
Guolin Ke committed
735
    ResetBaggingConfig(config_.get(), true);
736
737
  } else {
    tree_learner_->ResetIsConstantHessian(is_constant_hessian_);
738
  }
739
740
}

Guolin Ke's avatar
Guolin Ke committed
741
742
void GBDT::ResetConfig(const Config* config) {
  auto new_config = std::unique_ptr<Config>(new Config(*config));
743
  if (!config->monotone_constraints.empty()) {
Nikita Titov's avatar
Nikita Titov committed
744
    CHECK_EQ(static_cast<size_t>(train_data_->num_total_features()), config->monotone_constraints.size());
745
746
  }
  if (!config->feature_contri.empty()) {
Nikita Titov's avatar
Nikita Titov committed
747
    CHECK_EQ(static_cast<size_t>(train_data_->num_total_features()), config->feature_contri.size());
748
  }
749
750
751
  if (objective_function_ != nullptr && objective_function_->IsRenewTreeOutput() && !config->monotone_constraints.empty()) {
    Log::Fatal("Cannot use ``monotone_constraints`` in %s objective, please disable it.", objective_function_->GetName());
  }
Guolin Ke's avatar
Guolin Ke committed
752
753
754
  early_stopping_round_ = new_config->early_stopping_round;
  shrinkage_rate_ = new_config->learning_rate;
  if (tree_learner_ != nullptr) {
Guolin Ke's avatar
Guolin Ke committed
755
    tree_learner_->ResetConfig(new_config.get());
756
  }
Guolin Ke's avatar
Guolin Ke committed
757
758
  if (train_data_ != nullptr) {
    ResetBaggingConfig(new_config.get(), false);
759
  }
760
  if (config_.get() != nullptr && config_->forcedsplits_filename != new_config->forcedsplits_filename) {
761
762
763
764
765
766
767
    // load forced_splits file
    if (!new_config->forcedsplits_filename.empty()) {
      std::ifstream forced_splits_file(
          new_config->forcedsplits_filename.c_str());
      std::stringstream buffer;
      buffer << forced_splits_file.rdbuf();
      std::string err;
Guolin Ke's avatar
Guolin Ke committed
768
      forced_splits_json_ = Json::parse(buffer.str(), &err);
769
770
771
772
773
774
      tree_learner_->SetForcedSplit(&forced_splits_json_);
    } else {
      forced_splits_json_ = Json();
      tree_learner_->SetForcedSplit(nullptr);
    }
  }
Guolin Ke's avatar
Guolin Ke committed
775
  config_.reset(new_config.release());
Guolin Ke's avatar
Guolin Ke committed
776
777
}

Guolin Ke's avatar
Guolin Ke committed
778
void GBDT::ResetBaggingConfig(const Config* config, bool is_change_dataset) {
Guolin Ke's avatar
Guolin Ke committed
779
  // if need bagging, create buffer
Guolin Ke's avatar
Guolin Ke committed
780
781
782
783
784
785
  data_size_t num_pos_data = 0;
  if (objective_function_ != nullptr) {
    num_pos_data = objective_function_->NumPositiveData();
  }
  bool balance_bagging_cond = (config->pos_bagging_fraction < 1.0 || config->neg_bagging_fraction < 1.0) && (num_pos_data > 0);
  if ((config->bagging_fraction < 1.0 || balance_bagging_cond) && config->bagging_freq > 0) {
786
787
    need_re_bagging_ = false;
    if (!is_change_dataset &&
Guolin Ke's avatar
Guolin Ke committed
788
789
      config_.get() != nullptr && config_->bagging_fraction == config->bagging_fraction && config_->bagging_freq == config->bagging_freq
      && config_->pos_bagging_fraction == config->pos_bagging_fraction && config_->neg_bagging_fraction == config->neg_bagging_fraction) {
790
791
      return;
    }
Guolin Ke's avatar
Guolin Ke committed
792
793
    if (balance_bagging_cond) {
      balanced_bagging_ = true;
794
      bag_data_cnt_ = static_cast<data_size_t>(num_pos_data * config->pos_bagging_fraction)
Guolin Ke's avatar
Guolin Ke committed
795
796
797
798
                      + static_cast<data_size_t>((num_data_ - num_pos_data) * config->neg_bagging_fraction);
    } else {
      bag_data_cnt_ = static_cast<data_size_t>(config->bagging_fraction * num_data_);
    }
Guolin Ke's avatar
Guolin Ke committed
799
    bag_data_indices_.resize(num_data_);
800
801
802
803
804
805
    bagging_runner_.ReSize(num_data_);
    bagging_rands_.clear();
    for (int i = 0;
         i < (num_data_ + bagging_rand_block_ - 1) / bagging_rand_block_; ++i) {
      bagging_rands_.emplace_back(config_->bagging_seed + i);
    }
806

807
808
    double average_bag_rate =
        (static_cast<double>(bag_data_cnt_) / num_data_) / config->bagging_freq;
Guolin Ke's avatar
Guolin Ke committed
809
810
    is_use_subset_ = false;
    const int group_threshold_usesubset = 100;
811
    if (average_bag_rate <= 0.5
812
        && (train_data_->num_feature_groups() < group_threshold_usesubset)) {
Guolin Ke's avatar
Guolin Ke committed
813
814
815
816
817
      if (tmp_subset_ == nullptr || is_change_dataset) {
        tmp_subset_.reset(new Dataset(bag_data_cnt_));
        tmp_subset_->CopyFeatureMapperFrom(train_data_);
      }
      is_use_subset_ = true;
818
      Log::Debug("Use subset for bagging");
Guolin Ke's avatar
Guolin Ke committed
819
820
    }

821
    need_re_bagging_ = true;
822

Guolin Ke's avatar
Guolin Ke committed
823
824
825
826
827
    if (is_use_subset_ && bag_data_cnt_ < num_data_) {
      if (objective_function_ == nullptr) {
        size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
        gradients_.resize(total_size);
        hessians_.resize(total_size);
828
      }
829
    }
830
  } else {
Guolin Ke's avatar
Guolin Ke committed
831
832
    bag_data_cnt_ = num_data_;
    bag_data_indices_.clear();
833
    bagging_runner_.ReSize(0);
Guolin Ke's avatar
Guolin Ke committed
834
    is_use_subset_ = false;
835
  }
wxchan's avatar
wxchan committed
836
837
}

Guolin Ke's avatar
Guolin Ke committed
838
}  // namespace LightGBM