gbdt.h 18.6 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
Guolin Ke's avatar
Guolin Ke committed
5
6
7
#ifndef LIGHTGBM_BOOSTING_GBDT_H_
#define LIGHTGBM_BOOSTING_GBDT_H_

8
9
10
#include <LightGBM/boosting.h>
#include <LightGBM/objective_function.h>
#include <LightGBM/prediction_early_stop.h>
11
#include <LightGBM/cuda/vector_cudahost.h>
12
13
14
#include <LightGBM/utils/json11.h>
#include <LightGBM/utils/threading.h>

Guolin Ke's avatar
Guolin Ke committed
15
#include <string>
16
17
#include <algorithm>
#include <cstdio>
18
#include <fstream>
19
#include <map>
Guolin Ke's avatar
Guolin Ke committed
20
#include <memory>
21
#include <mutex>
22
23
24
25
26
#include <unordered_map>
#include <utility>
#include <vector>

#include "score_updater.hpp"
Guolin Ke's avatar
Guolin Ke committed
27
28

namespace LightGBM {
Guolin Ke's avatar
Guolin Ke committed
29

30
31
using json11::Json;

Guolin Ke's avatar
Guolin Ke committed
32
33
34
/*!
* \brief GBDT algorithm implementation. including Training, prediction, bagging.
*/
Guolin Ke's avatar
Guolin Ke committed
35
class GBDT : public GBDTBase {
Nikita Titov's avatar
Nikita Titov committed
36
 public:
Guolin Ke's avatar
Guolin Ke committed
37
38
39
  /*!
  * \brief Constructor
  */
40
  GBDT();
Guolin Ke's avatar
Guolin Ke committed
41

Guolin Ke's avatar
Guolin Ke committed
42
43
44
45
  /*!
  * \brief Destructor
  */
  ~GBDT();
Guolin Ke's avatar
Guolin Ke committed
46

Guolin Ke's avatar
Guolin Ke committed
47
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
48
  * \brief Initialization logic
zhangyafeikimi's avatar
zhangyafeikimi committed
49
  * \param gbdt_config Config for boosting
Guolin Ke's avatar
Guolin Ke committed
50
  * \param train_data Training data
51
  * \param objective_function Training objective function
Guolin Ke's avatar
Guolin Ke committed
52
53
  * \param training_metrics Training metrics
  */
Guolin Ke's avatar
Guolin Ke committed
54
  void Init(const Config* gbdt_config, const Dataset* train_data,
55
            const ObjectiveFunction* objective_function,
Guolin Ke's avatar
Guolin Ke committed
56
            const std::vector<const Metric*>& training_metrics) override;
wxchan's avatar
wxchan committed
57
58

  /*!
Guolin Ke's avatar
Guolin Ke committed
59
  * \brief Merge model from other boosting object. Will insert to the front of current boosting object
wxchan's avatar
wxchan committed
60
61
62
63
64
65
66
67
68
69
70
71
  * \param other
  */
  void MergeFrom(const Boosting* other) override {
    auto other_gbdt = reinterpret_cast<const GBDT*>(other);
    // tmp move to other vector
    auto original_models = std::move(models_);
    models_ = std::vector<std::unique_ptr<Tree>>();
    // push model from other first
    for (const auto& tree : other_gbdt->models_) {
      auto new_tree = std::unique_ptr<Tree>(new Tree(*(tree.get())));
      models_.push_back(std::move(new_tree));
    }
Guolin Ke's avatar
Guolin Ke committed
72
    num_init_iteration_ = static_cast<int>(models_.size()) / num_tree_per_iteration_;
wxchan's avatar
wxchan committed
73
74
75
76
77
    // push model in current object
    for (const auto& tree : original_models) {
      auto new_tree = std::unique_ptr<Tree>(new Tree(*(tree.get())));
      models_.push_back(std::move(new_tree));
    }
Guolin Ke's avatar
Guolin Ke committed
78
    num_iteration_for_pred_ = static_cast<int>(models_.size()) / num_tree_per_iteration_;
wxchan's avatar
wxchan committed
79
80
  }

81
  void ShuffleModels(int start_iter, int end_iter) override {
82
    int total_iter = static_cast<int>(models_.size()) / num_tree_per_iteration_;
83
84
85
86
87
    start_iter = std::max(0, start_iter);
    if (end_iter <= 0) {
      end_iter = total_iter;
    }
    end_iter = std::min(total_iter, end_iter);
88
89
90
91
92
93
    auto original_models = std::move(models_);
    std::vector<int> indices(total_iter);
    for (int i = 0; i < total_iter; ++i) {
      indices[i] = i;
    }
    Random tmp_rand(17);
94
95
    for (int i = start_iter; i < end_iter - 1; ++i) {
      int j = tmp_rand.NextShort(i + 1, end_iter);
96
97
98
99
100
101
102
103
104
105
106
107
      std::swap(indices[i], indices[j]);
    }
    models_ = std::vector<std::unique_ptr<Tree>>();
    for (int i = 0; i < total_iter; ++i) {
      for (int j = 0; j < num_tree_per_iteration_; ++j) {
        int tree_idx = indices[i] * num_tree_per_iteration_ + j;
        auto new_tree = std::unique_ptr<Tree>(new Tree(*(original_models[tree_idx].get())));
        models_.push_back(std::move(new_tree));
      }
    }
  }

Guolin Ke's avatar
Guolin Ke committed
108
109
110
111
112
113
  /*!
  * \brief Reset the training data
  * \param train_data New Training data
  * \param objective_function Training objective function
  * \param training_metrics Training metrics
  */
114
115
  void ResetTrainingData(const Dataset* train_data, const ObjectiveFunction* objective_function,
                         const std::vector<const Metric*>& training_metrics) override;
wxchan's avatar
wxchan committed
116

Guolin Ke's avatar
Guolin Ke committed
117
118
119
120
  /*!
  * \brief Reset Boosting Config
  * \param gbdt_config Config for boosting
  */
Guolin Ke's avatar
Guolin Ke committed
121
  void ResetConfig(const Config* gbdt_config) override;
Guolin Ke's avatar
Guolin Ke committed
122

Guolin Ke's avatar
Guolin Ke committed
123
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
124
125
126
  * \brief Adding a validation dataset
  * \param valid_data Validation dataset
  * \param valid_metrics Metrics for validation dataset
Guolin Ke's avatar
Guolin Ke committed
127
  */
wxchan's avatar
wxchan committed
128
  void AddValidDataset(const Dataset* valid_data,
129
                       const std::vector<const Metric*>& valid_metrics) override;
Guolin Ke's avatar
Guolin Ke committed
130

Guolin Ke's avatar
Guolin Ke committed
131
132
133
134
135
  /*!
  * \brief Perform a full training procedure
  * \param snapshot_freq frequence of snapshot
  * \param model_output_path path of model file
  */
Guolin Ke's avatar
Guolin Ke committed
136
137
  void Train(int snapshot_freq, const std::string& model_output_path) override;

138
139
  void RefitTree(const std::vector<std::vector<int>>& tree_leaf_prediction) override;

Guolin Ke's avatar
Guolin Ke committed
140
  /*!
Guolin Ke's avatar
Guolin Ke committed
141
  * \brief Training logic
Guolin Ke's avatar
Guolin Ke committed
142
143
144
  * \param gradients nullptr for using default objective, otherwise use self-defined boosting
  * \param hessians nullptr for using default objective, otherwise use self-defined boosting
  * \return True if cannot train any more
Guolin Ke's avatar
Guolin Ke committed
145
  */
Guolin Ke's avatar
Guolin Ke committed
146
  bool TrainOneIter(const score_t* gradients, const score_t* hessians) override;
147

wxchan's avatar
wxchan committed
148
149
150
151
152
  /*!
  * \brief Rollback one iteration
  */
  void RollbackOneIter() override;

Guolin Ke's avatar
Guolin Ke committed
153
154
155
  /*!
  * \brief Get current iteration
  */
Guolin Ke's avatar
Guolin Ke committed
156
  int GetCurrentIteration() const override { return static_cast<int>(models_.size()) / num_tree_per_iteration_; }
wxchan's avatar
wxchan committed
157

Guolin Ke's avatar
Guolin Ke committed
158
159
160
161
  /*!
  * \brief Can use early stopping for prediction or not
  * \return True if cannot use early stopping for prediction
  */
162
  bool NeedAccuratePrediction() const override {
163
164
165
166
167
168
169
    if (objective_function_ == nullptr) {
      return true;
    } else {
      return objective_function_->NeedAccuratePrediction();
    }
  }

Guolin Ke's avatar
Guolin Ke committed
170
171
172
173
174
  /*!
  * \brief Get evaluation result at data_idx data
  * \param data_idx 0: training data, 1: 1st validation data
  * \return evaluation result
  */
175
  std::vector<double> GetEvalAt(int data_idx) const override;
176

Guolin Ke's avatar
Guolin Ke committed
177
178
  /*!
  * \brief Get current training score
Guolin Ke's avatar
Guolin Ke committed
179
  * \param out_len length of returned score
Guolin Ke's avatar
Guolin Ke committed
180
181
  * \return training score
  */
Guolin Ke's avatar
Guolin Ke committed
182
  const double* GetTrainingScore(int64_t* out_len) override;
183

Guolin Ke's avatar
Guolin Ke committed
184
185
186
187
188
  /*!
  * \brief Get size of prediction at data_idx data
  * \param data_idx 0: training data, 1: 1st validation data
  * \return The size of prediction
  */
Guolin Ke's avatar
Guolin Ke committed
189
  int64_t GetNumPredictAt(int data_idx) const override {
Guolin Ke's avatar
Guolin Ke committed
190
191
192
193
194
195
196
    CHECK(data_idx >= 0 && data_idx <= static_cast<int>(valid_score_updater_.size()));
    data_size_t num_data = train_data_->num_data();
    if (data_idx > 0) {
      num_data = valid_score_updater_[data_idx - 1]->num_data();
    }
    return num_data * num_class_;
  }
Guolin Ke's avatar
Guolin Ke committed
197

Guolin Ke's avatar
Guolin Ke committed
198
199
200
201
  /*!
  * \brief Get prediction result at data_idx data
  * \param data_idx 0: training data, 1: 1st validation data
  * \param result used to store prediction result, should allocate memory before call this function
202
  * \param out_len length of returned score
Guolin Ke's avatar
Guolin Ke committed
203
  */
Guolin Ke's avatar
Guolin Ke committed
204
  void GetPredictAt(int data_idx, double* out_result, int64_t* out_len) override;
Guolin Ke's avatar
Guolin Ke committed
205

Guolin Ke's avatar
Guolin Ke committed
206
207
  /*!
  * \brief Get number of prediction for one data
208
  * \param start_iteration Start index of the iteration to predict
Guolin Ke's avatar
Guolin Ke committed
209
210
211
212
213
  * \param num_iteration number of used iterations
  * \param is_pred_leaf True if predicting  leaf index
  * \param is_pred_contrib True if predicting feature contribution
  * \return number of prediction
  */
214
  inline int NumPredictOneRow(int start_iteration, int num_iteration, bool is_pred_leaf, bool is_pred_contrib) const override {
215
    int num_pred_in_one_row = num_class_;
Guolin Ke's avatar
Guolin Ke committed
216
217
    if (is_pred_leaf) {
      int max_iteration = GetCurrentIteration();
218
219
      start_iteration = std::max(start_iteration, 0);
      start_iteration = std::min(start_iteration, max_iteration);
Guolin Ke's avatar
Guolin Ke committed
220
      if (num_iteration > 0) {
221
        num_pred_in_one_row *= static_cast<int>(std::min(max_iteration - start_iteration, num_iteration));
Guolin Ke's avatar
Guolin Ke committed
222
      } else {
223
        num_pred_in_one_row *= (max_iteration - start_iteration);
Guolin Ke's avatar
Guolin Ke committed
224
      }
225
    } else if (is_pred_contrib) {
226
      num_pred_in_one_row = num_tree_per_iteration_ * (max_feature_idx_ + 2);  // +1 for 0-based indexing, +1 for baseline
Guolin Ke's avatar
Guolin Ke committed
227
    }
228
    return num_pred_in_one_row;
Guolin Ke's avatar
Guolin Ke committed
229
  }
Guolin Ke's avatar
Guolin Ke committed
230

cbecker's avatar
cbecker committed
231
  void PredictRaw(const double* features, double* output,
232
                  const PredictionEarlyStopInstance* earlyStop) const override;
wxchan's avatar
wxchan committed
233

Guolin Ke's avatar
Guolin Ke committed
234
235
  void PredictRawByMap(const std::unordered_map<int, double>& features, double* output,
                       const PredictionEarlyStopInstance* early_stop) const override;
236

cbecker's avatar
cbecker committed
237
238
  void Predict(const double* features, double* output,
               const PredictionEarlyStopInstance* earlyStop) const override;
Guolin Ke's avatar
Guolin Ke committed
239

Guolin Ke's avatar
Guolin Ke committed
240
241
  void PredictByMap(const std::unordered_map<int, double>& features, double* output,
                    const PredictionEarlyStopInstance* early_stop) const override;
242

243
  void PredictLeafIndex(const double* features, double* output) const override;
wxchan's avatar
wxchan committed
244

245
246
  void PredictLeafIndexByMap(const std::unordered_map<int, double>& features, double* output) const override;

247
248
249
250
  void PredictContrib(const double* features, double* output) const override;

  void PredictContribByMap(const std::unordered_map<int, double>& features,
                           std::vector<std::unordered_map<int, double>>* output) const override;
251

Guolin Ke's avatar
Guolin Ke committed
252
  /*!
wxchan's avatar
wxchan committed
253
  * \brief Dump model to json format string
254
  * \param start_iteration The model will be saved start from
255
  * \param num_iteration Number of iterations that want to dump, -1 means dump all
256
  * \param feature_importance_type Type of feature importance, 0: split, 1: gain
wxchan's avatar
wxchan committed
257
  * \return Json format string of model
Guolin Ke's avatar
Guolin Ke committed
258
  */
259
260
  std::string DumpModel(int start_iteration, int num_iteration,
                        int feature_importance_type) const override;
wxchan's avatar
wxchan committed
261

262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
  /*!
  * \brief Translate model to if-else statement
  * \param num_iteration Number of iterations that want to translate, -1 means translate all
  * \return if-else format codes of model
  */
  std::string ModelToIfElse(int num_iteration) const override;

  /*!
  * \brief Translate model to if-else statement
  * \param num_iteration Number of iterations that want to translate, -1 means translate all
  * \param filename Filename that want to save to
  * \return is_finish Is training finished or not
  */
  bool SaveModelToIfElse(int num_iteration, const char* filename) const override;

wxchan's avatar
wxchan committed
277
278
  /*!
  * \brief Save model to file
279
  * \param start_iteration The model will be saved start from
wxchan's avatar
wxchan committed
280
  * \param num_iterations Number of model that want to save, -1 means save all
281
  * \param feature_importance_type Type of feature importance, 0: split, 1: gain
wxchan's avatar
wxchan committed
282
  * \param filename Filename that want to save to
283
  * \return is_finish Is training finished or not
wxchan's avatar
wxchan committed
284
  */
285
286
287
  bool SaveModelToFile(int start_iteration, int num_iterations,
                       int feature_importance_type,
                       const char* filename) const override;
wxchan's avatar
wxchan committed
288

289
290
  /*!
  * \brief Save model to string
291
  * \param start_iteration The model will be saved start from
wxchan's avatar
wxchan committed
292
  * \param num_iterations Number of model that want to save, -1 means save all
293
  * \param feature_importance_type Type of feature importance, 0: split, 1: gain
294
295
  * \return Non-empty string if succeeded
  */
296
  std::string SaveModelToString(int start_iteration, int num_iterations, int feature_importance_type) const override;
297

Guolin Ke's avatar
Guolin Ke committed
298
  /*!
299
  * \brief Restore from a serialized buffer
Guolin Ke's avatar
Guolin Ke committed
300
  */
301
  bool LoadModelFromString(const char* buffer, size_t len) override;
wxchan's avatar
wxchan committed
302

303
304
305
306
307
308
309
310
  /*!
  * \brief Calculate feature importances
  * \param num_iteration Number of model that want to use for feature importance, -1 means use all
  * \param importance_type: 0 for split, 1 for gain
  * \return vector of feature_importance
  */
  std::vector<double> FeatureImportance(int num_iteration, int importance_type) const override;

311
312
313
314
315
316
317
318
319
320
321
322
  /*!
  * \brief Calculate upper bound value
  * \return upper bound value
  */
  double GetUpperBoundValue() const override;

  /*!
  * \brief Calculate lower bound value
  * \return lower bound value
  */
  double GetLowerBoundValue() const override;

Guolin Ke's avatar
Guolin Ke committed
323
324
325
326
327
  /*!
  * \brief Get max feature index of this model
  * \return Max feature index of this model
  */
  inline int MaxFeatureIdx() const override { return max_feature_idx_; }
Guolin Ke's avatar
Guolin Ke committed
328

wxchan's avatar
wxchan committed
329
330
331
332
333
334
  /*!
  * \brief Get feature names of this model
  * \return Feature names of this model
  */
  inline std::vector<std::string> FeatureNames() const override { return feature_names_; }

Guolin Ke's avatar
Guolin Ke committed
335
336
337
338
339
340
  /*!
  * \brief Get index of label column
  * \return index of label column
  */
  inline int LabelIdx() const override { return label_idx_; }

Guolin Ke's avatar
Guolin Ke committed
341
342
343
344
  /*!
  * \brief Get number of weak sub-models
  * \return Number of weak sub-models
  */
wxchan's avatar
wxchan committed
345
  inline int NumberOfTotalModel() const override { return static_cast<int>(models_.size()); }
Guolin Ke's avatar
Guolin Ke committed
346

Guolin Ke's avatar
Guolin Ke committed
347
348
349
350
  /*!
  * \brief Get number of tree per iteration
  * \return number of tree per iteration
  */
Guolin Ke's avatar
Guolin Ke committed
351
  inline int NumModelPerIteration() const override { return num_tree_per_iteration_; }
Guolin Ke's avatar
Guolin Ke committed
352

353
354
355
356
  /*!
  * \brief Get number of classes
  * \return Number of classes
  */
Guolin Ke's avatar
Guolin Ke committed
357
  inline int NumberOfClasses() const override { return num_class_; }
358

359
  inline void InitPredict(int start_iteration, int num_iteration, bool is_pred_contrib) override {
Guolin Ke's avatar
Guolin Ke committed
360
    num_iteration_for_pred_ = static_cast<int>(models_.size()) / num_tree_per_iteration_;
361
362
    start_iteration = std::max(start_iteration, 0);
    start_iteration = std::min(start_iteration, num_iteration_for_pred_);
wxchan's avatar
wxchan committed
363
    if (num_iteration > 0) {
364
365
366
      num_iteration_for_pred_ = std::min(num_iteration, num_iteration_for_pred_ - start_iteration);
    } else {
      num_iteration_for_pred_ = num_iteration_for_pred_ - start_iteration;
367
    }
368
    start_iteration_for_pred_ = start_iteration;
369
370
371
372
373
374
    if (is_pred_contrib) {
      #pragma omp parallel for schedule(static)
      for (int i = 0; i < static_cast<int>(models_.size()); ++i) {
        models_[i]->RecomputeMaxDepth();
      }
    }
375
  }
wxchan's avatar
wxchan committed
376

Guolin Ke's avatar
Guolin Ke committed
377
  inline double GetLeafValue(int tree_idx, int leaf_idx) const override {
Guolin Ke's avatar
Guolin Ke committed
378
379
380
381
382
    CHECK(tree_idx >= 0 && static_cast<size_t>(tree_idx) < models_.size());
    CHECK(leaf_idx >= 0 && leaf_idx < models_[tree_idx]->num_leaves());
    return models_[tree_idx]->LeafOutput(leaf_idx);
  }

Guolin Ke's avatar
Guolin Ke committed
383
  inline void SetLeafValue(int tree_idx, int leaf_idx, double val) override {
Guolin Ke's avatar
Guolin Ke committed
384
385
386
387
388
    CHECK(tree_idx >= 0 && static_cast<size_t>(tree_idx) < models_.size());
    CHECK(leaf_idx >= 0 && leaf_idx < models_[tree_idx]->num_leaves());
    models_[tree_idx]->SetLeafOutput(leaf_idx, val);
  }

389
390
391
  /*!
  * \brief Get Type name of this boosting object
  */
Guolin Ke's avatar
Guolin Ke committed
392
  const char* SubModelName() const override { return "tree"; }
393

Nikita Titov's avatar
Nikita Titov committed
394
 protected:
395
396
397
398
399
400
401
  virtual bool GetIsConstHessian(const ObjectiveFunction* objective_function) {
    if (objective_function != nullptr) {
      return objective_function->IsConstantHessian();
    } else {
      return false;
    }
  }
Guolin Ke's avatar
Guolin Ke committed
402
403
404
  /*!
  * \brief Print eval result and check early stopping
  */
405
  virtual bool EvalAndCheckEarlyStopping();
Guolin Ke's avatar
Guolin Ke committed
406
407
408
409

  /*!
  * \brief reset config for bagging
  */
Guolin Ke's avatar
Guolin Ke committed
410
  void ResetBaggingConfig(const Config* config, bool is_change_dataset);
Guolin Ke's avatar
Guolin Ke committed
411

Guolin Ke's avatar
Guolin Ke committed
412
413
414
415
  /*!
  * \brief Implement bagging logic
  * \param iter Current interation
  */
416
417
  virtual void Bagging(int iter);

418
419
  virtual data_size_t BaggingHelper(data_size_t start, data_size_t cnt,
                                    data_size_t* buffer);
Guolin Ke's avatar
Guolin Ke committed
420

421
422
  data_size_t BalancedBaggingHelper(data_size_t start, data_size_t cnt,
                                    data_size_t* buffer);
Guolin Ke's avatar
Guolin Ke committed
423

Guolin Ke's avatar
Guolin Ke committed
424
425
426
  /*!
  * \brief calculate the object function
  */
Guolin Ke's avatar
Guolin Ke committed
427
  virtual void Boosting();
Guolin Ke's avatar
Guolin Ke committed
428

Guolin Ke's avatar
Guolin Ke committed
429
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
430
  * \brief updating score after tree was trained
Guolin Ke's avatar
Guolin Ke committed
431
  * \param tree Trained tree of this iteration
432
  * \param cur_tree_id Current tree for multiclass training
Guolin Ke's avatar
Guolin Ke committed
433
  */
434
  virtual void UpdateScore(const Tree* tree, const int cur_tree_id);
Guolin Ke's avatar
Guolin Ke committed
435

Guolin Ke's avatar
Guolin Ke committed
436
437
438
439
  /*!
  * \brief eval results for one metric

  */
Guolin Ke's avatar
Guolin Ke committed
440
  virtual std::vector<double> EvalOneMetric(const Metric* metric, const double* score) const;
Guolin Ke's avatar
Guolin Ke committed
441

Guolin Ke's avatar
Guolin Ke committed
442
  /*!
Hui Xue's avatar
Hui Xue committed
443
  * \brief Print metric result of current iteration
Guolin Ke's avatar
Guolin Ke committed
444
  * \param iter Current interation
Guolin Ke's avatar
Guolin Ke committed
445
  * \return best_msg if met early_stopping
Guolin Ke's avatar
Guolin Ke committed
446
  */
Guolin Ke's avatar
Guolin Ke committed
447
  std::string OutputMetric(int iter);
448

Guolin Ke's avatar
Guolin Ke committed
449
  double BoostFromAverage(int class_id, bool update_scorer);
Guolin Ke's avatar
Guolin Ke committed
450

451
452
  /*! \brief current iteration */
  int iter_;
Guolin Ke's avatar
Guolin Ke committed
453
454
455
  /*! \brief Pointer to training data */
  const Dataset* train_data_;
  /*! \brief Config of gbdt */
Guolin Ke's avatar
Guolin Ke committed
456
  std::unique_ptr<Config> config_;
Hui Xue's avatar
Hui Xue committed
457
  /*! \brief Tree learner, will use this class to learn trees */
458
  std::unique_ptr<TreeLearner> tree_learner_;
Guolin Ke's avatar
Guolin Ke committed
459
  /*! \brief Objective function */
460
  const ObjectiveFunction* objective_function_;
Hui Xue's avatar
Hui Xue committed
461
  /*! \brief Store and update training data's score */
Guolin Ke's avatar
Guolin Ke committed
462
  std::unique_ptr<ScoreUpdater> train_score_updater_;
Guolin Ke's avatar
Guolin Ke committed
463
464
465
  /*! \brief Metrics for training data */
  std::vector<const Metric*> training_metrics_;
  /*! \brief Store and update validation data's scores */
Guolin Ke's avatar
Guolin Ke committed
466
  std::vector<std::unique_ptr<ScoreUpdater>> valid_score_updater_;
Guolin Ke's avatar
Guolin Ke committed
467
468
  /*! \brief Metric for validation data */
  std::vector<std::vector<const Metric*>> valid_metrics_;
wxchan's avatar
wxchan committed
469
470
  /*! \brief Number of rounds for early stopping */
  int early_stopping_round_;
471
472
  /*! \brief Only use first metric for early stopping */
  bool es_first_metric_only_;
Guolin Ke's avatar
Guolin Ke committed
473
  /*! \brief Best iteration(s) for early stopping */
wxchan's avatar
wxchan committed
474
  std::vector<std::vector<int>> best_iter_;
Guolin Ke's avatar
Guolin Ke committed
475
  /*! \brief Best score(s) for early stopping */
476
  std::vector<std::vector<double>> best_score_;
Guolin Ke's avatar
Guolin Ke committed
477
478
  /*! \brief output message of best iteration */
  std::vector<std::vector<std::string>> best_msg_;
Guolin Ke's avatar
Guolin Ke committed
479
  /*! \brief Trained models(trees) */
Guolin Ke's avatar
Guolin Ke committed
480
  std::vector<std::unique_ptr<Tree>> models_;
Guolin Ke's avatar
Guolin Ke committed
481
482
  /*! \brief Max feature index of training data*/
  int max_feature_idx_;
483
484
485
486
487
488
489

#ifdef USE_CUDA
  /*! \brief First order derivative of training data */
  std::vector<score_t, CHAllocator<score_t>> gradients_;
  /*! \brief Second order derivative of training data */
  std::vector<score_t, CHAllocator<score_t>> hessians_;
#else
Guolin Ke's avatar
Guolin Ke committed
490
  /*! \brief First order derivative of training data */
491
  std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>> gradients_;
492
  /*! \brief Second order derivative of training data */
493
  std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>> hessians_;
494
495
#endif

Guolin Ke's avatar
Guolin Ke committed
496
  /*! \brief Store the indices of in-bag data */
497
  std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>> bag_data_indices_;
Guolin Ke's avatar
Guolin Ke committed
498
499
  /*! \brief Number of in-bag data */
  data_size_t bag_data_cnt_;
wxchan's avatar
wxchan committed
500
  /*! \brief Number of training data */
Guolin Ke's avatar
Guolin Ke committed
501
  data_size_t num_data_;
502
503
504
  /*! \brief Number of trees per iterations */
  int num_tree_per_iteration_;
  /*! \brief Number of class */
505
  int num_class_;
Guolin Ke's avatar
Guolin Ke committed
506
507
  /*! \brief Index of label column */
  data_size_t label_idx_;
508
  /*! \brief number of used model */
wxchan's avatar
wxchan committed
509
  int num_iteration_for_pred_;
510
511
  /*! \brief Start iteration of used model */
  int start_iteration_for_pred_;
Guolin Ke's avatar
Guolin Ke committed
512
513
  /*! \brief Shrinkage rate for one iteration */
  double shrinkage_rate_;
wxchan's avatar
wxchan committed
514
515
  /*! \brief Number of loaded initial models */
  int num_init_iteration_;
Guolin Ke's avatar
Guolin Ke committed
516
517
  /*! \brief Feature names */
  std::vector<std::string> feature_names_;
Guolin Ke's avatar
Guolin Ke committed
518
  std::vector<std::string> feature_infos_;
Guolin Ke's avatar
Guolin Ke committed
519
520
  std::unique_ptr<Dataset> tmp_subset_;
  bool is_use_subset_;
521
  std::vector<bool> class_need_train_;
522
  bool is_constant_hessian_;
523
  std::unique_ptr<ObjectiveFunction> loaded_objective_;
Guolin Ke's avatar
Guolin Ke committed
524
  bool average_output_;
Guolin Ke's avatar
Guolin Ke committed
525
  bool need_re_bagging_;
Guolin Ke's avatar
Guolin Ke committed
526
  bool balanced_bagging_;
Guolin Ke's avatar
Guolin Ke committed
527
  std::string loaded_parameter_;
528
  std::vector<int8_t> monotone_constraints_;
529
530
531
  const int bagging_rand_block_ = 1024;
  std::vector<Random> bagging_rands_;
  ParallelPartitionRunner<data_size_t, false> bagging_runner_;
532
  Json forced_splits_json_;
Guolin Ke's avatar
Guolin Ke committed
533
534
535
};

}  // namespace LightGBM
Guolin Ke's avatar
Guolin Ke committed
536
#endif   // LightGBM_BOOSTING_GBDT_H_