gbdt.h 18.6 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
Guolin Ke's avatar
Guolin Ke committed
5
6
7
#ifndef LIGHTGBM_BOOSTING_GBDT_H_
#define LIGHTGBM_BOOSTING_GBDT_H_

8
9
10
#include <LightGBM/boosting.h>
#include <LightGBM/objective_function.h>
#include <LightGBM/prediction_early_stop.h>
11
#include <LightGBM/cuda/vector_cudahost.h>
12
13
14
#include <LightGBM/utils/json11.h>
#include <LightGBM/utils/threading.h>

Guolin Ke's avatar
Guolin Ke committed
15
#include <string>
16
17
#include <algorithm>
#include <cstdio>
18
#include <fstream>
19
#include <map>
Guolin Ke's avatar
Guolin Ke committed
20
#include <memory>
21
#include <mutex>
22
23
24
25
26
#include <unordered_map>
#include <utility>
#include <vector>

#include "score_updater.hpp"
Guolin Ke's avatar
Guolin Ke committed
27
28

namespace LightGBM {
Guolin Ke's avatar
Guolin Ke committed
29

30
31
using json11::Json;

Guolin Ke's avatar
Guolin Ke committed
32
33
34
/*!
* \brief GBDT algorithm implementation. including Training, prediction, bagging.
*/
Guolin Ke's avatar
Guolin Ke committed
35
class GBDT : public GBDTBase {
Nikita Titov's avatar
Nikita Titov committed
36
 public:
Guolin Ke's avatar
Guolin Ke committed
37
38
39
  /*!
  * \brief Constructor
  */
40
  GBDT();
Guolin Ke's avatar
Guolin Ke committed
41

Guolin Ke's avatar
Guolin Ke committed
42
43
44
45
  /*!
  * \brief Destructor
  */
  ~GBDT();
Guolin Ke's avatar
Guolin Ke committed
46

47

Guolin Ke's avatar
Guolin Ke committed
48
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
49
  * \brief Initialization logic
zhangyafeikimi's avatar
zhangyafeikimi committed
50
  * \param gbdt_config Config for boosting
Guolin Ke's avatar
Guolin Ke committed
51
  * \param train_data Training data
52
  * \param objective_function Training objective function
Guolin Ke's avatar
Guolin Ke committed
53
54
  * \param training_metrics Training metrics
  */
Guolin Ke's avatar
Guolin Ke committed
55
  void Init(const Config* gbdt_config, const Dataset* train_data,
56
            const ObjectiveFunction* objective_function,
Guolin Ke's avatar
Guolin Ke committed
57
            const std::vector<const Metric*>& training_metrics) override;
wxchan's avatar
wxchan committed
58
59

  /*!
Guolin Ke's avatar
Guolin Ke committed
60
  * \brief Merge model from other boosting object. Will insert to the front of current boosting object
wxchan's avatar
wxchan committed
61
62
63
64
65
66
67
68
69
70
71
72
  * \param other
  */
  void MergeFrom(const Boosting* other) override {
    auto other_gbdt = reinterpret_cast<const GBDT*>(other);
    // tmp move to other vector
    auto original_models = std::move(models_);
    models_ = std::vector<std::unique_ptr<Tree>>();
    // push model from other first
    for (const auto& tree : other_gbdt->models_) {
      auto new_tree = std::unique_ptr<Tree>(new Tree(*(tree.get())));
      models_.push_back(std::move(new_tree));
    }
Guolin Ke's avatar
Guolin Ke committed
73
    num_init_iteration_ = static_cast<int>(models_.size()) / num_tree_per_iteration_;
wxchan's avatar
wxchan committed
74
75
76
77
78
    // push model in current object
    for (const auto& tree : original_models) {
      auto new_tree = std::unique_ptr<Tree>(new Tree(*(tree.get())));
      models_.push_back(std::move(new_tree));
    }
Guolin Ke's avatar
Guolin Ke committed
79
    num_iteration_for_pred_ = static_cast<int>(models_.size()) / num_tree_per_iteration_;
wxchan's avatar
wxchan committed
80
81
  }

82
  void ShuffleModels(int start_iter, int end_iter) override {
83
    int total_iter = static_cast<int>(models_.size()) / num_tree_per_iteration_;
84
85
86
87
88
    start_iter = std::max(0, start_iter);
    if (end_iter <= 0) {
      end_iter = total_iter;
    }
    end_iter = std::min(total_iter, end_iter);
89
90
91
92
93
94
    auto original_models = std::move(models_);
    std::vector<int> indices(total_iter);
    for (int i = 0; i < total_iter; ++i) {
      indices[i] = i;
    }
    Random tmp_rand(17);
95
96
    for (int i = start_iter; i < end_iter - 1; ++i) {
      int j = tmp_rand.NextShort(i + 1, end_iter);
97
98
99
100
101
102
103
104
105
106
107
108
      std::swap(indices[i], indices[j]);
    }
    models_ = std::vector<std::unique_ptr<Tree>>();
    for (int i = 0; i < total_iter; ++i) {
      for (int j = 0; j < num_tree_per_iteration_; ++j) {
        int tree_idx = indices[i] * num_tree_per_iteration_ + j;
        auto new_tree = std::unique_ptr<Tree>(new Tree(*(original_models[tree_idx].get())));
        models_.push_back(std::move(new_tree));
      }
    }
  }

Guolin Ke's avatar
Guolin Ke committed
109
110
111
112
113
114
  /*!
  * \brief Reset the training data
  * \param train_data New Training data
  * \param objective_function Training objective function
  * \param training_metrics Training metrics
  */
115
116
  void ResetTrainingData(const Dataset* train_data, const ObjectiveFunction* objective_function,
                         const std::vector<const Metric*>& training_metrics) override;
wxchan's avatar
wxchan committed
117

Guolin Ke's avatar
Guolin Ke committed
118
119
120
121
  /*!
  * \brief Reset Boosting Config
  * \param gbdt_config Config for boosting
  */
Guolin Ke's avatar
Guolin Ke committed
122
  void ResetConfig(const Config* gbdt_config) override;
Guolin Ke's avatar
Guolin Ke committed
123

Guolin Ke's avatar
Guolin Ke committed
124
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
125
126
127
  * \brief Adding a validation dataset
  * \param valid_data Validation dataset
  * \param valid_metrics Metrics for validation dataset
Guolin Ke's avatar
Guolin Ke committed
128
  */
wxchan's avatar
wxchan committed
129
  void AddValidDataset(const Dataset* valid_data,
130
                       const std::vector<const Metric*>& valid_metrics) override;
Guolin Ke's avatar
Guolin Ke committed
131

Guolin Ke's avatar
Guolin Ke committed
132
133
  /*!
  * \brief Perform a full training procedure
Andrew Ziem's avatar
Andrew Ziem committed
134
  * \param snapshot_freq frequency of snapshot
Guolin Ke's avatar
Guolin Ke committed
135
136
  * \param model_output_path path of model file
  */
Guolin Ke's avatar
Guolin Ke committed
137
138
  void Train(int snapshot_freq, const std::string& model_output_path) override;

139
140
  void RefitTree(const std::vector<std::vector<int>>& tree_leaf_prediction) override;

Guolin Ke's avatar
Guolin Ke committed
141
  /*!
Guolin Ke's avatar
Guolin Ke committed
142
  * \brief Training logic
Guolin Ke's avatar
Guolin Ke committed
143
  * \param gradients nullptr for using default objective, otherwise use self-defined boosting
Nikita Titov's avatar
Nikita Titov committed
144
  * \param hessians nullptr for using default objective, otherwise use self-defined boosting
Guolin Ke's avatar
Guolin Ke committed
145
  * \return True if cannot train any more
Guolin Ke's avatar
Guolin Ke committed
146
  */
Guolin Ke's avatar
Guolin Ke committed
147
  bool TrainOneIter(const score_t* gradients, const score_t* hessians) override;
148

wxchan's avatar
wxchan committed
149
150
151
152
153
  /*!
  * \brief Rollback one iteration
  */
  void RollbackOneIter() override;

Guolin Ke's avatar
Guolin Ke committed
154
155
156
  /*!
  * \brief Get current iteration
  */
Guolin Ke's avatar
Guolin Ke committed
157
  int GetCurrentIteration() const override { return static_cast<int>(models_.size()) / num_tree_per_iteration_; }
wxchan's avatar
wxchan committed
158

Guolin Ke's avatar
Guolin Ke committed
159
160
161
162
  /*!
  * \brief Can use early stopping for prediction or not
  * \return True if cannot use early stopping for prediction
  */
163
  bool NeedAccuratePrediction() const override {
164
165
166
167
168
169
170
    if (objective_function_ == nullptr) {
      return true;
    } else {
      return objective_function_->NeedAccuratePrediction();
    }
  }

Guolin Ke's avatar
Guolin Ke committed
171
172
173
174
175
  /*!
  * \brief Get evaluation result at data_idx data
  * \param data_idx 0: training data, 1: 1st validation data
  * \return evaluation result
  */
176
  std::vector<double> GetEvalAt(int data_idx) const override;
177

Guolin Ke's avatar
Guolin Ke committed
178
179
  /*!
  * \brief Get current training score
Guolin Ke's avatar
Guolin Ke committed
180
  * \param out_len length of returned score
Guolin Ke's avatar
Guolin Ke committed
181
182
  * \return training score
  */
Guolin Ke's avatar
Guolin Ke committed
183
  const double* GetTrainingScore(int64_t* out_len) override;
184

Guolin Ke's avatar
Guolin Ke committed
185
186
187
188
189
  /*!
  * \brief Get size of prediction at data_idx data
  * \param data_idx 0: training data, 1: 1st validation data
  * \return The size of prediction
  */
Guolin Ke's avatar
Guolin Ke committed
190
  int64_t GetNumPredictAt(int data_idx) const override {
Guolin Ke's avatar
Guolin Ke committed
191
192
193
194
195
196
197
    CHECK(data_idx >= 0 && data_idx <= static_cast<int>(valid_score_updater_.size()));
    data_size_t num_data = train_data_->num_data();
    if (data_idx > 0) {
      num_data = valid_score_updater_[data_idx - 1]->num_data();
    }
    return num_data * num_class_;
  }
Guolin Ke's avatar
Guolin Ke committed
198

Guolin Ke's avatar
Guolin Ke committed
199
200
201
202
  /*!
  * \brief Get prediction result at data_idx data
  * \param data_idx 0: training data, 1: 1st validation data
  * \param result used to store prediction result, should allocate memory before call this function
203
  * \param out_len length of returned score
Guolin Ke's avatar
Guolin Ke committed
204
  */
Guolin Ke's avatar
Guolin Ke committed
205
  void GetPredictAt(int data_idx, double* out_result, int64_t* out_len) override;
Guolin Ke's avatar
Guolin Ke committed
206

Guolin Ke's avatar
Guolin Ke committed
207
208
  /*!
  * \brief Get number of prediction for one data
209
  * \param start_iteration Start index of the iteration to predict
Guolin Ke's avatar
Guolin Ke committed
210
211
212
213
214
  * \param num_iteration number of used iterations
  * \param is_pred_leaf True if predicting  leaf index
  * \param is_pred_contrib True if predicting feature contribution
  * \return number of prediction
  */
215
  inline int NumPredictOneRow(int start_iteration, int num_iteration, bool is_pred_leaf, bool is_pred_contrib) const override {
216
    int num_pred_in_one_row = num_class_;
Guolin Ke's avatar
Guolin Ke committed
217
218
    if (is_pred_leaf) {
      int max_iteration = GetCurrentIteration();
219
220
      start_iteration = std::max(start_iteration, 0);
      start_iteration = std::min(start_iteration, max_iteration);
Guolin Ke's avatar
Guolin Ke committed
221
      if (num_iteration > 0) {
222
        num_pred_in_one_row *= static_cast<int>(std::min(max_iteration - start_iteration, num_iteration));
Guolin Ke's avatar
Guolin Ke committed
223
      } else {
224
        num_pred_in_one_row *= (max_iteration - start_iteration);
Guolin Ke's avatar
Guolin Ke committed
225
      }
226
    } else if (is_pred_contrib) {
227
      num_pred_in_one_row = num_tree_per_iteration_ * (max_feature_idx_ + 2);  // +1 for 0-based indexing, +1 for baseline
Guolin Ke's avatar
Guolin Ke committed
228
    }
229
    return num_pred_in_one_row;
Guolin Ke's avatar
Guolin Ke committed
230
  }
Guolin Ke's avatar
Guolin Ke committed
231

cbecker's avatar
cbecker committed
232
  void PredictRaw(const double* features, double* output,
233
                  const PredictionEarlyStopInstance* earlyStop) const override;
wxchan's avatar
wxchan committed
234

Guolin Ke's avatar
Guolin Ke committed
235
236
  void PredictRawByMap(const std::unordered_map<int, double>& features, double* output,
                       const PredictionEarlyStopInstance* early_stop) const override;
237

cbecker's avatar
cbecker committed
238
239
  void Predict(const double* features, double* output,
               const PredictionEarlyStopInstance* earlyStop) const override;
Guolin Ke's avatar
Guolin Ke committed
240

Guolin Ke's avatar
Guolin Ke committed
241
242
  void PredictByMap(const std::unordered_map<int, double>& features, double* output,
                    const PredictionEarlyStopInstance* early_stop) const override;
243

244
  void PredictLeafIndex(const double* features, double* output) const override;
wxchan's avatar
wxchan committed
245

246
247
  void PredictLeafIndexByMap(const std::unordered_map<int, double>& features, double* output) const override;

248
249
250
251
  void PredictContrib(const double* features, double* output) const override;

  void PredictContribByMap(const std::unordered_map<int, double>& features,
                           std::vector<std::unordered_map<int, double>>* output) const override;
252

Guolin Ke's avatar
Guolin Ke committed
253
  /*!
wxchan's avatar
wxchan committed
254
  * \brief Dump model to json format string
255
  * \param start_iteration The model will be saved start from
256
  * \param num_iteration Number of iterations that want to dump, -1 means dump all
257
  * \param feature_importance_type Type of feature importance, 0: split, 1: gain
wxchan's avatar
wxchan committed
258
  * \return Json format string of model
Guolin Ke's avatar
Guolin Ke committed
259
  */
260
261
  std::string DumpModel(int start_iteration, int num_iteration,
                        int feature_importance_type) const override;
wxchan's avatar
wxchan committed
262

263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
  /*!
  * \brief Translate model to if-else statement
  * \param num_iteration Number of iterations that want to translate, -1 means translate all
  * \return if-else format codes of model
  */
  std::string ModelToIfElse(int num_iteration) const override;

  /*!
  * \brief Translate model to if-else statement
  * \param num_iteration Number of iterations that want to translate, -1 means translate all
  * \param filename Filename that want to save to
  * \return is_finish Is training finished or not
  */
  bool SaveModelToIfElse(int num_iteration, const char* filename) const override;

wxchan's avatar
wxchan committed
278
279
  /*!
  * \brief Save model to file
280
  * \param start_iteration The model will be saved start from
wxchan's avatar
wxchan committed
281
  * \param num_iterations Number of model that want to save, -1 means save all
282
  * \param feature_importance_type Type of feature importance, 0: split, 1: gain
wxchan's avatar
wxchan committed
283
  * \param filename Filename that want to save to
284
  * \return is_finish Is training finished or not
wxchan's avatar
wxchan committed
285
  */
286
287
288
  bool SaveModelToFile(int start_iteration, int num_iterations,
                       int feature_importance_type,
                       const char* filename) const override;
wxchan's avatar
wxchan committed
289

290
291
  /*!
  * \brief Save model to string
292
  * \param start_iteration The model will be saved start from
wxchan's avatar
wxchan committed
293
  * \param num_iterations Number of model that want to save, -1 means save all
294
  * \param feature_importance_type Type of feature importance, 0: split, 1: gain
295
296
  * \return Non-empty string if succeeded
  */
297
  std::string SaveModelToString(int start_iteration, int num_iterations, int feature_importance_type) const override;
298

Guolin Ke's avatar
Guolin Ke committed
299
  /*!
300
  * \brief Restore from a serialized buffer
Guolin Ke's avatar
Guolin Ke committed
301
  */
302
  bool LoadModelFromString(const char* buffer, size_t len) override;
wxchan's avatar
wxchan committed
303

304
305
306
307
308
309
310
311
  /*!
  * \brief Calculate feature importances
  * \param num_iteration Number of model that want to use for feature importance, -1 means use all
  * \param importance_type: 0 for split, 1 for gain
  * \return vector of feature_importance
  */
  std::vector<double> FeatureImportance(int num_iteration, int importance_type) const override;

312
313
314
315
316
317
318
319
320
321
322
323
  /*!
  * \brief Calculate upper bound value
  * \return upper bound value
  */
  double GetUpperBoundValue() const override;

  /*!
  * \brief Calculate lower bound value
  * \return lower bound value
  */
  double GetLowerBoundValue() const override;

Guolin Ke's avatar
Guolin Ke committed
324
325
326
327
328
  /*!
  * \brief Get max feature index of this model
  * \return Max feature index of this model
  */
  inline int MaxFeatureIdx() const override { return max_feature_idx_; }
Guolin Ke's avatar
Guolin Ke committed
329

wxchan's avatar
wxchan committed
330
331
332
333
334
335
  /*!
  * \brief Get feature names of this model
  * \return Feature names of this model
  */
  inline std::vector<std::string> FeatureNames() const override { return feature_names_; }

Guolin Ke's avatar
Guolin Ke committed
336
337
338
339
340
341
  /*!
  * \brief Get index of label column
  * \return index of label column
  */
  inline int LabelIdx() const override { return label_idx_; }

Guolin Ke's avatar
Guolin Ke committed
342
343
344
345
  /*!
  * \brief Get number of weak sub-models
  * \return Number of weak sub-models
  */
wxchan's avatar
wxchan committed
346
  inline int NumberOfTotalModel() const override { return static_cast<int>(models_.size()); }
Guolin Ke's avatar
Guolin Ke committed
347

Guolin Ke's avatar
Guolin Ke committed
348
349
350
351
  /*!
  * \brief Get number of tree per iteration
  * \return number of tree per iteration
  */
Guolin Ke's avatar
Guolin Ke committed
352
  inline int NumModelPerIteration() const override { return num_tree_per_iteration_; }
Guolin Ke's avatar
Guolin Ke committed
353

354
355
356
357
  /*!
  * \brief Get number of classes
  * \return Number of classes
  */
Guolin Ke's avatar
Guolin Ke committed
358
  inline int NumberOfClasses() const override { return num_class_; }
359

360
  inline void InitPredict(int start_iteration, int num_iteration, bool is_pred_contrib) override {
Guolin Ke's avatar
Guolin Ke committed
361
    num_iteration_for_pred_ = static_cast<int>(models_.size()) / num_tree_per_iteration_;
362
363
    start_iteration = std::max(start_iteration, 0);
    start_iteration = std::min(start_iteration, num_iteration_for_pred_);
wxchan's avatar
wxchan committed
364
    if (num_iteration > 0) {
365
366
367
      num_iteration_for_pred_ = std::min(num_iteration, num_iteration_for_pred_ - start_iteration);
    } else {
      num_iteration_for_pred_ = num_iteration_for_pred_ - start_iteration;
368
    }
369
    start_iteration_for_pred_ = start_iteration;
370
371
372
373
374
375
    if (is_pred_contrib) {
      #pragma omp parallel for schedule(static)
      for (int i = 0; i < static_cast<int>(models_.size()); ++i) {
        models_[i]->RecomputeMaxDepth();
      }
    }
376
  }
wxchan's avatar
wxchan committed
377

Guolin Ke's avatar
Guolin Ke committed
378
  inline double GetLeafValue(int tree_idx, int leaf_idx) const override {
Guolin Ke's avatar
Guolin Ke committed
379
380
381
382
383
    CHECK(tree_idx >= 0 && static_cast<size_t>(tree_idx) < models_.size());
    CHECK(leaf_idx >= 0 && leaf_idx < models_[tree_idx]->num_leaves());
    return models_[tree_idx]->LeafOutput(leaf_idx);
  }

Guolin Ke's avatar
Guolin Ke committed
384
  inline void SetLeafValue(int tree_idx, int leaf_idx, double val) override {
Guolin Ke's avatar
Guolin Ke committed
385
386
387
388
389
    CHECK(tree_idx >= 0 && static_cast<size_t>(tree_idx) < models_.size());
    CHECK(leaf_idx >= 0 && leaf_idx < models_[tree_idx]->num_leaves());
    models_[tree_idx]->SetLeafOutput(leaf_idx, val);
  }

390
391
392
  /*!
  * \brief Get Type name of this boosting object
  */
Guolin Ke's avatar
Guolin Ke committed
393
  const char* SubModelName() const override { return "tree"; }
394

395
396
  bool IsLinear() const override { return linear_tree_; }

Nikita Titov's avatar
Nikita Titov committed
397
 protected:
398
399
400
401
402
403
404
  virtual bool GetIsConstHessian(const ObjectiveFunction* objective_function) {
    if (objective_function != nullptr) {
      return objective_function->IsConstantHessian();
    } else {
      return false;
    }
  }
Guolin Ke's avatar
Guolin Ke committed
405
406
407
  /*!
  * \brief Print eval result and check early stopping
  */
408
  virtual bool EvalAndCheckEarlyStopping();
Guolin Ke's avatar
Guolin Ke committed
409
410
411
412

  /*!
  * \brief reset config for bagging
  */
Guolin Ke's avatar
Guolin Ke committed
413
  void ResetBaggingConfig(const Config* config, bool is_change_dataset);
Guolin Ke's avatar
Guolin Ke committed
414

Guolin Ke's avatar
Guolin Ke committed
415
416
417
418
  /*!
  * \brief Implement bagging logic
  * \param iter Current interation
  */
419
420
  virtual void Bagging(int iter);

421
422
  virtual data_size_t BaggingHelper(data_size_t start, data_size_t cnt,
                                    data_size_t* buffer);
Guolin Ke's avatar
Guolin Ke committed
423

424
425
  data_size_t BalancedBaggingHelper(data_size_t start, data_size_t cnt,
                                    data_size_t* buffer);
Guolin Ke's avatar
Guolin Ke committed
426

Guolin Ke's avatar
Guolin Ke committed
427
428
429
  /*!
  * \brief calculate the object function
  */
Guolin Ke's avatar
Guolin Ke committed
430
  virtual void Boosting();
Guolin Ke's avatar
Guolin Ke committed
431

Guolin Ke's avatar
Guolin Ke committed
432
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
433
  * \brief updating score after tree was trained
Guolin Ke's avatar
Guolin Ke committed
434
  * \param tree Trained tree of this iteration
435
  * \param cur_tree_id Current tree for multiclass training
Guolin Ke's avatar
Guolin Ke committed
436
  */
437
  virtual void UpdateScore(const Tree* tree, const int cur_tree_id);
Guolin Ke's avatar
Guolin Ke committed
438

Guolin Ke's avatar
Guolin Ke committed
439
440
441
442
  /*!
  * \brief eval results for one metric

  */
Guolin Ke's avatar
Guolin Ke committed
443
  virtual std::vector<double> EvalOneMetric(const Metric* metric, const double* score) const;
Guolin Ke's avatar
Guolin Ke committed
444

Guolin Ke's avatar
Guolin Ke committed
445
  /*!
Hui Xue's avatar
Hui Xue committed
446
  * \brief Print metric result of current iteration
Andrew Ziem's avatar
Andrew Ziem committed
447
  * \param iter Current iteration
Guolin Ke's avatar
Guolin Ke committed
448
  * \return best_msg if met early_stopping
Guolin Ke's avatar
Guolin Ke committed
449
  */
Guolin Ke's avatar
Guolin Ke committed
450
  std::string OutputMetric(int iter);
451

Guolin Ke's avatar
Guolin Ke committed
452
  double BoostFromAverage(int class_id, bool update_scorer);
Guolin Ke's avatar
Guolin Ke committed
453

454
455
  /*! \brief current iteration */
  int iter_;
Guolin Ke's avatar
Guolin Ke committed
456
457
458
  /*! \brief Pointer to training data */
  const Dataset* train_data_;
  /*! \brief Config of gbdt */
Guolin Ke's avatar
Guolin Ke committed
459
  std::unique_ptr<Config> config_;
Hui Xue's avatar
Hui Xue committed
460
  /*! \brief Tree learner, will use this class to learn trees */
461
  std::unique_ptr<TreeLearner> tree_learner_;
Guolin Ke's avatar
Guolin Ke committed
462
  /*! \brief Objective function */
463
  const ObjectiveFunction* objective_function_;
Hui Xue's avatar
Hui Xue committed
464
  /*! \brief Store and update training data's score */
Guolin Ke's avatar
Guolin Ke committed
465
  std::unique_ptr<ScoreUpdater> train_score_updater_;
Guolin Ke's avatar
Guolin Ke committed
466
467
468
  /*! \brief Metrics for training data */
  std::vector<const Metric*> training_metrics_;
  /*! \brief Store and update validation data's scores */
Guolin Ke's avatar
Guolin Ke committed
469
  std::vector<std::unique_ptr<ScoreUpdater>> valid_score_updater_;
Guolin Ke's avatar
Guolin Ke committed
470
471
  /*! \brief Metric for validation data */
  std::vector<std::vector<const Metric*>> valid_metrics_;
wxchan's avatar
wxchan committed
472
473
  /*! \brief Number of rounds for early stopping */
  int early_stopping_round_;
474
475
  /*! \brief Only use first metric for early stopping */
  bool es_first_metric_only_;
Guolin Ke's avatar
Guolin Ke committed
476
  /*! \brief Best iteration(s) for early stopping */
wxchan's avatar
wxchan committed
477
  std::vector<std::vector<int>> best_iter_;
Guolin Ke's avatar
Guolin Ke committed
478
  /*! \brief Best score(s) for early stopping */
479
  std::vector<std::vector<double>> best_score_;
Guolin Ke's avatar
Guolin Ke committed
480
481
  /*! \brief output message of best iteration */
  std::vector<std::vector<std::string>> best_msg_;
Guolin Ke's avatar
Guolin Ke committed
482
  /*! \brief Trained models(trees) */
Guolin Ke's avatar
Guolin Ke committed
483
  std::vector<std::unique_ptr<Tree>> models_;
Guolin Ke's avatar
Guolin Ke committed
484
485
  /*! \brief Max feature index of training data*/
  int max_feature_idx_;
486
487
488
489
490
491
492

#ifdef USE_CUDA
  /*! \brief First order derivative of training data */
  std::vector<score_t, CHAllocator<score_t>> gradients_;
  /*! \brief Second order derivative of training data */
  std::vector<score_t, CHAllocator<score_t>> hessians_;
#else
Guolin Ke's avatar
Guolin Ke committed
493
  /*! \brief First order derivative of training data */
494
  std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>> gradients_;
495
  /*! \brief Second order derivative of training data */
496
  std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>> hessians_;
497
498
#endif

Guolin Ke's avatar
Guolin Ke committed
499
  /*! \brief Store the indices of in-bag data */
500
  std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>> bag_data_indices_;
Guolin Ke's avatar
Guolin Ke committed
501
502
  /*! \brief Number of in-bag data */
  data_size_t bag_data_cnt_;
wxchan's avatar
wxchan committed
503
  /*! \brief Number of training data */
Guolin Ke's avatar
Guolin Ke committed
504
  data_size_t num_data_;
505
506
507
  /*! \brief Number of trees per iterations */
  int num_tree_per_iteration_;
  /*! \brief Number of class */
508
  int num_class_;
Guolin Ke's avatar
Guolin Ke committed
509
510
  /*! \brief Index of label column */
  data_size_t label_idx_;
511
  /*! \brief number of used model */
wxchan's avatar
wxchan committed
512
  int num_iteration_for_pred_;
513
514
  /*! \brief Start iteration of used model */
  int start_iteration_for_pred_;
Guolin Ke's avatar
Guolin Ke committed
515
516
  /*! \brief Shrinkage rate for one iteration */
  double shrinkage_rate_;
wxchan's avatar
wxchan committed
517
518
  /*! \brief Number of loaded initial models */
  int num_init_iteration_;
Guolin Ke's avatar
Guolin Ke committed
519
520
  /*! \brief Feature names */
  std::vector<std::string> feature_names_;
Guolin Ke's avatar
Guolin Ke committed
521
  std::vector<std::string> feature_infos_;
Guolin Ke's avatar
Guolin Ke committed
522
523
  std::unique_ptr<Dataset> tmp_subset_;
  bool is_use_subset_;
524
  std::vector<bool> class_need_train_;
525
  bool is_constant_hessian_;
526
  std::unique_ptr<ObjectiveFunction> loaded_objective_;
Guolin Ke's avatar
Guolin Ke committed
527
  bool average_output_;
Guolin Ke's avatar
Guolin Ke committed
528
  bool need_re_bagging_;
Guolin Ke's avatar
Guolin Ke committed
529
  bool balanced_bagging_;
Guolin Ke's avatar
Guolin Ke committed
530
  std::string loaded_parameter_;
531
  std::vector<int8_t> monotone_constraints_;
532
533
534
  const int bagging_rand_block_ = 1024;
  std::vector<Random> bagging_rands_;
  ParallelPartitionRunner<data_size_t, false> bagging_runner_;
535
  Json forced_splits_json_;
536
  bool linear_tree_;
Guolin Ke's avatar
Guolin Ke committed
537
538
539
};

}  // namespace LightGBM
Guolin Ke's avatar
Guolin Ke committed
540
#endif   // LightGBM_BOOSTING_GBDT_H_