gbdt.h 18.8 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
Guolin Ke's avatar
Guolin Ke committed
5
6
7
#ifndef LIGHTGBM_BOOSTING_GBDT_H_
#define LIGHTGBM_BOOSTING_GBDT_H_

8
9
10
#include <LightGBM/boosting.h>
#include <LightGBM/objective_function.h>
#include <LightGBM/prediction_early_stop.h>
11
#include <LightGBM/cuda/vector_cudahost.h>
12
13
14
#include <LightGBM/utils/json11.h>
#include <LightGBM/utils/threading.h>

Guolin Ke's avatar
Guolin Ke committed
15
#include <string>
16
17
#include <algorithm>
#include <cstdio>
18
#include <fstream>
19
#include <map>
Guolin Ke's avatar
Guolin Ke committed
20
#include <memory>
21
#include <mutex>
22
23
24
25
26
#include <unordered_map>
#include <utility>
#include <vector>

#include "score_updater.hpp"
Guolin Ke's avatar
Guolin Ke committed
27
28

namespace LightGBM {
Guolin Ke's avatar
Guolin Ke committed
29

30
31
using json11::Json;

Guolin Ke's avatar
Guolin Ke committed
32
33
34
/*!
* \brief GBDT algorithm implementation. including Training, prediction, bagging.
*/
Guolin Ke's avatar
Guolin Ke committed
35
class GBDT : public GBDTBase {
Nikita Titov's avatar
Nikita Titov committed
36
 public:
Guolin Ke's avatar
Guolin Ke committed
37
38
39
  /*!
  * \brief Constructor
  */
40
  GBDT();
Guolin Ke's avatar
Guolin Ke committed
41

Guolin Ke's avatar
Guolin Ke committed
42
43
44
45
  /*!
  * \brief Destructor
  */
  ~GBDT();
Guolin Ke's avatar
Guolin Ke committed
46

47

Guolin Ke's avatar
Guolin Ke committed
48
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
49
  * \brief Initialization logic
zhangyafeikimi's avatar
zhangyafeikimi committed
50
  * \param gbdt_config Config for boosting
Guolin Ke's avatar
Guolin Ke committed
51
  * \param train_data Training data
52
  * \param objective_function Training objective function
Guolin Ke's avatar
Guolin Ke committed
53
54
  * \param training_metrics Training metrics
  */
Guolin Ke's avatar
Guolin Ke committed
55
  void Init(const Config* gbdt_config, const Dataset* train_data,
56
            const ObjectiveFunction* objective_function,
Guolin Ke's avatar
Guolin Ke committed
57
            const std::vector<const Metric*>& training_metrics) override;
wxchan's avatar
wxchan committed
58
59

  /*!
Guolin Ke's avatar
Guolin Ke committed
60
  * \brief Merge model from other boosting object. Will insert to the front of current boosting object
wxchan's avatar
wxchan committed
61
62
63
64
65
66
67
68
69
70
71
72
  * \param other
  */
  void MergeFrom(const Boosting* other) override {
    auto other_gbdt = reinterpret_cast<const GBDT*>(other);
    // tmp move to other vector
    auto original_models = std::move(models_);
    models_ = std::vector<std::unique_ptr<Tree>>();
    // push model from other first
    for (const auto& tree : other_gbdt->models_) {
      auto new_tree = std::unique_ptr<Tree>(new Tree(*(tree.get())));
      models_.push_back(std::move(new_tree));
    }
Guolin Ke's avatar
Guolin Ke committed
73
    num_init_iteration_ = static_cast<int>(models_.size()) / num_tree_per_iteration_;
wxchan's avatar
wxchan committed
74
75
76
77
78
    // push model in current object
    for (const auto& tree : original_models) {
      auto new_tree = std::unique_ptr<Tree>(new Tree(*(tree.get())));
      models_.push_back(std::move(new_tree));
    }
Guolin Ke's avatar
Guolin Ke committed
79
    num_iteration_for_pred_ = static_cast<int>(models_.size()) / num_tree_per_iteration_;
wxchan's avatar
wxchan committed
80
81
  }

82
  void ShuffleModels(int start_iter, int end_iter) override {
83
    int total_iter = static_cast<int>(models_.size()) / num_tree_per_iteration_;
84
85
86
87
88
    start_iter = std::max(0, start_iter);
    if (end_iter <= 0) {
      end_iter = total_iter;
    }
    end_iter = std::min(total_iter, end_iter);
89
90
91
92
93
94
    auto original_models = std::move(models_);
    std::vector<int> indices(total_iter);
    for (int i = 0; i < total_iter; ++i) {
      indices[i] = i;
    }
    Random tmp_rand(17);
95
96
    for (int i = start_iter; i < end_iter - 1; ++i) {
      int j = tmp_rand.NextShort(i + 1, end_iter);
97
98
99
100
101
102
103
104
105
106
107
108
      std::swap(indices[i], indices[j]);
    }
    models_ = std::vector<std::unique_ptr<Tree>>();
    for (int i = 0; i < total_iter; ++i) {
      for (int j = 0; j < num_tree_per_iteration_; ++j) {
        int tree_idx = indices[i] * num_tree_per_iteration_ + j;
        auto new_tree = std::unique_ptr<Tree>(new Tree(*(original_models[tree_idx].get())));
        models_.push_back(std::move(new_tree));
      }
    }
  }

Guolin Ke's avatar
Guolin Ke committed
109
110
111
112
113
114
  /*!
  * \brief Reset the training data
  * \param train_data New Training data
  * \param objective_function Training objective function
  * \param training_metrics Training metrics
  */
115
116
  void ResetTrainingData(const Dataset* train_data, const ObjectiveFunction* objective_function,
                         const std::vector<const Metric*>& training_metrics) override;
wxchan's avatar
wxchan committed
117

Guolin Ke's avatar
Guolin Ke committed
118
119
120
121
  /*!
  * \brief Reset Boosting Config
  * \param gbdt_config Config for boosting
  */
Guolin Ke's avatar
Guolin Ke committed
122
  void ResetConfig(const Config* gbdt_config) override;
Guolin Ke's avatar
Guolin Ke committed
123

Guolin Ke's avatar
Guolin Ke committed
124
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
125
126
127
  * \brief Adding a validation dataset
  * \param valid_data Validation dataset
  * \param valid_metrics Metrics for validation dataset
Guolin Ke's avatar
Guolin Ke committed
128
  */
wxchan's avatar
wxchan committed
129
  void AddValidDataset(const Dataset* valid_data,
130
                       const std::vector<const Metric*>& valid_metrics) override;
Guolin Ke's avatar
Guolin Ke committed
131

Guolin Ke's avatar
Guolin Ke committed
132
133
  /*!
  * \brief Perform a full training procedure
Andrew Ziem's avatar
Andrew Ziem committed
134
  * \param snapshot_freq frequency of snapshot
Guolin Ke's avatar
Guolin Ke committed
135
136
  * \param model_output_path path of model file
  */
Guolin Ke's avatar
Guolin Ke committed
137
138
  void Train(int snapshot_freq, const std::string& model_output_path) override;

139
140
  void RefitTree(const std::vector<std::vector<int>>& tree_leaf_prediction) override;

Guolin Ke's avatar
Guolin Ke committed
141
  /*!
Guolin Ke's avatar
Guolin Ke committed
142
  * \brief Training logic
Guolin Ke's avatar
Guolin Ke committed
143
  * \param gradients nullptr for using default objective, otherwise use self-defined boosting
Nikita Titov's avatar
Nikita Titov committed
144
  * \param hessians nullptr for using default objective, otherwise use self-defined boosting
Guolin Ke's avatar
Guolin Ke committed
145
  * \return True if cannot train any more
Guolin Ke's avatar
Guolin Ke committed
146
  */
Guolin Ke's avatar
Guolin Ke committed
147
  bool TrainOneIter(const score_t* gradients, const score_t* hessians) override;
148

wxchan's avatar
wxchan committed
149
150
151
152
153
  /*!
  * \brief Rollback one iteration
  */
  void RollbackOneIter() override;

Guolin Ke's avatar
Guolin Ke committed
154
155
156
  /*!
  * \brief Get current iteration
  */
Guolin Ke's avatar
Guolin Ke committed
157
  int GetCurrentIteration() const override { return static_cast<int>(models_.size()) / num_tree_per_iteration_; }
wxchan's avatar
wxchan committed
158

Guolin Ke's avatar
Guolin Ke committed
159
160
161
162
  /*!
  * \brief Can use early stopping for prediction or not
  * \return True if cannot use early stopping for prediction
  */
163
  bool NeedAccuratePrediction() const override {
164
165
166
167
168
169
170
    if (objective_function_ == nullptr) {
      return true;
    } else {
      return objective_function_->NeedAccuratePrediction();
    }
  }

Guolin Ke's avatar
Guolin Ke committed
171
172
173
174
175
  /*!
  * \brief Get evaluation result at data_idx data
  * \param data_idx 0: training data, 1: 1st validation data
  * \return evaluation result
  */
176
  std::vector<double> GetEvalAt(int data_idx) const override;
177

Guolin Ke's avatar
Guolin Ke committed
178
179
  /*!
  * \brief Get current training score
Guolin Ke's avatar
Guolin Ke committed
180
  * \param out_len length of returned score
Guolin Ke's avatar
Guolin Ke committed
181
182
  * \return training score
  */
Guolin Ke's avatar
Guolin Ke committed
183
  const double* GetTrainingScore(int64_t* out_len) override;
184

Guolin Ke's avatar
Guolin Ke committed
185
186
187
188
189
  /*!
  * \brief Get size of prediction at data_idx data
  * \param data_idx 0: training data, 1: 1st validation data
  * \return The size of prediction
  */
Guolin Ke's avatar
Guolin Ke committed
190
  int64_t GetNumPredictAt(int data_idx) const override {
Guolin Ke's avatar
Guolin Ke committed
191
192
193
194
195
    CHECK(data_idx >= 0 && data_idx <= static_cast<int>(valid_score_updater_.size()));
    data_size_t num_data = train_data_->num_data();
    if (data_idx > 0) {
      num_data = valid_score_updater_[data_idx - 1]->num_data();
    }
196
    return static_cast<int64_t>(num_data) * num_class_;
Guolin Ke's avatar
Guolin Ke committed
197
  }
Guolin Ke's avatar
Guolin Ke committed
198

Guolin Ke's avatar
Guolin Ke committed
199
200
201
202
  /*!
  * \brief Get prediction result at data_idx data
  * \param data_idx 0: training data, 1: 1st validation data
  * \param result used to store prediction result, should allocate memory before call this function
203
  * \param out_len length of returned score
Guolin Ke's avatar
Guolin Ke committed
204
  */
Guolin Ke's avatar
Guolin Ke committed
205
  void GetPredictAt(int data_idx, double* out_result, int64_t* out_len) override;
Guolin Ke's avatar
Guolin Ke committed
206

Guolin Ke's avatar
Guolin Ke committed
207
208
  /*!
  * \brief Get number of prediction for one data
209
  * \param start_iteration Start index of the iteration to predict
Guolin Ke's avatar
Guolin Ke committed
210
  * \param num_iteration number of used iterations
211
  * \param is_pred_leaf True if predicting leaf index
Guolin Ke's avatar
Guolin Ke committed
212
213
214
  * \param is_pred_contrib True if predicting feature contribution
  * \return number of prediction
  */
215
  inline int NumPredictOneRow(int start_iteration, int num_iteration, bool is_pred_leaf, bool is_pred_contrib) const override {
216
    int num_pred_in_one_row = num_class_;
Guolin Ke's avatar
Guolin Ke committed
217
218
    if (is_pred_leaf) {
      int max_iteration = GetCurrentIteration();
219
220
      start_iteration = std::max(start_iteration, 0);
      start_iteration = std::min(start_iteration, max_iteration);
Guolin Ke's avatar
Guolin Ke committed
221
      if (num_iteration > 0) {
222
        num_pred_in_one_row *= static_cast<int>(std::min(max_iteration - start_iteration, num_iteration));
Guolin Ke's avatar
Guolin Ke committed
223
      } else {
224
        num_pred_in_one_row *= (max_iteration - start_iteration);
Guolin Ke's avatar
Guolin Ke committed
225
      }
226
    } else if (is_pred_contrib) {
227
      num_pred_in_one_row = num_tree_per_iteration_ * (max_feature_idx_ + 2);  // +1 for 0-based indexing, +1 for baseline
Guolin Ke's avatar
Guolin Ke committed
228
    }
229
    return num_pred_in_one_row;
Guolin Ke's avatar
Guolin Ke committed
230
  }
Guolin Ke's avatar
Guolin Ke committed
231

cbecker's avatar
cbecker committed
232
  void PredictRaw(const double* features, double* output,
233
                  const PredictionEarlyStopInstance* earlyStop) const override;
wxchan's avatar
wxchan committed
234

Guolin Ke's avatar
Guolin Ke committed
235
236
  void PredictRawByMap(const std::unordered_map<int, double>& features, double* output,
                       const PredictionEarlyStopInstance* early_stop) const override;
237

cbecker's avatar
cbecker committed
238
239
  void Predict(const double* features, double* output,
               const PredictionEarlyStopInstance* earlyStop) const override;
Guolin Ke's avatar
Guolin Ke committed
240

Guolin Ke's avatar
Guolin Ke committed
241
242
  void PredictByMap(const std::unordered_map<int, double>& features, double* output,
                    const PredictionEarlyStopInstance* early_stop) const override;
243

244
  void PredictLeafIndex(const double* features, double* output) const override;
wxchan's avatar
wxchan committed
245

246
247
  void PredictLeafIndexByMap(const std::unordered_map<int, double>& features, double* output) const override;

248
249
250
251
  void PredictContrib(const double* features, double* output) const override;

  void PredictContribByMap(const std::unordered_map<int, double>& features,
                           std::vector<std::unordered_map<int, double>>* output) const override;
252

Guolin Ke's avatar
Guolin Ke committed
253
  /*!
wxchan's avatar
wxchan committed
254
  * \brief Dump model to json format string
255
  * \param start_iteration The model will be saved start from
256
  * \param num_iteration Number of iterations that want to dump, -1 means dump all
257
  * \param feature_importance_type Type of feature importance, 0: split, 1: gain
wxchan's avatar
wxchan committed
258
  * \return Json format string of model
Guolin Ke's avatar
Guolin Ke committed
259
  */
260
261
  std::string DumpModel(int start_iteration, int num_iteration,
                        int feature_importance_type) const override;
wxchan's avatar
wxchan committed
262

263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
  /*!
  * \brief Translate model to if-else statement
  * \param num_iteration Number of iterations that want to translate, -1 means translate all
  * \return if-else format codes of model
  */
  std::string ModelToIfElse(int num_iteration) const override;

  /*!
  * \brief Translate model to if-else statement
  * \param num_iteration Number of iterations that want to translate, -1 means translate all
  * \param filename Filename that want to save to
  * \return is_finish Is training finished or not
  */
  bool SaveModelToIfElse(int num_iteration, const char* filename) const override;

wxchan's avatar
wxchan committed
278
279
  /*!
  * \brief Save model to file
280
  * \param start_iteration The model will be saved start from
wxchan's avatar
wxchan committed
281
  * \param num_iterations Number of model that want to save, -1 means save all
282
  * \param feature_importance_type Type of feature importance, 0: split, 1: gain
wxchan's avatar
wxchan committed
283
  * \param filename Filename that want to save to
284
  * \return is_finish Is training finished or not
wxchan's avatar
wxchan committed
285
  */
286
287
288
  bool SaveModelToFile(int start_iteration, int num_iterations,
                       int feature_importance_type,
                       const char* filename) const override;
wxchan's avatar
wxchan committed
289

290
291
  /*!
  * \brief Save model to string
292
  * \param start_iteration The model will be saved start from
wxchan's avatar
wxchan committed
293
  * \param num_iterations Number of model that want to save, -1 means save all
294
  * \param feature_importance_type Type of feature importance, 0: split, 1: gain
295
296
  * \return Non-empty string if succeeded
  */
297
  std::string SaveModelToString(int start_iteration, int num_iterations, int feature_importance_type) const override;
298

Guolin Ke's avatar
Guolin Ke committed
299
  /*!
300
  * \brief Restore from a serialized buffer
Guolin Ke's avatar
Guolin Ke committed
301
  */
302
  bool LoadModelFromString(const char* buffer, size_t len) override;
wxchan's avatar
wxchan committed
303

304
305
306
307
308
309
310
311
  /*!
  * \brief Calculate feature importances
  * \param num_iteration Number of model that want to use for feature importance, -1 means use all
  * \param importance_type: 0 for split, 1 for gain
  * \return vector of feature_importance
  */
  std::vector<double> FeatureImportance(int num_iteration, int importance_type) const override;

312
313
314
315
316
317
318
319
320
321
322
323
  /*!
  * \brief Calculate upper bound value
  * \return upper bound value
  */
  double GetUpperBoundValue() const override;

  /*!
  * \brief Calculate lower bound value
  * \return lower bound value
  */
  double GetLowerBoundValue() const override;

Guolin Ke's avatar
Guolin Ke committed
324
325
326
327
328
  /*!
  * \brief Get max feature index of this model
  * \return Max feature index of this model
  */
  inline int MaxFeatureIdx() const override { return max_feature_idx_; }
Guolin Ke's avatar
Guolin Ke committed
329

wxchan's avatar
wxchan committed
330
331
332
333
334
335
  /*!
  * \brief Get feature names of this model
  * \return Feature names of this model
  */
  inline std::vector<std::string> FeatureNames() const override { return feature_names_; }

Guolin Ke's avatar
Guolin Ke committed
336
337
338
339
340
341
  /*!
  * \brief Get index of label column
  * \return index of label column
  */
  inline int LabelIdx() const override { return label_idx_; }

Guolin Ke's avatar
Guolin Ke committed
342
343
344
345
  /*!
  * \brief Get number of weak sub-models
  * \return Number of weak sub-models
  */
wxchan's avatar
wxchan committed
346
  inline int NumberOfTotalModel() const override { return static_cast<int>(models_.size()); }
Guolin Ke's avatar
Guolin Ke committed
347

Guolin Ke's avatar
Guolin Ke committed
348
349
350
351
  /*!
  * \brief Get number of tree per iteration
  * \return number of tree per iteration
  */
Guolin Ke's avatar
Guolin Ke committed
352
  inline int NumModelPerIteration() const override { return num_tree_per_iteration_; }
Guolin Ke's avatar
Guolin Ke committed
353

354
355
356
357
  /*!
  * \brief Get number of classes
  * \return Number of classes
  */
Guolin Ke's avatar
Guolin Ke committed
358
  inline int NumberOfClasses() const override { return num_class_; }
359

360
  inline void InitPredict(int start_iteration, int num_iteration, bool is_pred_contrib) override {
Guolin Ke's avatar
Guolin Ke committed
361
    num_iteration_for_pred_ = static_cast<int>(models_.size()) / num_tree_per_iteration_;
362
363
    start_iteration = std::max(start_iteration, 0);
    start_iteration = std::min(start_iteration, num_iteration_for_pred_);
wxchan's avatar
wxchan committed
364
    if (num_iteration > 0) {
365
366
367
      num_iteration_for_pred_ = std::min(num_iteration, num_iteration_for_pred_ - start_iteration);
    } else {
      num_iteration_for_pred_ = num_iteration_for_pred_ - start_iteration;
368
    }
369
    start_iteration_for_pred_ = start_iteration;
370
371
372
373
374
375
    if (is_pred_contrib) {
      #pragma omp parallel for schedule(static)
      for (int i = 0; i < static_cast<int>(models_.size()); ++i) {
        models_[i]->RecomputeMaxDepth();
      }
    }
376
  }
wxchan's avatar
wxchan committed
377

Guolin Ke's avatar
Guolin Ke committed
378
  inline double GetLeafValue(int tree_idx, int leaf_idx) const override {
Guolin Ke's avatar
Guolin Ke committed
379
380
381
382
383
    CHECK(tree_idx >= 0 && static_cast<size_t>(tree_idx) < models_.size());
    CHECK(leaf_idx >= 0 && leaf_idx < models_[tree_idx]->num_leaves());
    return models_[tree_idx]->LeafOutput(leaf_idx);
  }

Guolin Ke's avatar
Guolin Ke committed
384
  inline void SetLeafValue(int tree_idx, int leaf_idx, double val) override {
Guolin Ke's avatar
Guolin Ke committed
385
386
387
388
389
    CHECK(tree_idx >= 0 && static_cast<size_t>(tree_idx) < models_.size());
    CHECK(leaf_idx >= 0 && leaf_idx < models_[tree_idx]->num_leaves());
    models_[tree_idx]->SetLeafOutput(leaf_idx, val);
  }

390
391
392
  /*!
  * \brief Get Type name of this boosting object
  */
Guolin Ke's avatar
Guolin Ke committed
393
  const char* SubModelName() const override { return "tree"; }
394

395
396
  bool IsLinear() const override { return linear_tree_; }

397
398
  inline std::string ParserConfigStr() const override {return parser_config_str_;}

Nikita Titov's avatar
Nikita Titov committed
399
 protected:
400
401
402
403
404
405
406
  virtual bool GetIsConstHessian(const ObjectiveFunction* objective_function) {
    if (objective_function != nullptr) {
      return objective_function->IsConstantHessian();
    } else {
      return false;
    }
  }
Guolin Ke's avatar
Guolin Ke committed
407
408
409
  /*!
  * \brief Print eval result and check early stopping
  */
410
  virtual bool EvalAndCheckEarlyStopping();
Guolin Ke's avatar
Guolin Ke committed
411
412
413
414

  /*!
  * \brief reset config for bagging
  */
Guolin Ke's avatar
Guolin Ke committed
415
  void ResetBaggingConfig(const Config* config, bool is_change_dataset);
Guolin Ke's avatar
Guolin Ke committed
416

Guolin Ke's avatar
Guolin Ke committed
417
418
419
420
  /*!
  * \brief Implement bagging logic
  * \param iter Current interation
  */
421
422
  virtual void Bagging(int iter);

423
424
  virtual data_size_t BaggingHelper(data_size_t start, data_size_t cnt,
                                    data_size_t* buffer);
Guolin Ke's avatar
Guolin Ke committed
425

426
427
  data_size_t BalancedBaggingHelper(data_size_t start, data_size_t cnt,
                                    data_size_t* buffer);
Guolin Ke's avatar
Guolin Ke committed
428

Guolin Ke's avatar
Guolin Ke committed
429
430
431
  /*!
  * \brief calculate the object function
  */
Guolin Ke's avatar
Guolin Ke committed
432
  virtual void Boosting();
Guolin Ke's avatar
Guolin Ke committed
433

Guolin Ke's avatar
Guolin Ke committed
434
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
435
  * \brief updating score after tree was trained
Guolin Ke's avatar
Guolin Ke committed
436
  * \param tree Trained tree of this iteration
437
  * \param cur_tree_id Current tree for multiclass training
Guolin Ke's avatar
Guolin Ke committed
438
  */
439
  virtual void UpdateScore(const Tree* tree, const int cur_tree_id);
Guolin Ke's avatar
Guolin Ke committed
440

Guolin Ke's avatar
Guolin Ke committed
441
442
443
444
  /*!
  * \brief eval results for one metric

  */
Guolin Ke's avatar
Guolin Ke committed
445
  virtual std::vector<double> EvalOneMetric(const Metric* metric, const double* score) const;
Guolin Ke's avatar
Guolin Ke committed
446

Guolin Ke's avatar
Guolin Ke committed
447
  /*!
Hui Xue's avatar
Hui Xue committed
448
  * \brief Print metric result of current iteration
Andrew Ziem's avatar
Andrew Ziem committed
449
  * \param iter Current iteration
Guolin Ke's avatar
Guolin Ke committed
450
  * \return best_msg if met early_stopping
Guolin Ke's avatar
Guolin Ke committed
451
  */
Guolin Ke's avatar
Guolin Ke committed
452
  std::string OutputMetric(int iter);
453

Guolin Ke's avatar
Guolin Ke committed
454
  double BoostFromAverage(int class_id, bool update_scorer);
Guolin Ke's avatar
Guolin Ke committed
455

456
457
  /*! \brief current iteration */
  int iter_;
Guolin Ke's avatar
Guolin Ke committed
458
459
460
  /*! \brief Pointer to training data */
  const Dataset* train_data_;
  /*! \brief Config of gbdt */
Guolin Ke's avatar
Guolin Ke committed
461
  std::unique_ptr<Config> config_;
Hui Xue's avatar
Hui Xue committed
462
  /*! \brief Tree learner, will use this class to learn trees */
463
  std::unique_ptr<TreeLearner> tree_learner_;
Guolin Ke's avatar
Guolin Ke committed
464
  /*! \brief Objective function */
465
  const ObjectiveFunction* objective_function_;
Hui Xue's avatar
Hui Xue committed
466
  /*! \brief Store and update training data's score */
Guolin Ke's avatar
Guolin Ke committed
467
  std::unique_ptr<ScoreUpdater> train_score_updater_;
Guolin Ke's avatar
Guolin Ke committed
468
469
470
  /*! \brief Metrics for training data */
  std::vector<const Metric*> training_metrics_;
  /*! \brief Store and update validation data's scores */
Guolin Ke's avatar
Guolin Ke committed
471
  std::vector<std::unique_ptr<ScoreUpdater>> valid_score_updater_;
Guolin Ke's avatar
Guolin Ke committed
472
473
  /*! \brief Metric for validation data */
  std::vector<std::vector<const Metric*>> valid_metrics_;
wxchan's avatar
wxchan committed
474
475
  /*! \brief Number of rounds for early stopping */
  int early_stopping_round_;
476
477
  /*! \brief Only use first metric for early stopping */
  bool es_first_metric_only_;
Guolin Ke's avatar
Guolin Ke committed
478
  /*! \brief Best iteration(s) for early stopping */
wxchan's avatar
wxchan committed
479
  std::vector<std::vector<int>> best_iter_;
Guolin Ke's avatar
Guolin Ke committed
480
  /*! \brief Best score(s) for early stopping */
481
  std::vector<std::vector<double>> best_score_;
Guolin Ke's avatar
Guolin Ke committed
482
483
  /*! \brief output message of best iteration */
  std::vector<std::vector<std::string>> best_msg_;
Guolin Ke's avatar
Guolin Ke committed
484
  /*! \brief Trained models(trees) */
Guolin Ke's avatar
Guolin Ke committed
485
  std::vector<std::unique_ptr<Tree>> models_;
Guolin Ke's avatar
Guolin Ke committed
486
487
  /*! \brief Max feature index of training data*/
  int max_feature_idx_;
488
489
  /*! \brief Parser config file content */
  std::string parser_config_str_ = "";
490

491
#if defined(USE_CUDA) || defined(USE_CUDA_EXP)
492
493
494
495
496
  /*! \brief First order derivative of training data */
  std::vector<score_t, CHAllocator<score_t>> gradients_;
  /*! \brief Second order derivative of training data */
  std::vector<score_t, CHAllocator<score_t>> hessians_;
#else
Guolin Ke's avatar
Guolin Ke committed
497
  /*! \brief First order derivative of training data */
498
  std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>> gradients_;
499
  /*! \brief Second order derivative of training data */
500
  std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>> hessians_;
501
502
#endif

Guolin Ke's avatar
Guolin Ke committed
503
  /*! \brief Store the indices of in-bag data */
504
  std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>> bag_data_indices_;
Guolin Ke's avatar
Guolin Ke committed
505
506
  /*! \brief Number of in-bag data */
  data_size_t bag_data_cnt_;
wxchan's avatar
wxchan committed
507
  /*! \brief Number of training data */
Guolin Ke's avatar
Guolin Ke committed
508
  data_size_t num_data_;
509
510
511
  /*! \brief Number of trees per iterations */
  int num_tree_per_iteration_;
  /*! \brief Number of class */
512
  int num_class_;
Guolin Ke's avatar
Guolin Ke committed
513
514
  /*! \brief Index of label column */
  data_size_t label_idx_;
515
  /*! \brief number of used model */
wxchan's avatar
wxchan committed
516
  int num_iteration_for_pred_;
517
518
  /*! \brief Start iteration of used model */
  int start_iteration_for_pred_;
Guolin Ke's avatar
Guolin Ke committed
519
520
  /*! \brief Shrinkage rate for one iteration */
  double shrinkage_rate_;
wxchan's avatar
wxchan committed
521
522
  /*! \brief Number of loaded initial models */
  int num_init_iteration_;
Guolin Ke's avatar
Guolin Ke committed
523
524
  /*! \brief Feature names */
  std::vector<std::string> feature_names_;
Guolin Ke's avatar
Guolin Ke committed
525
  std::vector<std::string> feature_infos_;
Guolin Ke's avatar
Guolin Ke committed
526
527
  std::unique_ptr<Dataset> tmp_subset_;
  bool is_use_subset_;
528
  std::vector<bool> class_need_train_;
529
  bool is_constant_hessian_;
530
  std::unique_ptr<ObjectiveFunction> loaded_objective_;
Guolin Ke's avatar
Guolin Ke committed
531
  bool average_output_;
Guolin Ke's avatar
Guolin Ke committed
532
  bool need_re_bagging_;
Guolin Ke's avatar
Guolin Ke committed
533
  bool balanced_bagging_;
Guolin Ke's avatar
Guolin Ke committed
534
  std::string loaded_parameter_;
535
  std::vector<int8_t> monotone_constraints_;
536
537
538
  const int bagging_rand_block_ = 1024;
  std::vector<Random> bagging_rands_;
  ParallelPartitionRunner<data_size_t, false> bagging_runner_;
539
  Json forced_splits_json_;
540
  bool linear_tree_;
Guolin Ke's avatar
Guolin Ke committed
541
542
543
};

}  // namespace LightGBM
Guolin Ke's avatar
Guolin Ke committed
544
#endif   // LightGBM_BOOSTING_GBDT_H_