gbdt.h 19.6 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
Guolin Ke's avatar
Guolin Ke committed
5
6
7
#ifndef LIGHTGBM_BOOSTING_GBDT_H_
#define LIGHTGBM_BOOSTING_GBDT_H_

8
9
10
#include <LightGBM/boosting.h>
#include <LightGBM/objective_function.h>
#include <LightGBM/prediction_early_stop.h>
11
#include <LightGBM/cuda/vector_cudahost.h>
12
13
14
#include <LightGBM/utils/json11.h>
#include <LightGBM/utils/threading.h>

Guolin Ke's avatar
Guolin Ke committed
15
#include <string>
16
17
#include <algorithm>
#include <cstdio>
18
#include <fstream>
19
#include <map>
Guolin Ke's avatar
Guolin Ke committed
20
#include <memory>
21
#include <mutex>
22
23
24
25
#include <unordered_map>
#include <utility>
#include <vector>

26
#include "cuda/cuda_score_updater.hpp"
27
#include "score_updater.hpp"
Guolin Ke's avatar
Guolin Ke committed
28
29

namespace LightGBM {
Guolin Ke's avatar
Guolin Ke committed
30

31
32
using json11::Json;

Guolin Ke's avatar
Guolin Ke committed
33
34
35
/*!
* \brief GBDT algorithm implementation. including Training, prediction, bagging.
*/
Guolin Ke's avatar
Guolin Ke committed
36
class GBDT : public GBDTBase {
Nikita Titov's avatar
Nikita Titov committed
37
 public:
Guolin Ke's avatar
Guolin Ke committed
38
39
40
  /*!
  * \brief Constructor
  */
41
  GBDT();
Guolin Ke's avatar
Guolin Ke committed
42

Guolin Ke's avatar
Guolin Ke committed
43
44
45
46
  /*!
  * \brief Destructor
  */
  ~GBDT();
Guolin Ke's avatar
Guolin Ke committed
47

48

Guolin Ke's avatar
Guolin Ke committed
49
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
50
  * \brief Initialization logic
zhangyafeikimi's avatar
zhangyafeikimi committed
51
  * \param gbdt_config Config for boosting
Guolin Ke's avatar
Guolin Ke committed
52
  * \param train_data Training data
53
  * \param objective_function Training objective function
Guolin Ke's avatar
Guolin Ke committed
54
55
  * \param training_metrics Training metrics
  */
Guolin Ke's avatar
Guolin Ke committed
56
  void Init(const Config* gbdt_config, const Dataset* train_data,
57
            const ObjectiveFunction* objective_function,
Guolin Ke's avatar
Guolin Ke committed
58
            const std::vector<const Metric*>& training_metrics) override;
wxchan's avatar
wxchan committed
59
60

  /*!
Guolin Ke's avatar
Guolin Ke committed
61
  * \brief Merge model from other boosting object. Will insert to the front of current boosting object
wxchan's avatar
wxchan committed
62
63
64
65
66
67
68
69
70
71
72
73
  * \param other
  */
  void MergeFrom(const Boosting* other) override {
    auto other_gbdt = reinterpret_cast<const GBDT*>(other);
    // tmp move to other vector
    auto original_models = std::move(models_);
    models_ = std::vector<std::unique_ptr<Tree>>();
    // push model from other first
    for (const auto& tree : other_gbdt->models_) {
      auto new_tree = std::unique_ptr<Tree>(new Tree(*(tree.get())));
      models_.push_back(std::move(new_tree));
    }
Guolin Ke's avatar
Guolin Ke committed
74
    num_init_iteration_ = static_cast<int>(models_.size()) / num_tree_per_iteration_;
wxchan's avatar
wxchan committed
75
76
77
78
79
    // push model in current object
    for (const auto& tree : original_models) {
      auto new_tree = std::unique_ptr<Tree>(new Tree(*(tree.get())));
      models_.push_back(std::move(new_tree));
    }
Guolin Ke's avatar
Guolin Ke committed
80
    num_iteration_for_pred_ = static_cast<int>(models_.size()) / num_tree_per_iteration_;
wxchan's avatar
wxchan committed
81
82
  }

83
  void ShuffleModels(int start_iter, int end_iter) override {
84
    int total_iter = static_cast<int>(models_.size()) / num_tree_per_iteration_;
85
86
87
88
89
    start_iter = std::max(0, start_iter);
    if (end_iter <= 0) {
      end_iter = total_iter;
    }
    end_iter = std::min(total_iter, end_iter);
90
91
92
93
94
95
    auto original_models = std::move(models_);
    std::vector<int> indices(total_iter);
    for (int i = 0; i < total_iter; ++i) {
      indices[i] = i;
    }
    Random tmp_rand(17);
96
97
    for (int i = start_iter; i < end_iter - 1; ++i) {
      int j = tmp_rand.NextShort(i + 1, end_iter);
98
99
100
101
102
103
104
105
106
107
108
109
      std::swap(indices[i], indices[j]);
    }
    models_ = std::vector<std::unique_ptr<Tree>>();
    for (int i = 0; i < total_iter; ++i) {
      for (int j = 0; j < num_tree_per_iteration_; ++j) {
        int tree_idx = indices[i] * num_tree_per_iteration_ + j;
        auto new_tree = std::unique_ptr<Tree>(new Tree(*(original_models[tree_idx].get())));
        models_.push_back(std::move(new_tree));
      }
    }
  }

Guolin Ke's avatar
Guolin Ke committed
110
111
112
113
114
115
  /*!
  * \brief Reset the training data
  * \param train_data New Training data
  * \param objective_function Training objective function
  * \param training_metrics Training metrics
  */
116
117
  void ResetTrainingData(const Dataset* train_data, const ObjectiveFunction* objective_function,
                         const std::vector<const Metric*>& training_metrics) override;
wxchan's avatar
wxchan committed
118

Guolin Ke's avatar
Guolin Ke committed
119
120
121
122
  /*!
  * \brief Reset Boosting Config
  * \param gbdt_config Config for boosting
  */
Guolin Ke's avatar
Guolin Ke committed
123
  void ResetConfig(const Config* gbdt_config) override;
Guolin Ke's avatar
Guolin Ke committed
124

Guolin Ke's avatar
Guolin Ke committed
125
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
126
127
128
  * \brief Adding a validation dataset
  * \param valid_data Validation dataset
  * \param valid_metrics Metrics for validation dataset
Guolin Ke's avatar
Guolin Ke committed
129
  */
wxchan's avatar
wxchan committed
130
  void AddValidDataset(const Dataset* valid_data,
131
                       const std::vector<const Metric*>& valid_metrics) override;
Guolin Ke's avatar
Guolin Ke committed
132

Guolin Ke's avatar
Guolin Ke committed
133
134
  /*!
  * \brief Perform a full training procedure
Andrew Ziem's avatar
Andrew Ziem committed
135
  * \param snapshot_freq frequency of snapshot
Guolin Ke's avatar
Guolin Ke committed
136
137
  * \param model_output_path path of model file
  */
Guolin Ke's avatar
Guolin Ke committed
138
139
  void Train(int snapshot_freq, const std::string& model_output_path) override;

140
141
  void RefitTree(const std::vector<std::vector<int>>& tree_leaf_prediction) override;

Guolin Ke's avatar
Guolin Ke committed
142
  /*!
Guolin Ke's avatar
Guolin Ke committed
143
  * \brief Training logic
Guolin Ke's avatar
Guolin Ke committed
144
  * \param gradients nullptr for using default objective, otherwise use self-defined boosting
Nikita Titov's avatar
Nikita Titov committed
145
  * \param hessians nullptr for using default objective, otherwise use self-defined boosting
Guolin Ke's avatar
Guolin Ke committed
146
  * \return True if cannot train any more
Guolin Ke's avatar
Guolin Ke committed
147
  */
Guolin Ke's avatar
Guolin Ke committed
148
  bool TrainOneIter(const score_t* gradients, const score_t* hessians) override;
149

wxchan's avatar
wxchan committed
150
151
152
153
154
  /*!
  * \brief Rollback one iteration
  */
  void RollbackOneIter() override;

Guolin Ke's avatar
Guolin Ke committed
155
156
157
  /*!
  * \brief Get current iteration
  */
Guolin Ke's avatar
Guolin Ke committed
158
  int GetCurrentIteration() const override { return static_cast<int>(models_.size()) / num_tree_per_iteration_; }
wxchan's avatar
wxchan committed
159

Guolin Ke's avatar
Guolin Ke committed
160
161
162
163
  /*!
  * \brief Can use early stopping for prediction or not
  * \return True if cannot use early stopping for prediction
  */
164
  bool NeedAccuratePrediction() const override {
165
166
167
168
169
170
171
    if (objective_function_ == nullptr) {
      return true;
    } else {
      return objective_function_->NeedAccuratePrediction();
    }
  }

Guolin Ke's avatar
Guolin Ke committed
172
173
174
175
176
  /*!
  * \brief Get evaluation result at data_idx data
  * \param data_idx 0: training data, 1: 1st validation data
  * \return evaluation result
  */
177
  std::vector<double> GetEvalAt(int data_idx) const override;
178

Guolin Ke's avatar
Guolin Ke committed
179
180
  /*!
  * \brief Get current training score
Guolin Ke's avatar
Guolin Ke committed
181
  * \param out_len length of returned score
Guolin Ke's avatar
Guolin Ke committed
182
183
  * \return training score
  */
Guolin Ke's avatar
Guolin Ke committed
184
  const double* GetTrainingScore(int64_t* out_len) override;
185

Guolin Ke's avatar
Guolin Ke committed
186
187
188
189
190
  /*!
  * \brief Get size of prediction at data_idx data
  * \param data_idx 0: training data, 1: 1st validation data
  * \return The size of prediction
  */
Guolin Ke's avatar
Guolin Ke committed
191
  int64_t GetNumPredictAt(int data_idx) const override {
Guolin Ke's avatar
Guolin Ke committed
192
193
194
195
196
    CHECK(data_idx >= 0 && data_idx <= static_cast<int>(valid_score_updater_.size()));
    data_size_t num_data = train_data_->num_data();
    if (data_idx > 0) {
      num_data = valid_score_updater_[data_idx - 1]->num_data();
    }
197
    return static_cast<int64_t>(num_data) * num_class_;
Guolin Ke's avatar
Guolin Ke committed
198
  }
Guolin Ke's avatar
Guolin Ke committed
199

Guolin Ke's avatar
Guolin Ke committed
200
201
202
203
  /*!
  * \brief Get prediction result at data_idx data
  * \param data_idx 0: training data, 1: 1st validation data
  * \param result used to store prediction result, should allocate memory before call this function
204
  * \param out_len length of returned score
Guolin Ke's avatar
Guolin Ke committed
205
  */
Guolin Ke's avatar
Guolin Ke committed
206
  void GetPredictAt(int data_idx, double* out_result, int64_t* out_len) override;
Guolin Ke's avatar
Guolin Ke committed
207

Guolin Ke's avatar
Guolin Ke committed
208
209
  /*!
  * \brief Get number of prediction for one data
210
  * \param start_iteration Start index of the iteration to predict
Guolin Ke's avatar
Guolin Ke committed
211
  * \param num_iteration number of used iterations
212
  * \param is_pred_leaf True if predicting leaf index
Guolin Ke's avatar
Guolin Ke committed
213
214
215
  * \param is_pred_contrib True if predicting feature contribution
  * \return number of prediction
  */
216
  inline int NumPredictOneRow(int start_iteration, int num_iteration, bool is_pred_leaf, bool is_pred_contrib) const override {
217
    int num_pred_in_one_row = num_class_;
Guolin Ke's avatar
Guolin Ke committed
218
219
    if (is_pred_leaf) {
      int max_iteration = GetCurrentIteration();
220
221
      start_iteration = std::max(start_iteration, 0);
      start_iteration = std::min(start_iteration, max_iteration);
Guolin Ke's avatar
Guolin Ke committed
222
      if (num_iteration > 0) {
223
        num_pred_in_one_row *= static_cast<int>(std::min(max_iteration - start_iteration, num_iteration));
Guolin Ke's avatar
Guolin Ke committed
224
      } else {
225
        num_pred_in_one_row *= (max_iteration - start_iteration);
Guolin Ke's avatar
Guolin Ke committed
226
      }
227
    } else if (is_pred_contrib) {
228
      num_pred_in_one_row = num_tree_per_iteration_ * (max_feature_idx_ + 2);  // +1 for 0-based indexing, +1 for baseline
Guolin Ke's avatar
Guolin Ke committed
229
    }
230
    return num_pred_in_one_row;
Guolin Ke's avatar
Guolin Ke committed
231
  }
Guolin Ke's avatar
Guolin Ke committed
232

cbecker's avatar
cbecker committed
233
  void PredictRaw(const double* features, double* output,
234
                  const PredictionEarlyStopInstance* earlyStop) const override;
wxchan's avatar
wxchan committed
235

Guolin Ke's avatar
Guolin Ke committed
236
237
  void PredictRawByMap(const std::unordered_map<int, double>& features, double* output,
                       const PredictionEarlyStopInstance* early_stop) const override;
238

cbecker's avatar
cbecker committed
239
240
  void Predict(const double* features, double* output,
               const PredictionEarlyStopInstance* earlyStop) const override;
Guolin Ke's avatar
Guolin Ke committed
241

Guolin Ke's avatar
Guolin Ke committed
242
243
  void PredictByMap(const std::unordered_map<int, double>& features, double* output,
                    const PredictionEarlyStopInstance* early_stop) const override;
244

245
  void PredictLeafIndex(const double* features, double* output) const override;
wxchan's avatar
wxchan committed
246

247
248
  void PredictLeafIndexByMap(const std::unordered_map<int, double>& features, double* output) const override;

249
250
251
252
  void PredictContrib(const double* features, double* output) const override;

  void PredictContribByMap(const std::unordered_map<int, double>& features,
                           std::vector<std::unordered_map<int, double>>* output) const override;
253

Guolin Ke's avatar
Guolin Ke committed
254
  /*!
wxchan's avatar
wxchan committed
255
  * \brief Dump model to json format string
256
  * \param start_iteration The model will be saved start from
257
  * \param num_iteration Number of iterations that want to dump, -1 means dump all
258
  * \param feature_importance_type Type of feature importance, 0: split, 1: gain
wxchan's avatar
wxchan committed
259
  * \return Json format string of model
Guolin Ke's avatar
Guolin Ke committed
260
  */
261
262
  std::string DumpModel(int start_iteration, int num_iteration,
                        int feature_importance_type) const override;
wxchan's avatar
wxchan committed
263

264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
  /*!
  * \brief Translate model to if-else statement
  * \param num_iteration Number of iterations that want to translate, -1 means translate all
  * \return if-else format codes of model
  */
  std::string ModelToIfElse(int num_iteration) const override;

  /*!
  * \brief Translate model to if-else statement
  * \param num_iteration Number of iterations that want to translate, -1 means translate all
  * \param filename Filename that want to save to
  * \return is_finish Is training finished or not
  */
  bool SaveModelToIfElse(int num_iteration, const char* filename) const override;

wxchan's avatar
wxchan committed
279
280
  /*!
  * \brief Save model to file
281
  * \param start_iteration The model will be saved start from
wxchan's avatar
wxchan committed
282
  * \param num_iterations Number of model that want to save, -1 means save all
283
  * \param feature_importance_type Type of feature importance, 0: split, 1: gain
wxchan's avatar
wxchan committed
284
  * \param filename Filename that want to save to
285
  * \return is_finish Is training finished or not
wxchan's avatar
wxchan committed
286
  */
287
288
289
  bool SaveModelToFile(int start_iteration, int num_iterations,
                       int feature_importance_type,
                       const char* filename) const override;
wxchan's avatar
wxchan committed
290

291
292
  /*!
  * \brief Save model to string
293
  * \param start_iteration The model will be saved start from
wxchan's avatar
wxchan committed
294
  * \param num_iterations Number of model that want to save, -1 means save all
295
  * \param feature_importance_type Type of feature importance, 0: split, 1: gain
296
297
  * \return Non-empty string if succeeded
  */
298
  std::string SaveModelToString(int start_iteration, int num_iterations, int feature_importance_type) const override;
299

Guolin Ke's avatar
Guolin Ke committed
300
  /*!
301
  * \brief Restore from a serialized buffer
Guolin Ke's avatar
Guolin Ke committed
302
  */
303
  bool LoadModelFromString(const char* buffer, size_t len) override;
wxchan's avatar
wxchan committed
304

305
306
307
308
309
310
311
312
  /*!
  * \brief Calculate feature importances
  * \param num_iteration Number of model that want to use for feature importance, -1 means use all
  * \param importance_type: 0 for split, 1 for gain
  * \return vector of feature_importance
  */
  std::vector<double> FeatureImportance(int num_iteration, int importance_type) const override;

313
314
315
316
317
318
319
320
321
322
323
324
  /*!
  * \brief Calculate upper bound value
  * \return upper bound value
  */
  double GetUpperBoundValue() const override;

  /*!
  * \brief Calculate lower bound value
  * \return lower bound value
  */
  double GetLowerBoundValue() const override;

Guolin Ke's avatar
Guolin Ke committed
325
326
327
328
329
  /*!
  * \brief Get max feature index of this model
  * \return Max feature index of this model
  */
  inline int MaxFeatureIdx() const override { return max_feature_idx_; }
Guolin Ke's avatar
Guolin Ke committed
330

wxchan's avatar
wxchan committed
331
332
333
334
335
336
  /*!
  * \brief Get feature names of this model
  * \return Feature names of this model
  */
  inline std::vector<std::string> FeatureNames() const override { return feature_names_; }

Guolin Ke's avatar
Guolin Ke committed
337
338
339
340
341
342
  /*!
  * \brief Get index of label column
  * \return index of label column
  */
  inline int LabelIdx() const override { return label_idx_; }

Guolin Ke's avatar
Guolin Ke committed
343
344
345
346
  /*!
  * \brief Get number of weak sub-models
  * \return Number of weak sub-models
  */
wxchan's avatar
wxchan committed
347
  inline int NumberOfTotalModel() const override { return static_cast<int>(models_.size()); }
Guolin Ke's avatar
Guolin Ke committed
348

Guolin Ke's avatar
Guolin Ke committed
349
350
351
352
  /*!
  * \brief Get number of tree per iteration
  * \return number of tree per iteration
  */
Guolin Ke's avatar
Guolin Ke committed
353
  inline int NumModelPerIteration() const override { return num_tree_per_iteration_; }
Guolin Ke's avatar
Guolin Ke committed
354

355
356
357
358
  /*!
  * \brief Get number of classes
  * \return Number of classes
  */
Guolin Ke's avatar
Guolin Ke committed
359
  inline int NumberOfClasses() const override { return num_class_; }
360

361
  inline void InitPredict(int start_iteration, int num_iteration, bool is_pred_contrib) override {
Guolin Ke's avatar
Guolin Ke committed
362
    num_iteration_for_pred_ = static_cast<int>(models_.size()) / num_tree_per_iteration_;
363
364
    start_iteration = std::max(start_iteration, 0);
    start_iteration = std::min(start_iteration, num_iteration_for_pred_);
wxchan's avatar
wxchan committed
365
    if (num_iteration > 0) {
366
367
368
      num_iteration_for_pred_ = std::min(num_iteration, num_iteration_for_pred_ - start_iteration);
    } else {
      num_iteration_for_pred_ = num_iteration_for_pred_ - start_iteration;
369
    }
370
    start_iteration_for_pred_ = start_iteration;
371
372
373
374
375
376
    if (is_pred_contrib) {
      #pragma omp parallel for schedule(static)
      for (int i = 0; i < static_cast<int>(models_.size()); ++i) {
        models_[i]->RecomputeMaxDepth();
      }
    }
377
  }
wxchan's avatar
wxchan committed
378

Guolin Ke's avatar
Guolin Ke committed
379
  inline double GetLeafValue(int tree_idx, int leaf_idx) const override {
Guolin Ke's avatar
Guolin Ke committed
380
381
382
383
384
    CHECK(tree_idx >= 0 && static_cast<size_t>(tree_idx) < models_.size());
    CHECK(leaf_idx >= 0 && leaf_idx < models_[tree_idx]->num_leaves());
    return models_[tree_idx]->LeafOutput(leaf_idx);
  }

Guolin Ke's avatar
Guolin Ke committed
385
  inline void SetLeafValue(int tree_idx, int leaf_idx, double val) override {
Guolin Ke's avatar
Guolin Ke committed
386
387
388
389
390
    CHECK(tree_idx >= 0 && static_cast<size_t>(tree_idx) < models_.size());
    CHECK(leaf_idx >= 0 && leaf_idx < models_[tree_idx]->num_leaves());
    models_[tree_idx]->SetLeafOutput(leaf_idx, val);
  }

391
392
393
  /*!
  * \brief Get Type name of this boosting object
  */
Guolin Ke's avatar
Guolin Ke committed
394
  const char* SubModelName() const override { return "tree"; }
395

396
397
  bool IsLinear() const override { return linear_tree_; }

398
399
  inline std::string ParserConfigStr() const override {return parser_config_str_;}

Nikita Titov's avatar
Nikita Titov committed
400
 protected:
401
402
403
404
405
406
407
  virtual bool GetIsConstHessian(const ObjectiveFunction* objective_function) {
    if (objective_function != nullptr) {
      return objective_function->IsConstantHessian();
    } else {
      return false;
    }
  }
Guolin Ke's avatar
Guolin Ke committed
408
409
410
  /*!
  * \brief Print eval result and check early stopping
  */
411
  virtual bool EvalAndCheckEarlyStopping();
Guolin Ke's avatar
Guolin Ke committed
412
413
414
415

  /*!
  * \brief reset config for bagging
  */
Guolin Ke's avatar
Guolin Ke committed
416
  void ResetBaggingConfig(const Config* config, bool is_change_dataset);
Guolin Ke's avatar
Guolin Ke committed
417

Guolin Ke's avatar
Guolin Ke committed
418
419
420
421
  /*!
  * \brief Implement bagging logic
  * \param iter Current interation
  */
422
423
  virtual void Bagging(int iter);

424
425
  virtual data_size_t BaggingHelper(data_size_t start, data_size_t cnt,
                                    data_size_t* buffer);
Guolin Ke's avatar
Guolin Ke committed
426

427
428
  data_size_t BalancedBaggingHelper(data_size_t start, data_size_t cnt,
                                    data_size_t* buffer);
Guolin Ke's avatar
Guolin Ke committed
429

Guolin Ke's avatar
Guolin Ke committed
430
431
432
  /*!
  * \brief calculate the object function
  */
Guolin Ke's avatar
Guolin Ke committed
433
  virtual void Boosting();
Guolin Ke's avatar
Guolin Ke committed
434

Guolin Ke's avatar
Guolin Ke committed
435
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
436
  * \brief updating score after tree was trained
Guolin Ke's avatar
Guolin Ke committed
437
  * \param tree Trained tree of this iteration
438
  * \param cur_tree_id Current tree for multiclass training
Guolin Ke's avatar
Guolin Ke committed
439
  */
440
  virtual void UpdateScore(const Tree* tree, const int cur_tree_id);
Guolin Ke's avatar
Guolin Ke committed
441

Guolin Ke's avatar
Guolin Ke committed
442
443
444
445
  /*!
  * \brief eval results for one metric

  */
446
  virtual std::vector<double> EvalOneMetric(const Metric* metric, const double* score, const data_size_t num_data) const;
Guolin Ke's avatar
Guolin Ke committed
447

Guolin Ke's avatar
Guolin Ke committed
448
  /*!
Hui Xue's avatar
Hui Xue committed
449
  * \brief Print metric result of current iteration
Andrew Ziem's avatar
Andrew Ziem committed
450
  * \param iter Current iteration
Guolin Ke's avatar
Guolin Ke committed
451
  * \return best_msg if met early_stopping
Guolin Ke's avatar
Guolin Ke committed
452
  */
Guolin Ke's avatar
Guolin Ke committed
453
  std::string OutputMetric(int iter);
454

Guolin Ke's avatar
Guolin Ke committed
455
  double BoostFromAverage(int class_id, bool update_scorer);
Guolin Ke's avatar
Guolin Ke committed
456

457
458
  /*! \brief current iteration */
  int iter_;
Guolin Ke's avatar
Guolin Ke committed
459
460
461
  /*! \brief Pointer to training data */
  const Dataset* train_data_;
  /*! \brief Config of gbdt */
Guolin Ke's avatar
Guolin Ke committed
462
  std::unique_ptr<Config> config_;
Hui Xue's avatar
Hui Xue committed
463
  /*! \brief Tree learner, will use this class to learn trees */
464
  std::unique_ptr<TreeLearner> tree_learner_;
Guolin Ke's avatar
Guolin Ke committed
465
  /*! \brief Objective function */
466
  const ObjectiveFunction* objective_function_;
Hui Xue's avatar
Hui Xue committed
467
  /*! \brief Store and update training data's score */
Guolin Ke's avatar
Guolin Ke committed
468
  std::unique_ptr<ScoreUpdater> train_score_updater_;
Guolin Ke's avatar
Guolin Ke committed
469
470
471
  /*! \brief Metrics for training data */
  std::vector<const Metric*> training_metrics_;
  /*! \brief Store and update validation data's scores */
Guolin Ke's avatar
Guolin Ke committed
472
  std::vector<std::unique_ptr<ScoreUpdater>> valid_score_updater_;
Guolin Ke's avatar
Guolin Ke committed
473
474
  /*! \brief Metric for validation data */
  std::vector<std::vector<const Metric*>> valid_metrics_;
wxchan's avatar
wxchan committed
475
476
  /*! \brief Number of rounds for early stopping */
  int early_stopping_round_;
477
478
  /*! \brief Only use first metric for early stopping */
  bool es_first_metric_only_;
Guolin Ke's avatar
Guolin Ke committed
479
  /*! \brief Best iteration(s) for early stopping */
wxchan's avatar
wxchan committed
480
  std::vector<std::vector<int>> best_iter_;
Guolin Ke's avatar
Guolin Ke committed
481
  /*! \brief Best score(s) for early stopping */
482
  std::vector<std::vector<double>> best_score_;
Guolin Ke's avatar
Guolin Ke committed
483
484
  /*! \brief output message of best iteration */
  std::vector<std::vector<std::string>> best_msg_;
Guolin Ke's avatar
Guolin Ke committed
485
  /*! \brief Trained models(trees) */
Guolin Ke's avatar
Guolin Ke committed
486
  std::vector<std::unique_ptr<Tree>> models_;
Guolin Ke's avatar
Guolin Ke committed
487
488
  /*! \brief Max feature index of training data*/
  int max_feature_idx_;
489
490
  /*! \brief Parser config file content */
  std::string parser_config_str_ = "";
491

492
#if defined(USE_CUDA) || defined(USE_CUDA_EXP)
493
494
495
496
497
  /*! \brief First order derivative of training data */
  std::vector<score_t, CHAllocator<score_t>> gradients_;
  /*! \brief Second order derivative of training data */
  std::vector<score_t, CHAllocator<score_t>> hessians_;
#else
Guolin Ke's avatar
Guolin Ke committed
498
  /*! \brief First order derivative of training data */
499
  std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>> gradients_;
500
  /*! \brief Second order derivative of training data */
501
  std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>> hessians_;
502
#endif
503
504
505
506
  /*! \brief Pointer to gradient vector, can be on CPU or GPU */
  score_t* gradients_pointer_;
  /*! \brief Pointer to hessian vector, can be on CPU or GPU */
  score_t* hessians_pointer_;
shiyu1994's avatar
shiyu1994 committed
507
508
  /*! \brief Whether boosting is done on GPU, used for cuda_exp */
  bool boosting_on_gpu_;
509
510
511
512
513
514
515
516
  #ifdef USE_CUDA_EXP
  /*! \brief Buffer for scores when boosting is on GPU but evaluation is not, used only with cuda_exp */
  mutable std::vector<double> host_score_;
  /*! \brief Buffer for scores when boosting is not on GPU but evaluation is, used only with cuda_exp */
  mutable CUDAVector<double> cuda_score_;
  /*! \brief Buffer for bag_data_indices_ on GPU, used only with cuda_exp */
  CUDAVector<data_size_t> cuda_bag_data_indices_;
  #endif  // USE_CUDA_EXP
517

Guolin Ke's avatar
Guolin Ke committed
518
  /*! \brief Store the indices of in-bag data */
519
  std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>> bag_data_indices_;
Guolin Ke's avatar
Guolin Ke committed
520
521
  /*! \brief Number of in-bag data */
  data_size_t bag_data_cnt_;
wxchan's avatar
wxchan committed
522
  /*! \brief Number of training data */
Guolin Ke's avatar
Guolin Ke committed
523
  data_size_t num_data_;
524
525
526
  /*! \brief Number of trees per iterations */
  int num_tree_per_iteration_;
  /*! \brief Number of class */
527
  int num_class_;
Guolin Ke's avatar
Guolin Ke committed
528
529
  /*! \brief Index of label column */
  data_size_t label_idx_;
530
  /*! \brief number of used model */
wxchan's avatar
wxchan committed
531
  int num_iteration_for_pred_;
532
533
  /*! \brief Start iteration of used model */
  int start_iteration_for_pred_;
Guolin Ke's avatar
Guolin Ke committed
534
535
  /*! \brief Shrinkage rate for one iteration */
  double shrinkage_rate_;
wxchan's avatar
wxchan committed
536
537
  /*! \brief Number of loaded initial models */
  int num_init_iteration_;
Guolin Ke's avatar
Guolin Ke committed
538
539
  /*! \brief Feature names */
  std::vector<std::string> feature_names_;
Guolin Ke's avatar
Guolin Ke committed
540
  std::vector<std::string> feature_infos_;
Guolin Ke's avatar
Guolin Ke committed
541
542
  std::unique_ptr<Dataset> tmp_subset_;
  bool is_use_subset_;
543
  std::vector<bool> class_need_train_;
544
  bool is_constant_hessian_;
545
  std::unique_ptr<ObjectiveFunction> loaded_objective_;
Guolin Ke's avatar
Guolin Ke committed
546
  bool average_output_;
Guolin Ke's avatar
Guolin Ke committed
547
  bool need_re_bagging_;
Guolin Ke's avatar
Guolin Ke committed
548
  bool balanced_bagging_;
Guolin Ke's avatar
Guolin Ke committed
549
  std::string loaded_parameter_;
550
  std::vector<int8_t> monotone_constraints_;
551
552
553
  const int bagging_rand_block_ = 1024;
  std::vector<Random> bagging_rands_;
  ParallelPartitionRunner<data_size_t, false> bagging_runner_;
554
  Json forced_splits_json_;
555
  bool linear_tree_;
Guolin Ke's avatar
Guolin Ke committed
556
557
558
};

}  // namespace LightGBM
Guolin Ke's avatar
Guolin Ke committed
559
#endif   // LightGBM_BOOSTING_GBDT_H_