gbdt.h 21.3 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
Guolin Ke's avatar
Guolin Ke committed
5
6
7
#ifndef LIGHTGBM_BOOSTING_GBDT_H_
#define LIGHTGBM_BOOSTING_GBDT_H_

8
9
10
#include <LightGBM/boosting.h>
#include <LightGBM/objective_function.h>
#include <LightGBM/prediction_early_stop.h>
11
#include <LightGBM/cuda/vector_cudahost.h>
12
13
14
#include <LightGBM/utils/json11.h>
#include <LightGBM/utils/threading.h>

Guolin Ke's avatar
Guolin Ke committed
15
#include <string>
16
17
#include <algorithm>
#include <cstdio>
18
#include <fstream>
19
#include <map>
Guolin Ke's avatar
Guolin Ke committed
20
#include <memory>
21
#include <mutex>
22
23
24
25
#include <unordered_map>
#include <utility>
#include <vector>

26
#include "cuda/cuda_score_updater.hpp"
27
#include "score_updater.hpp"
Guolin Ke's avatar
Guolin Ke committed
28
29

namespace LightGBM {
Guolin Ke's avatar
Guolin Ke committed
30

31
32
using json11::Json;

Guolin Ke's avatar
Guolin Ke committed
33
34
35
/*!
* \brief GBDT algorithm implementation. including Training, prediction, bagging.
*/
Guolin Ke's avatar
Guolin Ke committed
36
class GBDT : public GBDTBase {
Nikita Titov's avatar
Nikita Titov committed
37
 public:
Guolin Ke's avatar
Guolin Ke committed
38
39
40
  /*!
  * \brief Constructor
  */
41
  GBDT();
Guolin Ke's avatar
Guolin Ke committed
42

Guolin Ke's avatar
Guolin Ke committed
43
44
45
46
  /*!
  * \brief Destructor
  */
  ~GBDT();
Guolin Ke's avatar
Guolin Ke committed
47

48

Guolin Ke's avatar
Guolin Ke committed
49
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
50
  * \brief Initialization logic
zhangyafeikimi's avatar
zhangyafeikimi committed
51
  * \param gbdt_config Config for boosting
Guolin Ke's avatar
Guolin Ke committed
52
  * \param train_data Training data
53
  * \param objective_function Training objective function
Guolin Ke's avatar
Guolin Ke committed
54
55
  * \param training_metrics Training metrics
  */
Guolin Ke's avatar
Guolin Ke committed
56
  void Init(const Config* gbdt_config, const Dataset* train_data,
57
            const ObjectiveFunction* objective_function,
Guolin Ke's avatar
Guolin Ke committed
58
            const std::vector<const Metric*>& training_metrics) override;
wxchan's avatar
wxchan committed
59
60

  /*!
Guolin Ke's avatar
Guolin Ke committed
61
  * \brief Merge model from other boosting object. Will insert to the front of current boosting object
wxchan's avatar
wxchan committed
62
63
64
65
66
67
68
69
70
71
72
73
  * \param other
  */
  void MergeFrom(const Boosting* other) override {
    auto other_gbdt = reinterpret_cast<const GBDT*>(other);
    // tmp move to other vector
    auto original_models = std::move(models_);
    models_ = std::vector<std::unique_ptr<Tree>>();
    // push model from other first
    for (const auto& tree : other_gbdt->models_) {
      auto new_tree = std::unique_ptr<Tree>(new Tree(*(tree.get())));
      models_.push_back(std::move(new_tree));
    }
Guolin Ke's avatar
Guolin Ke committed
74
    num_init_iteration_ = static_cast<int>(models_.size()) / num_tree_per_iteration_;
wxchan's avatar
wxchan committed
75
76
77
78
79
    // push model in current object
    for (const auto& tree : original_models) {
      auto new_tree = std::unique_ptr<Tree>(new Tree(*(tree.get())));
      models_.push_back(std::move(new_tree));
    }
Guolin Ke's avatar
Guolin Ke committed
80
    num_iteration_for_pred_ = static_cast<int>(models_.size()) / num_tree_per_iteration_;
wxchan's avatar
wxchan committed
81
82
  }

83
  void ShuffleModels(int start_iter, int end_iter) override {
84
    int total_iter = static_cast<int>(models_.size()) / num_tree_per_iteration_;
85
86
87
88
89
    start_iter = std::max(0, start_iter);
    if (end_iter <= 0) {
      end_iter = total_iter;
    }
    end_iter = std::min(total_iter, end_iter);
90
91
92
93
94
95
    auto original_models = std::move(models_);
    std::vector<int> indices(total_iter);
    for (int i = 0; i < total_iter; ++i) {
      indices[i] = i;
    }
    Random tmp_rand(17);
96
97
    for (int i = start_iter; i < end_iter - 1; ++i) {
      int j = tmp_rand.NextShort(i + 1, end_iter);
98
99
100
101
102
103
104
105
106
107
108
109
      std::swap(indices[i], indices[j]);
    }
    models_ = std::vector<std::unique_ptr<Tree>>();
    for (int i = 0; i < total_iter; ++i) {
      for (int j = 0; j < num_tree_per_iteration_; ++j) {
        int tree_idx = indices[i] * num_tree_per_iteration_ + j;
        auto new_tree = std::unique_ptr<Tree>(new Tree(*(original_models[tree_idx].get())));
        models_.push_back(std::move(new_tree));
      }
    }
  }

Guolin Ke's avatar
Guolin Ke committed
110
111
112
113
114
115
  /*!
  * \brief Reset the training data
  * \param train_data New Training data
  * \param objective_function Training objective function
  * \param training_metrics Training metrics
  */
116
117
  void ResetTrainingData(const Dataset* train_data, const ObjectiveFunction* objective_function,
                         const std::vector<const Metric*>& training_metrics) override;
wxchan's avatar
wxchan committed
118

Guolin Ke's avatar
Guolin Ke committed
119
120
121
122
  /*!
  * \brief Reset Boosting Config
  * \param gbdt_config Config for boosting
  */
Guolin Ke's avatar
Guolin Ke committed
123
  void ResetConfig(const Config* gbdt_config) override;
Guolin Ke's avatar
Guolin Ke committed
124

Guolin Ke's avatar
Guolin Ke committed
125
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
126
127
128
  * \brief Adding a validation dataset
  * \param valid_data Validation dataset
  * \param valid_metrics Metrics for validation dataset
Guolin Ke's avatar
Guolin Ke committed
129
  */
wxchan's avatar
wxchan committed
130
  void AddValidDataset(const Dataset* valid_data,
131
                       const std::vector<const Metric*>& valid_metrics) override;
Guolin Ke's avatar
Guolin Ke committed
132

Guolin Ke's avatar
Guolin Ke committed
133
134
  /*!
  * \brief Perform a full training procedure
Andrew Ziem's avatar
Andrew Ziem committed
135
  * \param snapshot_freq frequency of snapshot
Guolin Ke's avatar
Guolin Ke committed
136
137
  * \param model_output_path path of model file
  */
Guolin Ke's avatar
Guolin Ke committed
138
139
  void Train(int snapshot_freq, const std::string& model_output_path) override;

140
141
  void RefitTree(const std::vector<std::vector<int>>& tree_leaf_prediction) override;

Guolin Ke's avatar
Guolin Ke committed
142
  /*!
Guolin Ke's avatar
Guolin Ke committed
143
  * \brief Training logic
Guolin Ke's avatar
Guolin Ke committed
144
  * \param gradients nullptr for using default objective, otherwise use self-defined boosting
Nikita Titov's avatar
Nikita Titov committed
145
  * \param hessians nullptr for using default objective, otherwise use self-defined boosting
Guolin Ke's avatar
Guolin Ke committed
146
  * \return True if cannot train any more
Guolin Ke's avatar
Guolin Ke committed
147
  */
Guolin Ke's avatar
Guolin Ke committed
148
  bool TrainOneIter(const score_t* gradients, const score_t* hessians) override;
149

wxchan's avatar
wxchan committed
150
151
152
153
154
  /*!
  * \brief Rollback one iteration
  */
  void RollbackOneIter() override;

Guolin Ke's avatar
Guolin Ke committed
155
156
157
  /*!
  * \brief Get current iteration
  */
Guolin Ke's avatar
Guolin Ke committed
158
  int GetCurrentIteration() const override { return static_cast<int>(models_.size()) / num_tree_per_iteration_; }
wxchan's avatar
wxchan committed
159

160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
  /*!
  * \brief Get parameters as a JSON string
  */
  std::string GetLoadedParam() const override {
    if (loaded_parameter_.empty()) {
      return std::string("{}");
    }
    const auto param_types = Config::ParameterTypes();
    const auto lines = Common::Split(loaded_parameter_.c_str(), "\n");
    bool first = true;
    std::stringstream str_buf;
    str_buf << "{";
    for (const auto& line : lines) {
      const auto pair = Common::Split(line.c_str(), ":");
      if (pair[1] == " ]")
        continue;
      if (first) {
        first = false;
        str_buf << "\"";
      } else {
        str_buf << ",\"";
      }
      const auto param = pair[0].substr(1);
      const auto value_str = pair[1].substr(1, pair[1].size() - 2);
      const auto param_type = param_types.at(param);
      str_buf << param << "\": ";
      if (param_type == "string") {
        str_buf << "\"" << value_str << "\"";
      } else if (param_type == "int") {
        int value;
        Common::Atoi(value_str.c_str(), &value);
        str_buf << value;
      } else if (param_type == "double") {
        double value;
        Common::Atof(value_str.c_str(), &value);
        str_buf << value;
      } else if (param_type == "bool") {
        bool value = value_str == "1";
        str_buf << std::boolalpha << value;
      } else if (param_type.substr(0, 6) == "vector") {
        str_buf << "[";
        if (param_type.substr(7, 6) == "string") {
          const auto parts = Common::Split(value_str.c_str(), ",");
          str_buf << "\"" << Common::Join(parts, "\",\"") << "\"";
        } else {
          str_buf << value_str;
        }
        str_buf << "]";
      }
    }
    str_buf << "}";
    return str_buf.str();
  }

Guolin Ke's avatar
Guolin Ke committed
214
215
216
217
  /*!
  * \brief Can use early stopping for prediction or not
  * \return True if cannot use early stopping for prediction
  */
218
  bool NeedAccuratePrediction() const override {
219
220
221
222
223
224
225
    if (objective_function_ == nullptr) {
      return true;
    } else {
      return objective_function_->NeedAccuratePrediction();
    }
  }

Guolin Ke's avatar
Guolin Ke committed
226
227
228
229
230
  /*!
  * \brief Get evaluation result at data_idx data
  * \param data_idx 0: training data, 1: 1st validation data
  * \return evaluation result
  */
231
  std::vector<double> GetEvalAt(int data_idx) const override;
232

Guolin Ke's avatar
Guolin Ke committed
233
234
  /*!
  * \brief Get current training score
Guolin Ke's avatar
Guolin Ke committed
235
  * \param out_len length of returned score
Guolin Ke's avatar
Guolin Ke committed
236
237
  * \return training score
  */
Guolin Ke's avatar
Guolin Ke committed
238
  const double* GetTrainingScore(int64_t* out_len) override;
239

Guolin Ke's avatar
Guolin Ke committed
240
241
242
243
244
  /*!
  * \brief Get size of prediction at data_idx data
  * \param data_idx 0: training data, 1: 1st validation data
  * \return The size of prediction
  */
Guolin Ke's avatar
Guolin Ke committed
245
  int64_t GetNumPredictAt(int data_idx) const override {
Guolin Ke's avatar
Guolin Ke committed
246
247
248
249
250
    CHECK(data_idx >= 0 && data_idx <= static_cast<int>(valid_score_updater_.size()));
    data_size_t num_data = train_data_->num_data();
    if (data_idx > 0) {
      num_data = valid_score_updater_[data_idx - 1]->num_data();
    }
251
    return static_cast<int64_t>(num_data) * num_class_;
Guolin Ke's avatar
Guolin Ke committed
252
  }
Guolin Ke's avatar
Guolin Ke committed
253

Guolin Ke's avatar
Guolin Ke committed
254
255
256
257
  /*!
  * \brief Get prediction result at data_idx data
  * \param data_idx 0: training data, 1: 1st validation data
  * \param result used to store prediction result, should allocate memory before call this function
258
  * \param out_len length of returned score
Guolin Ke's avatar
Guolin Ke committed
259
  */
Guolin Ke's avatar
Guolin Ke committed
260
  void GetPredictAt(int data_idx, double* out_result, int64_t* out_len) override;
Guolin Ke's avatar
Guolin Ke committed
261

Guolin Ke's avatar
Guolin Ke committed
262
263
  /*!
  * \brief Get number of prediction for one data
264
  * \param start_iteration Start index of the iteration to predict
Guolin Ke's avatar
Guolin Ke committed
265
  * \param num_iteration number of used iterations
266
  * \param is_pred_leaf True if predicting leaf index
Guolin Ke's avatar
Guolin Ke committed
267
268
269
  * \param is_pred_contrib True if predicting feature contribution
  * \return number of prediction
  */
270
  inline int NumPredictOneRow(int start_iteration, int num_iteration, bool is_pred_leaf, bool is_pred_contrib) const override {
271
    int num_pred_in_one_row = num_class_;
Guolin Ke's avatar
Guolin Ke committed
272
273
    if (is_pred_leaf) {
      int max_iteration = GetCurrentIteration();
274
275
      start_iteration = std::max(start_iteration, 0);
      start_iteration = std::min(start_iteration, max_iteration);
Guolin Ke's avatar
Guolin Ke committed
276
      if (num_iteration > 0) {
277
        num_pred_in_one_row *= static_cast<int>(std::min(max_iteration - start_iteration, num_iteration));
Guolin Ke's avatar
Guolin Ke committed
278
      } else {
279
        num_pred_in_one_row *= (max_iteration - start_iteration);
Guolin Ke's avatar
Guolin Ke committed
280
      }
281
    } else if (is_pred_contrib) {
282
      num_pred_in_one_row = num_tree_per_iteration_ * (max_feature_idx_ + 2);  // +1 for 0-based indexing, +1 for baseline
Guolin Ke's avatar
Guolin Ke committed
283
    }
284
    return num_pred_in_one_row;
Guolin Ke's avatar
Guolin Ke committed
285
  }
Guolin Ke's avatar
Guolin Ke committed
286

cbecker's avatar
cbecker committed
287
  void PredictRaw(const double* features, double* output,
288
                  const PredictionEarlyStopInstance* earlyStop) const override;
wxchan's avatar
wxchan committed
289

Guolin Ke's avatar
Guolin Ke committed
290
291
  void PredictRawByMap(const std::unordered_map<int, double>& features, double* output,
                       const PredictionEarlyStopInstance* early_stop) const override;
292

cbecker's avatar
cbecker committed
293
294
  void Predict(const double* features, double* output,
               const PredictionEarlyStopInstance* earlyStop) const override;
Guolin Ke's avatar
Guolin Ke committed
295

Guolin Ke's avatar
Guolin Ke committed
296
297
  void PredictByMap(const std::unordered_map<int, double>& features, double* output,
                    const PredictionEarlyStopInstance* early_stop) const override;
298

299
  void PredictLeafIndex(const double* features, double* output) const override;
wxchan's avatar
wxchan committed
300

301
302
  void PredictLeafIndexByMap(const std::unordered_map<int, double>& features, double* output) const override;

303
304
305
306
  void PredictContrib(const double* features, double* output) const override;

  void PredictContribByMap(const std::unordered_map<int, double>& features,
                           std::vector<std::unordered_map<int, double>>* output) const override;
307

Guolin Ke's avatar
Guolin Ke committed
308
  /*!
wxchan's avatar
wxchan committed
309
  * \brief Dump model to json format string
310
  * \param start_iteration The model will be saved start from
311
  * \param num_iteration Number of iterations that want to dump, -1 means dump all
312
  * \param feature_importance_type Type of feature importance, 0: split, 1: gain
wxchan's avatar
wxchan committed
313
  * \return Json format string of model
Guolin Ke's avatar
Guolin Ke committed
314
  */
315
316
  std::string DumpModel(int start_iteration, int num_iteration,
                        int feature_importance_type) const override;
wxchan's avatar
wxchan committed
317

318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
  /*!
  * \brief Translate model to if-else statement
  * \param num_iteration Number of iterations that want to translate, -1 means translate all
  * \return if-else format codes of model
  */
  std::string ModelToIfElse(int num_iteration) const override;

  /*!
  * \brief Translate model to if-else statement
  * \param num_iteration Number of iterations that want to translate, -1 means translate all
  * \param filename Filename that want to save to
  * \return is_finish Is training finished or not
  */
  bool SaveModelToIfElse(int num_iteration, const char* filename) const override;

wxchan's avatar
wxchan committed
333
334
  /*!
  * \brief Save model to file
335
  * \param start_iteration The model will be saved start from
wxchan's avatar
wxchan committed
336
  * \param num_iterations Number of model that want to save, -1 means save all
337
  * \param feature_importance_type Type of feature importance, 0: split, 1: gain
wxchan's avatar
wxchan committed
338
  * \param filename Filename that want to save to
339
  * \return is_finish Is training finished or not
wxchan's avatar
wxchan committed
340
  */
341
342
343
  bool SaveModelToFile(int start_iteration, int num_iterations,
                       int feature_importance_type,
                       const char* filename) const override;
wxchan's avatar
wxchan committed
344

345
346
  /*!
  * \brief Save model to string
347
  * \param start_iteration The model will be saved start from
wxchan's avatar
wxchan committed
348
  * \param num_iterations Number of model that want to save, -1 means save all
349
  * \param feature_importance_type Type of feature importance, 0: split, 1: gain
350
351
  * \return Non-empty string if succeeded
  */
352
  std::string SaveModelToString(int start_iteration, int num_iterations, int feature_importance_type) const override;
353

Guolin Ke's avatar
Guolin Ke committed
354
  /*!
355
  * \brief Restore from a serialized buffer
Guolin Ke's avatar
Guolin Ke committed
356
  */
357
  bool LoadModelFromString(const char* buffer, size_t len) override;
wxchan's avatar
wxchan committed
358

359
360
361
362
363
364
365
366
  /*!
  * \brief Calculate feature importances
  * \param num_iteration Number of model that want to use for feature importance, -1 means use all
  * \param importance_type: 0 for split, 1 for gain
  * \return vector of feature_importance
  */
  std::vector<double> FeatureImportance(int num_iteration, int importance_type) const override;

367
368
369
370
371
372
373
374
375
376
377
378
  /*!
  * \brief Calculate upper bound value
  * \return upper bound value
  */
  double GetUpperBoundValue() const override;

  /*!
  * \brief Calculate lower bound value
  * \return lower bound value
  */
  double GetLowerBoundValue() const override;

Guolin Ke's avatar
Guolin Ke committed
379
380
381
382
383
  /*!
  * \brief Get max feature index of this model
  * \return Max feature index of this model
  */
  inline int MaxFeatureIdx() const override { return max_feature_idx_; }
Guolin Ke's avatar
Guolin Ke committed
384

wxchan's avatar
wxchan committed
385
386
387
388
389
390
  /*!
  * \brief Get feature names of this model
  * \return Feature names of this model
  */
  inline std::vector<std::string> FeatureNames() const override { return feature_names_; }

Guolin Ke's avatar
Guolin Ke committed
391
392
393
394
395
396
  /*!
  * \brief Get index of label column
  * \return index of label column
  */
  inline int LabelIdx() const override { return label_idx_; }

Guolin Ke's avatar
Guolin Ke committed
397
398
399
400
  /*!
  * \brief Get number of weak sub-models
  * \return Number of weak sub-models
  */
wxchan's avatar
wxchan committed
401
  inline int NumberOfTotalModel() const override { return static_cast<int>(models_.size()); }
Guolin Ke's avatar
Guolin Ke committed
402

Guolin Ke's avatar
Guolin Ke committed
403
404
405
406
  /*!
  * \brief Get number of tree per iteration
  * \return number of tree per iteration
  */
Guolin Ke's avatar
Guolin Ke committed
407
  inline int NumModelPerIteration() const override { return num_tree_per_iteration_; }
Guolin Ke's avatar
Guolin Ke committed
408

409
410
411
412
  /*!
  * \brief Get number of classes
  * \return Number of classes
  */
Guolin Ke's avatar
Guolin Ke committed
413
  inline int NumberOfClasses() const override { return num_class_; }
414

415
  inline void InitPredict(int start_iteration, int num_iteration, bool is_pred_contrib) override {
Guolin Ke's avatar
Guolin Ke committed
416
    num_iteration_for_pred_ = static_cast<int>(models_.size()) / num_tree_per_iteration_;
417
418
    start_iteration = std::max(start_iteration, 0);
    start_iteration = std::min(start_iteration, num_iteration_for_pred_);
wxchan's avatar
wxchan committed
419
    if (num_iteration > 0) {
420
421
422
      num_iteration_for_pred_ = std::min(num_iteration, num_iteration_for_pred_ - start_iteration);
    } else {
      num_iteration_for_pred_ = num_iteration_for_pred_ - start_iteration;
423
    }
424
    start_iteration_for_pred_ = start_iteration;
425
426
427
428
429
430
    if (is_pred_contrib) {
      #pragma omp parallel for schedule(static)
      for (int i = 0; i < static_cast<int>(models_.size()); ++i) {
        models_[i]->RecomputeMaxDepth();
      }
    }
431
  }
wxchan's avatar
wxchan committed
432

Guolin Ke's avatar
Guolin Ke committed
433
  inline double GetLeafValue(int tree_idx, int leaf_idx) const override {
Guolin Ke's avatar
Guolin Ke committed
434
435
436
437
438
    CHECK(tree_idx >= 0 && static_cast<size_t>(tree_idx) < models_.size());
    CHECK(leaf_idx >= 0 && leaf_idx < models_[tree_idx]->num_leaves());
    return models_[tree_idx]->LeafOutput(leaf_idx);
  }

Guolin Ke's avatar
Guolin Ke committed
439
  inline void SetLeafValue(int tree_idx, int leaf_idx, double val) override {
Guolin Ke's avatar
Guolin Ke committed
440
441
442
443
444
    CHECK(tree_idx >= 0 && static_cast<size_t>(tree_idx) < models_.size());
    CHECK(leaf_idx >= 0 && leaf_idx < models_[tree_idx]->num_leaves());
    models_[tree_idx]->SetLeafOutput(leaf_idx, val);
  }

445
446
447
  /*!
  * \brief Get Type name of this boosting object
  */
Guolin Ke's avatar
Guolin Ke committed
448
  const char* SubModelName() const override { return "tree"; }
449

450
451
  bool IsLinear() const override { return linear_tree_; }

452
453
  inline std::string ParserConfigStr() const override {return parser_config_str_;}

Nikita Titov's avatar
Nikita Titov committed
454
 protected:
455
456
457
458
459
460
461
  virtual bool GetIsConstHessian(const ObjectiveFunction* objective_function) {
    if (objective_function != nullptr) {
      return objective_function->IsConstantHessian();
    } else {
      return false;
    }
  }
Guolin Ke's avatar
Guolin Ke committed
462
463
464
  /*!
  * \brief Print eval result and check early stopping
  */
465
  virtual bool EvalAndCheckEarlyStopping();
Guolin Ke's avatar
Guolin Ke committed
466
467
468
469

  /*!
  * \brief reset config for bagging
  */
Guolin Ke's avatar
Guolin Ke committed
470
  void ResetBaggingConfig(const Config* config, bool is_change_dataset);
Guolin Ke's avatar
Guolin Ke committed
471

Guolin Ke's avatar
Guolin Ke committed
472
473
474
475
  /*!
  * \brief Implement bagging logic
  * \param iter Current interation
  */
476
477
  virtual void Bagging(int iter);

478
479
  virtual data_size_t BaggingHelper(data_size_t start, data_size_t cnt,
                                    data_size_t* buffer);
Guolin Ke's avatar
Guolin Ke committed
480

481
482
  data_size_t BalancedBaggingHelper(data_size_t start, data_size_t cnt,
                                    data_size_t* buffer);
Guolin Ke's avatar
Guolin Ke committed
483

Guolin Ke's avatar
Guolin Ke committed
484
  /*!
485
  * \brief calculate the objective function
Guolin Ke's avatar
Guolin Ke committed
486
  */
Guolin Ke's avatar
Guolin Ke committed
487
  virtual void Boosting();
Guolin Ke's avatar
Guolin Ke committed
488

Guolin Ke's avatar
Guolin Ke committed
489
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
490
  * \brief updating score after tree was trained
Guolin Ke's avatar
Guolin Ke committed
491
  * \param tree Trained tree of this iteration
492
  * \param cur_tree_id Current tree for multiclass training
Guolin Ke's avatar
Guolin Ke committed
493
  */
494
  virtual void UpdateScore(const Tree* tree, const int cur_tree_id);
Guolin Ke's avatar
Guolin Ke committed
495

Guolin Ke's avatar
Guolin Ke committed
496
497
498
499
  /*!
  * \brief eval results for one metric

  */
500
  virtual std::vector<double> EvalOneMetric(const Metric* metric, const double* score, const data_size_t num_data) const;
Guolin Ke's avatar
Guolin Ke committed
501

Guolin Ke's avatar
Guolin Ke committed
502
  /*!
Hui Xue's avatar
Hui Xue committed
503
  * \brief Print metric result of current iteration
Andrew Ziem's avatar
Andrew Ziem committed
504
  * \param iter Current iteration
Guolin Ke's avatar
Guolin Ke committed
505
  * \return best_msg if met early_stopping
Guolin Ke's avatar
Guolin Ke committed
506
  */
Guolin Ke's avatar
Guolin Ke committed
507
  std::string OutputMetric(int iter);
508

Guolin Ke's avatar
Guolin Ke committed
509
  double BoostFromAverage(int class_id, bool update_scorer);
Guolin Ke's avatar
Guolin Ke committed
510

511
512
  /*! \brief current iteration */
  int iter_;
Guolin Ke's avatar
Guolin Ke committed
513
514
515
  /*! \brief Pointer to training data */
  const Dataset* train_data_;
  /*! \brief Config of gbdt */
Guolin Ke's avatar
Guolin Ke committed
516
  std::unique_ptr<Config> config_;
Hui Xue's avatar
Hui Xue committed
517
  /*! \brief Tree learner, will use this class to learn trees */
518
  std::unique_ptr<TreeLearner> tree_learner_;
Guolin Ke's avatar
Guolin Ke committed
519
  /*! \brief Objective function */
520
  const ObjectiveFunction* objective_function_;
Hui Xue's avatar
Hui Xue committed
521
  /*! \brief Store and update training data's score */
Guolin Ke's avatar
Guolin Ke committed
522
  std::unique_ptr<ScoreUpdater> train_score_updater_;
Guolin Ke's avatar
Guolin Ke committed
523
524
525
  /*! \brief Metrics for training data */
  std::vector<const Metric*> training_metrics_;
  /*! \brief Store and update validation data's scores */
Guolin Ke's avatar
Guolin Ke committed
526
  std::vector<std::unique_ptr<ScoreUpdater>> valid_score_updater_;
Guolin Ke's avatar
Guolin Ke committed
527
528
  /*! \brief Metric for validation data */
  std::vector<std::vector<const Metric*>> valid_metrics_;
wxchan's avatar
wxchan committed
529
530
  /*! \brief Number of rounds for early stopping */
  int early_stopping_round_;
531
532
  /*! \brief Only use first metric for early stopping */
  bool es_first_metric_only_;
Guolin Ke's avatar
Guolin Ke committed
533
  /*! \brief Best iteration(s) for early stopping */
wxchan's avatar
wxchan committed
534
  std::vector<std::vector<int>> best_iter_;
Guolin Ke's avatar
Guolin Ke committed
535
  /*! \brief Best score(s) for early stopping */
536
  std::vector<std::vector<double>> best_score_;
Guolin Ke's avatar
Guolin Ke committed
537
538
  /*! \brief output message of best iteration */
  std::vector<std::vector<std::string>> best_msg_;
Guolin Ke's avatar
Guolin Ke committed
539
  /*! \brief Trained models(trees) */
Guolin Ke's avatar
Guolin Ke committed
540
  std::vector<std::unique_ptr<Tree>> models_;
Guolin Ke's avatar
Guolin Ke committed
541
542
  /*! \brief Max feature index of training data*/
  int max_feature_idx_;
543
544
  /*! \brief Parser config file content */
  std::string parser_config_str_ = "";
545

546
#if defined(USE_CUDA) || defined(USE_CUDA_EXP)
547
548
549
550
551
  /*! \brief First order derivative of training data */
  std::vector<score_t, CHAllocator<score_t>> gradients_;
  /*! \brief Second order derivative of training data */
  std::vector<score_t, CHAllocator<score_t>> hessians_;
#else
Guolin Ke's avatar
Guolin Ke committed
552
  /*! \brief First order derivative of training data */
553
  std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>> gradients_;
554
  /*! \brief Second order derivative of training data */
555
  std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>> hessians_;
556
#endif
557
558
559
560
  /*! \brief Pointer to gradient vector, can be on CPU or GPU */
  score_t* gradients_pointer_;
  /*! \brief Pointer to hessian vector, can be on CPU or GPU */
  score_t* hessians_pointer_;
shiyu1994's avatar
shiyu1994 committed
561
562
  /*! \brief Whether boosting is done on GPU, used for cuda_exp */
  bool boosting_on_gpu_;
563
564
565
566
567
568
569
570
  #ifdef USE_CUDA_EXP
  /*! \brief Buffer for scores when boosting is on GPU but evaluation is not, used only with cuda_exp */
  mutable std::vector<double> host_score_;
  /*! \brief Buffer for scores when boosting is not on GPU but evaluation is, used only with cuda_exp */
  mutable CUDAVector<double> cuda_score_;
  /*! \brief Buffer for bag_data_indices_ on GPU, used only with cuda_exp */
  CUDAVector<data_size_t> cuda_bag_data_indices_;
  #endif  // USE_CUDA_EXP
571

Guolin Ke's avatar
Guolin Ke committed
572
  /*! \brief Store the indices of in-bag data */
573
  std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>> bag_data_indices_;
Guolin Ke's avatar
Guolin Ke committed
574
575
  /*! \brief Number of in-bag data */
  data_size_t bag_data_cnt_;
wxchan's avatar
wxchan committed
576
  /*! \brief Number of training data */
Guolin Ke's avatar
Guolin Ke committed
577
  data_size_t num_data_;
578
579
580
  /*! \brief Number of trees per iterations */
  int num_tree_per_iteration_;
  /*! \brief Number of class */
581
  int num_class_;
Guolin Ke's avatar
Guolin Ke committed
582
583
  /*! \brief Index of label column */
  data_size_t label_idx_;
584
  /*! \brief number of used model */
wxchan's avatar
wxchan committed
585
  int num_iteration_for_pred_;
586
587
  /*! \brief Start iteration of used model */
  int start_iteration_for_pred_;
Guolin Ke's avatar
Guolin Ke committed
588
589
  /*! \brief Shrinkage rate for one iteration */
  double shrinkage_rate_;
wxchan's avatar
wxchan committed
590
591
  /*! \brief Number of loaded initial models */
  int num_init_iteration_;
Guolin Ke's avatar
Guolin Ke committed
592
593
  /*! \brief Feature names */
  std::vector<std::string> feature_names_;
Guolin Ke's avatar
Guolin Ke committed
594
  std::vector<std::string> feature_infos_;
Guolin Ke's avatar
Guolin Ke committed
595
596
  std::unique_ptr<Dataset> tmp_subset_;
  bool is_use_subset_;
597
  std::vector<bool> class_need_train_;
598
  bool is_constant_hessian_;
599
  std::unique_ptr<ObjectiveFunction> loaded_objective_;
Guolin Ke's avatar
Guolin Ke committed
600
  bool average_output_;
Guolin Ke's avatar
Guolin Ke committed
601
  bool need_re_bagging_;
Guolin Ke's avatar
Guolin Ke committed
602
  bool balanced_bagging_;
Guolin Ke's avatar
Guolin Ke committed
603
  std::string loaded_parameter_;
604
  std::vector<int8_t> monotone_constraints_;
605
606
607
  const int bagging_rand_block_ = 1024;
  std::vector<Random> bagging_rands_;
  ParallelPartitionRunner<data_size_t, false> bagging_runner_;
608
  Json forced_splits_json_;
609
  bool linear_tree_;
Guolin Ke's avatar
Guolin Ke committed
610
611
612
};

}  // namespace LightGBM
Guolin Ke's avatar
Guolin Ke committed
613
#endif   // LightGBM_BOOSTING_GBDT_H_