boosting.h 10.8 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
5
6
#ifndef LIGHTGBM_INCLUDE_LIGHTGBM_BOOSTING_H_
#define LIGHTGBM_INCLUDE_LIGHTGBM_BOOSTING_H_
Guolin Ke's avatar
Guolin Ke committed
7

8
9
10
#include <LightGBM/config.h>
#include <LightGBM/meta.h>

Guolin Ke's avatar
Guolin Ke committed
11
#include <string>
12
#include <map>
13
14
#include <unordered_map>
#include <vector>
Guolin Ke's avatar
Guolin Ke committed
15
16
17
18
19
20
21

namespace LightGBM {

/*! \brief forward declaration */
class Dataset;
class ObjectiveFunction;
class Metric;
22
struct PredictionEarlyStopInstance;
Guolin Ke's avatar
Guolin Ke committed
23
24
25
26

/*!
* \brief The interface for Boosting
*/
27
class LIGHTGBM_EXPORT Boosting {
Nikita Titov's avatar
Nikita Titov committed
28
 public:
Guolin Ke's avatar
Guolin Ke committed
29
30
31
32
  /*! \brief virtual destructor */
  virtual ~Boosting() {}

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
33
34
  * \brief Initialization logic
  * \param config Configs for boosting
Guolin Ke's avatar
Guolin Ke committed
35
  * \param train_data Training data
36
  * \param objective_function Training objective function
Guolin Ke's avatar
Guolin Ke committed
37
38
  * \param training_metrics Training metric
  */
39
  virtual void Init(
Guolin Ke's avatar
Guolin Ke committed
40
    const Config* config,
41
    const Dataset* train_data,
42
    const ObjectiveFunction* objective_function,
43
    const std::vector<const Metric*>& training_metrics) = 0;
Guolin Ke's avatar
Guolin Ke committed
44

wxchan's avatar
wxchan committed
45
46
  /*!
  * \brief Merge model from other boosting object
Guolin Ke's avatar
Guolin Ke committed
47
  Will insert to the front of current boosting object
wxchan's avatar
wxchan committed
48
49
50
51
  * \param other
  */
  virtual void MergeFrom(const Boosting* other) = 0;

52
53
54
  /*!
  * \brief Shuffle Existing Models
  */
55
  virtual void ShuffleModels(int start_iter, int end_iter) = 0;
56

57
58
59
  virtual void ResetTrainingData(const Dataset* train_data, const ObjectiveFunction* objective_function,
                                 const std::vector<const Metric*>& training_metrics) = 0;

Guolin Ke's avatar
Guolin Ke committed
60
  virtual void ResetConfig(const Config* config) = 0;
wxchan's avatar
wxchan committed
61

62
63


Guolin Ke's avatar
Guolin Ke committed
64
65
66
67
68
  /*!
  * \brief Add a validation data
  * \param valid_data Validation data
  * \param valid_metrics Metric for validation data
  */
wxchan's avatar
wxchan committed
69
  virtual void AddValidDataset(const Dataset* valid_data,
Guolin Ke's avatar
Guolin Ke committed
70
                               const std::vector<const Metric*>& valid_metrics) = 0;
Guolin Ke's avatar
Guolin Ke committed
71

Guolin Ke's avatar
Guolin Ke committed
72
73
  virtual void Train(int snapshot_freq, const std::string& model_output_path) = 0;

74
75
76
  /*!
  * \brief Update the tree output by new training data
  */
77
  virtual void RefitTree(const int* tree_leaf_prediction, const size_t nrow, const size_t ncol) = 0;
78

Guolin Ke's avatar
Guolin Ke committed
79
80
  /*!
  * \brief Training logic
Guolin Ke's avatar
Guolin Ke committed
81
82
83
  * \param gradients nullptr for using default objective, otherwise use self-defined boosting
  * \param hessians nullptr for using default objective, otherwise use self-defined boosting
  * \return True if cannot train anymore
Guolin Ke's avatar
Guolin Ke committed
84
  */
Guolin Ke's avatar
Guolin Ke committed
85
  virtual bool TrainOneIter(const score_t* gradients, const score_t* hessians) = 0;
86

wxchan's avatar
wxchan committed
87
88
89
90
91
92
93
94
95
96
  /*!
  * \brief Rollback one iteration
  */
  virtual void RollbackOneIter() = 0;

  /*!
  * \brief return current iteration
  */
  virtual int GetCurrentIteration() const = 0;

Guolin Ke's avatar
Guolin Ke committed
97
98
99
100
101
  /*!
  * \brief Get evaluation result at data_idx data
  * \param data_idx 0: training data, 1: 1st validation data
  * \return evaluation result
  */
102
  virtual std::vector<double> GetEvalAt(int data_idx) const = 0;
103

Guolin Ke's avatar
Guolin Ke committed
104
105
  /*!
  * \brief Get current training score
Guolin Ke's avatar
Guolin Ke committed
106
  * \param out_len length of returned score
Guolin Ke's avatar
Guolin Ke committed
107
108
  * \return training score
  */
109
  virtual const double* GetTrainingScore(int64_t* out_len) = 0;
Guolin Ke's avatar
Guolin Ke committed
110

Guolin Ke's avatar
Guolin Ke committed
111
112
113
  /*!
  * \brief Get prediction result at data_idx data
  * \param data_idx 0: training data, 1: 1st validation data
114
  * \return out_len length of returned score
Guolin Ke's avatar
Guolin Ke committed
115
116
  */
  virtual int64_t GetNumPredictAt(int data_idx) const = 0;
Guolin Ke's avatar
Guolin Ke committed
117

Guolin Ke's avatar
Guolin Ke committed
118
119
120
121
  /*!
  * \brief Get prediction result at data_idx data
  * \param data_idx 0: training data, 1: 1st validation data
  * \param result used to store prediction result, should allocate memory before call this function
122
  * \param out_len length of returned score
Guolin Ke's avatar
Guolin Ke committed
123
  */
Guolin Ke's avatar
Guolin Ke committed
124
  virtual void GetPredictAt(int data_idx, double* result, int64_t* out_len) = 0;
Guolin Ke's avatar
Guolin Ke committed
125

126
  virtual int NumPredictOneRow(int start_iteration, int num_iteration, bool is_pred_leaf, bool is_pred_contrib) const = 0;
Guolin Ke's avatar
Guolin Ke committed
127

Guolin Ke's avatar
Guolin Ke committed
128
  /*!
Hui Xue's avatar
Hui Xue committed
129
  * \brief Prediction for one record, not sigmoid transform
Guolin Ke's avatar
Guolin Ke committed
130
  * \param feature_values Feature value on this record
Guolin Ke's avatar
Guolin Ke committed
131
  * \param output Prediction result for this record
Guolin Ke's avatar
Guolin Ke committed
132
  * \param early_stop Early stopping instance. If nullptr, no early stopping is applied and all models are evaluated.
Guolin Ke's avatar
Guolin Ke committed
133
  */
cbecker's avatar
cbecker committed
134
  virtual void PredictRaw(const double* features, double* output,
135
                          const PredictionEarlyStopInstance* early_stop) const = 0;
Guolin Ke's avatar
Guolin Ke committed
136

Guolin Ke's avatar
Guolin Ke committed
137
138
  virtual void PredictRawByMap(const std::unordered_map<int, double>& features, double* output,
                               const PredictionEarlyStopInstance* early_stop) const = 0;
139
140


Guolin Ke's avatar
Guolin Ke committed
141
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
142
  * \brief Prediction for one record, sigmoid transformation will be used if needed
Guolin Ke's avatar
Guolin Ke committed
143
  * \param feature_values Feature value on this record
Guolin Ke's avatar
Guolin Ke committed
144
  * \param output Prediction result for this record
Guolin Ke's avatar
Guolin Ke committed
145
  * \param early_stop Early stopping instance. If nullptr, no early stopping is applied and all models are evaluated.
Guolin Ke's avatar
Guolin Ke committed
146
  */
cbecker's avatar
cbecker committed
147
  virtual void Predict(const double* features, double* output,
148
                       const PredictionEarlyStopInstance* early_stop) const = 0;
149

Guolin Ke's avatar
Guolin Ke committed
150
151
  virtual void PredictByMap(const std::unordered_map<int, double>& features, double* output,
                            const PredictionEarlyStopInstance* early_stop) const = 0;
152
153


wxchan's avatar
wxchan committed
154
  /*!
155
  * \brief Prediction for one record with leaf index
wxchan's avatar
wxchan committed
156
  * \param feature_values Feature value on this record
Guolin Ke's avatar
Guolin Ke committed
157
  * \param output Prediction result for this record
wxchan's avatar
wxchan committed
158
  */
Guolin Ke's avatar
Guolin Ke committed
159
  virtual void PredictLeafIndex(
160
    const double* features, double* output) const = 0;
161

162
163
164
  virtual void PredictLeafIndexByMap(
    const std::unordered_map<int, double>& features, double* output) const = 0;

Guolin Ke's avatar
Guolin Ke committed
165
  /*!
166
167
168
169
  * \brief Feature contributions for the model's prediction of one record
  * \param feature_values Feature value on this record
  * \param output Prediction result for this record
  */
170
171
172
173
  virtual void PredictContrib(const double* features, double* output) const = 0;

  virtual void PredictContribByMap(const std::unordered_map<int, double>& features,
                                   std::vector<std::unordered_map<int, double>>* output) const = 0;
174

Guolin Ke's avatar
Guolin Ke committed
175
  /*!
wxchan's avatar
wxchan committed
176
  * \brief Dump model to json format string
177
  * \param start_iteration The model will be saved start from
178
  * \param num_iteration Number of iterations that want to dump, -1 means dump all
179
  * \param feature_importance_type Type of feature importance, 0: split, 1: gain
wxchan's avatar
wxchan committed
180
181
  * \return Json format string of model
  */
182
  virtual std::string DumpModel(int start_iteration, int num_iteration, int feature_importance_type) const = 0;
wxchan's avatar
wxchan committed
183

184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
  /*!
  * \brief Translate model to if-else statement
  * \param num_iteration Number of iterations that want to translate, -1 means translate all
  * \return if-else format codes of model
  */
  virtual std::string ModelToIfElse(int num_iteration) const = 0;

  /*!
  * \brief Translate model to if-else statement
  * \param num_iteration Number of iterations that want to translate, -1 means translate all
  * \param filename Filename that want to save to
  * \return is_finish Is training finished or not
  */
  virtual bool SaveModelToIfElse(int num_iteration, const char* filename) const = 0;

wxchan's avatar
wxchan committed
199
200
  /*!
  * \brief Save model to file
201
  * \param start_iteration The model will be saved start from
wxchan's avatar
wxchan committed
202
  * \param num_iterations Number of model that want to save, -1 means save all
203
  * \param feature_importance_type Type of feature importance, 0: split, 1: gain
wxchan's avatar
wxchan committed
204
  * \param filename Filename that want to save to
205
  * \return true if succeeded
Guolin Ke's avatar
Guolin Ke committed
206
  */
207
  virtual bool SaveModelToFile(int start_iteration, int num_iterations, int feature_importance_type, const char* filename) const = 0;
Guolin Ke's avatar
Guolin Ke committed
208

209
210
  /*!
  * \brief Save model to string
211
  * \param start_iteration The model will be saved start from
wxchan's avatar
wxchan committed
212
  * \param num_iterations Number of model that want to save, -1 means save all
213
  * \param feature_importance_type Type of feature importance, 0: split, 1: gain
214
215
  * \return Non-empty string if succeeded
  */
216
  virtual std::string SaveModelToString(int start_iteration, int num_iterations, int feature_importance_type) const = 0;
217

Guolin Ke's avatar
Guolin Ke committed
218
219
  /*!
  * \brief Restore from a serialized string
220
221
  * \param buffer The content of model
  * \param len The length of buffer
wxchan's avatar
wxchan committed
222
223
  * \return true if succeeded
  */
224
  virtual bool LoadModelFromString(const char* buffer, size_t len) = 0;
wxchan's avatar
wxchan committed
225

226
227
228
229
230
231
232
  /*!
  * \brief Calculate feature importances
  * \param num_iteration Number of model that want to use for feature importance, -1 means use all
  * \param importance_type: 0 for split, 1 for gain
  * \return vector of feature_importance
  */
  virtual std::vector<double> FeatureImportance(int num_iteration, int importance_type) const = 0;
Guolin Ke's avatar
Guolin Ke committed
233

234
235
236
237
238
239
240
241
242
243
244
245
  /*!
  * \brief Calculate upper bound value
  * \return max possible value
  */
  virtual double GetUpperBoundValue() const = 0;

  /*!
  * \brief Calculate lower bound value
  * \return min possible value
  */
  virtual double GetLowerBoundValue() const = 0;

Guolin Ke's avatar
Guolin Ke committed
246
247
248
249
250
251
  /*!
  * \brief Get max feature index of this model
  * \return Max feature index of this model
  */
  virtual int MaxFeatureIdx() const = 0;

wxchan's avatar
wxchan committed
252
253
254
255
256
257
  /*!
  * \brief Get feature names of this model
  * \return Feature names of this model
  */
  virtual std::vector<std::string> FeatureNames() const = 0;

Guolin Ke's avatar
Guolin Ke committed
258
259
260
261
262
263
  /*!
  * \brief Get index of label column
  * \return index of label column
  */
  virtual int LabelIdx() const = 0;

Guolin Ke's avatar
Guolin Ke committed
264
265
266
267
  /*!
  * \brief Get number of weak sub-models
  * \return Number of weak sub-models
  */
wxchan's avatar
wxchan committed
268
  virtual int NumberOfTotalModel() const = 0;
269

Guolin Ke's avatar
Guolin Ke committed
270
  /*!
Guolin Ke's avatar
Guolin Ke committed
271
272
  * \brief Get number of models per iteration
  * \return Number of models per iteration
Guolin Ke's avatar
Guolin Ke committed
273
  */
Guolin Ke's avatar
Guolin Ke committed
274
  virtual int NumModelPerIteration() const = 0;
Guolin Ke's avatar
Guolin Ke committed
275

276
277
278
279
  /*!
  * \brief Get number of classes
  * \return Number of classes
  */
Guolin Ke's avatar
Guolin Ke committed
280
  virtual int NumberOfClasses() const = 0;
281

282
283
284
  /*! \brief The prediction should be accurate or not. True will disable early stopping for prediction. */
  virtual bool NeedAccuratePrediction() const = 0;

285
  /*!
Guolin Ke's avatar
Guolin Ke committed
286
  * \brief Initial work for the prediction
287
  * \param start_iteration Start index of the iteration to predict
Guolin Ke's avatar
Guolin Ke committed
288
  * \param num_iteration number of used iteration
289
  * \param is_pred_contrib
290
  */
291
  virtual void InitPredict(int start_iteration, int num_iteration, bool is_pred_contrib) = 0;
292

293
  /*!
Guolin Ke's avatar
Guolin Ke committed
294
  * \brief Name of submodel
295
  */
Guolin Ke's avatar
Guolin Ke committed
296
  virtual const char* SubModelName() const = 0;
297

Guolin Ke's avatar
Guolin Ke committed
298
299
300
301
302
303
  Boosting() = default;
  /*! \brief Disable copy */
  Boosting& operator=(const Boosting&) = delete;
  /*! \brief Disable copy */
  Boosting(const Boosting&) = delete;

304
  static bool LoadFileToBoosting(Boosting* boosting, const char* filename);
wxchan's avatar
wxchan committed
305

Guolin Ke's avatar
Guolin Ke committed
306
307
308
  /*!
  * \brief Create boosting object
  * \param type Type of boosting
wxchan's avatar
wxchan committed
309
  * \param format Format of model
310
311
  * \param config config for boosting
  * \param filename name of model file, if existing will continue to train from this model
Guolin Ke's avatar
Guolin Ke committed
312
313
  * \return The boosting object
  */
314
  static Boosting* CreateBoosting(const std::string& type, const char* filename);
315

316
317
  virtual std::string GetLoadedParam() const = 0;

318
  virtual bool IsLinear() const { return false; }
319
320

  virtual std::string ParserConfigStr() const = 0;
Guolin Ke's avatar
Guolin Ke committed
321
322
};

Guolin Ke's avatar
Guolin Ke committed
323
class GBDTBase : public Boosting {
Nikita Titov's avatar
Nikita Titov committed
324
 public:
Guolin Ke's avatar
Guolin Ke committed
325
326
327
328
  virtual double GetLeafValue(int tree_idx, int leaf_idx) const = 0;
  virtual void SetLeafValue(int tree_idx, int leaf_idx, double val) = 0;
};

Guolin Ke's avatar
Guolin Ke committed
329
330
}  // namespace LightGBM

331
#endif   // LIGHTGBM_INCLUDE_LIGHTGBM_BOOSTING_H_