boosting.h 9.84 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
#ifndef LIGHTGBM_BOOSTING_H_
#define LIGHTGBM_BOOSTING_H_

#include <LightGBM/meta.h>
#include <LightGBM/config.h>

wxchan's avatar
wxchan committed
7
8
9
10
#ifdef USE_PROTO
#include "model.pb.h"
#endif // USE_PROTO

Guolin Ke's avatar
Guolin Ke committed
11
12
#include <vector>
#include <string>
13
#include <map>
Guolin Ke's avatar
Guolin Ke committed
14
15
16
17
18
19
20

namespace LightGBM {

/*! \brief forward declaration */
class Dataset;
class ObjectiveFunction;
class Metric;
21
struct PredictionEarlyStopInstance;
Guolin Ke's avatar
Guolin Ke committed
22
23
24
25

/*!
* \brief The interface for Boosting
*/
26
class LIGHTGBM_EXPORT Boosting {
Guolin Ke's avatar
Guolin Ke committed
27
28
29
30
31
public:
  /*! \brief virtual destructor */
  virtual ~Boosting() {}

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
32
33
  * \brief Initialization logic
  * \param config Configs for boosting
Guolin Ke's avatar
Guolin Ke committed
34
  * \param train_data Training data
35
  * \param objective_function Training objective function
Guolin Ke's avatar
Guolin Ke committed
36
37
  * \param training_metrics Training metric
  */
38
39
40
  virtual void Init(
    const BoostingConfig* config,
    const Dataset* train_data,
41
    const ObjectiveFunction* objective_function,
42
    const std::vector<const Metric*>& training_metrics) = 0;
Guolin Ke's avatar
Guolin Ke committed
43

wxchan's avatar
wxchan committed
44
45
  /*!
  * \brief Merge model from other boosting object
Guolin Ke's avatar
Guolin Ke committed
46
  Will insert to the front of current boosting object
wxchan's avatar
wxchan committed
47
48
49
50
  * \param other
  */
  virtual void MergeFrom(const Boosting* other) = 0;

51
52
53
54
  virtual void ResetTrainingData(const Dataset* train_data, const ObjectiveFunction* objective_function,
                                 const std::vector<const Metric*>& training_metrics) = 0;

  virtual void ResetConfig(const BoostingConfig* config) = 0;
wxchan's avatar
wxchan committed
55

Guolin Ke's avatar
Guolin Ke committed
56
57
58
59
60
  /*!
  * \brief Add a validation data
  * \param valid_data Validation data
  * \param valid_metrics Metric for validation data
  */
wxchan's avatar
wxchan committed
61
  virtual void AddValidDataset(const Dataset* valid_data,
Guolin Ke's avatar
Guolin Ke committed
62
                               const std::vector<const Metric*>& valid_metrics) = 0;
Guolin Ke's avatar
Guolin Ke committed
63

Guolin Ke's avatar
Guolin Ke committed
64
65
  virtual void Train(int snapshot_freq, const std::string& model_output_path) = 0;

Guolin Ke's avatar
Guolin Ke committed
66
67
  /*!
  * \brief Training logic
Guolin Ke's avatar
Guolin Ke committed
68
69
70
  * \param gradients nullptr for using default objective, otherwise use self-defined boosting
  * \param hessians nullptr for using default objective, otherwise use self-defined boosting
  * \return True if cannot train anymore
Guolin Ke's avatar
Guolin Ke committed
71
  */
Guolin Ke's avatar
Guolin Ke committed
72
  virtual bool TrainOneIter(const score_t* gradients, const score_t* hessians) = 0;
73

wxchan's avatar
wxchan committed
74
75
76
77
78
79
80
81
82
83
  /*!
  * \brief Rollback one iteration
  */
  virtual void RollbackOneIter() = 0;

  /*!
  * \brief return current iteration
  */
  virtual int GetCurrentIteration() const = 0;

Guolin Ke's avatar
Guolin Ke committed
84
85
86
87
88
  /*!
  * \brief Get evaluation result at data_idx data
  * \param data_idx 0: training data, 1: 1st validation data
  * \return evaluation result
  */
89
  virtual std::vector<double> GetEvalAt(int data_idx) const = 0;
90

Guolin Ke's avatar
Guolin Ke committed
91
92
  /*!
  * \brief Get current training score
Guolin Ke's avatar
Guolin Ke committed
93
  * \param out_len length of returned score
Guolin Ke's avatar
Guolin Ke committed
94
95
  * \return training score
  */
96
  virtual const double* GetTrainingScore(int64_t* out_len) = 0;
Guolin Ke's avatar
Guolin Ke committed
97

Guolin Ke's avatar
Guolin Ke committed
98
99
100
  /*!
  * \brief Get prediction result at data_idx data
  * \param data_idx 0: training data, 1: 1st validation data
101
  * \return out_len length of returned score
Guolin Ke's avatar
Guolin Ke committed
102
103
  */
  virtual int64_t GetNumPredictAt(int data_idx) const = 0;
Guolin Ke's avatar
Guolin Ke committed
104

Guolin Ke's avatar
Guolin Ke committed
105
106
107
108
  /*!
  * \brief Get prediction result at data_idx data
  * \param data_idx 0: training data, 1: 1st validation data
  * \param result used to store prediction result, should allocate memory before call this function
109
  * \param out_len length of returned score
Guolin Ke's avatar
Guolin Ke committed
110
  */
Guolin Ke's avatar
Guolin Ke committed
111
  virtual void GetPredictAt(int data_idx, double* result, int64_t* out_len) = 0;
Guolin Ke's avatar
Guolin Ke committed
112

113
  virtual int NumPredictOneRow(int num_iteration, bool is_pred_leaf, bool is_pred_contrib) const = 0;
Guolin Ke's avatar
Guolin Ke committed
114

Guolin Ke's avatar
Guolin Ke committed
115
  /*!
Hui Xue's avatar
Hui Xue committed
116
  * \brief Prediction for one record, not sigmoid transform
Guolin Ke's avatar
Guolin Ke committed
117
  * \param feature_values Feature value on this record
Guolin Ke's avatar
Guolin Ke committed
118
  * \param output Prediction result for this record
Guolin Ke's avatar
Guolin Ke committed
119
  * \param early_stop Early stopping instance. If nullptr, no early stopping is applied and all models are evaluated.
Guolin Ke's avatar
Guolin Ke committed
120
  */
cbecker's avatar
cbecker committed
121
  virtual void PredictRaw(const double* features, double* output,
122
                          const PredictionEarlyStopInstance* early_stop) const = 0;
Guolin Ke's avatar
Guolin Ke committed
123

Guolin Ke's avatar
Guolin Ke committed
124
125
  virtual void PredictRawByMap(const std::unordered_map<int, double>& features, double* output,
                               const PredictionEarlyStopInstance* early_stop) const = 0;
126
127


Guolin Ke's avatar
Guolin Ke committed
128
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
129
  * \brief Prediction for one record, sigmoid transformation will be used if needed
Guolin Ke's avatar
Guolin Ke committed
130
  * \param feature_values Feature value on this record
Guolin Ke's avatar
Guolin Ke committed
131
  * \param output Prediction result for this record
Guolin Ke's avatar
Guolin Ke committed
132
  * \param early_stop Early stopping instance. If nullptr, no early stopping is applied and all models are evaluated.
Guolin Ke's avatar
Guolin Ke committed
133
  */
cbecker's avatar
cbecker committed
134
  virtual void Predict(const double* features, double* output,
135
                       const PredictionEarlyStopInstance* early_stop) const = 0;
136

Guolin Ke's avatar
Guolin Ke committed
137
138
  virtual void PredictByMap(const std::unordered_map<int, double>& features, double* output,
                            const PredictionEarlyStopInstance* early_stop) const = 0;
139
140


wxchan's avatar
wxchan committed
141
  /*!
142
  * \brief Prediction for one record with leaf index
wxchan's avatar
wxchan committed
143
  * \param feature_values Feature value on this record
Guolin Ke's avatar
Guolin Ke committed
144
  * \param output Prediction result for this record
wxchan's avatar
wxchan committed
145
  */
Guolin Ke's avatar
Guolin Ke committed
146
  virtual void PredictLeafIndex(
147
    const double* features, double* output) const = 0;
148

149
150
151
  virtual void PredictLeafIndexByMap(
    const std::unordered_map<int, double>& features, double* output) const = 0;

Guolin Ke's avatar
Guolin Ke committed
152
  /*!
153
154
155
  * \brief Feature contributions for the model's prediction of one record
  * \param feature_values Feature value on this record
  * \param output Prediction result for this record
Guolin Ke's avatar
Guolin Ke committed
156
  * \param early_stop Early stopping instance. If nullptr, no early stopping is applied and all models are evaluated.
157
158
  */
  virtual void PredictContrib(const double* features, double* output,
Guolin Ke's avatar
Guolin Ke committed
159
                              const PredictionEarlyStopInstance* early_stop) const = 0;
160

Guolin Ke's avatar
Guolin Ke committed
161
  /*!
wxchan's avatar
wxchan committed
162
  * \brief Dump model to json format string
163
  * \param num_iteration Number of iterations that want to dump, -1 means dump all
wxchan's avatar
wxchan committed
164
165
  * \return Json format string of model
  */
166
  virtual std::string DumpModel(int num_iteration) const = 0;
wxchan's avatar
wxchan committed
167

168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
  /*!
  * \brief Translate model to if-else statement
  * \param num_iteration Number of iterations that want to translate, -1 means translate all
  * \return if-else format codes of model
  */
  virtual std::string ModelToIfElse(int num_iteration) const = 0;

  /*!
  * \brief Translate model to if-else statement
  * \param num_iteration Number of iterations that want to translate, -1 means translate all
  * \param filename Filename that want to save to
  * \return is_finish Is training finished or not
  */
  virtual bool SaveModelToIfElse(int num_iteration, const char* filename) const = 0;

wxchan's avatar
wxchan committed
183
184
  /*!
  * \brief Save model to file
wxchan's avatar
wxchan committed
185
  * \param num_iterations Number of model that want to save, -1 means save all
wxchan's avatar
wxchan committed
186
187
  * \param is_finish Is training finished or not
  * \param filename Filename that want to save to
188
  * \return true if succeeded
Guolin Ke's avatar
Guolin Ke committed
189
  */
190
  virtual bool SaveModelToFile(int num_iterations, const char* filename) const = 0;
Guolin Ke's avatar
Guolin Ke committed
191

192
193
  /*!
  * \brief Save model to string
wxchan's avatar
wxchan committed
194
  * \param num_iterations Number of model that want to save, -1 means save all
195
196
197
198
  * \return Non-empty string if succeeded
  */
  virtual std::string SaveModelToString(int num_iterations) const = 0;

Guolin Ke's avatar
Guolin Ke committed
199
200
201
  /*!
  * \brief Restore from a serialized string
  * \param model_str The string of model
202
  * \return true if succeeded
Guolin Ke's avatar
Guolin Ke committed
203
  */
204
  virtual bool LoadModelFromString(const std::string& model_str) = 0;
205

wxchan's avatar
wxchan committed
206
207
208
209
210
211
212
  #ifdef USE_PROTO
  /*!
  * \brief Save model with protobuf
  * \param num_iterations Number of model that want to save, -1 means save all
  * \param filename Filename that want to save to
  */
  virtual void SaveModelToProto(int num_iteration, const char* filename) const = 0;
Guolin Ke's avatar
Guolin Ke committed
213

wxchan's avatar
wxchan committed
214
215
216
217
218
219
220
221
  /*!
  * \brief Restore from a serialized protobuf file
  * \param filename Filename that want to restore from
  * \return true if succeeded
  */
  virtual bool LoadModelFromProto(const char* filename) = 0;
  #endif // USE_PROTO

222
223
224
225
226
227
228
  /*!
  * \brief Calculate feature importances
  * \param num_iteration Number of model that want to use for feature importance, -1 means use all
  * \param importance_type: 0 for split, 1 for gain
  * \return vector of feature_importance
  */
  virtual std::vector<double> FeatureImportance(int num_iteration, int importance_type) const = 0;
Guolin Ke's avatar
Guolin Ke committed
229
230
231
232
233
234
235

  /*!
  * \brief Get max feature index of this model
  * \return Max feature index of this model
  */
  virtual int MaxFeatureIdx() const = 0;

wxchan's avatar
wxchan committed
236
237
238
239
240
241
  /*!
  * \brief Get feature names of this model
  * \return Feature names of this model
  */
  virtual std::vector<std::string> FeatureNames() const = 0;

Guolin Ke's avatar
Guolin Ke committed
242
243
244
245
246
247
  /*!
  * \brief Get index of label column
  * \return index of label column
  */
  virtual int LabelIdx() const = 0;

Guolin Ke's avatar
Guolin Ke committed
248
249
250
251
  /*!
  * \brief Get number of weak sub-models
  * \return Number of weak sub-models
  */
wxchan's avatar
wxchan committed
252
  virtual int NumberOfTotalModel() const = 0;
253

Guolin Ke's avatar
Guolin Ke committed
254
  /*!
Guolin Ke's avatar
Guolin Ke committed
255
256
  * \brief Get number of models per iteration
  * \return Number of models per iteration
Guolin Ke's avatar
Guolin Ke committed
257
  */
Guolin Ke's avatar
Guolin Ke committed
258
  virtual int NumModelPerIteration() const = 0;
Guolin Ke's avatar
Guolin Ke committed
259

260
261
262
263
  /*!
  * \brief Get number of classes
  * \return Number of classes
  */
Guolin Ke's avatar
Guolin Ke committed
264
  virtual int NumberOfClasses() const = 0;
265

266
267
268
  /*! \brief The prediction should be accurate or not. True will disable early stopping for prediction. */
  virtual bool NeedAccuratePrediction() const = 0;

269
  /*!
Guolin Ke's avatar
Guolin Ke committed
270
271
  * \brief Initial work for the prediction
  * \param num_iteration number of used iteration
272
  */
273
  virtual void InitPredict(int num_iteration) = 0;
274

275
  /*!
Guolin Ke's avatar
Guolin Ke committed
276
  * \brief Name of submodel
277
  */
Guolin Ke's avatar
Guolin Ke committed
278
  virtual const char* SubModelName() const = 0;
279

Guolin Ke's avatar
Guolin Ke committed
280
281
282
283
284
285
  Boosting() = default;
  /*! \brief Disable copy */
  Boosting& operator=(const Boosting&) = delete;
  /*! \brief Disable copy */
  Boosting(const Boosting&) = delete;

wxchan's avatar
wxchan committed
286
  static bool LoadFileToBoosting(Boosting* boosting, const std::string& format, const char* filename);
wxchan's avatar
wxchan committed
287

Guolin Ke's avatar
Guolin Ke committed
288
289
290
  /*!
  * \brief Create boosting object
  * \param type Type of boosting
wxchan's avatar
wxchan committed
291
  * \param format Format of model
292
293
  * \param config config for boosting
  * \param filename name of model file, if existing will continue to train from this model
Guolin Ke's avatar
Guolin Ke committed
294
295
  * \return The boosting object
  */
wxchan's avatar
wxchan committed
296
  static Boosting* CreateBoosting(const std::string& type, const std::string& format, const char* filename);
297

Guolin Ke's avatar
Guolin Ke committed
298
299
};

Guolin Ke's avatar
Guolin Ke committed
300
301
302
303
304
305
class GBDTBase : public Boosting {
public:
  virtual double GetLeafValue(int tree_idx, int leaf_idx) const = 0;
  virtual void SetLeafValue(int tree_idx, int leaf_idx, double val) = 0;
};

Guolin Ke's avatar
Guolin Ke committed
306
307
}  // namespace LightGBM

Guolin Ke's avatar
Guolin Ke committed
308
#endif   // LightGBM_BOOSTING_H_