"cupy_backends/cuda/stream.pxd" did not exist on "93bf084b3332e0d58c118590cc1722af6c810a8e"
boosting.h 9.83 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
#ifndef LIGHTGBM_BOOSTING_H_
#define LIGHTGBM_BOOSTING_H_

#include <LightGBM/config.h>
5
#include <LightGBM/meta.h>
Guolin Ke's avatar
Guolin Ke committed
6
7

#include <string>
8
#include <map>
9
10
#include <unordered_map>
#include <vector>
Guolin Ke's avatar
Guolin Ke committed
11
12
13
14
15
16
17

namespace LightGBM {

/*! \brief forward declaration */
class Dataset;
class ObjectiveFunction;
class Metric;
18
struct PredictionEarlyStopInstance;
Guolin Ke's avatar
Guolin Ke committed
19
20
21
22

/*!
* \brief The interface for Boosting
*/
23
class LIGHTGBM_EXPORT Boosting {
Nikita Titov's avatar
Nikita Titov committed
24
 public:
Guolin Ke's avatar
Guolin Ke committed
25
26
27
28
  /*! \brief virtual destructor */
  virtual ~Boosting() {}

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
29
30
  * \brief Initialization logic
  * \param config Configs for boosting
Guolin Ke's avatar
Guolin Ke committed
31
  * \param train_data Training data
32
  * \param objective_function Training objective function
Guolin Ke's avatar
Guolin Ke committed
33
34
  * \param training_metrics Training metric
  */
35
  virtual void Init(
Guolin Ke's avatar
Guolin Ke committed
36
    const Config* config,
37
    const Dataset* train_data,
38
    const ObjectiveFunction* objective_function,
39
    const std::vector<const Metric*>& training_metrics) = 0;
Guolin Ke's avatar
Guolin Ke committed
40

wxchan's avatar
wxchan committed
41
42
  /*!
  * \brief Merge model from other boosting object
Guolin Ke's avatar
Guolin Ke committed
43
  Will insert to the front of current boosting object
wxchan's avatar
wxchan committed
44
45
46
47
  * \param other
  */
  virtual void MergeFrom(const Boosting* other) = 0;

48
49
50
  /*!
  * \brief Shuffle Existing Models
  */
51
  virtual void ShuffleModels(int start_iter, int end_iter) = 0;
52

53
54
55
  virtual void ResetTrainingData(const Dataset* train_data, const ObjectiveFunction* objective_function,
                                 const std::vector<const Metric*>& training_metrics) = 0;

Guolin Ke's avatar
Guolin Ke committed
56
  virtual void ResetConfig(const Config* config) = 0;
wxchan's avatar
wxchan committed
57

58
59


Guolin Ke's avatar
Guolin Ke committed
60
61
62
63
64
  /*!
  * \brief Add a validation data
  * \param valid_data Validation data
  * \param valid_metrics Metric for validation data
  */
wxchan's avatar
wxchan committed
65
  virtual void AddValidDataset(const Dataset* valid_data,
Guolin Ke's avatar
Guolin Ke committed
66
                               const std::vector<const Metric*>& valid_metrics) = 0;
Guolin Ke's avatar
Guolin Ke committed
67

Guolin Ke's avatar
Guolin Ke committed
68
69
  virtual void Train(int snapshot_freq, const std::string& model_output_path) = 0;

70
71
72
73
74
  /*!
  * \brief Update the tree output by new training data
  */
  virtual void RefitTree(const std::vector<std::vector<int>>& tree_leaf_prediction) = 0;

Guolin Ke's avatar
Guolin Ke committed
75
76
  /*!
  * \brief Training logic
Guolin Ke's avatar
Guolin Ke committed
77
78
79
  * \param gradients nullptr for using default objective, otherwise use self-defined boosting
  * \param hessians nullptr for using default objective, otherwise use self-defined boosting
  * \return True if cannot train anymore
Guolin Ke's avatar
Guolin Ke committed
80
  */
Guolin Ke's avatar
Guolin Ke committed
81
  virtual bool TrainOneIter(const score_t* gradients, const score_t* hessians) = 0;
82

wxchan's avatar
wxchan committed
83
84
85
86
87
88
89
90
91
92
  /*!
  * \brief Rollback one iteration
  */
  virtual void RollbackOneIter() = 0;

  /*!
  * \brief return current iteration
  */
  virtual int GetCurrentIteration() const = 0;

Guolin Ke's avatar
Guolin Ke committed
93
94
95
96
97
  /*!
  * \brief Get evaluation result at data_idx data
  * \param data_idx 0: training data, 1: 1st validation data
  * \return evaluation result
  */
98
  virtual std::vector<double> GetEvalAt(int data_idx) const = 0;
99

Guolin Ke's avatar
Guolin Ke committed
100
101
  /*!
  * \brief Get current training score
Guolin Ke's avatar
Guolin Ke committed
102
  * \param out_len length of returned score
Guolin Ke's avatar
Guolin Ke committed
103
104
  * \return training score
  */
105
  virtual const double* GetTrainingScore(int64_t* out_len) = 0;
Guolin Ke's avatar
Guolin Ke committed
106

Guolin Ke's avatar
Guolin Ke committed
107
108
109
  /*!
  * \brief Get prediction result at data_idx data
  * \param data_idx 0: training data, 1: 1st validation data
110
  * \return out_len length of returned score
Guolin Ke's avatar
Guolin Ke committed
111
112
  */
  virtual int64_t GetNumPredictAt(int data_idx) const = 0;
Guolin Ke's avatar
Guolin Ke committed
113

Guolin Ke's avatar
Guolin Ke committed
114
115
116
117
  /*!
  * \brief Get prediction result at data_idx data
  * \param data_idx 0: training data, 1: 1st validation data
  * \param result used to store prediction result, should allocate memory before call this function
118
  * \param out_len length of returned score
Guolin Ke's avatar
Guolin Ke committed
119
  */
Guolin Ke's avatar
Guolin Ke committed
120
  virtual void GetPredictAt(int data_idx, double* result, int64_t* out_len) = 0;
Guolin Ke's avatar
Guolin Ke committed
121

122
  virtual int NumPredictOneRow(int num_iteration, bool is_pred_leaf, bool is_pred_contrib) const = 0;
Guolin Ke's avatar
Guolin Ke committed
123

Guolin Ke's avatar
Guolin Ke committed
124
  /*!
Hui Xue's avatar
Hui Xue committed
125
  * \brief Prediction for one record, not sigmoid transform
Guolin Ke's avatar
Guolin Ke committed
126
  * \param feature_values Feature value on this record
Guolin Ke's avatar
Guolin Ke committed
127
  * \param output Prediction result for this record
Guolin Ke's avatar
Guolin Ke committed
128
  * \param early_stop Early stopping instance. If nullptr, no early stopping is applied and all models are evaluated.
Guolin Ke's avatar
Guolin Ke committed
129
  */
cbecker's avatar
cbecker committed
130
  virtual void PredictRaw(const double* features, double* output,
131
                          const PredictionEarlyStopInstance* early_stop) const = 0;
Guolin Ke's avatar
Guolin Ke committed
132

Guolin Ke's avatar
Guolin Ke committed
133
134
  virtual void PredictRawByMap(const std::unordered_map<int, double>& features, double* output,
                               const PredictionEarlyStopInstance* early_stop) const = 0;
135
136


Guolin Ke's avatar
Guolin Ke committed
137
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
138
  * \brief Prediction for one record, sigmoid transformation will be used if needed
Guolin Ke's avatar
Guolin Ke committed
139
  * \param feature_values Feature value on this record
Guolin Ke's avatar
Guolin Ke committed
140
  * \param output Prediction result for this record
Guolin Ke's avatar
Guolin Ke committed
141
  * \param early_stop Early stopping instance. If nullptr, no early stopping is applied and all models are evaluated.
Guolin Ke's avatar
Guolin Ke committed
142
  */
cbecker's avatar
cbecker committed
143
  virtual void Predict(const double* features, double* output,
144
                       const PredictionEarlyStopInstance* early_stop) const = 0;
145

Guolin Ke's avatar
Guolin Ke committed
146
147
  virtual void PredictByMap(const std::unordered_map<int, double>& features, double* output,
                            const PredictionEarlyStopInstance* early_stop) const = 0;
148
149


wxchan's avatar
wxchan committed
150
  /*!
151
  * \brief Prediction for one record with leaf index
wxchan's avatar
wxchan committed
152
  * \param feature_values Feature value on this record
Guolin Ke's avatar
Guolin Ke committed
153
  * \param output Prediction result for this record
wxchan's avatar
wxchan committed
154
  */
Guolin Ke's avatar
Guolin Ke committed
155
  virtual void PredictLeafIndex(
156
    const double* features, double* output) const = 0;
157

158
159
160
  virtual void PredictLeafIndexByMap(
    const std::unordered_map<int, double>& features, double* output) const = 0;

Guolin Ke's avatar
Guolin Ke committed
161
  /*!
162
163
164
  * \brief Feature contributions for the model's prediction of one record
  * \param feature_values Feature value on this record
  * \param output Prediction result for this record
Guolin Ke's avatar
Guolin Ke committed
165
  * \param early_stop Early stopping instance. If nullptr, no early stopping is applied and all models are evaluated.
166
167
  */
  virtual void PredictContrib(const double* features, double* output,
Guolin Ke's avatar
Guolin Ke committed
168
                              const PredictionEarlyStopInstance* early_stop) const = 0;
169

Guolin Ke's avatar
Guolin Ke committed
170
  /*!
wxchan's avatar
wxchan committed
171
  * \brief Dump model to json format string
172
  * \param start_iteration The model will be saved start from
173
  * \param num_iteration Number of iterations that want to dump, -1 means dump all
wxchan's avatar
wxchan committed
174
175
  * \return Json format string of model
  */
176
  virtual std::string DumpModel(int start_iteration, int num_iteration) const = 0;
wxchan's avatar
wxchan committed
177

178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
  /*!
  * \brief Translate model to if-else statement
  * \param num_iteration Number of iterations that want to translate, -1 means translate all
  * \return if-else format codes of model
  */
  virtual std::string ModelToIfElse(int num_iteration) const = 0;

  /*!
  * \brief Translate model to if-else statement
  * \param num_iteration Number of iterations that want to translate, -1 means translate all
  * \param filename Filename that want to save to
  * \return is_finish Is training finished or not
  */
  virtual bool SaveModelToIfElse(int num_iteration, const char* filename) const = 0;

wxchan's avatar
wxchan committed
193
194
  /*!
  * \brief Save model to file
195
  * \param start_iteration The model will be saved start from
wxchan's avatar
wxchan committed
196
  * \param num_iterations Number of model that want to save, -1 means save all
wxchan's avatar
wxchan committed
197
198
  * \param is_finish Is training finished or not
  * \param filename Filename that want to save to
199
  * \return true if succeeded
Guolin Ke's avatar
Guolin Ke committed
200
  */
201
  virtual bool SaveModelToFile(int start_iteration, int num_iterations, const char* filename) const = 0;
Guolin Ke's avatar
Guolin Ke committed
202

203
204
  /*!
  * \brief Save model to string
205
  * \param start_iteration The model will be saved start from
wxchan's avatar
wxchan committed
206
  * \param num_iterations Number of model that want to save, -1 means save all
207
208
  * \return Non-empty string if succeeded
  */
209
  virtual std::string SaveModelToString(int start_iteration, int num_iterations) const = 0;
210

Guolin Ke's avatar
Guolin Ke committed
211
212
  /*!
  * \brief Restore from a serialized string
213
214
  * \param buffer The content of model
  * \param len The length of buffer
wxchan's avatar
wxchan committed
215
216
  * \return true if succeeded
  */
217
  virtual bool LoadModelFromString(const char* buffer, size_t len) = 0;
wxchan's avatar
wxchan committed
218

219
220
221
222
223
224
225
  /*!
  * \brief Calculate feature importances
  * \param num_iteration Number of model that want to use for feature importance, -1 means use all
  * \param importance_type: 0 for split, 1 for gain
  * \return vector of feature_importance
  */
  virtual std::vector<double> FeatureImportance(int num_iteration, int importance_type) const = 0;
Guolin Ke's avatar
Guolin Ke committed
226
227
228
229
230
231
232

  /*!
  * \brief Get max feature index of this model
  * \return Max feature index of this model
  */
  virtual int MaxFeatureIdx() const = 0;

wxchan's avatar
wxchan committed
233
234
235
236
237
238
  /*!
  * \brief Get feature names of this model
  * \return Feature names of this model
  */
  virtual std::vector<std::string> FeatureNames() const = 0;

Guolin Ke's avatar
Guolin Ke committed
239
240
241
242
243
244
  /*!
  * \brief Get index of label column
  * \return index of label column
  */
  virtual int LabelIdx() const = 0;

Guolin Ke's avatar
Guolin Ke committed
245
246
247
248
  /*!
  * \brief Get number of weak sub-models
  * \return Number of weak sub-models
  */
wxchan's avatar
wxchan committed
249
  virtual int NumberOfTotalModel() const = 0;
250

Guolin Ke's avatar
Guolin Ke committed
251
  /*!
Guolin Ke's avatar
Guolin Ke committed
252
253
  * \brief Get number of models per iteration
  * \return Number of models per iteration
Guolin Ke's avatar
Guolin Ke committed
254
  */
Guolin Ke's avatar
Guolin Ke committed
255
  virtual int NumModelPerIteration() const = 0;
Guolin Ke's avatar
Guolin Ke committed
256

257
258
259
260
  /*!
  * \brief Get number of classes
  * \return Number of classes
  */
Guolin Ke's avatar
Guolin Ke committed
261
  virtual int NumberOfClasses() const = 0;
262

263
264
265
  /*! \brief The prediction should be accurate or not. True will disable early stopping for prediction. */
  virtual bool NeedAccuratePrediction() const = 0;

266
  /*!
Guolin Ke's avatar
Guolin Ke committed
267
268
  * \brief Initial work for the prediction
  * \param num_iteration number of used iteration
269
  * \param is_pred_contrib
270
  */
271
  virtual void InitPredict(int num_iteration, bool is_pred_contrib) = 0;
272

273
  /*!
Guolin Ke's avatar
Guolin Ke committed
274
  * \brief Name of submodel
275
  */
Guolin Ke's avatar
Guolin Ke committed
276
  virtual const char* SubModelName() const = 0;
277

Guolin Ke's avatar
Guolin Ke committed
278
279
280
281
282
283
  Boosting() = default;
  /*! \brief Disable copy */
  Boosting& operator=(const Boosting&) = delete;
  /*! \brief Disable copy */
  Boosting(const Boosting&) = delete;

284
  static bool LoadFileToBoosting(Boosting* boosting, const char* filename);
wxchan's avatar
wxchan committed
285

Guolin Ke's avatar
Guolin Ke committed
286
287
288
  /*!
  * \brief Create boosting object
  * \param type Type of boosting
wxchan's avatar
wxchan committed
289
  * \param format Format of model
290
291
  * \param config config for boosting
  * \param filename name of model file, if existing will continue to train from this model
Guolin Ke's avatar
Guolin Ke committed
292
293
  * \return The boosting object
  */
294
  static Boosting* CreateBoosting(const std::string& type, const char* filename);
Guolin Ke's avatar
Guolin Ke committed
295
296
};

Guolin Ke's avatar
Guolin Ke committed
297
class GBDTBase : public Boosting {
Nikita Titov's avatar
Nikita Titov committed
298
 public:
Guolin Ke's avatar
Guolin Ke committed
299
300
301
302
  virtual double GetLeafValue(int tree_idx, int leaf_idx) const = 0;
  virtual void SetLeafValue(int tree_idx, int leaf_idx, double val) = 0;
};

Guolin Ke's avatar
Guolin Ke committed
303
304
}  // namespace LightGBM

Guolin Ke's avatar
Guolin Ke committed
305
#endif   // LightGBM_BOOSTING_H_