config.h 19.5 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
#ifndef LIGHTGBM_CONFIG_H_
#define LIGHTGBM_CONFIG_H_

#include <LightGBM/utils/common.h>
#include <LightGBM/utils/log.h>

Guolin Ke's avatar
Guolin Ke committed
7
#include <LightGBM/meta.h>
8
#include <LightGBM/export.h>
Guolin Ke's avatar
Guolin Ke committed
9

Guolin Ke's avatar
Guolin Ke committed
10
11
12
#include <vector>
#include <string>
#include <unordered_map>
wxchan's avatar
wxchan committed
13
#include <unordered_set>
Guolin Ke's avatar
Guolin Ke committed
14
#include <algorithm>
Guolin Ke's avatar
Guolin Ke committed
15
#include <memory>
Guolin Ke's avatar
Guolin Ke committed
16
17
18

namespace LightGBM {

Guolin Ke's avatar
Guolin Ke committed
19
20
21
22
const std::string kDefaultTreeLearnerType = "serial";
const std::string kDefaultDevice = "cpu";
const std::string kDefaultBoostingType = "gbdt";
const std::string kDefaultObjectiveType = "regression";
23
const int kDefaultNumLeaves = 31;
Guolin Ke's avatar
Guolin Ke committed
24

Guolin Ke's avatar
Guolin Ke committed
25
26
27
28
29
30
31
32
33
/*!
* \brief The interface for Config
*/
struct ConfigBase {
public:
  /*! \brief virtual destructor */
  virtual ~ConfigBase() {}

  /*!
Hui Xue's avatar
Hui Xue committed
34
  * \brief Set current config object by params
Guolin Ke's avatar
Guolin Ke committed
35
36
37
38
39
40
41
42
43
  * \param params Store the key and value for params
  */
  virtual void Set(
    const std::unordered_map<std::string, std::string>& params) = 0;

  /*!
  * \brief Get string value by specific name of key
  * \param params Store the key and value for params
  * \param name Name of key
Hui Xue's avatar
Hui Xue committed
44
  * \param out Value will assign to out if key exists
Guolin Ke's avatar
Guolin Ke committed
45
46
  * \return True if key exists
  */
Guolin Ke's avatar
Guolin Ke committed
47
  inline static bool GetString(
Guolin Ke's avatar
Guolin Ke committed
48
49
50
51
52
53
54
    const std::unordered_map<std::string, std::string>& params,
    const std::string& name, std::string* out);

  /*!
  * \brief Get int value by specific name of key
  * \param params Store the key and value for params
  * \param name Name of key
Hui Xue's avatar
Hui Xue committed
55
  * \param out Value will assign to out if key exists
Guolin Ke's avatar
Guolin Ke committed
56
57
  * \return True if key exists
  */
Guolin Ke's avatar
Guolin Ke committed
58
  inline static bool GetInt(
Guolin Ke's avatar
Guolin Ke committed
59
60
61
62
    const std::unordered_map<std::string, std::string>& params,
    const std::string& name, int* out);

  /*!
63
  * \brief Get double value by specific name of key
Guolin Ke's avatar
Guolin Ke committed
64
65
  * \param params Store the key and value for params
  * \param name Name of key
Hui Xue's avatar
Hui Xue committed
66
  * \param out Value will assign to out if key exists
Guolin Ke's avatar
Guolin Ke committed
67
68
  * \return True if key exists
  */
Guolin Ke's avatar
Guolin Ke committed
69
  inline static bool GetDouble(
Guolin Ke's avatar
Guolin Ke committed
70
    const std::unordered_map<std::string, std::string>& params,
71
    const std::string& name, double* out);
Guolin Ke's avatar
Guolin Ke committed
72
73
74
75
76

  /*!
  * \brief Get bool value by specific name of key
  * \param params Store the key and value for params
  * \param name Name of key
Hui Xue's avatar
Hui Xue committed
77
  * \param out Value will assign to out if key exists
Guolin Ke's avatar
Guolin Ke committed
78
79
  * \return True if key exists
  */
Guolin Ke's avatar
Guolin Ke committed
80
  inline static bool GetBool(
Guolin Ke's avatar
Guolin Ke committed
81
82
    const std::unordered_map<std::string, std::string>& params,
    const std::string& name, bool* out);
83

wxchan's avatar
wxchan committed
84
  static void KV2Map(std::unordered_map<std::string, std::string>& params, const char* kv);
85
  static std::unordered_map<std::string, std::string> Str2Map(const char* parameters);
Guolin Ke's avatar
Guolin Ke committed
86
87
88
89
};

/*! \brief Types of tasks */
enum TaskType {
90
  kTrain, kPredict, kConvertModel
Guolin Ke's avatar
Guolin Ke committed
91
92
93
94
95
};

/*! \brief Config for input and output files */
struct IOConfig: public ConfigBase {
public:
96
  int max_bin = 255;
97
  int num_class = 1;
Guolin Ke's avatar
Guolin Ke committed
98
99
  int data_random_seed = 1;
  std::string data_filename = "";
100
  std::string initscore_filename = "";
Guolin Ke's avatar
Guolin Ke committed
101
  std::vector<std::string> valid_data_filenames;
102
  std::vector<std::string> valid_data_initscores;
103
  int snapshot_freq = -1;
Guolin Ke's avatar
Guolin Ke committed
104
105
  std::string output_model = "LightGBM_model.txt";
  std::string output_result = "LightGBM_predict_result.txt";
106
  std::string convert_model = "gbdt_prediction.cpp";
Guolin Ke's avatar
Guolin Ke committed
107
  std::string input_model = "";
Guolin Ke's avatar
Guolin Ke committed
108
  int verbosity = 1;
109
  int num_iteration_predict = -1;
Guolin Ke's avatar
Guolin Ke committed
110
111
  bool is_pre_partition = false;
  bool is_enable_sparse = true;
112
113
114
115
116
  /*! \brief The threshold of zero elements precentage for treating a feature as a sparse feature.
   *  Default is 0.8, where a feature is treated as a sparse feature when there are over 80% zeros.
   *  When setting to 1.0, all features are processed as dense features.
   */
  double sparse_threshold = 0.8;
Guolin Ke's avatar
Guolin Ke committed
117
118
  bool use_two_round_loading = false;
  bool is_save_binary_file = false;
Guolin Ke's avatar
Guolin Ke committed
119
  bool enable_load_from_binary_file = true;
Guolin Ke's avatar
Guolin Ke committed
120
  int bin_construct_sample_cnt = 200000;
Guolin Ke's avatar
Guolin Ke committed
121
  bool is_predict_leaf_index = false;
122
  bool is_predict_contrib = false;
Guolin Ke's avatar
Guolin Ke committed
123
  bool is_predict_raw_score = false;
124
  int min_data_in_leaf = 20;
Guolin Ke's avatar
Guolin Ke committed
125
  int min_data_in_bin = 5;
126
  double max_conflict_rate = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
127
  bool enable_bundle = true;
Guolin Ke's avatar
Guolin Ke committed
128
129
130
131
132
  bool has_header = false;
  /*! \brief Index or column name of label, default is the first column
   * And add an prefix "name:" while using column name */
  std::string label_column = "";
  /*! \brief Index or column name of weight, < 0 means not used
133
  * And add an prefix "name:" while using column name
134
  * Note: when using Index, it doesn't count the label index */
Guolin Ke's avatar
Guolin Ke committed
135
  std::string weight_column = "";
Guolin Ke's avatar
Guolin Ke committed
136
137
  /*! \brief Index or column name of group/query id, < 0 means not used
  * And add an prefix "name:" while using column name
138
  * Note: when using Index, it doesn't count the label index */
Guolin Ke's avatar
Guolin Ke committed
139
140
  std::string group_column = "";
  /*! \brief ignored features, separate by ','
Guolin Ke's avatar
Guolin Ke committed
141
  * And add an prefix "name:" while using column name
142
  * Note: when using Index, it doesn't count the label index */
Guolin Ke's avatar
Guolin Ke committed
143
  std::string ignore_column = "";
144
145
  /*! \brief specific categorical columns, Note:only support for integer type categorical
  * And add an prefix "name:" while using column name
146
  * Note: when using Index, it doesn't count the label index */
147
  std::string categorical_column = "";
Guolin Ke's avatar
Guolin Ke committed
148
  std::string device_type = kDefaultDevice;
149
150
151
152
153
154
155

  /*! \brief Set to true if want to use early stop for the prediction */
  bool pred_early_stop = false;
  /*! \brief Frequency of checking the pred_early_stop */
  int pred_early_stop_freq = 10;
  /*! \brief Threshold of margin of pred_early_stop */
  double pred_early_stop_margin = 10.0f;
Guolin Ke's avatar
Guolin Ke committed
156
157
  bool zero_as_missing = false;
  bool use_missing = true;
158
  LIGHTGBM_EXPORT void Set(const std::unordered_map<std::string, std::string>& params) override;
Guolin Ke's avatar
Guolin Ke committed
159
160
161
162
163
164
};

/*! \brief Config for objective function */
struct ObjectiveConfig: public ConfigBase {
public:
  virtual ~ObjectiveConfig() {}
165
  double sigmoid = 1.0f;
Tsukasa OMOTO's avatar
Tsukasa OMOTO committed
166
  double huber_delta = 1.0f;
Tsukasa OMOTO's avatar
Tsukasa OMOTO committed
167
  double fair_c = 1.0f;
168
  // for Approximate Hessian With Gaussian
169
  double gaussian_eta = 1.0f;
170
  double poisson_max_delta_step = 0.7f;
Guolin Ke's avatar
Guolin Ke committed
171
  // for lambdarank
172
  std::vector<double> label_gain;
Guolin Ke's avatar
Guolin Ke committed
173
174
175
176
  // for lambdarank
  int max_position = 20;
  // for binary
  bool is_unbalance = false;
177
178
  // for multiclass
  int num_class = 1;
Guolin Ke's avatar
Guolin Ke committed
179
180
  // Balancing of positive and negative weights
  double scale_pos_weight = 1.0f;
181
  LIGHTGBM_EXPORT void Set(const std::unordered_map<std::string, std::string>& params) override;
Guolin Ke's avatar
Guolin Ke committed
182
183
184
185
186
187
};

/*! \brief Config for metrics interface*/
struct MetricConfig: public ConfigBase {
public:
  virtual ~MetricConfig() {}
188
  int num_class = 1;
189
  double sigmoid = 1.0f;
Tsukasa OMOTO's avatar
Tsukasa OMOTO committed
190
  double huber_delta = 1.0f;
Tsukasa OMOTO's avatar
Tsukasa OMOTO committed
191
  double fair_c = 1.0f;
192
  std::vector<double> label_gain;
Guolin Ke's avatar
Guolin Ke committed
193
  std::vector<int> eval_at;
194
  LIGHTGBM_EXPORT void Set(const std::unordered_map<std::string, std::string>& params) override;
Guolin Ke's avatar
Guolin Ke committed
195
196
197
198
199
200
};


/*! \brief Config for tree model */
struct TreeConfig: public ConfigBase {
public:
201
202
  int min_data_in_leaf = 20;
  double min_sum_hessian_in_leaf = 1e-3f;
203
204
205
  double lambda_l1 = 0.0f;
  double lambda_l2 = 0.0f;
  double min_gain_to_split = 0.0f;
206
  // should > 1
207
  int num_leaves = kDefaultNumLeaves;
Guolin Ke's avatar
Guolin Ke committed
208
  int feature_fraction_seed = 2;
209
  double feature_fraction = 1.0f;
tks's avatar
tks committed
210
  // max cache size(unit:MB) for historical histogram. < 0 means no limit
211
  double histogram_pool_size = -1.0f;
212
  // max depth of tree model.
Guolin Ke's avatar
Guolin Ke committed
213
  // Still grow tree by leaf-wise, but limit the max depth to avoid over-fitting
tks's avatar
tks committed
214
215
  // And the max leaves will be min(num_leaves, pow(2, max_depth))
  // max_depth < 0 means no limit
216
  int max_depth = -1;
Guolin Ke's avatar
Guolin Ke committed
217
  int top_k = 20;
218
219
220
221
222
223
224
225
226
227
  /*! \brief OpenCL platform ID. Usually each GPU vendor exposes one OpenCL platform.
   *  Default value is -1, using the system-wide default platform
   */
  int gpu_platform_id = -1;
  /*! \brief OpenCL device ID in the specified platform. Each GPU in the selected platform has a
   *  unique device ID. Default value is -1, using the default device in the selected platform
   */
  int gpu_device_id = -1;
  /*! \brief Set to true to use double precision math on GPU (default using single precision) */
  bool gpu_use_dp = false;
ChenZhiyong's avatar
ChenZhiyong committed
228
229
  int max_cat_group = 64;
  int min_data_per_group = 10;
230
  int max_cat_threshold = 256;
ChenZhiyong's avatar
ChenZhiyong committed
231
232
233
  double cat_smooth_ratio = 0.01;
  double min_cat_smooth = 5;
  double max_cat_smooth = 100;
234
  LIGHTGBM_EXPORT void Set(const std::unordered_map<std::string, std::string>& params) override;
Guolin Ke's avatar
Guolin Ke committed
235
236
237
238
239
240
};

/*! \brief Config for Boosting */
struct BoostingConfig: public ConfigBase {
public:
  virtual ~BoostingConfig() {}
Guolin Ke's avatar
Guolin Ke committed
241
  double sigmoid = 1.0f;
242
243
  int output_freq = 1;
  bool is_provide_training_metric = false;
244
  int num_iterations = 100;
245
246
  double learning_rate = 0.1f;
  double bagging_fraction = 1.0f;
Guolin Ke's avatar
Guolin Ke committed
247
248
  int bagging_seed = 3;
  int bagging_freq = 0;
wxchan's avatar
wxchan committed
249
  int early_stopping_round = 0;
250
  int num_class = 1;
251
252
253
254
255
  double drop_rate = 0.1;
  int max_drop = 50;
  double skip_drop = 0.5;
  bool xgboost_dart_mode = false;
  bool uniform_drop = false;
Guolin Ke's avatar
Guolin Ke committed
256
  int drop_seed = 4;
Guolin Ke's avatar
Guolin Ke committed
257
258
  double top_rate = 0.2f;
  double other_rate = 0.1f;
259
  // only used for the regression. Will boost from the average labels.
260
  bool boost_from_average = true;
Guolin Ke's avatar
Guolin Ke committed
261
262
  std::string tree_learner_type = kDefaultTreeLearnerType;
  std::string device_type = kDefaultDevice;
Guolin Ke's avatar
Guolin Ke committed
263
  TreeConfig tree_config;
264
  LIGHTGBM_EXPORT void Set(const std::unordered_map<std::string, std::string>& params) override;
Guolin Ke's avatar
Guolin Ke committed
265
266
267
268
269
270
271
272
273
};

/*! \brief Config for Network */
struct NetworkConfig: public ConfigBase {
public:
  int num_machines = 1;
  int local_listen_port = 12400;
  int time_out = 120;  // in minutes
  std::string machine_list_filename = "";
274
  std::string machines = "";
275
  LIGHTGBM_EXPORT void Set(const std::unordered_map<std::string, std::string>& params) override;
Guolin Ke's avatar
Guolin Ke committed
276
277
278
279
280
281
282
283
};


/*! \brief Overall config, all configs will put on this class */
struct OverallConfig: public ConfigBase {
public:
  TaskType task_type = TaskType::kTrain;
  NetworkConfig network_config;
Guolin Ke's avatar
Guolin Ke committed
284
  int seed = 0;
Guolin Ke's avatar
Guolin Ke committed
285
286
287
288
  int num_threads = 0;
  bool is_parallel = false;
  bool is_parallel_find_bin = false;
  IOConfig io_config;
Guolin Ke's avatar
Guolin Ke committed
289
  std::string boosting_type = kDefaultBoostingType;
Guolin Ke's avatar
Guolin Ke committed
290
  BoostingConfig boosting_config;
Guolin Ke's avatar
Guolin Ke committed
291
  std::string objective_type =  kDefaultObjectiveType;
Guolin Ke's avatar
Guolin Ke committed
292
293
294
  ObjectiveConfig objective_config;
  std::vector<std::string> metric_types;
  MetricConfig metric_config;
295
  std::string convert_model_language = "";
296
  LIGHTGBM_EXPORT void Set(const std::unordered_map<std::string, std::string>& params) override;
297

Guolin Ke's avatar
Guolin Ke committed
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
private:
  void CheckParamConflict();
};


inline bool ConfigBase::GetString(
  const std::unordered_map<std::string, std::string>& params,
  const std::string& name, std::string* out) {
  if (params.count(name) > 0) {
    *out = params.at(name);
    return true;
  }
  return false;
}

inline bool ConfigBase::GetInt(
  const std::unordered_map<std::string, std::string>& params,
  const std::string& name, int* out) {
  if (params.count(name) > 0) {
317
    if (!Common::AtoiAndCheck(params.at(name).c_str(), out)) {
318
      Log::Fatal("Parameter %s should be of type int, got \"%s\"",
319
320
        name.c_str(), params.at(name).c_str());
    }
Guolin Ke's avatar
Guolin Ke committed
321
322
323
324
325
    return true;
  }
  return false;
}

326
inline bool ConfigBase::GetDouble(
Guolin Ke's avatar
Guolin Ke committed
327
  const std::unordered_map<std::string, std::string>& params,
328
  const std::string& name, double* out) {
Guolin Ke's avatar
Guolin Ke committed
329
  if (params.count(name) > 0) {
330
    if (!Common::AtofAndCheck(params.at(name).c_str(), out)) {
331
      Log::Fatal("Parameter %s should be of type double, got \"%s\"",
332
333
        name.c_str(), params.at(name).c_str());
    }
Guolin Ke's avatar
Guolin Ke committed
334
335
336
337
338
339
340
341
342
343
    return true;
  }
  return false;
}

inline bool ConfigBase::GetBool(
  const std::unordered_map<std::string, std::string>& params,
  const std::string& name, bool* out) {
  if (params.count(name) > 0) {
    std::string value = params.at(name);
Guolin Ke's avatar
Guolin Ke committed
344
    std::transform(value.begin(), value.end(), value.begin(), Common::tolower);
345
    if (value == std::string("false") || value == std::string("-")) {
Guolin Ke's avatar
Guolin Ke committed
346
      *out = false;
347
    } else if (value == std::string("true") || value == std::string("+")) {
Guolin Ke's avatar
Guolin Ke committed
348
      *out = true;
349
    } else {
350
      Log::Fatal("Parameter %s should be \"true\"/\"+\" or \"false\"/\"-\", got \"%s\"",
351
        name.c_str(), params.at(name).c_str());
Guolin Ke's avatar
Guolin Ke committed
352
353
354
355
356
357
358
359
    }
    return true;
  }
  return false;
}

struct ParameterAlias {
  static void KeyAliasTransform(std::unordered_map<std::string, std::string>* params) {
Guolin Ke's avatar
Guolin Ke committed
360
    const std::unordered_map<std::string, std::string> alias_table(
Guolin Ke's avatar
Guolin Ke committed
361
362
363
    {
      { "config", "config_file" },
      { "nthread", "num_threads" },
Guolin Ke's avatar
Guolin Ke committed
364
      { "random_seed", "seed" },
Guolin Ke's avatar
Guolin Ke committed
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
      { "num_thread", "num_threads" },
      { "boosting", "boosting_type" },
      { "boost", "boosting_type" },
      { "application", "objective" },
      { "app", "objective" },
      { "train_data", "data" },
      { "train", "data" },
      { "model_output", "output_model" },
      { "model_out", "output_model" },
      { "model_input", "input_model" },
      { "model_in", "input_model" },
      { "predict_result", "output_result" },
      { "prediction_result", "output_result" },
      { "valid", "valid_data" },
      { "test_data", "valid_data" },
      { "test", "valid_data" },
      { "is_sparse", "is_enable_sparse" },
Guolin Ke's avatar
Guolin Ke committed
382
383
      { "enable_sparse", "is_enable_sparse" },
      { "pre_partition", "is_pre_partition" },
384
      { "training_metric", "is_training_metric" },
Guolin Ke's avatar
Guolin Ke committed
385
386
      { "train_metric", "is_training_metric" },
      { "ndcg_at", "ndcg_eval_at" },
Guolin Ke's avatar
Guolin Ke committed
387
      { "eval_at", "ndcg_eval_at" },
Guolin Ke's avatar
Guolin Ke committed
388
389
      { "min_data_per_leaf", "min_data_in_leaf" },
      { "min_data", "min_data_in_leaf" },
Guolin Ke's avatar
Guolin Ke committed
390
      { "min_child_samples", "min_data_in_leaf" },
Guolin Ke's avatar
Guolin Ke committed
391
392
393
      { "min_sum_hessian_per_leaf", "min_sum_hessian_in_leaf" },
      { "min_sum_hessian", "min_sum_hessian_in_leaf" },
      { "min_hessian", "min_sum_hessian_in_leaf" },
Guolin Ke's avatar
Guolin Ke committed
394
      { "min_child_weight", "min_sum_hessian_in_leaf" },
Guolin Ke's avatar
Guolin Ke committed
395
396
      { "num_leaf", "num_leaves" },
      { "sub_feature", "feature_fraction" },
Guolin Ke's avatar
Guolin Ke committed
397
      { "colsample_bytree", "feature_fraction" },
Guolin Ke's avatar
Guolin Ke committed
398
399
400
401
402
403
      { "num_iteration", "num_iterations" },
      { "num_tree", "num_iterations" },
      { "num_round", "num_iterations" },
      { "num_trees", "num_iterations" },
      { "num_rounds", "num_iterations" },
      { "sub_row", "bagging_fraction" },
Guolin Ke's avatar
Guolin Ke committed
404
405
      { "subsample", "bagging_fraction" },
      { "subsample_freq", "bagging_freq" },
Guolin Ke's avatar
Guolin Ke committed
406
407
408
409
410
411
412
413
      { "shrinkage_rate", "learning_rate" },
      { "tree", "tree_learner" },
      { "num_machine", "num_machines" },
      { "local_port", "local_listen_port" },
      { "two_round_loading", "use_two_round_loading"},
      { "two_round", "use_two_round_loading" },
      { "mlist", "machine_list_file" },
      { "is_save_binary", "is_save_binary_file" },
Qiwei Ye's avatar
Qiwei Ye committed
414
      { "save_binary", "is_save_binary_file" },
wxchan's avatar
wxchan committed
415
      { "early_stopping_rounds", "early_stopping_round"},
416
      { "early_stopping", "early_stopping_round"},
Guolin Ke's avatar
Guolin Ke committed
417
418
419
420
421
422
423
424
      { "verbosity", "verbose" },
      { "header", "has_header" },
      { "label", "label_column" },
      { "weight", "weight_column" },
      { "group", "group_column" },
      { "query", "group_column" },
      { "query_column", "group_column" },
      { "ignore_feature", "ignore_column" },
Guolin Ke's avatar
Guolin Ke committed
425
      { "blacklist", "ignore_column" },
426
427
428
      { "categorical_feature", "categorical_column" },
      { "cat_column", "categorical_column" },
      { "cat_feature", "categorical_column" },
Guolin Ke's avatar
Guolin Ke committed
429
      { "predict_raw_score", "is_predict_raw_score" },
430
      { "predict_leaf_index", "is_predict_leaf_index" },
Guolin Ke's avatar
Guolin Ke committed
431
432
      { "raw_score", "is_predict_raw_score" },
      { "leaf_index", "is_predict_leaf_index" },
433
434
      { "contrib", "is_predict_contrib" },
      { "predict_contrib", "is_predict_contrib" },
Guolin Ke's avatar
Guolin Ke committed
435
      { "min_split_gain", "min_gain_to_split" },
Guolin Ke's avatar
Guolin Ke committed
436
      { "topk", "top_k" },
Guolin Ke's avatar
Guolin Ke committed
437
438
      { "reg_alpha", "lambda_l1" },
      { "reg_lambda", "lambda_l2" },
439
      { "num_classes", "num_class" },
Guolin Ke's avatar
Guolin Ke committed
440
      { "unbalanced_sets", "is_unbalance" },
441
      { "bagging_fraction_seed", "bagging_seed" },
442
443
444
      { "num_boost_round", "num_iterations" },
      { "workers", "machines" },
      { "nodes", "machines" },
Guolin Ke's avatar
Guolin Ke committed
445
    });
Guolin Ke's avatar
Guolin Ke committed
446
    const std::unordered_set<std::string> parameter_set({
wxchan's avatar
wxchan committed
447
448
449
450
451
452
453
454
455
      "config", "config_file", "task", "device",
      "num_threads", "seed", "boosting_type", "objective", "data",
      "output_model", "input_model", "output_result", "valid_data",
      "is_enable_sparse", "is_pre_partition", "is_training_metric",
      "ndcg_eval_at", "min_data_in_leaf", "min_sum_hessian_in_leaf",
      "num_leaves", "feature_fraction", "num_iterations",
      "bagging_fraction", "bagging_freq", "learning_rate", "tree_learner",
      "num_machines", "local_listen_port", "use_two_round_loading",
      "machine_list_file", "is_save_binary_file", "early_stopping_round",
Guolin Ke's avatar
Guolin Ke committed
456
      "verbose", "has_header", "label_column", "weight_column", "group_column",
wxchan's avatar
wxchan committed
457
458
459
460
461
462
463
464
465
466
467
468
469
      "ignore_column", "categorical_column", "is_predict_raw_score",
      "is_predict_leaf_index", "min_gain_to_split", "top_k",
      "lambda_l1", "lambda_l2", "num_class", "is_unbalance",
      "max_depth", "subsample_for_bin", "max_bin", "bagging_seed",
      "drop_rate", "skip_drop", "max_drop", "uniform_drop",
      "xgboost_dart_mode", "drop_seed", "top_rate", "other_rate",
      "min_data_in_bin", "data_random_seed", "bin_construct_sample_cnt",
      "num_iteration_predict", "pred_early_stop", "pred_early_stop_freq",
      "pred_early_stop_margin", "use_missing", "sigmoid", "huber_delta",
      "fair_c", "poission_max_delta_step", "scale_pos_weight",
      "boost_from_average", "max_position", "label_gain",
      "metric", "metric_freq", "time_out",
      "gpu_platform_id", "gpu_device_id", "gpu_use_dp",
470
      "convert_model", "convert_model_language",
Guolin Ke's avatar
Guolin Ke committed
471
472
473
      "feature_fraction_seed", "enable_bundle", "data_filename", "valid_data_filenames",
      "snapshot_freq", "verbosity", "sparse_threshold", "enable_load_from_binary_file",
      "max_conflict_rate", "poisson_max_delta_step", "gaussian_eta",
474
      "histogram_pool_size", "output_freq", "is_provide_training_metric", "machine_list_filename", "machines",
ChenZhiyong's avatar
ChenZhiyong committed
475
476
      "zero_as_missing", "init_score_file", "valid_init_score_file", "is_predict_contrib",
      "max_cat_threshold", "max_cat_group", "cat_smooth_ratio", "min_cat_smooth", "max_cat_smooth", "min_data_per_group"
wxchan's avatar
wxchan committed
477
    });
Guolin Ke's avatar
Guolin Ke committed
478
479
    std::unordered_map<std::string, std::string> tmp_map;
    for (const auto& pair : *params) {
wxchan's avatar
wxchan committed
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
      auto alias = alias_table.find(pair.first);
      if (alias != alias_table.end()) { // found alias
        auto alias_set = tmp_map.find(alias->second); 
        if (alias_set != tmp_map.end()) { // alias already set
          // set priority by length & alphabetically to ensure reproducible behavior
          if (alias_set->second.size() < pair.first.size() ||
            (alias_set->second.size() == pair.first.size() && alias_set->second < pair.first)) {
            Log::Warning("%s is set with %s=%s, %s=%s will be ignored. Current value: %s=%s.",
              alias->second.c_str(), alias_set->second.c_str(), params->at(alias_set->second).c_str(),
              pair.first.c_str(), pair.second.c_str(), alias->second.c_str(), params->at(alias_set->second).c_str());
          } else {
            Log::Warning("%s is set with %s=%s, will be overrided by %s=%s. Current value: %s=%s.",
              alias->second.c_str(), alias_set->second.c_str(), params->at(alias_set->second).c_str(),
              pair.first.c_str(), pair.second.c_str(), alias->second.c_str(), pair.second.c_str());
            tmp_map[alias->second] = pair.first;
          }
        } else { // alias not set
          tmp_map.emplace(alias->second, pair.first);
        }
      } else if (parameter_set.find(pair.first) == parameter_set.end()) {
        Log::Warning("Unknown parameter: %s", pair.first.c_str());
Guolin Ke's avatar
Guolin Ke committed
501
502
503
      }
    }
    for (const auto& pair : tmp_map) {
wxchan's avatar
wxchan committed
504
505
506
507
508
509
510
511
      auto alias = params->find(pair.first);
      if (alias == params->end()) { // not find
        params->emplace(pair.first, params->at(pair.second));
        params->erase(pair.second);
      } else {
        Log::Warning("%s is set=%s, %s=%s will be ignored. Current value: %s=%s.", 
          pair.first.c_str(), alias->second.c_str(), pair.second.c_str(), params->at(pair.second).c_str(),
          pair.first.c_str(), alias->second.c_str());
Guolin Ke's avatar
Guolin Ke committed
512
513
514
515
516
517
518
      }
    }
  }
};

}   // namespace LightGBM

Guolin Ke's avatar
Guolin Ke committed
519
#endif   // LightGBM_CONFIG_H_