config.h 19.9 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
#ifndef LIGHTGBM_CONFIG_H_
#define LIGHTGBM_CONFIG_H_

#include <LightGBM/utils/common.h>
#include <LightGBM/utils/log.h>

Guolin Ke's avatar
Guolin Ke committed
7
#include <LightGBM/meta.h>
8
#include <LightGBM/export.h>
Guolin Ke's avatar
Guolin Ke committed
9

Guolin Ke's avatar
Guolin Ke committed
10
11
12
#include <vector>
#include <string>
#include <unordered_map>
wxchan's avatar
wxchan committed
13
#include <unordered_set>
Guolin Ke's avatar
Guolin Ke committed
14
#include <algorithm>
Guolin Ke's avatar
Guolin Ke committed
15
#include <memory>
Guolin Ke's avatar
Guolin Ke committed
16
17
18

namespace LightGBM {

Guolin Ke's avatar
Guolin Ke committed
19
20
21
22
const std::string kDefaultTreeLearnerType = "serial";
const std::string kDefaultDevice = "cpu";
const std::string kDefaultBoostingType = "gbdt";
const std::string kDefaultObjectiveType = "regression";
23
const int kDefaultNumLeaves = 31;
Guolin Ke's avatar
Guolin Ke committed
24

Guolin Ke's avatar
Guolin Ke committed
25
26
27
28
29
30
31
32
33
/*!
* \brief The interface for Config
*/
struct ConfigBase {
public:
  /*! \brief virtual destructor */
  virtual ~ConfigBase() {}

  /*!
Hui Xue's avatar
Hui Xue committed
34
  * \brief Set current config object by params
Guolin Ke's avatar
Guolin Ke committed
35
36
37
38
39
40
41
42
43
  * \param params Store the key and value for params
  */
  virtual void Set(
    const std::unordered_map<std::string, std::string>& params) = 0;

  /*!
  * \brief Get string value by specific name of key
  * \param params Store the key and value for params
  * \param name Name of key
Hui Xue's avatar
Hui Xue committed
44
  * \param out Value will assign to out if key exists
Guolin Ke's avatar
Guolin Ke committed
45
46
  * \return True if key exists
  */
Guolin Ke's avatar
Guolin Ke committed
47
  inline static bool GetString(
Guolin Ke's avatar
Guolin Ke committed
48
49
50
51
52
53
54
    const std::unordered_map<std::string, std::string>& params,
    const std::string& name, std::string* out);

  /*!
  * \brief Get int value by specific name of key
  * \param params Store the key and value for params
  * \param name Name of key
Hui Xue's avatar
Hui Xue committed
55
  * \param out Value will assign to out if key exists
Guolin Ke's avatar
Guolin Ke committed
56
57
  * \return True if key exists
  */
Guolin Ke's avatar
Guolin Ke committed
58
  inline static bool GetInt(
Guolin Ke's avatar
Guolin Ke committed
59
60
61
62
    const std::unordered_map<std::string, std::string>& params,
    const std::string& name, int* out);

  /*!
63
  * \brief Get double value by specific name of key
Guolin Ke's avatar
Guolin Ke committed
64
65
  * \param params Store the key and value for params
  * \param name Name of key
Hui Xue's avatar
Hui Xue committed
66
  * \param out Value will assign to out if key exists
Guolin Ke's avatar
Guolin Ke committed
67
68
  * \return True if key exists
  */
Guolin Ke's avatar
Guolin Ke committed
69
  inline static bool GetDouble(
Guolin Ke's avatar
Guolin Ke committed
70
    const std::unordered_map<std::string, std::string>& params,
71
    const std::string& name, double* out);
Guolin Ke's avatar
Guolin Ke committed
72
73
74
75
76

  /*!
  * \brief Get bool value by specific name of key
  * \param params Store the key and value for params
  * \param name Name of key
Hui Xue's avatar
Hui Xue committed
77
  * \param out Value will assign to out if key exists
Guolin Ke's avatar
Guolin Ke committed
78
79
  * \return True if key exists
  */
Guolin Ke's avatar
Guolin Ke committed
80
  inline static bool GetBool(
Guolin Ke's avatar
Guolin Ke committed
81
82
    const std::unordered_map<std::string, std::string>& params,
    const std::string& name, bool* out);
83

wxchan's avatar
wxchan committed
84
  static void KV2Map(std::unordered_map<std::string, std::string>& params, const char* kv);
85
  static std::unordered_map<std::string, std::string> Str2Map(const char* parameters);
Guolin Ke's avatar
Guolin Ke committed
86
87
88
89
};

/*! \brief Types of tasks */
enum TaskType {
90
  kTrain, kPredict, kConvertModel, KRefitTree
Guolin Ke's avatar
Guolin Ke committed
91
92
93
94
95
};

/*! \brief Config for input and output files */
struct IOConfig: public ConfigBase {
public:
96
  int max_bin = 255;
97
  int num_class = 1;
Guolin Ke's avatar
Guolin Ke committed
98
99
  int data_random_seed = 1;
  std::string data_filename = "";
100
  std::string initscore_filename = "";
Guolin Ke's avatar
Guolin Ke committed
101
  std::vector<std::string> valid_data_filenames;
102
  std::vector<std::string> valid_data_initscores;
103
  int snapshot_freq = -1;
Guolin Ke's avatar
Guolin Ke committed
104
105
  std::string output_model = "LightGBM_model.txt";
  std::string output_result = "LightGBM_predict_result.txt";
106
  std::string convert_model = "gbdt_prediction.cpp";
Guolin Ke's avatar
Guolin Ke committed
107
  std::string input_model = "";
108

Guolin Ke's avatar
Guolin Ke committed
109
  int verbosity = 1;
110
  int num_iteration_predict = -1;
Guolin Ke's avatar
Guolin Ke committed
111
112
  bool is_pre_partition = false;
  bool is_enable_sparse = true;
113
114
115
116
117
  /*! \brief The threshold of zero elements precentage for treating a feature as a sparse feature.
   *  Default is 0.8, where a feature is treated as a sparse feature when there are over 80% zeros.
   *  When setting to 1.0, all features are processed as dense features.
   */
  double sparse_threshold = 0.8;
Guolin Ke's avatar
Guolin Ke committed
118
119
  bool use_two_round_loading = false;
  bool is_save_binary_file = false;
Guolin Ke's avatar
Guolin Ke committed
120
  bool enable_load_from_binary_file = true;
Guolin Ke's avatar
Guolin Ke committed
121
  int bin_construct_sample_cnt = 200000;
Guolin Ke's avatar
Guolin Ke committed
122
  bool is_predict_leaf_index = false;
123
  bool is_predict_contrib = false;
Guolin Ke's avatar
Guolin Ke committed
124
  bool is_predict_raw_score = false;
125
  int min_data_in_leaf = 20;
126
  int min_data_in_bin = 3;
Guolin Ke's avatar
Guolin Ke committed
127
  double max_conflict_rate = 0.0;
Guolin Ke's avatar
Guolin Ke committed
128
  bool enable_bundle = true;
Guolin Ke's avatar
Guolin Ke committed
129
  bool has_header = false;
Guolin Ke's avatar
Guolin Ke committed
130
  std::vector<int8_t> monotone_constraints;
Guolin Ke's avatar
Guolin Ke committed
131
132
133
134
  /*! \brief Index or column name of label, default is the first column
   * And add an prefix "name:" while using column name */
  std::string label_column = "";
  /*! \brief Index or column name of weight, < 0 means not used
135
  * And add an prefix "name:" while using column name
136
  * Note: when using Index, it doesn't count the label index */
Guolin Ke's avatar
Guolin Ke committed
137
  std::string weight_column = "";
Guolin Ke's avatar
Guolin Ke committed
138
139
  /*! \brief Index or column name of group/query id, < 0 means not used
  * And add an prefix "name:" while using column name
140
  * Note: when using Index, it doesn't count the label index */
Guolin Ke's avatar
Guolin Ke committed
141
142
  std::string group_column = "";
  /*! \brief ignored features, separate by ','
Guolin Ke's avatar
Guolin Ke committed
143
  * And add an prefix "name:" while using column name
144
  * Note: when using Index, it doesn't count the label index */
Guolin Ke's avatar
Guolin Ke committed
145
  std::string ignore_column = "";
146
147
  /*! \brief specific categorical columns, Note:only support for integer type categorical
  * And add an prefix "name:" while using column name
148
  * Note: when using Index, it doesn't count the label index */
149
  std::string categorical_column = "";
Guolin Ke's avatar
Guolin Ke committed
150
  std::string device_type = kDefaultDevice;
151
152
153
154
155
156

  /*! \brief Set to true if want to use early stop for the prediction */
  bool pred_early_stop = false;
  /*! \brief Frequency of checking the pred_early_stop */
  int pred_early_stop_freq = 10;
  /*! \brief Threshold of margin of pred_early_stop */
Guolin Ke's avatar
Guolin Ke committed
157
  double pred_early_stop_margin = 10.0;
Guolin Ke's avatar
Guolin Ke committed
158
159
  bool zero_as_missing = false;
  bool use_missing = true;
160
  LIGHTGBM_EXPORT void Set(const std::unordered_map<std::string, std::string>& params) override;
Guolin Ke's avatar
Guolin Ke committed
161
162
163
164
165
166
};

/*! \brief Config for objective function */
struct ObjectiveConfig: public ConfigBase {
public:
  virtual ~ObjectiveConfig() {}
Guolin Ke's avatar
Guolin Ke committed
167
168
169
  double sigmoid = 1.0;
  double fair_c = 1.0;
  double poisson_max_delta_step = 0.7;
Guolin Ke's avatar
Guolin Ke committed
170
  // for lambdarank
171
  std::vector<double> label_gain;
Guolin Ke's avatar
Guolin Ke committed
172
173
174
175
  // for lambdarank
  int max_position = 20;
  // for binary
  bool is_unbalance = false;
176
177
  // for multiclass
  int num_class = 1;
Guolin Ke's avatar
Guolin Ke committed
178
  // Balancing of positive and negative weights
Guolin Ke's avatar
Guolin Ke committed
179
  double scale_pos_weight = 1.0;
180
181
  // True will sqrt fit the sqrt(label)
  bool reg_sqrt = false;
Guolin Ke's avatar
Guolin Ke committed
182
183
  double alpha = 0.9;
  double tweedie_variance_power = 1.5;
184
  LIGHTGBM_EXPORT void Set(const std::unordered_map<std::string, std::string>& params) override;
Guolin Ke's avatar
Guolin Ke committed
185
186
187
188
189
190
};

/*! \brief Config for metrics interface*/
struct MetricConfig: public ConfigBase {
public:
  virtual ~MetricConfig() {}
191
  int num_class = 1;
Guolin Ke's avatar
Guolin Ke committed
192
193
194
195
  double sigmoid = 1.0;
  double fair_c = 1.0;
  double alpha = 0.9;
  double tweedie_variance_power = 1.5;
196
  std::vector<double> label_gain;
Guolin Ke's avatar
Guolin Ke committed
197
  std::vector<int> eval_at;
198
  LIGHTGBM_EXPORT void Set(const std::unordered_map<std::string, std::string>& params) override;
Guolin Ke's avatar
Guolin Ke committed
199
200
201
202
203
204
};


/*! \brief Config for tree model */
struct TreeConfig: public ConfigBase {
public:
205
  int min_data_in_leaf = 20;
Guolin Ke's avatar
Guolin Ke committed
206
207
208
209
210
  double min_sum_hessian_in_leaf = 1e-3;
  double max_delta_step = 0.0;
  double lambda_l1 = 0.0;
  double lambda_l2 = 0.0;
  double min_gain_to_split = 0.0;
211
  // should > 1
212
  int num_leaves = kDefaultNumLeaves;
Guolin Ke's avatar
Guolin Ke committed
213
  int feature_fraction_seed = 2;
Guolin Ke's avatar
Guolin Ke committed
214
  double feature_fraction = 1.0;
tks's avatar
tks committed
215
  // max cache size(unit:MB) for historical histogram. < 0 means no limit
Guolin Ke's avatar
Guolin Ke committed
216
  double histogram_pool_size = -1.0;
217
  // max depth of tree model.
Guolin Ke's avatar
Guolin Ke committed
218
  // Still grow tree by leaf-wise, but limit the max depth to avoid over-fitting
tks's avatar
tks committed
219
220
  // And the max leaves will be min(num_leaves, pow(2, max_depth))
  // max_depth < 0 means no limit
221
  int max_depth = -1;
Guolin Ke's avatar
Guolin Ke committed
222
  int top_k = 20;
223
224
225
226
227
228
229
230
231
232
  /*! \brief OpenCL platform ID. Usually each GPU vendor exposes one OpenCL platform.
   *  Default value is -1, using the system-wide default platform
   */
  int gpu_platform_id = -1;
  /*! \brief OpenCL device ID in the specified platform. Each GPU in the selected platform has a
   *  unique device ID. Default value is -1, using the default device in the selected platform
   */
  int gpu_device_id = -1;
  /*! \brief Set to true to use double precision math on GPU (default using single precision) */
  bool gpu_use_dp = false;
Guolin Ke's avatar
Guolin Ke committed
233
  int min_data_per_group = 100;
234
235
236
  int max_cat_threshold = 32;
  double cat_l2 = 10;
  double cat_smooth = 10;
237
  int max_cat_to_onehot = 4;
238
  LIGHTGBM_EXPORT void Set(const std::unordered_map<std::string, std::string>& params) override;
Guolin Ke's avatar
Guolin Ke committed
239
240
241
242
243
244
};

/*! \brief Config for Boosting */
struct BoostingConfig: public ConfigBase {
public:
  virtual ~BoostingConfig() {}
245
246
  int output_freq = 1;
  bool is_provide_training_metric = false;
247
  int num_iterations = 100;
Guolin Ke's avatar
Guolin Ke committed
248
249
  double learning_rate = 0.1;
  double bagging_fraction = 1.0;
Guolin Ke's avatar
Guolin Ke committed
250
251
  int bagging_seed = 3;
  int bagging_freq = 0;
wxchan's avatar
wxchan committed
252
  int early_stopping_round = 0;
253
  int num_class = 1;
254
255
256
257
258
  double drop_rate = 0.1;
  int max_drop = 50;
  double skip_drop = 0.5;
  bool xgboost_dart_mode = false;
  bool uniform_drop = false;
Guolin Ke's avatar
Guolin Ke committed
259
  int drop_seed = 4;
Guolin Ke's avatar
Guolin Ke committed
260
261
  double top_rate = 0.2;
  double other_rate = 0.1;
262
  // only used for the regression. Will boost from the average labels.
263
  bool boost_from_average = true;
Guolin Ke's avatar
Guolin Ke committed
264
265
  std::string tree_learner_type = kDefaultTreeLearnerType;
  std::string device_type = kDefaultDevice;
Guolin Ke's avatar
Guolin Ke committed
266
  TreeConfig tree_config;
267
  LIGHTGBM_EXPORT void Set(const std::unordered_map<std::string, std::string>& params) override;
268
269
270

  /* filename of forced splits */
  std::string forcedsplits_filename = "";
Guolin Ke's avatar
Guolin Ke committed
271
272
273
274
275
276
277
278
279
};

/*! \brief Config for Network */
struct NetworkConfig: public ConfigBase {
public:
  int num_machines = 1;
  int local_listen_port = 12400;
  int time_out = 120;  // in minutes
  std::string machine_list_filename = "";
280
  std::string machines = "";
281
  LIGHTGBM_EXPORT void Set(const std::unordered_map<std::string, std::string>& params) override;
Guolin Ke's avatar
Guolin Ke committed
282
283
284
285
286
287
288
289
};


/*! \brief Overall config, all configs will put on this class */
struct OverallConfig: public ConfigBase {
public:
  TaskType task_type = TaskType::kTrain;
  NetworkConfig network_config;
Guolin Ke's avatar
Guolin Ke committed
290
  int seed = 0;
Guolin Ke's avatar
Guolin Ke committed
291
292
293
294
  int num_threads = 0;
  bool is_parallel = false;
  bool is_parallel_find_bin = false;
  IOConfig io_config;
Guolin Ke's avatar
Guolin Ke committed
295
  std::string boosting_type = kDefaultBoostingType;
Guolin Ke's avatar
Guolin Ke committed
296
  BoostingConfig boosting_config;
Guolin Ke's avatar
Guolin Ke committed
297
  std::string objective_type =  kDefaultObjectiveType;
Guolin Ke's avatar
Guolin Ke committed
298
299
300
  ObjectiveConfig objective_config;
  std::vector<std::string> metric_types;
  MetricConfig metric_config;
301
  std::string convert_model_language = "";
302
  LIGHTGBM_EXPORT void Set(const std::unordered_map<std::string, std::string>& params) override;
303

Guolin Ke's avatar
Guolin Ke committed
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
private:
  void CheckParamConflict();
};


inline bool ConfigBase::GetString(
  const std::unordered_map<std::string, std::string>& params,
  const std::string& name, std::string* out) {
  if (params.count(name) > 0) {
    *out = params.at(name);
    return true;
  }
  return false;
}

inline bool ConfigBase::GetInt(
  const std::unordered_map<std::string, std::string>& params,
  const std::string& name, int* out) {
  if (params.count(name) > 0) {
323
    if (!Common::AtoiAndCheck(params.at(name).c_str(), out)) {
324
      Log::Fatal("Parameter %s should be of type int, got \"%s\"",
325
326
        name.c_str(), params.at(name).c_str());
    }
Guolin Ke's avatar
Guolin Ke committed
327
328
329
330
331
    return true;
  }
  return false;
}

332
inline bool ConfigBase::GetDouble(
Guolin Ke's avatar
Guolin Ke committed
333
  const std::unordered_map<std::string, std::string>& params,
334
  const std::string& name, double* out) {
Guolin Ke's avatar
Guolin Ke committed
335
  if (params.count(name) > 0) {
336
    if (!Common::AtofAndCheck(params.at(name).c_str(), out)) {
337
      Log::Fatal("Parameter %s should be of type double, got \"%s\"",
338
339
        name.c_str(), params.at(name).c_str());
    }
Guolin Ke's avatar
Guolin Ke committed
340
341
342
343
344
345
346
347
348
349
    return true;
  }
  return false;
}

inline bool ConfigBase::GetBool(
  const std::unordered_map<std::string, std::string>& params,
  const std::string& name, bool* out) {
  if (params.count(name) > 0) {
    std::string value = params.at(name);
Guolin Ke's avatar
Guolin Ke committed
350
    std::transform(value.begin(), value.end(), value.begin(), Common::tolower);
351
    if (value == std::string("false") || value == std::string("-")) {
Guolin Ke's avatar
Guolin Ke committed
352
      *out = false;
353
    } else if (value == std::string("true") || value == std::string("+")) {
Guolin Ke's avatar
Guolin Ke committed
354
      *out = true;
355
    } else {
356
      Log::Fatal("Parameter %s should be \"true\"/\"+\" or \"false\"/\"-\", got \"%s\"",
357
        name.c_str(), params.at(name).c_str());
Guolin Ke's avatar
Guolin Ke committed
358
359
360
361
362
363
364
365
    }
    return true;
  }
  return false;
}

struct ParameterAlias {
  static void KeyAliasTransform(std::unordered_map<std::string, std::string>* params) {
Guolin Ke's avatar
Guolin Ke committed
366
    const std::unordered_map<std::string, std::string> alias_table(
Guolin Ke's avatar
Guolin Ke committed
367
368
369
    {
      { "config", "config_file" },
      { "nthread", "num_threads" },
370
      { "num_thread", "num_threads" },
Guolin Ke's avatar
Guolin Ke committed
371
      { "random_seed", "seed" },
Guolin Ke's avatar
Guolin Ke committed
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
      { "boosting", "boosting_type" },
      { "boost", "boosting_type" },
      { "application", "objective" },
      { "app", "objective" },
      { "train_data", "data" },
      { "train", "data" },
      { "model_output", "output_model" },
      { "model_out", "output_model" },
      { "model_input", "input_model" },
      { "model_in", "input_model" },
      { "predict_result", "output_result" },
      { "prediction_result", "output_result" },
      { "valid", "valid_data" },
      { "test_data", "valid_data" },
      { "test", "valid_data" },
      { "is_sparse", "is_enable_sparse" },
Guolin Ke's avatar
Guolin Ke committed
388
389
      { "enable_sparse", "is_enable_sparse" },
      { "pre_partition", "is_pre_partition" },
390
      { "training_metric", "is_training_metric" },
Guolin Ke's avatar
Guolin Ke committed
391
392
      { "train_metric", "is_training_metric" },
      { "ndcg_at", "ndcg_eval_at" },
Guolin Ke's avatar
Guolin Ke committed
393
      { "eval_at", "ndcg_eval_at" },
Guolin Ke's avatar
Guolin Ke committed
394
395
      { "min_data_per_leaf", "min_data_in_leaf" },
      { "min_data", "min_data_in_leaf" },
Guolin Ke's avatar
Guolin Ke committed
396
      { "min_child_samples", "min_data_in_leaf" },
Guolin Ke's avatar
Guolin Ke committed
397
398
399
      { "min_sum_hessian_per_leaf", "min_sum_hessian_in_leaf" },
      { "min_sum_hessian", "min_sum_hessian_in_leaf" },
      { "min_hessian", "min_sum_hessian_in_leaf" },
Guolin Ke's avatar
Guolin Ke committed
400
      { "min_child_weight", "min_sum_hessian_in_leaf" },
Guolin Ke's avatar
Guolin Ke committed
401
402
      { "num_leaf", "num_leaves" },
      { "sub_feature", "feature_fraction" },
Guolin Ke's avatar
Guolin Ke committed
403
      { "colsample_bytree", "feature_fraction" },
Guolin Ke's avatar
Guolin Ke committed
404
405
406
407
408
      { "num_iteration", "num_iterations" },
      { "num_tree", "num_iterations" },
      { "num_round", "num_iterations" },
      { "num_trees", "num_iterations" },
      { "num_rounds", "num_iterations" },
409
      { "num_boost_round", "num_iterations" },
410
      { "n_estimators", "num_iterations"},
Guolin Ke's avatar
Guolin Ke committed
411
      { "sub_row", "bagging_fraction" },
Guolin Ke's avatar
Guolin Ke committed
412
413
      { "subsample", "bagging_fraction" },
      { "subsample_freq", "bagging_freq" },
Guolin Ke's avatar
Guolin Ke committed
414
415
416
417
418
419
420
421
      { "shrinkage_rate", "learning_rate" },
      { "tree", "tree_learner" },
      { "num_machine", "num_machines" },
      { "local_port", "local_listen_port" },
      { "two_round_loading", "use_two_round_loading"},
      { "two_round", "use_two_round_loading" },
      { "mlist", "machine_list_file" },
      { "is_save_binary", "is_save_binary_file" },
Qiwei Ye's avatar
Qiwei Ye committed
422
      { "save_binary", "is_save_binary_file" },
wxchan's avatar
wxchan committed
423
      { "early_stopping_rounds", "early_stopping_round"},
424
      { "early_stopping", "early_stopping_round"},
Guolin Ke's avatar
Guolin Ke committed
425
426
427
428
429
430
431
432
      { "verbosity", "verbose" },
      { "header", "has_header" },
      { "label", "label_column" },
      { "weight", "weight_column" },
      { "group", "group_column" },
      { "query", "group_column" },
      { "query_column", "group_column" },
      { "ignore_feature", "ignore_column" },
Guolin Ke's avatar
Guolin Ke committed
433
      { "blacklist", "ignore_column" },
434
435
436
      { "categorical_feature", "categorical_column" },
      { "cat_column", "categorical_column" },
      { "cat_feature", "categorical_column" },
Guolin Ke's avatar
Guolin Ke committed
437
      { "predict_raw_score", "is_predict_raw_score" },
Guolin Ke's avatar
Guolin Ke committed
438
439
      { "raw_score", "is_predict_raw_score" },
      { "leaf_index", "is_predict_leaf_index" },
440
      { "predict_leaf_index", "is_predict_leaf_index" },
441
442
      { "contrib", "is_predict_contrib" },
      { "predict_contrib", "is_predict_contrib" },
Guolin Ke's avatar
Guolin Ke committed
443
      { "min_split_gain", "min_gain_to_split" },
Guolin Ke's avatar
Guolin Ke committed
444
      { "topk", "top_k" },
Guolin Ke's avatar
Guolin Ke committed
445
446
      { "reg_alpha", "lambda_l1" },
      { "reg_lambda", "lambda_l2" },
447
      { "num_classes", "num_class" },
Guolin Ke's avatar
Guolin Ke committed
448
      { "unbalanced_sets", "is_unbalance" },
449
      { "bagging_fraction_seed", "bagging_seed" },
450
451
      { "workers", "machines" },
      { "nodes", "machines" },
452
      { "subsample_for_bin", "bin_construct_sample_cnt" },
Guolin Ke's avatar
Guolin Ke committed
453
      { "metric_freq", "output_freq" },
454
455
456
      { "mc", "monotone_constraints" },
      { "max_tree_output", "max_delta_step" },
      { "max_leaf_output", "max_delta_step" }
Guolin Ke's avatar
Guolin Ke committed
457
    });
Guolin Ke's avatar
Guolin Ke committed
458
    const std::unordered_set<std::string> parameter_set({
wxchan's avatar
wxchan committed
459
460
      "config", "config_file", "task", "device",
      "num_threads", "seed", "boosting_type", "objective", "data",
461
      "output_model", "input_model", "output_result", "valid_data",
wxchan's avatar
wxchan committed
462
463
464
465
466
467
      "is_enable_sparse", "is_pre_partition", "is_training_metric",
      "ndcg_eval_at", "min_data_in_leaf", "min_sum_hessian_in_leaf",
      "num_leaves", "feature_fraction", "num_iterations",
      "bagging_fraction", "bagging_freq", "learning_rate", "tree_learner",
      "num_machines", "local_listen_port", "use_two_round_loading",
      "machine_list_file", "is_save_binary_file", "early_stopping_round",
Guolin Ke's avatar
Guolin Ke committed
468
      "verbose", "has_header", "label_column", "weight_column", "group_column",
wxchan's avatar
wxchan committed
469
470
471
      "ignore_column", "categorical_column", "is_predict_raw_score",
      "is_predict_leaf_index", "min_gain_to_split", "top_k",
      "lambda_l1", "lambda_l2", "num_class", "is_unbalance",
472
      "max_depth", "max_bin", "bagging_seed",
wxchan's avatar
wxchan committed
473
474
475
476
      "drop_rate", "skip_drop", "max_drop", "uniform_drop",
      "xgboost_dart_mode", "drop_seed", "top_rate", "other_rate",
      "min_data_in_bin", "data_random_seed", "bin_construct_sample_cnt",
      "num_iteration_predict", "pred_early_stop", "pred_early_stop_freq",
477
      "pred_early_stop_margin", "use_missing", "sigmoid",
wxchan's avatar
wxchan committed
478
479
      "fair_c", "poission_max_delta_step", "scale_pos_weight",
      "boost_from_average", "max_position", "label_gain",
480
      "metric", "output_freq", "time_out",
wxchan's avatar
wxchan committed
481
      "gpu_platform_id", "gpu_device_id", "gpu_use_dp",
482
      "convert_model", "convert_model_language",
Guolin Ke's avatar
Guolin Ke committed
483
484
      "feature_fraction_seed", "enable_bundle", "data_filename", "valid_data_filenames",
      "snapshot_freq", "verbosity", "sparse_threshold", "enable_load_from_binary_file",
485
      "max_conflict_rate", "poisson_max_delta_step",
486
      "histogram_pool_size", "is_provide_training_metric", "machine_list_filename", "machines",
ChenZhiyong's avatar
ChenZhiyong committed
487
      "zero_as_missing", "init_score_file", "valid_init_score_file", "is_predict_contrib",
488
      "max_cat_threshold",  "cat_smooth", "min_data_per_group", "cat_l2", "max_cat_to_onehot",
489
490
      "alpha", "reg_sqrt", "tweedie_variance_power", "monotone_constraints", "max_delta_step",
      "forced_splits"
wxchan's avatar
wxchan committed
491
    });
Guolin Ke's avatar
Guolin Ke committed
492
493
    std::unordered_map<std::string, std::string> tmp_map;
    for (const auto& pair : *params) {
wxchan's avatar
wxchan committed
494
495
496
497
498
499
500
501
502
503
504
      auto alias = alias_table.find(pair.first);
      if (alias != alias_table.end()) { // found alias
        auto alias_set = tmp_map.find(alias->second); 
        if (alias_set != tmp_map.end()) { // alias already set
          // set priority by length & alphabetically to ensure reproducible behavior
          if (alias_set->second.size() < pair.first.size() ||
            (alias_set->second.size() == pair.first.size() && alias_set->second < pair.first)) {
            Log::Warning("%s is set with %s=%s, %s=%s will be ignored. Current value: %s=%s.",
              alias->second.c_str(), alias_set->second.c_str(), params->at(alias_set->second).c_str(),
              pair.first.c_str(), pair.second.c_str(), alias->second.c_str(), params->at(alias_set->second).c_str());
          } else {
505
            Log::Warning("%s is set with %s=%s, will be overridden by %s=%s. Current value: %s=%s.",
wxchan's avatar
wxchan committed
506
507
508
509
510
511
512
513
514
              alias->second.c_str(), alias_set->second.c_str(), params->at(alias_set->second).c_str(),
              pair.first.c_str(), pair.second.c_str(), alias->second.c_str(), pair.second.c_str());
            tmp_map[alias->second] = pair.first;
          }
        } else { // alias not set
          tmp_map.emplace(alias->second, pair.first);
        }
      } else if (parameter_set.find(pair.first) == parameter_set.end()) {
        Log::Warning("Unknown parameter: %s", pair.first.c_str());
Guolin Ke's avatar
Guolin Ke committed
515
516
517
      }
    }
    for (const auto& pair : tmp_map) {
wxchan's avatar
wxchan committed
518
519
520
521
522
523
524
525
      auto alias = params->find(pair.first);
      if (alias == params->end()) { // not find
        params->emplace(pair.first, params->at(pair.second));
        params->erase(pair.second);
      } else {
        Log::Warning("%s is set=%s, %s=%s will be ignored. Current value: %s=%s.", 
          pair.first.c_str(), alias->second.c_str(), pair.second.c_str(), params->at(pair.second).c_str(),
          pair.first.c_str(), alias->second.c_str());
Guolin Ke's avatar
Guolin Ke committed
526
527
528
529
530
531
532
      }
    }
  }
};

}   // namespace LightGBM

Guolin Ke's avatar
Guolin Ke committed
533
#endif   // LightGBM_CONFIG_H_