config.cpp 14 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
#include <LightGBM/config.h>

#include <LightGBM/utils/common.h>
Guolin Ke's avatar
Guolin Ke committed
4
#include <LightGBM/utils/random.h>
Guolin Ke's avatar
Guolin Ke committed
5
6
7
8
#include <LightGBM/utils/log.h>

#include <vector>
#include <string>
Guolin Ke's avatar
Guolin Ke committed
9
#include <unordered_set>
Guolin Ke's avatar
Guolin Ke committed
10
#include <algorithm>
Guolin Ke's avatar
Guolin Ke committed
11
#include <limits>
Guolin Ke's avatar
Guolin Ke committed
12
13
14

namespace LightGBM {

15
std::unordered_map<std::string, std::string> ConfigBase::Str2Map(const char* parameters) {
16
  std::unordered_map<std::string, std::string> params;
17
  auto args = Common::Split(parameters, " \t\n\r");
18
19
20
21
22
23
24
25
26
  for (auto arg : args) {
    std::vector<std::string> tmp_strs = Common::Split(arg.c_str(), '=');
    if (tmp_strs.size() == 2) {
      std::string key = Common::RemoveQuotationSymbol(Common::Trim(tmp_strs[0]));
      std::string value = Common::RemoveQuotationSymbol(Common::Trim(tmp_strs[1]));
      if (key.size() <= 0) {
        continue;
      }
      params[key] = value;
27
    } else if (Common::Trim(arg).size() > 0) {
Qiwei Ye's avatar
Qiwei Ye committed
28
      Log::Warning("Unknown parameter %s", arg.c_str());
29
30
31
    }
  }
  ParameterAlias::KeyAliasTransform(&params);
32
  return params;
33
34
}

Guolin Ke's avatar
Guolin Ke committed
35
36
37
void OverallConfig::Set(const std::unordered_map<std::string, std::string>& params) {
  // load main config types
  GetInt(params, "num_threads", &num_threads);
Guolin Ke's avatar
Guolin Ke committed
38
39
40
41

  // generate seeds by seed.
  if (GetInt(params, "seed", &seed)) {
    Random rand(seed);
Guolin Ke's avatar
Guolin Ke committed
42
43
44
45
46
    int int_max = std::numeric_limits<short>::max();
    io_config.data_random_seed = static_cast<int>(rand.NextShort(0, int_max));
    boosting_config.bagging_seed = static_cast<int>(rand.NextShort(0, int_max));
    boosting_config.drop_seed = static_cast<int>(rand.NextShort(0, int_max));
    boosting_config.tree_config.feature_fraction_seed = static_cast<int>(rand.NextShort(0, int_max));
Guolin Ke's avatar
Guolin Ke committed
47
  }
Guolin Ke's avatar
Guolin Ke committed
48
49
50
51
52
53
54
55
56
  GetTaskType(params);
  GetBoostingType(params);
  GetObjectiveType(params);
  GetMetricType(params);

  // sub-config setup
  network_config.Set(params);
  io_config.Set(params);

Guolin Ke's avatar
Guolin Ke committed
57
  boosting_config.Set(params);
Guolin Ke's avatar
Guolin Ke committed
58
59
60
61
  objective_config.Set(params);
  metric_config.Set(params);
  // check for conflicts
  CheckParamConflict();
Qiwei Ye's avatar
Qiwei Ye committed
62

Guolin Ke's avatar
Guolin Ke committed
63
  if (io_config.verbosity == 1) {
Qiwei Ye's avatar
Qiwei Ye committed
64
    LightGBM::Log::ResetLogLevel(LightGBM::LogLevel::Info);
65
  } else if (io_config.verbosity == 0) {
Qiwei Ye's avatar
Qiwei Ye committed
66
    LightGBM::Log::ResetLogLevel(LightGBM::LogLevel::Warning);
67
  } else if (io_config.verbosity >= 2) {
Qiwei Ye's avatar
Qiwei Ye committed
68
    LightGBM::Log::ResetLogLevel(LightGBM::LogLevel::Debug);
69
  } else {
Qiwei Ye's avatar
Qiwei Ye committed
70
    LightGBM::Log::ResetLogLevel(LightGBM::LogLevel::Fatal);
Guolin Ke's avatar
Guolin Ke committed
71
  }
Guolin Ke's avatar
Guolin Ke committed
72
73
74
75
76
}

void OverallConfig::GetBoostingType(const std::unordered_map<std::string, std::string>& params) {
  std::string value;
  if (GetString(params, "boosting_type", &value)) {
Guolin Ke's avatar
Guolin Ke committed
77
    std::transform(value.begin(), value.end(), value.begin(), Common::tolower);
Guolin Ke's avatar
Guolin Ke committed
78
    if (value == std::string("gbdt") || value == std::string("gbrt")) {
Guolin Ke's avatar
Guolin Ke committed
79
      boosting_type = "gbdt";
80
    } else if (value == std::string("dart")) {
Guolin Ke's avatar
Guolin Ke committed
81
      boosting_type = "dart";
Guolin Ke's avatar
Guolin Ke committed
82
83
    } else if (value == std::string("goss")) {
      boosting_type = "goss";
Guolin Ke's avatar
Guolin Ke committed
84
    } else {
85
      Log::Fatal("Unknown boosting type %s", value.c_str());
Guolin Ke's avatar
Guolin Ke committed
86
87
88
89
90
91
92
    }
  }
}

void OverallConfig::GetObjectiveType(const std::unordered_map<std::string, std::string>& params) {
  std::string value;
  if (GetString(params, "objective", &value)) {
Guolin Ke's avatar
Guolin Ke committed
93
    std::transform(value.begin(), value.end(), value.begin(), Common::tolower);
Guolin Ke's avatar
Guolin Ke committed
94
95
96
97
98
99
100
101
102
103
    objective_type = value;
  }
}

void OverallConfig::GetMetricType(const std::unordered_map<std::string, std::string>& params) {
  std::string value;
  if (GetString(params, "metric", &value)) {
    // clear old metrics
    metric_types.clear();
    // to lower
Guolin Ke's avatar
Guolin Ke committed
104
    std::transform(value.begin(), value.end(), value.begin(), Common::tolower);
Guolin Ke's avatar
Guolin Ke committed
105
106
    // split
    std::vector<std::string> metrics = Common::Split(value.c_str(), ',');
107
    // remove duplicate
Guolin Ke's avatar
Guolin Ke committed
108
    std::unordered_set<std::string> metric_sets;
Guolin Ke's avatar
Guolin Ke committed
109
    for (auto& metric : metrics) {
Guolin Ke's avatar
Guolin Ke committed
110
      std::transform(metric.begin(), metric.end(), metric.begin(), Common::tolower);
Guolin Ke's avatar
Guolin Ke committed
111
112
      if (metric_sets.count(metric) <= 0) {
        metric_sets.insert(metric);
Guolin Ke's avatar
Guolin Ke committed
113
114
      }
    }
Guolin Ke's avatar
Guolin Ke committed
115
116
    for (auto& metric : metric_sets) {
      metric_types.push_back(metric);
Guolin Ke's avatar
Guolin Ke committed
117
    }
Guolin Ke's avatar
Guolin Ke committed
118
    metric_types.shrink_to_fit();
Guolin Ke's avatar
Guolin Ke committed
119
120
121
122
123
124
125
  }
}


void OverallConfig::GetTaskType(const std::unordered_map<std::string, std::string>& params) {
  std::string value;
  if (GetString(params, "task", &value)) {
Guolin Ke's avatar
Guolin Ke committed
126
    std::transform(value.begin(), value.end(), value.begin(), Common::tolower);
Guolin Ke's avatar
Guolin Ke committed
127
128
129
130
131
132
    if (value == std::string("train") || value == std::string("training")) {
      task_type = TaskType::kTrain;
    } else if (value == std::string("predict") || value == std::string("prediction")
      || value == std::string("test")) {
      task_type = TaskType::kPredict;
    } else {
133
      Log::Fatal("Unknown task type %s", value.c_str());
Guolin Ke's avatar
Guolin Ke committed
134
135
136
137
138
    }
  }
}

void OverallConfig::CheckParamConflict() {
139

140
141
  // check if objective_type, metric_type, and num_class match
  bool objective_type_multiclass = (objective_type == std::string("multiclass"));
Guolin Ke's avatar
Guolin Ke committed
142
  int num_class_check = boosting_config.num_class;
143
144
145
146
147
148
149
150
  if (objective_type_multiclass) {
    if (num_class_check <= 2) {
      Log::Fatal("Number of classes should be specified and greater than 2 for multiclass training");
    }
  } else {
    if (task_type == TaskType::kTrain && num_class_check != 1) {
      Log::Fatal("Number of classes must be 1 for non-multiclass training");
    }
151
  }
wxchan's avatar
wxchan committed
152
153
154
155
156
157
158
  if (boosting_config.is_provide_training_metric || !io_config.valid_data_filenames.empty()) {
    for (std::string metric_type : metric_types) {
      bool metric_type_multiclass = (metric_type == std::string("multi_logloss") || metric_type == std::string("multi_error"));
      if ((objective_type_multiclass && !metric_type_multiclass)
        || (!objective_type_multiclass && metric_type_multiclass)) {
        Log::Fatal("Objective and metrics don't match");
      }
159
    }
160
  }
161

Guolin Ke's avatar
Guolin Ke committed
162
163
164
165
  if (network_config.num_machines > 1) {
    is_parallel = true;
  } else {
    is_parallel = false;
166
    boosting_config.tree_learner_type = "serial";
Guolin Ke's avatar
Guolin Ke committed
167
168
  }

169
  if (boosting_config.tree_learner_type == std::string("serial")) {
Guolin Ke's avatar
Guolin Ke committed
170
171
172
173
    is_parallel = false;
    network_config.num_machines = 1;
  }

174
175
  if (boosting_config.tree_learner_type == std::string("serial") 
      || boosting_config.tree_learner_type == std::string("feature")) {
Guolin Ke's avatar
Guolin Ke committed
176
    is_parallel_find_bin = false;
177
178
  } else if (boosting_config.tree_learner_type == std::string("data")
             || boosting_config.tree_learner_type == std::string("voting")) {
Guolin Ke's avatar
Guolin Ke committed
179
    is_parallel_find_bin = true;
180
181
    if (boosting_config.tree_config.histogram_pool_size >= 0 
        && boosting_config.tree_learner_type == std::string("data")) {
182
      Log::Warning("Histogram LRU queue was enabled (histogram_pool_size=%f). Will disable this to reduce communication costs"
183
        , boosting_config.tree_config.histogram_pool_size);
184
      // Change pool size to -1 (not limit) when using data parallel to reduce communication costs
Guolin Ke's avatar
Guolin Ke committed
185
      boosting_config.tree_config.histogram_pool_size = -1;
186
187
    }

Guolin Ke's avatar
Guolin Ke committed
188
189
190
191
192
193
  }
}

void IOConfig::Set(const std::unordered_map<std::string, std::string>& params) {
  GetInt(params, "max_bin", &max_bin);
  CHECK(max_bin > 0);
194
  GetInt(params, "num_class", &num_class);
Guolin Ke's avatar
Guolin Ke committed
195
  GetInt(params, "data_random_seed", &data_random_seed);
196
  GetString(params, "data", &data_filename);
Qiwei Ye's avatar
Qiwei Ye committed
197
  GetInt(params, "verbose", &verbosity);
Guolin Ke's avatar
Guolin Ke committed
198
  GetInt(params, "num_iteration_predict", &num_iteration_predict);
Guolin Ke's avatar
Guolin Ke committed
199
  GetInt(params, "bin_construct_sample_cnt", &bin_construct_sample_cnt);
Guolin Ke's avatar
Guolin Ke committed
200
201
202
203
  GetBool(params, "is_pre_partition", &is_pre_partition);
  GetBool(params, "is_enable_sparse", &is_enable_sparse);
  GetBool(params, "use_two_round_loading", &use_two_round_loading);
  GetBool(params, "is_save_binary_file", &is_save_binary_file);
Guolin Ke's avatar
Guolin Ke committed
204
  GetBool(params, "enable_load_from_binary_file", &enable_load_from_binary_file);
Guolin Ke's avatar
Guolin Ke committed
205
206
  GetBool(params, "is_predict_raw_score", &is_predict_raw_score);
  GetBool(params, "is_predict_leaf_index", &is_predict_leaf_index);
Guolin Ke's avatar
Guolin Ke committed
207
208
209
210
211
212
213
  GetString(params, "output_model", &output_model);
  GetString(params, "input_model", &input_model);
  GetString(params, "output_result", &output_result);
  std::string tmp_str = "";
  if (GetString(params, "valid_data", &tmp_str)) {
    valid_data_filenames = Common::Split(tmp_str.c_str(), ',');
  }
Guolin Ke's avatar
Guolin Ke committed
214
215
216
217
218
  GetBool(params, "has_header", &has_header);
  GetString(params, "label_column", &label_column);
  GetString(params, "weight_column", &weight_column);
  GetString(params, "group_column", &group_column);
  GetString(params, "ignore_column", &ignore_column);
219
  GetString(params, "categorical_column", &categorical_column);
Guolin Ke's avatar
Guolin Ke committed
220
221
222
223
224
  GetInt(params, "min_data_in_leaf", &min_data_in_leaf);
  GetInt(params, "min_dato_in_bin", &min_data_in_bin);
  GetDouble(params, "max_conflict_rate", &max_conflict_rate);
  GetBool(params, "enable_bundle", &enable_bundle);
  GetBool(params, "adjacent_bundle", &adjacent_bundle);
Guolin Ke's avatar
Guolin Ke committed
225
226
227
228
229
}


void ObjectiveConfig::Set(const std::unordered_map<std::string, std::string>& params) {
  GetBool(params, "is_unbalance", &is_unbalance);
230
  GetDouble(params, "sigmoid", &sigmoid);
Tsukasa OMOTO's avatar
Tsukasa OMOTO committed
231
  GetDouble(params, "huber_delta", &huber_delta);
Tsukasa OMOTO's avatar
Tsukasa OMOTO committed
232
  GetDouble(params, "fair_c", &fair_c);
233
  GetDouble(params, "gaussian_eta", &gaussian_eta);
234
  GetDouble(params, "poisson_max_delta_step", &poisson_max_delta_step);
Guolin Ke's avatar
Guolin Ke committed
235
236
  GetInt(params, "max_position", &max_position);
  CHECK(max_position > 0);
237
238
  GetInt(params, "num_class", &num_class);
  CHECK(num_class >= 1);
Guolin Ke's avatar
Guolin Ke committed
239
  GetDouble(params, "scale_pos_weight", &scale_pos_weight);
Guolin Ke's avatar
Guolin Ke committed
240
241
  std::string tmp_str = "";
  if (GetString(params, "label_gain", &tmp_str)) {
Guolin Ke's avatar
Guolin Ke committed
242
    label_gain = Common::StringToArray<double>(tmp_str, ',');
Guolin Ke's avatar
Guolin Ke committed
243
244
245
  } else {
    // label_gain = 2^i - 1, may overflow, so we use 31 here
    const int max_label = 31;
246
    label_gain.push_back(0.0f);
Guolin Ke's avatar
Guolin Ke committed
247
    for (int i = 1; i < max_label; ++i) {
248
      label_gain.push_back(static_cast<double>((1 << i) - 1));
Guolin Ke's avatar
Guolin Ke committed
249
250
    }
  }
Guolin Ke's avatar
Guolin Ke committed
251
  label_gain.shrink_to_fit();
Guolin Ke's avatar
Guolin Ke committed
252
253
254
255
}


void MetricConfig::Set(const std::unordered_map<std::string, std::string>& params) {
256
  GetDouble(params, "sigmoid", &sigmoid);
Tsukasa OMOTO's avatar
Tsukasa OMOTO committed
257
  GetDouble(params, "huber_delta", &huber_delta);
Tsukasa OMOTO's avatar
Tsukasa OMOTO committed
258
  GetDouble(params, "fair_c", &fair_c);
259
  GetInt(params, "num_class", &num_class);
Guolin Ke's avatar
Guolin Ke committed
260
261
  std::string tmp_str = "";
  if (GetString(params, "label_gain", &tmp_str)) {
Guolin Ke's avatar
Guolin Ke committed
262
    label_gain = Common::StringToArray<double>(tmp_str, ',');
Guolin Ke's avatar
Guolin Ke committed
263
264
265
  } else {
    // label_gain = 2^i - 1, may overflow, so we use 31 here
    const int max_label = 31;
266
    label_gain.push_back(0.0f);
Guolin Ke's avatar
Guolin Ke committed
267
    for (int i = 1; i < max_label; ++i) {
268
      label_gain.push_back(static_cast<double>((1 << i) - 1));
Guolin Ke's avatar
Guolin Ke committed
269
270
    }
  }
Guolin Ke's avatar
Guolin Ke committed
271
  label_gain.shrink_to_fit();
Guolin Ke's avatar
Guolin Ke committed
272
  if (GetString(params, "ndcg_eval_at", &tmp_str)) {
Guolin Ke's avatar
Guolin Ke committed
273
    eval_at = Common::StringToArray<int>(tmp_str, ',');
Guolin Ke's avatar
Guolin Ke committed
274
275
276
277
278
279
280
281
282
283
    std::sort(eval_at.begin(), eval_at.end());
    for (size_t i = 0; i < eval_at.size(); ++i) {
      CHECK(eval_at[i] > 0);
    }
  } else {
    // default eval ndcg @[1-5]
    for (int i = 1; i <= 5; ++i) {
      eval_at.push_back(i);
    }
  }
Guolin Ke's avatar
Guolin Ke committed
284
  eval_at.shrink_to_fit();
Guolin Ke's avatar
Guolin Ke committed
285
286
287
288
289
}


void TreeConfig::Set(const std::unordered_map<std::string, std::string>& params) {
  GetInt(params, "min_data_in_leaf", &min_data_in_leaf);
290
  GetDouble(params, "min_sum_hessian_in_leaf", &min_sum_hessian_in_leaf);
291
  CHECK(min_sum_hessian_in_leaf > 1.0f || min_data_in_leaf > 0);
292
  GetDouble(params, "lambda_l1", &lambda_l1);
293
294
295
296
297
298
  CHECK(lambda_l1 >= 0.0f);
  GetDouble(params, "lambda_l2", &lambda_l2);
  CHECK(lambda_l2 >= 0.0f);
  GetDouble(params, "min_gain_to_split", &min_gain_to_split);
  CHECK(min_gain_to_split >= 0.0f);
  GetInt(params, "num_leaves", &num_leaves);
299
  CHECK(num_leaves > 1);
Guolin Ke's avatar
Guolin Ke committed
300
  GetInt(params, "feature_fraction_seed", &feature_fraction_seed);
301
  GetDouble(params, "feature_fraction", &feature_fraction);
302
  CHECK(feature_fraction > 0.0f && feature_fraction <= 1.0f);
303
  GetDouble(params, "histogram_pool_size", &histogram_pool_size);
Guolin Ke's avatar
Guolin Ke committed
304
  GetInt(params, "max_depth", &max_depth);
Guolin Ke's avatar
Guolin Ke committed
305
  GetInt(params, "top_k", &top_k);
Guolin Ke's avatar
Guolin Ke committed
306
  CHECK(max_depth > 1 || max_depth < 0);
Guolin Ke's avatar
Guolin Ke committed
307
308
309
310
311
}


void BoostingConfig::Set(const std::unordered_map<std::string, std::string>& params) {
  GetInt(params, "num_iterations", &num_iterations);
Guolin Ke's avatar
Guolin Ke committed
312
  GetDouble(params, "sigmoid", &sigmoid);
Guolin Ke's avatar
Guolin Ke committed
313
314
315
316
  CHECK(num_iterations >= 0);
  GetInt(params, "bagging_seed", &bagging_seed);
  GetInt(params, "bagging_freq", &bagging_freq);
  CHECK(bagging_freq >= 0);
317
  GetDouble(params, "bagging_fraction", &bagging_fraction);
318
  CHECK(bagging_fraction > 0.0f && bagging_fraction <= 1.0f);
319
  GetDouble(params, "learning_rate", &learning_rate);
320
  CHECK(learning_rate > 0.0f);
wxchan's avatar
wxchan committed
321
322
  GetInt(params, "early_stopping_round", &early_stopping_round);
  CHECK(early_stopping_round >= 0);
323
324
325
  GetInt(params, "metric_freq", &output_freq);
  CHECK(output_freq >= 0);
  GetBool(params, "is_training_metric", &is_provide_training_metric);
326
  GetInt(params, "num_class", &num_class);
Guolin Ke's avatar
Guolin Ke committed
327
  GetInt(params, "drop_seed", &drop_seed);
328
  GetDouble(params, "drop_rate", &drop_rate);
329
330
331
332
  GetDouble(params, "skip_drop", &skip_drop);
  GetInt(params, "max_drop", &max_drop);
  GetBool(params, "xgboost_dart_mode", &xgboost_dart_mode);
  GetBool(params, "uniform_drop", &uniform_drop);
Guolin Ke's avatar
Guolin Ke committed
333
334
  GetDouble(params, "top_rate", &top_rate);
  GetDouble(params, "other_rate", &other_rate);
335
  CHECK(drop_rate <= 1.0 && drop_rate >= 0.0);
336
  CHECK(skip_drop <= 1.0 && skip_drop >= 0.0);
Guolin Ke's avatar
Guolin Ke committed
337
338
  GetTreeLearnerType(params);
  tree_config.Set(params);
Guolin Ke's avatar
Guolin Ke committed
339
340
}

Guolin Ke's avatar
Guolin Ke committed
341
void BoostingConfig::GetTreeLearnerType(const std::unordered_map<std::string, std::string>& params) {
Guolin Ke's avatar
Guolin Ke committed
342
343
  std::string value;
  if (GetString(params, "tree_learner", &value)) {
Guolin Ke's avatar
Guolin Ke committed
344
    std::transform(value.begin(), value.end(), value.begin(), Common::tolower);
Guolin Ke's avatar
Guolin Ke committed
345
    if (value == std::string("serial")) {
346
      tree_learner_type = "serial";
Guolin Ke's avatar
Guolin Ke committed
347
    } else if (value == std::string("feature") || value == std::string("feature_parallel")) {
348
      tree_learner_type = "feature";
Guolin Ke's avatar
Guolin Ke committed
349
    } else if (value == std::string("data") || value == std::string("data_parallel")) {
350
      tree_learner_type = "data";
Guolin Ke's avatar
Guolin Ke committed
351
    } else if (value == std::string("voting") || value == std::string("voting_parallel")) {
352
      tree_learner_type = "voting";
Guolin Ke's avatar
Guolin Ke committed
353
    } else {
354
      Log::Fatal("Unknown tree learner type %s", value.c_str());
Guolin Ke's avatar
Guolin Ke committed
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
    }
  }
}

void NetworkConfig::Set(const std::unordered_map<std::string, std::string>& params) {
  GetInt(params, "num_machines", &num_machines);
  CHECK(num_machines >= 1);
  GetInt(params, "local_listen_port", &local_listen_port);
  CHECK(local_listen_port > 0);
  GetInt(params, "time_out", &time_out);
  CHECK(time_out > 0);
  GetString(params, "machine_list_file", &machine_list_filename);
}

}  // namespace LightGBM