"vscode:/vscode.git/clone" did not exist on "441f38bb8140cab9ea3076f903e66541a5c48785"
config.cpp 13.8 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
5
6
7

#include <limits>

Guolin Ke's avatar
Guolin Ke committed
8
9
10
11
#include <LightGBM/config.h>

#include <LightGBM/utils/common.h>
#include <LightGBM/utils/log.h>
12
#include <LightGBM/utils/random.h>
Guolin Ke's avatar
Guolin Ke committed
13
14
15

namespace LightGBM {

Guolin Ke's avatar
Guolin Ke committed
16
void Config::KV2Map(std::unordered_map<std::string, std::string>* params, const char* kv) {
wxchan's avatar
wxchan committed
17
  std::vector<std::string> tmp_strs = Common::Split(kv, '=');
18
  if (tmp_strs.size() == 2 || tmp_strs.size() == 1) {
wxchan's avatar
wxchan committed
19
    std::string key = Common::RemoveQuotationSymbol(Common::Trim(tmp_strs[0]));
20
21
22
23
    std::string value = "";
    if (tmp_strs.size() == 2) {
      value = Common::RemoveQuotationSymbol(Common::Trim(tmp_strs[1]));
    }
wxchan's avatar
wxchan committed
24
    if (key.size() > 0) {
Guolin Ke's avatar
Guolin Ke committed
25
26
27
      auto value_search = params->find(key);
      if (value_search == params->end()) {  // not set
        params->emplace(key, value);
wxchan's avatar
wxchan committed
28
      } else {
29
        Log::Warning("%s is set=%s, %s=%s will be ignored. Current value: %s=%s",
wxchan's avatar
wxchan committed
30
31
32
33
34
35
36
37
38
          key.c_str(), value_search->second.c_str(), key.c_str(), value.c_str(),
          key.c_str(), value_search->second.c_str());
      }
    }
  } else {
    Log::Warning("Unknown parameter %s", kv);
  }
}

Guolin Ke's avatar
Guolin Ke committed
39
std::unordered_map<std::string, std::string> Config::Str2Map(const char* parameters) {
40
  std::unordered_map<std::string, std::string> params;
41
  auto args = Common::Split(parameters, " \t\n\r");
42
  for (auto arg : args) {
Guolin Ke's avatar
Guolin Ke committed
43
    KV2Map(&params, Common::Trim(arg).c_str());
44
45
  }
  ParameterAlias::KeyAliasTransform(&params);
46
  return params;
47
48
}

Guolin Ke's avatar
Guolin Ke committed
49
void GetBoostingType(const std::unordered_map<std::string, std::string>& params, std::string* boosting) {
Guolin Ke's avatar
Guolin Ke committed
50
  std::string value;
Guolin Ke's avatar
Guolin Ke committed
51
  if (Config::GetString(params, "boosting", &value)) {
Guolin Ke's avatar
Guolin Ke committed
52
    std::transform(value.begin(), value.end(), value.begin(), Common::tolower);
Guolin Ke's avatar
Guolin Ke committed
53
    if (value == std::string("gbdt") || value == std::string("gbrt")) {
Guolin Ke's avatar
Guolin Ke committed
54
      *boosting = "gbdt";
55
    } else if (value == std::string("dart")) {
Guolin Ke's avatar
Guolin Ke committed
56
      *boosting = "dart";
Guolin Ke's avatar
Guolin Ke committed
57
    } else if (value == std::string("goss")) {
Guolin Ke's avatar
Guolin Ke committed
58
      *boosting = "goss";
59
    } else if (value == std::string("rf") || value == std::string("random_forest")) {
Guolin Ke's avatar
Guolin Ke committed
60
      *boosting = "rf";
Guolin Ke's avatar
Guolin Ke committed
61
    } else {
62
      Log::Fatal("Unknown boosting type %s", value.c_str());
Guolin Ke's avatar
Guolin Ke committed
63
64
65
66
    }
  }
}

Guolin Ke's avatar
Guolin Ke committed
67
68
69
70
71
72
73
74
75
76
77
78
79
void ParseMetrics(const std::string& value, std::vector<std::string>* out_metric) {
  std::unordered_set<std::string> metric_sets;
  out_metric->clear();
  std::vector<std::string> metrics = Common::Split(value.c_str(), ',');
  for (auto& met : metrics) {
    auto type = ParseMetricAlias(met);
    if (metric_sets.count(type) <= 0) {
      out_metric->push_back(type);
      metric_sets.insert(type);
    }
  }
}

Guolin Ke's avatar
Guolin Ke committed
80
void GetObjectiveType(const std::unordered_map<std::string, std::string>& params, std::string* objective) {
Guolin Ke's avatar
Guolin Ke committed
81
  std::string value;
Guolin Ke's avatar
Guolin Ke committed
82
  if (Config::GetString(params, "objective", &value)) {
Guolin Ke's avatar
Guolin Ke committed
83
    std::transform(value.begin(), value.end(), value.begin(), Common::tolower);
Guolin Ke's avatar
Guolin Ke committed
84
    *objective = ParseObjectiveAlias(value);
Guolin Ke's avatar
Guolin Ke committed
85
86
87
  }
}

Guolin Ke's avatar
Guolin Ke committed
88
void GetMetricType(const std::unordered_map<std::string, std::string>& params, std::vector<std::string>* metric) {
Guolin Ke's avatar
Guolin Ke committed
89
  std::string value;
Guolin Ke's avatar
Guolin Ke committed
90
  if (Config::GetString(params, "metric", &value)) {
Guolin Ke's avatar
Guolin Ke committed
91
    std::transform(value.begin(), value.end(), value.begin(), Common::tolower);
Guolin Ke's avatar
Guolin Ke committed
92
    ParseMetrics(value, metric);
Guolin Ke's avatar
Guolin Ke committed
93
  }
94
  // add names of objective function if not providing metric
Guolin Ke's avatar
Guolin Ke committed
95
96
  if (metric->empty() && value.size() == 0) {
    if (Config::GetString(params, "objective", &value)) {
97
      std::transform(value.begin(), value.end(), value.begin(), Common::tolower);
Guolin Ke's avatar
Guolin Ke committed
98
      ParseMetrics(value, metric);
99
100
    }
  }
Guolin Ke's avatar
Guolin Ke committed
101
102
}

Guolin Ke's avatar
Guolin Ke committed
103
void GetTaskType(const std::unordered_map<std::string, std::string>& params, TaskType* task) {
Guolin Ke's avatar
Guolin Ke committed
104
  std::string value;
Guolin Ke's avatar
Guolin Ke committed
105
  if (Config::GetString(params, "task", &value)) {
Guolin Ke's avatar
Guolin Ke committed
106
    std::transform(value.begin(), value.end(), value.begin(), Common::tolower);
Guolin Ke's avatar
Guolin Ke committed
107
    if (value == std::string("train") || value == std::string("training")) {
Guolin Ke's avatar
Guolin Ke committed
108
      *task = TaskType::kTrain;
Guolin Ke's avatar
Guolin Ke committed
109
    } else if (value == std::string("predict") || value == std::string("prediction")
Guolin Ke's avatar
Guolin Ke committed
110
               || value == std::string("test")) {
Guolin Ke's avatar
Guolin Ke committed
111
      *task = TaskType::kPredict;
112
    } else if (value == std::string("convert_model")) {
Guolin Ke's avatar
Guolin Ke committed
113
      *task = TaskType::kConvertModel;
114
    } else if (value == std::string("refit") || value == std::string("refit_tree")) {
Guolin Ke's avatar
Guolin Ke committed
115
      *task = TaskType::KRefitTree;
Guolin Ke's avatar
Guolin Ke committed
116
    } else {
117
      Log::Fatal("Unknown task type %s", value.c_str());
Guolin Ke's avatar
Guolin Ke committed
118
119
120
121
    }
  }
}

wxchan's avatar
wxchan committed
122
void GetDeviceType(const std::unordered_map<std::string, std::string>& params, std::string* device_type) {
Guolin Ke's avatar
Guolin Ke committed
123
  std::string value;
124
  if (Config::GetString(params, "device_type", &value)) {
Guolin Ke's avatar
Guolin Ke committed
125
126
    std::transform(value.begin(), value.end(), value.begin(), Common::tolower);
    if (value == std::string("cpu")) {
wxchan's avatar
wxchan committed
127
      *device_type = "cpu";
Guolin Ke's avatar
Guolin Ke committed
128
    } else if (value == std::string("gpu")) {
wxchan's avatar
wxchan committed
129
      *device_type = "gpu";
Guolin Ke's avatar
Guolin Ke committed
130
131
132
133
134
135
    } else {
      Log::Fatal("Unknown device type %s", value.c_str());
    }
  }
}

Guolin Ke's avatar
Guolin Ke committed
136
void GetTreeLearnerType(const std::unordered_map<std::string, std::string>& params, std::string* tree_learner) {
Guolin Ke's avatar
Guolin Ke committed
137
  std::string value;
Guolin Ke's avatar
Guolin Ke committed
138
  if (Config::GetString(params, "tree_learner", &value)) {
Guolin Ke's avatar
Guolin Ke committed
139
140
    std::transform(value.begin(), value.end(), value.begin(), Common::tolower);
    if (value == std::string("serial")) {
Guolin Ke's avatar
Guolin Ke committed
141
      *tree_learner = "serial";
Guolin Ke's avatar
Guolin Ke committed
142
    } else if (value == std::string("feature") || value == std::string("feature_parallel")) {
Guolin Ke's avatar
Guolin Ke committed
143
      *tree_learner = "feature";
Guolin Ke's avatar
Guolin Ke committed
144
    } else if (value == std::string("data") || value == std::string("data_parallel")) {
Guolin Ke's avatar
Guolin Ke committed
145
      *tree_learner = "data";
Guolin Ke's avatar
Guolin Ke committed
146
    } else if (value == std::string("voting") || value == std::string("voting_parallel")) {
Guolin Ke's avatar
Guolin Ke committed
147
      *tree_learner = "voting";
Guolin Ke's avatar
Guolin Ke committed
148
149
150
151
152
153
    } else {
      Log::Fatal("Unknown tree learner type %s", value.c_str());
    }
  }
}

Belinda Trotta's avatar
Belinda Trotta committed
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
void Config::GetAucMuWeights() {
  if (auc_mu_weights.empty()) {
    // equal weights for all classes
    auc_mu_weights_matrix = std::vector<std::vector<double>> (num_class, std::vector<double>(num_class, 1));
    for (size_t i = 0; i < static_cast<size_t>(num_class); ++i) {
      auc_mu_weights_matrix[i][i] = 0;
    }
  } else {
    auc_mu_weights_matrix = std::vector<std::vector<double>> (num_class, std::vector<double>(num_class, 0));
    if (auc_mu_weights.size() != static_cast<size_t>(num_class * num_class)) {
      Log::Fatal("auc_mu_weights must have %d elements, but found %d", num_class * num_class, auc_mu_weights.size());
    }
    for (size_t i = 0; i < static_cast<size_t>(num_class); ++i) {
      for (size_t j = 0; j < static_cast<size_t>(num_class); ++j) {
        if (i == j) {
          auc_mu_weights_matrix[i][j] = 0;
          if (std::fabs(auc_mu_weights[i * num_class + j]) > kZeroThreshold) {
            Log::Info("AUC-mu matrix must have zeros on diagonal. Overwriting value in position %d of auc_mu_weights with 0.", i * num_class + j);
          }
        } else {
          if (std::fabs(auc_mu_weights[i * num_class + j]) < kZeroThreshold) {
            Log::Fatal("AUC-mu matrix must have non-zero values for non-diagonal entries. Found zero value in position %d of auc_mu_weights.", i * num_class + j);
          }
          auc_mu_weights_matrix[i][j] = auc_mu_weights[i * num_class + j];
        }
      }
    }
  }
182
}
Belinda Trotta's avatar
Belinda Trotta committed
183

Guolin Ke's avatar
Guolin Ke committed
184
void Config::Set(const std::unordered_map<std::string, std::string>& params) {
Guolin Ke's avatar
Guolin Ke committed
185
186
187
  // generate seeds by seed.
  if (GetInt(params, "seed", &seed)) {
    Random rand(seed);
Guolin Ke's avatar
Guolin Ke committed
188
    int int_max = std::numeric_limits<int16_t>::max();
Guolin Ke's avatar
Guolin Ke committed
189
190
191
192
    data_random_seed = static_cast<int>(rand.NextShort(0, int_max));
    bagging_seed = static_cast<int>(rand.NextShort(0, int_max));
    drop_seed = static_cast<int>(rand.NextShort(0, int_max));
    feature_fraction_seed = static_cast<int>(rand.NextShort(0, int_max));
193
    objective_seed = static_cast<int>(rand.NextShort(0, int_max));
194
    extra_seed = static_cast<int>(rand.NextShort(0, int_max));
Guolin Ke's avatar
Guolin Ke committed
195
196
  }

Guolin Ke's avatar
Guolin Ke committed
197
198
199
200
201
202
  GetTaskType(params, &task);
  GetBoostingType(params, &boosting);
  GetMetricType(params, &metric);
  GetObjectiveType(params, &objective);
  GetDeviceType(params, &device_type);
  GetTreeLearnerType(params, &tree_learner);
Guolin Ke's avatar
Guolin Ke committed
203

Guolin Ke's avatar
Guolin Ke committed
204
  GetMembersFromString(params);
205

Belinda Trotta's avatar
Belinda Trotta committed
206
207
  GetAucMuWeights();

Guolin Ke's avatar
Guolin Ke committed
208
209
  // sort eval_at
  std::sort(eval_at.begin(), eval_at.end());
Guolin Ke's avatar
Guolin Ke committed
210

211
212
213
214
215
216
217
  std::vector<std::string> new_valid;
  for (size_t i = 0; i < valid.size(); ++i) {
    if (valid[i] != data) {
      // Only push the non-training data
      new_valid.push_back(valid[i]);
    } else {
      is_provide_training_metric = true;
218
219
    }
  }
220
  valid = new_valid;
221

Guolin Ke's avatar
Guolin Ke committed
222
223
  // check for conflicts
  CheckParamConflict();
224

Guolin Ke's avatar
Guolin Ke committed
225
  if (verbosity == 1) {
Guolin Ke's avatar
Guolin Ke committed
226
    LightGBM::Log::ResetLogLevel(LightGBM::LogLevel::Info);
Guolin Ke's avatar
Guolin Ke committed
227
  } else if (verbosity == 0) {
Guolin Ke's avatar
Guolin Ke committed
228
    LightGBM::Log::ResetLogLevel(LightGBM::LogLevel::Warning);
Guolin Ke's avatar
Guolin Ke committed
229
  } else if (verbosity >= 2) {
Guolin Ke's avatar
Guolin Ke committed
230
231
232
233
234
235
    LightGBM::Log::ResetLogLevel(LightGBM::LogLevel::Debug);
  } else {
    LightGBM::Log::ResetLogLevel(LightGBM::LogLevel::Fatal);
  }
}

Guolin Ke's avatar
Guolin Ke committed
236
bool CheckMultiClassObjective(const std::string& objective) {
Guolin Ke's avatar
Guolin Ke committed
237
  return (objective == std::string("multiclass") || objective == std::string("multiclassova"));
238
239
}

Guolin Ke's avatar
Guolin Ke committed
240
241
242
void Config::CheckParamConflict() {
  // check if objective, metric, and num_class match
  int num_class_check = num_class;
Guolin Ke's avatar
Guolin Ke committed
243
  bool objective_type_multiclass = CheckMultiClassObjective(objective) || (objective == std::string("custom") && num_class_check > 1);
244

245
  if (objective_type_multiclass) {
Guolin Ke's avatar
Guolin Ke committed
246
247
    if (num_class_check <= 1) {
      Log::Fatal("Number of classes should be specified and greater than 1 for multiclass training");
248
249
    }
  } else {
Guolin Ke's avatar
Guolin Ke committed
250
    if (task == TaskType::kTrain && num_class_check != 1) {
251
252
      Log::Fatal("Number of classes must be 1 for non-multiclass training");
    }
253
  }
Guolin Ke's avatar
Guolin Ke committed
254
  for (std::string metric_type : metric) {
255
256
257
    bool metric_type_multiclass = (CheckMultiClassObjective(metric_type)
                                   || metric_type == std::string("multi_logloss")
                                   || metric_type == std::string("multi_error")
Belinda Trotta's avatar
Belinda Trotta committed
258
                                   || metric_type == std::string("auc_mu")
Guolin Ke's avatar
Guolin Ke committed
259
                                   || (metric_type == std::string("custom") && num_class_check > 1));
Guolin Ke's avatar
Guolin Ke committed
260
    if ((objective_type_multiclass && !metric_type_multiclass)
261
262
        || (!objective_type_multiclass && metric_type_multiclass)) {
      Log::Fatal("Multiclass objective and metrics don't match");
263
    }
264
  }
265

Guolin Ke's avatar
Guolin Ke committed
266
  if (num_machines > 1) {
Guolin Ke's avatar
Guolin Ke committed
267
268
269
    is_parallel = true;
  } else {
    is_parallel = false;
Guolin Ke's avatar
Guolin Ke committed
270
    tree_learner = "serial";
Guolin Ke's avatar
Guolin Ke committed
271
272
  }

Guolin Ke's avatar
Guolin Ke committed
273
  bool is_single_tree_learner = tree_learner == std::string("serial");
Guolin Ke's avatar
Guolin Ke committed
274
275

  if (is_single_tree_learner) {
Guolin Ke's avatar
Guolin Ke committed
276
    is_parallel = false;
Guolin Ke's avatar
Guolin Ke committed
277
    num_machines = 1;
Guolin Ke's avatar
Guolin Ke committed
278
279
  }

Guolin Ke's avatar
Guolin Ke committed
280
  if (is_single_tree_learner || tree_learner == std::string("feature")) {
281
    is_data_based_parallel = false;
Guolin Ke's avatar
Guolin Ke committed
282
283
  } else if (tree_learner == std::string("data")
             || tree_learner == std::string("voting")) {
284
    is_data_based_parallel = true;
Guolin Ke's avatar
Guolin Ke committed
285
286
    if (histogram_pool_size >= 0
        && tree_learner == std::string("data")) {
287
288
      Log::Warning("Histogram LRU queue was enabled (histogram_pool_size=%f).\n"
                   "Will disable this to reduce communication costs",
Guolin Ke's avatar
Guolin Ke committed
289
                   histogram_pool_size);
tks's avatar
tks committed
290
      // Change pool size to -1 (no limit) when using data parallel to reduce communication costs
Guolin Ke's avatar
Guolin Ke committed
291
      histogram_pool_size = -1;
292
    }
Guolin Ke's avatar
Guolin Ke committed
293
  }
294
295
296
297
298
299
  if (is_data_based_parallel) {
    if (!forcedsplits_filename.empty()) {
      Log::Fatal("Don't support forcedsplits in %s tree learner",
                 tree_learner.c_str());
    }
  }
300
  // Check max_depth and num_leaves
Guolin Ke's avatar
Guolin Ke committed
301
  if (max_depth > 0) {
302
    double full_num_leaves = std::pow(2, max_depth);
303
    if (full_num_leaves > num_leaves
Guolin Ke's avatar
Guolin Ke committed
304
        && num_leaves == kDefaultNumLeaves) {
305
      Log::Warning("Accuracy may be bad since you didn't set num_leaves and 2^max_depth > num_leaves");
306
    }
307
308
309
310
311

    if (full_num_leaves < num_leaves) {
      // Fits in an int, and is more restrictive than the current num_leaves
      num_leaves = static_cast<int>(full_num_leaves);
    }
312
  }
313
314
315
316
317
  // force col-wise for gpu
  if (device_type == std::string("gpu")) {
    force_col_wise = true;
    force_row_wise = false;
  }
Belinda Trotta's avatar
Belinda Trotta committed
318
319
320
321
322
323
324
325
  // min_data_in_leaf must be at least 2 if path smoothing is active. This is because when the split is calculated
  // the count is calculated using the proportion of hessian in the leaf which is rounded up to nearest int, so it can
  // be 1 when there is actually no data in the leaf. In rare cases this can cause a bug because with path smoothing the
  // calculated split gain can be positive even with zero gradient and hessian.
  if (path_smooth > kEpsilon && min_data_in_leaf < 2) {
    min_data_in_leaf = 2;
    Log::Warning("min_data_in_leaf has been increased to 2 because this is required when path smoothing is active.");
  }
326
327
328
329
330
331
332
333
334
335
336
  if (is_parallel && monotone_constraints_method == std::string("intermediate")) {
    // In distributed mode, local node doesn't have histograms on all features, cannot perform "intermediate" monotone constraints.
    Log::Warning("Cannot use \"intermediate\" monotone constraints in parallel learning, auto set to \"basic\" method.");
    monotone_constraints_method = "basic";
  }
  if (feature_fraction_bynode != 1.0 && monotone_constraints_method == std::string("intermediate")) {
    // "intermediate" monotone constraints need to recompute splits. If the features are sampled when computing the
    // split initially, then the sampling needs to be recorded or done once again, which is currently not supported
    Log::Warning("Cannot use \"intermediate\" monotone constraints with feature fraction different from 1, auto set monotone constraints to \"basic\" method.");
    monotone_constraints_method = "basic";
  }
337
338
339
  if (max_depth > 0 && monotone_penalty >= max_depth) {
    Log::Warning("Monotone penalty greater than tree depth. Monotone features won't be used.");
  }
Guolin Ke's avatar
Guolin Ke committed
340
341
}

Guolin Ke's avatar
Guolin Ke committed
342
343
344
345
346
347
348
349
350
std::string Config::ToString() const {
  std::stringstream str_buf;
  str_buf << "[boosting: " << boosting << "]\n";
  str_buf << "[objective: " << objective << "]\n";
  str_buf << "[metric: " << Common::Join(metric, ",") << "]\n";
  str_buf << "[tree_learner: " << tree_learner << "]\n";
  str_buf << "[device_type: " << device_type << "]\n";
  str_buf << SaveMembersToString();
  return str_buf.str();
Guolin Ke's avatar
Guolin Ke committed
351
352
353
}

}  // namespace LightGBM