config.cpp 20.5 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
Guolin Ke's avatar
Guolin Ke committed
5
6
#include <LightGBM/config.h>

7
#include <LightGBM/cuda/vector_cudahost.h>
Guolin Ke's avatar
Guolin Ke committed
8
9
#include <LightGBM/utils/common.h>
#include <LightGBM/utils/log.h>
10
#include <LightGBM/utils/random.h>
Guolin Ke's avatar
Guolin Ke committed
11

12
#include <algorithm>
13
#include <limits>
14
15
16
17
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
18

Guolin Ke's avatar
Guolin Ke committed
19
20
namespace LightGBM {

21
void Config::KV2Map(std::unordered_map<std::string, std::vector<std::string>>* params, const char* kv) {
wxchan's avatar
wxchan committed
22
  std::vector<std::string> tmp_strs = Common::Split(kv, '=');
23
  if (tmp_strs.size() == 2 || tmp_strs.size() == 1) {
wxchan's avatar
wxchan committed
24
    std::string key = Common::RemoveQuotationSymbol(Common::Trim(tmp_strs[0]));
25
26
27
28
    std::string value = "";
    if (tmp_strs.size() == 2) {
      value = Common::RemoveQuotationSymbol(Common::Trim(tmp_strs[1]));
    }
wxchan's avatar
wxchan committed
29
    if (key.size() > 0) {
30
      params->operator[](key).emplace_back(value);
wxchan's avatar
wxchan committed
31
32
33
34
35
36
    }
  } else {
    Log::Warning("Unknown parameter %s", kv);
  }
}

37
38
39
40
41
42
43
44
45
46
47
void GetFirstValueAsInt(const std::unordered_map<std::string, std::vector<std::string>>& params, std::string key, int* out) {
  const auto pair = params.find(key);
  if (pair != params.end()) {
    auto candidate = pair->second[0].c_str();
    if (!Common::AtoiAndCheck(candidate, out)) {
      Log::Fatal("Parameter %s should be of type int, got \"%s\"", key.c_str(), candidate);
    }
  }
}

void Config::SetVerbosity(const std::unordered_map<std::string, std::vector<std::string>>& params) {
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
  int verbosity = 1;

  // if "verbosity" was found in params, prefer that to any other aliases
  const auto verbosity_iter = params.find("verbosity");
  if (verbosity_iter != params.end()) {
    GetFirstValueAsInt(params, "verbosity", &verbosity);
  } else {
    // if "verbose" was found in params and "verbosity" was not, use that value
    const auto verbose_iter = params.find("verbose");
    if (verbose_iter != params.end()) {
      GetFirstValueAsInt(params, "verbose", &verbosity);
    } else {
      // if "verbosity" and "verbose" were both missing from params, don't modify LightGBM's log level
      return;
    }
  }

  // otherwise, update LightGBM's log level based on the passed-in value
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
  if (verbosity < 0) {
    LightGBM::Log::ResetLogLevel(LightGBM::LogLevel::Fatal);
  } else if (verbosity == 0) {
    LightGBM::Log::ResetLogLevel(LightGBM::LogLevel::Warning);
  } else if (verbosity == 1) {
    LightGBM::Log::ResetLogLevel(LightGBM::LogLevel::Info);
  } else {
    LightGBM::Log::ResetLogLevel(LightGBM::LogLevel::Debug);
  }
}

void Config::KeepFirstValues(const std::unordered_map<std::string, std::vector<std::string>>& params, std::unordered_map<std::string, std::string>* out) {
  for (auto pair = params.begin(); pair != params.end(); ++pair) {
    auto name = pair->first.c_str();
    auto values = pair->second;
    out->emplace(name, values[0]);
    for (size_t i = 1; i < pair->second.size(); ++i) {
      Log::Warning("%s is set=%s, %s=%s will be ignored. Current value: %s=%s",
        name, values[0].c_str(),
        name, values[i].c_str(),
        name, values[0].c_str());
    }
  }
}

Guolin Ke's avatar
Guolin Ke committed
91
std::unordered_map<std::string, std::string> Config::Str2Map(const char* parameters) {
92
  std::unordered_map<std::string, std::vector<std::string>> all_params;
93
  std::unordered_map<std::string, std::string> params;
94
  auto args = Common::Split(parameters, " \t\n\r");
95
  for (auto arg : args) {
96
    KV2Map(&all_params, Common::Trim(arg).c_str());
97
  }
98
99
  SetVerbosity(all_params);
  KeepFirstValues(all_params, &params);
100
  ParameterAlias::KeyAliasTransform(&params);
101
  return params;
102
103
}

Guolin Ke's avatar
Guolin Ke committed
104
void GetBoostingType(const std::unordered_map<std::string, std::string>& params, std::string* boosting) {
Guolin Ke's avatar
Guolin Ke committed
105
  std::string value;
Guolin Ke's avatar
Guolin Ke committed
106
  if (Config::GetString(params, "boosting", &value)) {
Guolin Ke's avatar
Guolin Ke committed
107
    std::transform(value.begin(), value.end(), value.begin(), Common::tolower);
Guolin Ke's avatar
Guolin Ke committed
108
    if (value == std::string("gbdt") || value == std::string("gbrt")) {
Guolin Ke's avatar
Guolin Ke committed
109
      *boosting = "gbdt";
110
    } else if (value == std::string("dart")) {
Guolin Ke's avatar
Guolin Ke committed
111
      *boosting = "dart";
Guolin Ke's avatar
Guolin Ke committed
112
    } else if (value == std::string("goss")) {
Guolin Ke's avatar
Guolin Ke committed
113
      *boosting = "goss";
114
    } else if (value == std::string("rf") || value == std::string("random_forest")) {
Guolin Ke's avatar
Guolin Ke committed
115
      *boosting = "rf";
Guolin Ke's avatar
Guolin Ke committed
116
    } else {
117
      Log::Fatal("Unknown boosting type %s", value.c_str());
Guolin Ke's avatar
Guolin Ke committed
118
119
120
121
    }
  }
}

122
123
124
125
126
127
128
129
130
131
132
133
134
135
void GetDataSampleStrategy(const std::unordered_map<std::string, std::string>& params, std::string* strategy) {
  std::string value;
  if (Config::GetString(params, "data_sample_strategy", &value)) {
    std::transform(value.begin(), value.end(), value.begin(), Common::tolower);
    if (value == std::string("goss")) {
      *strategy = "goss";
    } else if (value == std::string("bagging")) {
      *strategy = "bagging";
    } else {
      Log::Fatal("Unknown sample strategy %s", value.c_str());
    }
  }
}

Guolin Ke's avatar
Guolin Ke committed
136
137
138
139
140
141
142
143
144
145
146
147
148
void ParseMetrics(const std::string& value, std::vector<std::string>* out_metric) {
  std::unordered_set<std::string> metric_sets;
  out_metric->clear();
  std::vector<std::string> metrics = Common::Split(value.c_str(), ',');
  for (auto& met : metrics) {
    auto type = ParseMetricAlias(met);
    if (metric_sets.count(type) <= 0) {
      out_metric->push_back(type);
      metric_sets.insert(type);
    }
  }
}

Guolin Ke's avatar
Guolin Ke committed
149
void GetObjectiveType(const std::unordered_map<std::string, std::string>& params, std::string* objective) {
Guolin Ke's avatar
Guolin Ke committed
150
  std::string value;
Guolin Ke's avatar
Guolin Ke committed
151
  if (Config::GetString(params, "objective", &value)) {
Guolin Ke's avatar
Guolin Ke committed
152
    std::transform(value.begin(), value.end(), value.begin(), Common::tolower);
Guolin Ke's avatar
Guolin Ke committed
153
    *objective = ParseObjectiveAlias(value);
Guolin Ke's avatar
Guolin Ke committed
154
155
156
  }
}

157
void GetMetricType(const std::unordered_map<std::string, std::string>& params, const std::string& objective, std::vector<std::string>* metric) {
Guolin Ke's avatar
Guolin Ke committed
158
  std::string value;
Guolin Ke's avatar
Guolin Ke committed
159
  if (Config::GetString(params, "metric", &value)) {
Guolin Ke's avatar
Guolin Ke committed
160
    std::transform(value.begin(), value.end(), value.begin(), Common::tolower);
Guolin Ke's avatar
Guolin Ke committed
161
    ParseMetrics(value, metric);
Guolin Ke's avatar
Guolin Ke committed
162
  }
163
  // add names of objective function if not providing metric
Guolin Ke's avatar
Guolin Ke committed
164
  if (metric->empty() && value.size() == 0) {
165
    ParseMetrics(objective, metric);
166
  }
Guolin Ke's avatar
Guolin Ke committed
167
168
}

Guolin Ke's avatar
Guolin Ke committed
169
void GetTaskType(const std::unordered_map<std::string, std::string>& params, TaskType* task) {
Guolin Ke's avatar
Guolin Ke committed
170
  std::string value;
Guolin Ke's avatar
Guolin Ke committed
171
  if (Config::GetString(params, "task", &value)) {
Guolin Ke's avatar
Guolin Ke committed
172
    std::transform(value.begin(), value.end(), value.begin(), Common::tolower);
Guolin Ke's avatar
Guolin Ke committed
173
    if (value == std::string("train") || value == std::string("training")) {
Guolin Ke's avatar
Guolin Ke committed
174
      *task = TaskType::kTrain;
Guolin Ke's avatar
Guolin Ke committed
175
    } else if (value == std::string("predict") || value == std::string("prediction")
Guolin Ke's avatar
Guolin Ke committed
176
               || value == std::string("test")) {
Guolin Ke's avatar
Guolin Ke committed
177
      *task = TaskType::kPredict;
178
    } else if (value == std::string("convert_model")) {
Guolin Ke's avatar
Guolin Ke committed
179
      *task = TaskType::kConvertModel;
180
    } else if (value == std::string("refit") || value == std::string("refit_tree")) {
Guolin Ke's avatar
Guolin Ke committed
181
      *task = TaskType::KRefitTree;
182
183
    } else if (value == std::string("save_binary")) {
      *task = TaskType::kSaveBinary;
Guolin Ke's avatar
Guolin Ke committed
184
    } else {
185
      Log::Fatal("Unknown task type %s", value.c_str());
Guolin Ke's avatar
Guolin Ke committed
186
187
188
189
    }
  }
}

wxchan's avatar
wxchan committed
190
void GetDeviceType(const std::unordered_map<std::string, std::string>& params, std::string* device_type) {
Guolin Ke's avatar
Guolin Ke committed
191
  std::string value;
192
  if (Config::GetString(params, "device_type", &value)) {
Guolin Ke's avatar
Guolin Ke committed
193
194
    std::transform(value.begin(), value.end(), value.begin(), Common::tolower);
    if (value == std::string("cpu")) {
wxchan's avatar
wxchan committed
195
      *device_type = "cpu";
Guolin Ke's avatar
Guolin Ke committed
196
    } else if (value == std::string("gpu")) {
wxchan's avatar
wxchan committed
197
      *device_type = "gpu";
198
199
    } else if (value == std::string("cuda")) {
      *device_type = "cuda";
Guolin Ke's avatar
Guolin Ke committed
200
201
202
203
204
205
    } else {
      Log::Fatal("Unknown device type %s", value.c_str());
    }
  }
}

Guolin Ke's avatar
Guolin Ke committed
206
void GetTreeLearnerType(const std::unordered_map<std::string, std::string>& params, std::string* tree_learner) {
Guolin Ke's avatar
Guolin Ke committed
207
  std::string value;
Guolin Ke's avatar
Guolin Ke committed
208
  if (Config::GetString(params, "tree_learner", &value)) {
Guolin Ke's avatar
Guolin Ke committed
209
210
    std::transform(value.begin(), value.end(), value.begin(), Common::tolower);
    if (value == std::string("serial")) {
Guolin Ke's avatar
Guolin Ke committed
211
      *tree_learner = "serial";
Guolin Ke's avatar
Guolin Ke committed
212
    } else if (value == std::string("feature") || value == std::string("feature_parallel")) {
Guolin Ke's avatar
Guolin Ke committed
213
      *tree_learner = "feature";
Guolin Ke's avatar
Guolin Ke committed
214
    } else if (value == std::string("data") || value == std::string("data_parallel")) {
Guolin Ke's avatar
Guolin Ke committed
215
      *tree_learner = "data";
Guolin Ke's avatar
Guolin Ke committed
216
    } else if (value == std::string("voting") || value == std::string("voting_parallel")) {
Guolin Ke's avatar
Guolin Ke committed
217
      *tree_learner = "voting";
Guolin Ke's avatar
Guolin Ke committed
218
219
220
221
222
223
    } else {
      Log::Fatal("Unknown tree learner type %s", value.c_str());
    }
  }
}

Belinda Trotta's avatar
Belinda Trotta committed
224
225
226
227
228
229
230
231
232
233
void Config::GetAucMuWeights() {
  if (auc_mu_weights.empty()) {
    // equal weights for all classes
    auc_mu_weights_matrix = std::vector<std::vector<double>> (num_class, std::vector<double>(num_class, 1));
    for (size_t i = 0; i < static_cast<size_t>(num_class); ++i) {
      auc_mu_weights_matrix[i][i] = 0;
    }
  } else {
    auc_mu_weights_matrix = std::vector<std::vector<double>> (num_class, std::vector<double>(num_class, 0));
    if (auc_mu_weights.size() != static_cast<size_t>(num_class * num_class)) {
234
      Log::Fatal("auc_mu_weights must have %d elements, but found %zu", num_class * num_class, auc_mu_weights.size());
Belinda Trotta's avatar
Belinda Trotta committed
235
236
237
238
239
240
    }
    for (size_t i = 0; i < static_cast<size_t>(num_class); ++i) {
      for (size_t j = 0; j < static_cast<size_t>(num_class); ++j) {
        if (i == j) {
          auc_mu_weights_matrix[i][j] = 0;
          if (std::fabs(auc_mu_weights[i * num_class + j]) > kZeroThreshold) {
241
            Log::Info("AUC-mu matrix must have zeros on diagonal. Overwriting value in position %zu of auc_mu_weights with 0.", i * num_class + j);
Belinda Trotta's avatar
Belinda Trotta committed
242
243
244
          }
        } else {
          if (std::fabs(auc_mu_weights[i * num_class + j]) < kZeroThreshold) {
245
            Log::Fatal("AUC-mu matrix must have non-zero values for non-diagonal entries. Found zero value in position %zu of auc_mu_weights.", i * num_class + j);
Belinda Trotta's avatar
Belinda Trotta committed
246
247
248
249
250
251
          }
          auc_mu_weights_matrix[i][j] = auc_mu_weights[i * num_class + j];
        }
      }
    }
  }
252
}
Belinda Trotta's avatar
Belinda Trotta committed
253

254
255
256
257
258
259
260
261
void Config::GetInteractionConstraints() {
  if (interaction_constraints == "") {
    interaction_constraints_vector = std::vector<std::vector<int>>();
  } else {
    interaction_constraints_vector = Common::StringToArrayofArrays<int>(interaction_constraints, '[', ']', ',');
  }
}

Guolin Ke's avatar
Guolin Ke committed
262
void Config::Set(const std::unordered_map<std::string, std::string>& params) {
Guolin Ke's avatar
Guolin Ke committed
263
264
265
  // generate seeds by seed.
  if (GetInt(params, "seed", &seed)) {
    Random rand(seed);
Guolin Ke's avatar
Guolin Ke committed
266
    int int_max = std::numeric_limits<int16_t>::max();
Guolin Ke's avatar
Guolin Ke committed
267
268
269
270
    data_random_seed = static_cast<int>(rand.NextShort(0, int_max));
    bagging_seed = static_cast<int>(rand.NextShort(0, int_max));
    drop_seed = static_cast<int>(rand.NextShort(0, int_max));
    feature_fraction_seed = static_cast<int>(rand.NextShort(0, int_max));
271
    objective_seed = static_cast<int>(rand.NextShort(0, int_max));
272
    extra_seed = static_cast<int>(rand.NextShort(0, int_max));
Guolin Ke's avatar
Guolin Ke committed
273
274
  }

Guolin Ke's avatar
Guolin Ke committed
275
276
  GetTaskType(params, &task);
  GetBoostingType(params, &boosting);
277
  GetDataSampleStrategy(params, &data_sample_strategy);
Guolin Ke's avatar
Guolin Ke committed
278
  GetObjectiveType(params, &objective);
279
  GetMetricType(params, objective, &metric);
Guolin Ke's avatar
Guolin Ke committed
280
  GetDeviceType(params, &device_type);
281
  if (device_type == std::string("cuda")) {
282
283
    LGBM_config_::current_device = lgbm_device_cuda;
  }
Guolin Ke's avatar
Guolin Ke committed
284
  GetTreeLearnerType(params, &tree_learner);
Guolin Ke's avatar
Guolin Ke committed
285

Guolin Ke's avatar
Guolin Ke committed
286
  GetMembersFromString(params);
287

Belinda Trotta's avatar
Belinda Trotta committed
288
289
  GetAucMuWeights();

290
291
  GetInteractionConstraints();

Guolin Ke's avatar
Guolin Ke committed
292
293
  // sort eval_at
  std::sort(eval_at.begin(), eval_at.end());
Guolin Ke's avatar
Guolin Ke committed
294

295
296
297
298
299
300
301
  std::vector<std::string> new_valid;
  for (size_t i = 0; i < valid.size(); ++i) {
    if (valid[i] != data) {
      // Only push the non-training data
      new_valid.push_back(valid[i]);
    } else {
      is_provide_training_metric = true;
302
303
    }
  }
304
  valid = new_valid;
305

306
307
308
309
310
  if ((task == TaskType::kSaveBinary) && !save_binary) {
    Log::Info("save_binary parameter set to true because task is save_binary");
    save_binary = true;
  }

311
  // check for conflicts
312
  CheckParamConflict(params);
Guolin Ke's avatar
Guolin Ke committed
313
314
}

Guolin Ke's avatar
Guolin Ke committed
315
bool CheckMultiClassObjective(const std::string& objective) {
Guolin Ke's avatar
Guolin Ke committed
316
  return (objective == std::string("multiclass") || objective == std::string("multiclassova"));
317
318
}

319
void Config::CheckParamConflict(const std::unordered_map<std::string, std::string>& params) {
Guolin Ke's avatar
Guolin Ke committed
320
321
  // check if objective, metric, and num_class match
  int num_class_check = num_class;
Guolin Ke's avatar
Guolin Ke committed
322
  bool objective_type_multiclass = CheckMultiClassObjective(objective) || (objective == std::string("custom") && num_class_check > 1);
323

324
  if (objective_type_multiclass) {
Guolin Ke's avatar
Guolin Ke committed
325
326
    if (num_class_check <= 1) {
      Log::Fatal("Number of classes should be specified and greater than 1 for multiclass training");
327
328
    }
  } else {
Guolin Ke's avatar
Guolin Ke committed
329
    if (task == TaskType::kTrain && num_class_check != 1) {
330
331
      Log::Fatal("Number of classes must be 1 for non-multiclass training");
    }
332
  }
Guolin Ke's avatar
Guolin Ke committed
333
  for (std::string metric_type : metric) {
334
335
336
    bool metric_type_multiclass = (CheckMultiClassObjective(metric_type)
                                   || metric_type == std::string("multi_logloss")
                                   || metric_type == std::string("multi_error")
Belinda Trotta's avatar
Belinda Trotta committed
337
                                   || metric_type == std::string("auc_mu")
Guolin Ke's avatar
Guolin Ke committed
338
                                   || (metric_type == std::string("custom") && num_class_check > 1));
Guolin Ke's avatar
Guolin Ke committed
339
    if ((objective_type_multiclass && !metric_type_multiclass)
340
341
        || (!objective_type_multiclass && metric_type_multiclass)) {
      Log::Fatal("Multiclass objective and metrics don't match");
342
    }
343
  }
344

Guolin Ke's avatar
Guolin Ke committed
345
  if (num_machines > 1) {
Guolin Ke's avatar
Guolin Ke committed
346
347
348
    is_parallel = true;
  } else {
    is_parallel = false;
Guolin Ke's avatar
Guolin Ke committed
349
    tree_learner = "serial";
Guolin Ke's avatar
Guolin Ke committed
350
351
  }

Guolin Ke's avatar
Guolin Ke committed
352
  bool is_single_tree_learner = tree_learner == std::string("serial");
Guolin Ke's avatar
Guolin Ke committed
353
354

  if (is_single_tree_learner) {
Guolin Ke's avatar
Guolin Ke committed
355
    is_parallel = false;
Guolin Ke's avatar
Guolin Ke committed
356
    num_machines = 1;
Guolin Ke's avatar
Guolin Ke committed
357
358
  }

Guolin Ke's avatar
Guolin Ke committed
359
  if (is_single_tree_learner || tree_learner == std::string("feature")) {
360
    is_data_based_parallel = false;
Guolin Ke's avatar
Guolin Ke committed
361
362
  } else if (tree_learner == std::string("data")
             || tree_learner == std::string("voting")) {
363
    is_data_based_parallel = true;
Guolin Ke's avatar
Guolin Ke committed
364
365
    if (histogram_pool_size >= 0
        && tree_learner == std::string("data")) {
366
367
      Log::Warning("Histogram LRU queue was enabled (histogram_pool_size=%f).\n"
                   "Will disable this to reduce communication costs",
Guolin Ke's avatar
Guolin Ke committed
368
                   histogram_pool_size);
tks's avatar
tks committed
369
      // Change pool size to -1 (no limit) when using data parallel to reduce communication costs
Guolin Ke's avatar
Guolin Ke committed
370
      histogram_pool_size = -1;
371
    }
Guolin Ke's avatar
Guolin Ke committed
372
  }
373
374
375
376
377
378
  if (is_data_based_parallel) {
    if (!forcedsplits_filename.empty()) {
      Log::Fatal("Don't support forcedsplits in %s tree learner",
                 tree_learner.c_str());
    }
  }
379
380
381
382
383
384
385
386
387
388
389

  // max_depth defaults to -1, so max_depth>0 implies "you explicitly overrode the default"
  //
  // Changing max_depth while leaving num_leaves at its default (31) can lead to 2 undesirable situations:
  //
  //   * (0 <= max_depth <= 4) it's not possible to produce a tree with 31 leaves
  //     - this block reduces num_leaves to 2^max_depth
  //   * (max_depth > 4) 31 leaves is less than a full depth-wise tree, which might lead to underfitting
  //     - this block warns about that
  // ref: https://github.com/microsoft/LightGBM/issues/2898#issuecomment-1002860601
  if (max_depth > 0 && (params.count("num_leaves") == 0 || params.at("num_leaves").empty())) {
390
    double full_num_leaves = std::pow(2, max_depth);
391
392
393
394
395
396
    if (full_num_leaves > num_leaves) {
      Log::Warning("Provided parameters constrain tree depth (max_depth=%d) without explicitly setting 'num_leaves'. "
                   "This can lead to underfitting. To resolve this warning, pass 'num_leaves' (<=%.0f) in params. "
                   "Alternatively, pass (max_depth=-1) and just use 'num_leaves' to constrain model complexity.",
                   max_depth,
                   full_num_leaves);
397
    }
398
399
400
401
402

    if (full_num_leaves < num_leaves) {
      // Fits in an int, and is more restrictive than the current num_leaves
      num_leaves = static_cast<int>(full_num_leaves);
    }
403
  }
404
  if (device_type == std::string("gpu")) {
405
    // force col-wise for gpu version
406
407
    force_col_wise = true;
    force_row_wise = false;
Guolin Ke's avatar
Guolin Ke committed
408
409
410
    if (deterministic) {
      Log::Warning("Although \"deterministic\" is set, the results ran by GPU may be non-deterministic.");
    }
411
412
413
414
    if (use_quantized_grad) {
      Log::Warning("Quantized training is not supported by GPU tree learner. Switch to full precision training.");
      use_quantized_grad = false;
    }
415
416
  } else if (device_type == std::string("cuda")) {
    // force row-wise for cuda version
417
418
419
420
421
    force_col_wise = false;
    force_row_wise = true;
    if (deterministic) {
      Log::Warning("Although \"deterministic\" is set, the results ran by GPU may be non-deterministic.");
    }
422
  }
Andrew Ziem's avatar
Andrew Ziem committed
423
  // linear tree learner must be serial type and run on CPU device
424
  if (linear_tree) {
425
    if (device_type != std::string("cpu") && device_type != std::string("gpu")) {
426
      device_type = "cpu";
427
      Log::Warning("Linear tree learner only works with CPU and GPU. Falling back to CPU now.");
428
429
430
431
432
433
434
435
    }
    if (tree_learner != std::string("serial")) {
      tree_learner = "serial";
      Log::Warning("Linear tree learner must be serial.");
    }
    if (zero_as_missing) {
      Log::Fatal("zero_as_missing must be false when fitting linear trees.");
    }
436
    if (objective == std::string("regression_l1")) {
437
438
439
      Log::Fatal("Cannot use regression_l1 objective when fitting linear trees.");
    }
  }
Belinda Trotta's avatar
Belinda Trotta committed
440
441
442
443
444
445
446
447
  // min_data_in_leaf must be at least 2 if path smoothing is active. This is because when the split is calculated
  // the count is calculated using the proportion of hessian in the leaf which is rounded up to nearest int, so it can
  // be 1 when there is actually no data in the leaf. In rare cases this can cause a bug because with path smoothing the
  // calculated split gain can be positive even with zero gradient and hessian.
  if (path_smooth > kEpsilon && min_data_in_leaf < 2) {
    min_data_in_leaf = 2;
    Log::Warning("min_data_in_leaf has been increased to 2 because this is required when path smoothing is active.");
  }
448
  if (is_parallel && (monotone_constraints_method == std::string("intermediate") || monotone_constraints_method == std::string("advanced"))) {
449
    // In distributed mode, local node doesn't have histograms on all features, cannot perform "intermediate" monotone constraints.
450
    Log::Warning("Cannot use \"intermediate\" or \"advanced\" monotone constraints in distributed learning, auto set to \"basic\" method.");
451
452
    monotone_constraints_method = "basic";
  }
453
  if (feature_fraction_bynode != 1.0 && (monotone_constraints_method == std::string("intermediate") || monotone_constraints_method == std::string("advanced"))) {
454
455
    // "intermediate" monotone constraints need to recompute splits. If the features are sampled when computing the
    // split initially, then the sampling needs to be recorded or done once again, which is currently not supported
456
    Log::Warning("Cannot use \"intermediate\" or \"advanced\" monotone constraints with feature fraction different from 1, auto set monotone constraints to \"basic\" method.");
457
458
    monotone_constraints_method = "basic";
  }
459
460
461
  if (max_depth > 0 && monotone_penalty >= max_depth) {
    Log::Warning("Monotone penalty greater than tree depth. Monotone features won't be used.");
  }
462
463
464
465
466
467
  if (min_data_in_leaf <= 0 && min_sum_hessian_in_leaf <= kEpsilon) {
    Log::Warning(
        "Cannot set both min_data_in_leaf and min_sum_hessian_in_leaf to 0. "
        "Will set min_data_in_leaf to 1.");
    min_data_in_leaf = 1;
  }
468
469
470
471
472
473
  if (boosting == std::string("goss")) {
    boosting = std::string("gbdt");
    data_sample_strategy = std::string("goss");
    Log::Warning("Found boosting=goss. For backwards compatibility reasons, LightGBM interprets this as boosting=gbdt, data_sample_strategy=goss."
                 "To suppress this warning, set data_sample_strategy=goss instead.");
  }
474
475
476
477
478

  if (bagging_by_query && data_sample_strategy != std::string("bagging")) {
    Log::Warning("bagging_by_query=true is only compatible with data_sample_strategy=bagging. Setting bagging_by_query=false.");
    bagging_by_query = false;
  }
Guolin Ke's avatar
Guolin Ke committed
479
480
}

Guolin Ke's avatar
Guolin Ke committed
481
482
483
484
485
486
487
488
489
std::string Config::ToString() const {
  std::stringstream str_buf;
  str_buf << "[boosting: " << boosting << "]\n";
  str_buf << "[objective: " << objective << "]\n";
  str_buf << "[metric: " << Common::Join(metric, ",") << "]\n";
  str_buf << "[tree_learner: " << tree_learner << "]\n";
  str_buf << "[device_type: " << device_type << "]\n";
  str_buf << SaveMembersToString();
  return str_buf.str();
Guolin Ke's avatar
Guolin Ke committed
490
491
}

492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
const std::string Config::DumpAliases() {
  auto map = Config::parameter2aliases();
  for (auto& pair : map) {
    std::sort(pair.second.begin(), pair.second.end(), SortAlias);
  }
  std::stringstream str_buf;
  str_buf << "{\n";
  bool first = true;
  for (const auto& pair : map) {
    if (first) {
      str_buf << "   \"";
      first = false;
    } else {
      str_buf << "   , \"";
    }
    str_buf << pair.first << "\": [";
    if (pair.second.size() > 0) {
      str_buf << "\"" << CommonC::Join(pair.second, "\", \"") << "\"";
    }
    str_buf << "]\n";
  }
  str_buf << "}\n";
  return str_buf.str();
}

Guolin Ke's avatar
Guolin Ke committed
517
}  // namespace LightGBM