"vscode:/vscode.git/clone" did not exist on "b27d81ea411d04d8d071d4d4e75c19ffa15c5795"
gbdt_model_text.cpp 24.2 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2017 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
5
#include <LightGBM/config.h>
Guolin Ke's avatar
Guolin Ke committed
6
#include <LightGBM/metric.h>
7
#include <LightGBM/objective_function.h>
8
#include <LightGBM/utils/array_args.h>
9
#include <LightGBM/utils/common.h>
Guolin Ke's avatar
Guolin Ke committed
10

11
12
13
14
#include <string>
#include <sstream>
#include <vector>

15
16
#include "gbdt.h"

Guolin Ke's avatar
Guolin Ke committed
17
18
namespace LightGBM {

19
const char* kModelVersion = "v4";
20

21
std::string GBDT::DumpModel(int start_iteration, int num_iteration, int feature_importance_type) const {
Guolin Ke's avatar
Guolin Ke committed
22
  std::stringstream str_buf;
23
  Common::C_stringstream(str_buf);
Guolin Ke's avatar
Guolin Ke committed
24
25

  str_buf << "{";
26
27
28
29
30
31
  str_buf << "\"name\":\"" << SubModelName() << "\"," << '\n';
  str_buf << "\"version\":\"" << kModelVersion << "\"," << '\n';
  str_buf << "\"num_class\":" << num_class_ << "," << '\n';
  str_buf << "\"num_tree_per_iteration\":" << num_tree_per_iteration_ << "," << '\n';
  str_buf << "\"label_index\":" << label_idx_ << "," << '\n';
  str_buf << "\"max_feature_idx\":" << max_feature_idx_ << "," << '\n';
32
33
34
  if (objective_function_ != nullptr) {
    str_buf << "\"objective\":\"" << objective_function_->ToString() << "\",\n";
  }
Guolin Ke's avatar
Guolin Ke committed
35

36
37
  str_buf << "\"average_output\":" << (average_output_ ? "true" : "false") << ",\n";

38
  str_buf << "\"feature_names\":[\"" << CommonC::Join(feature_names_, "\",\"")
39
40
41
          << "\"]," << '\n';

  str_buf << "\"monotone_constraints\":["
42
          << CommonC::Join(monotone_constraints_, ",") << "]," << '\n';
Guolin Ke's avatar
Guolin Ke committed
43

44
45
46
47
  str_buf << "\"feature_infos\":" << "{";
  bool first_obj = true;
  for (size_t i = 0; i < feature_infos_.size(); ++i) {
    std::stringstream json_str_buf;
48
    Common::C_stringstream(json_str_buf);
49
50
51
52
    auto strs = Common::Split(feature_infos_[i].c_str(), ":");
    if (strs[0][0] == '[') {
      strs[0].erase(0, 1);  // remove '['
      strs[1].erase(strs[1].size() - 1);  // remove ']'
53
54
55
56
57
58
      double max_, min_;
      Common::Atof(strs[0].c_str(), &min_);
      Common::Atof(strs[1].c_str(), &max_);
      json_str_buf << std::setprecision(std::numeric_limits<double>::digits10 + 2);
      json_str_buf << "{\"min_value\":" << Common::AvoidInf(min_) << ",";
      json_str_buf << "\"max_value\":" << Common::AvoidInf(max_) << ",";
59
60
      json_str_buf << "\"values\":[]}";
    } else if (strs[0] != "none") {  // categorical feature
61
      auto vals = CommonC::StringToArray<int>(feature_infos_[i], ':');
62
63
64
65
      auto max_idx = ArrayArgs<int>::ArgMax(vals);
      auto min_idx = ArrayArgs<int>::ArgMin(vals);
      json_str_buf << "{\"min_value\":" << vals[min_idx] << ",";
      json_str_buf << "\"max_value\":" << vals[max_idx] << ",";
66
      json_str_buf << "\"values\":[" << CommonC::Join(vals, ",") << "]}";
67
68
69
70
71
72
73
74
75
76
77
78
    } else {  // unused feature
      continue;
    }
    if (!first_obj) {
      str_buf << ",";
    }
    str_buf << "\"" << feature_names_[i] << "\":";
    str_buf << json_str_buf.str();
    first_obj = false;
  }
  str_buf << "}," << '\n';

Guolin Ke's avatar
Guolin Ke committed
79
80
  str_buf << "\"tree_info\":[";
  int num_used_model = static_cast<int>(models_.size());
81
82
83
  int total_iteration = num_used_model / num_tree_per_iteration_;
  start_iteration = std::max(start_iteration, 0);
  start_iteration = std::min(start_iteration, total_iteration);
Guolin Ke's avatar
Guolin Ke committed
84
  if (num_iteration > 0) {
85
86
    int end_iteration = start_iteration + num_iteration;
    num_used_model = std::min(end_iteration * num_tree_per_iteration_ , num_used_model);
Guolin Ke's avatar
Guolin Ke committed
87
  }
88
89
90
  int start_model = start_iteration * num_tree_per_iteration_;
  for (int i = start_model; i < num_used_model; ++i) {
    if (i > start_model) {
Guolin Ke's avatar
Guolin Ke committed
91
92
93
94
95
96
97
      str_buf << ",";
    }
    str_buf << "{";
    str_buf << "\"tree_index\":" << i << ",";
    str_buf << models_[i]->ToJSON();
    str_buf << "}";
  }
98
99
  str_buf << "]," << '\n';

100
101
  std::vector<double> feature_importances = FeatureImportance(
      num_iteration, feature_importance_type);
102
103
104
105
106
107
108
109
110
  // store the importance first
  std::vector<std::pair<size_t, std::string>> pairs;
  for (size_t i = 0; i < feature_importances.size(); ++i) {
    size_t feature_importances_int = static_cast<size_t>(feature_importances[i]);
    if (feature_importances_int > 0) {
      pairs.emplace_back(feature_importances_int, feature_names_[i]);
    }
  }
  str_buf << '\n' << "\"feature_importances\":" << "{";
111
112
  for (size_t i = 0; i < pairs.size(); ++i) {
    if (i > 0) {
113
114
      str_buf << ",";
    }
115
    str_buf << "\"" << pairs[i].second << "\":" << std::to_string(pairs[i].first);
116
117
  }
  str_buf << "}" << '\n';
Guolin Ke's avatar
Guolin Ke committed
118

119
  str_buf << "}" << '\n';
Guolin Ke's avatar
Guolin Ke committed
120
121
122
123
124
125

  return str_buf.str();
}

std::string GBDT::ModelToIfElse(int num_iteration) const {
  std::stringstream str_buf;
126
  Common::C_stringstream(str_buf);
Guolin Ke's avatar
Guolin Ke committed
127

128
129
130
131
132
133
134
135
136
137
138
139
  str_buf << "#include \"gbdt.h\"" << '\n';
  str_buf << "#include <LightGBM/utils/common.h>" << '\n';
  str_buf << "#include <LightGBM/objective_function.h>" << '\n';
  str_buf << "#include <LightGBM/metric.h>" << '\n';
  str_buf << "#include <LightGBM/prediction_early_stop.h>" << '\n';
  str_buf << "#include <ctime>" << '\n';
  str_buf << "#include <sstream>" << '\n';
  str_buf << "#include <chrono>" << '\n';
  str_buf << "#include <string>" << '\n';
  str_buf << "#include <vector>" << '\n';
  str_buf << "#include <utility>" << '\n';
  str_buf << "namespace LightGBM {" << '\n';
Guolin Ke's avatar
Guolin Ke committed
140
141
142
143
144
145
146
147

  int num_used_model = static_cast<int>(models_.size());
  if (num_iteration > 0) {
    num_used_model = std::min(num_iteration * num_tree_per_iteration_, num_used_model);
  }

  // PredictRaw
  for (int i = 0; i < num_used_model; ++i) {
148
    str_buf << models_[i]->ToIfElse(i, false) << '\n';
Guolin Ke's avatar
Guolin Ke committed
149
150
151
152
153
154
155
156
157
  }

  str_buf << "double (*PredictTreePtr[])(const double*) = { ";
  for (int i = 0; i < num_used_model; ++i) {
    if (i > 0) {
      str_buf << " , ";
    }
    str_buf << "PredictTree" << i;
  }
158
  str_buf << " };" << '\n' << '\n';
Guolin Ke's avatar
Guolin Ke committed
159
160

  std::stringstream pred_str_buf;
161
  Common::C_stringstream(pred_str_buf);
Guolin Ke's avatar
Guolin Ke committed
162

163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
  pred_str_buf << "\t" << "int early_stop_round_counter = 0;" << '\n';
  pred_str_buf << "\t" << "std::memset(output, 0, sizeof(double) * num_tree_per_iteration_);" << '\n';
  pred_str_buf << "\t" << "for (int i = 0; i < num_iteration_for_pred_; ++i) {" << '\n';
  pred_str_buf << "\t\t" << "for (int k = 0; k < num_tree_per_iteration_; ++k) {" << '\n';
  pred_str_buf << "\t\t\t" << "output[k] += (*PredictTreePtr[i * num_tree_per_iteration_ + k])(features);" << '\n';
  pred_str_buf << "\t\t" << "}" << '\n';
  pred_str_buf << "\t\t" << "++early_stop_round_counter;" << '\n';
  pred_str_buf << "\t\t" << "if (early_stop->round_period == early_stop_round_counter) {" << '\n';
  pred_str_buf << "\t\t\t" << "if (early_stop->callback_function(output, num_tree_per_iteration_))" << '\n';
  pred_str_buf << "\t\t\t\t" << "return;" << '\n';
  pred_str_buf << "\t\t\t" << "early_stop_round_counter = 0;" << '\n';
  pred_str_buf << "\t\t" << "}" << '\n';
  pred_str_buf << "\t" << "}" << '\n';

  str_buf << "void GBDT::PredictRaw(const double* features, double *output, const PredictionEarlyStopInstance* early_stop) const {" << '\n';
Guolin Ke's avatar
Guolin Ke committed
178
  str_buf << pred_str_buf.str();
179
180
  str_buf << "}" << '\n';
  str_buf << '\n';
Guolin Ke's avatar
Guolin Ke committed
181

182
183
184
  // PredictRawByMap
  str_buf << "double (*PredictTreeByMapPtr[])(const std::unordered_map<int, double>&) = { ";
  for (int i = 0; i < num_used_model; ++i) {
Guolin Ke's avatar
Guolin Ke committed
185
186
187
188
    if (i > 0) {
      str_buf << " , ";
    }
    str_buf << "PredictTree" << i << "ByMap";
189
  }
190
  str_buf << " };" << '\n' << '\n';
191
192

  std::stringstream pred_str_buf_map;
193
  Common::C_stringstream(pred_str_buf_map);
194

195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
  pred_str_buf_map << "\t" << "int early_stop_round_counter = 0;" << '\n';
  pred_str_buf_map << "\t" << "std::memset(output, 0, sizeof(double) * num_tree_per_iteration_);" << '\n';
  pred_str_buf_map << "\t" << "for (int i = 0; i < num_iteration_for_pred_; ++i) {" << '\n';
  pred_str_buf_map << "\t\t" << "for (int k = 0; k < num_tree_per_iteration_; ++k) {" << '\n';
  pred_str_buf_map << "\t\t\t" << "output[k] += (*PredictTreeByMapPtr[i * num_tree_per_iteration_ + k])(features);" << '\n';
  pred_str_buf_map << "\t\t" << "}" << '\n';
  pred_str_buf_map << "\t\t" << "++early_stop_round_counter;" << '\n';
  pred_str_buf_map << "\t\t" << "if (early_stop->round_period == early_stop_round_counter) {" << '\n';
  pred_str_buf_map << "\t\t\t" << "if (early_stop->callback_function(output, num_tree_per_iteration_))" << '\n';
  pred_str_buf_map << "\t\t\t\t" << "return;" << '\n';
  pred_str_buf_map << "\t\t\t" << "early_stop_round_counter = 0;" << '\n';
  pred_str_buf_map << "\t\t" << "}" << '\n';
  pred_str_buf_map << "\t" << "}" << '\n';

  str_buf << "void GBDT::PredictRawByMap(const std::unordered_map<int, double>& features, double* output, const PredictionEarlyStopInstance* early_stop) const {" << '\n';
210
  str_buf << pred_str_buf_map.str();
211
212
  str_buf << "}" << '\n';
  str_buf << '\n';
213

Guolin Ke's avatar
Guolin Ke committed
214
  // Predict
215
216
217
218
219
220
221
  str_buf << "void GBDT::Predict(const double* features, double *output, const PredictionEarlyStopInstance* early_stop) const {" << '\n';
  str_buf << "\t" << "PredictRaw(features, output, early_stop);" << '\n';
  str_buf << "\t" << "if (average_output_) {" << '\n';
  str_buf << "\t\t" << "for (int k = 0; k < num_tree_per_iteration_; ++k) {" << '\n';
  str_buf << "\t\t\t" << "output[k] /= num_iteration_for_pred_;" << '\n';
  str_buf << "\t\t" << "}" << '\n';
  str_buf << "\t" << "}" << '\n';
Guolin Ke's avatar
Guolin Ke committed
222
  str_buf << "\t" << "if (objective_function_ != nullptr) {" << '\n';
223
224
225
226
  str_buf << "\t\t" << "objective_function_->ConvertOutput(output, output);" << '\n';
  str_buf << "\t" << "}" << '\n';
  str_buf << "}" << '\n';
  str_buf << '\n';
Guolin Ke's avatar
Guolin Ke committed
227

228
  // PredictByMap
229
230
231
232
233
234
235
  str_buf << "void GBDT::PredictByMap(const std::unordered_map<int, double>& features, double* output, const PredictionEarlyStopInstance* early_stop) const {" << '\n';
  str_buf << "\t" << "PredictRawByMap(features, output, early_stop);" << '\n';
  str_buf << "\t" << "if (average_output_) {" << '\n';
  str_buf << "\t\t" << "for (int k = 0; k < num_tree_per_iteration_; ++k) {" << '\n';
  str_buf << "\t\t\t" << "output[k] /= num_iteration_for_pred_;" << '\n';
  str_buf << "\t\t" << "}" << '\n';
  str_buf << "\t" << "}" << '\n';
Guolin Ke's avatar
Guolin Ke committed
236
  str_buf << "\t" << "if (objective_function_ != nullptr) {" << '\n';
237
238
239
240
  str_buf << "\t\t" << "objective_function_->ConvertOutput(output, output);" << '\n';
  str_buf << "\t" << "}" << '\n';
  str_buf << "}" << '\n';
  str_buf << '\n';
241
242


Guolin Ke's avatar
Guolin Ke committed
243
244
  // PredictLeafIndex
  for (int i = 0; i < num_used_model; ++i) {
245
    str_buf << models_[i]->ToIfElse(i, true) << '\n';
Guolin Ke's avatar
Guolin Ke committed
246
247
248
249
250
251
252
253
254
  }

  str_buf << "double (*PredictTreeLeafPtr[])(const double*) = { ";
  for (int i = 0; i < num_used_model; ++i) {
    if (i > 0) {
      str_buf << " , ";
    }
    str_buf << "PredictTree" << i << "Leaf";
  }
255
  str_buf << " };" << '\n' << '\n';
Guolin Ke's avatar
Guolin Ke committed
256

257
258
259
260
261
262
  str_buf << "void GBDT::PredictLeafIndex(const double* features, double *output) const {" << '\n';
  str_buf << "\t" << "int total_tree = num_iteration_for_pred_ * num_tree_per_iteration_;" << '\n';
  str_buf << "\t" << "for (int i = 0; i < total_tree; ++i) {" << '\n';
  str_buf << "\t\t" << "output[i] = (*PredictTreeLeafPtr[i])(features);" << '\n';
  str_buf << "\t" << "}" << '\n';
  str_buf << "}" << '\n';
Guolin Ke's avatar
Guolin Ke committed
263

264
  // PredictLeafIndexByMap
265
266
  str_buf << "double (*PredictTreeLeafByMapPtr[])(const std::unordered_map<int, double>&) = { ";
  for (int i = 0; i < num_used_model; ++i) {
Guolin Ke's avatar
Guolin Ke committed
267
268
269
270
    if (i > 0) {
      str_buf << " , ";
    }
    str_buf << "PredictTree" << i << "LeafByMap";
271
  }
272
  str_buf << " };" << '\n' << '\n';
273

274
275
276
277
278
279
  str_buf << "void GBDT::PredictLeafIndexByMap(const std::unordered_map<int, double>& features, double* output) const {" << '\n';
  str_buf << "\t" << "int total_tree = num_iteration_for_pred_ * num_tree_per_iteration_;" << '\n';
  str_buf << "\t" << "for (int i = 0; i < total_tree; ++i) {" << '\n';
  str_buf << "\t\t" << "output[i] = (*PredictTreeLeafByMapPtr[i])(features);" << '\n';
  str_buf << "\t" << "}" << '\n';
  str_buf << "}" << '\n';
280

281
  str_buf << "}  // namespace LightGBM" << '\n';
Guolin Ke's avatar
Guolin Ke committed
282
283
284
285
286
287
288
289
290
291
292
293

  return str_buf.str();
}

bool GBDT::SaveModelToIfElse(int num_iteration, const char* filename) const {
  /*! \brief File to write models */
  std::ofstream output_file;
  std::ifstream ifs(filename);
  if (ifs.good()) {
    std::string origin((std::istreambuf_iterator<char>(ifs)),
      (std::istreambuf_iterator<char>()));
    output_file.open(filename);
294
295
296
297
    output_file << "#define USE_HARD_CODE 0" << '\n';
    output_file << "#ifndef USE_HARD_CODE" << '\n';
    output_file << origin << '\n';
    output_file << "#else" << '\n';
Guolin Ke's avatar
Guolin Ke committed
298
    output_file << ModelToIfElse(num_iteration);
299
    output_file << "#endif" << '\n';
Guolin Ke's avatar
Guolin Ke committed
300
301
302
303
304
305
306
307
  } else {
    output_file.open(filename);
    output_file << ModelToIfElse(num_iteration);
  }

  ifs.close();
  output_file.close();

308
  return static_cast<bool>(output_file);
Guolin Ke's avatar
Guolin Ke committed
309
310
}

311
std::string GBDT::SaveModelToString(int start_iteration, int num_iteration, int feature_importance_type) const {
Guolin Ke's avatar
Guolin Ke committed
312
  std::stringstream ss;
313
  Common::C_stringstream(ss);
Guolin Ke's avatar
Guolin Ke committed
314
315

  // output model type
316
317
  ss << SubModelName() << '\n';
  ss << "version=" << kModelVersion << '\n';
Guolin Ke's avatar
Guolin Ke committed
318
  // output number of class
319
320
  ss << "num_class=" << num_class_ << '\n';
  ss << "num_tree_per_iteration=" << num_tree_per_iteration_ << '\n';
Guolin Ke's avatar
Guolin Ke committed
321
  // output label index
322
  ss << "label_index=" << label_idx_ << '\n';
Guolin Ke's avatar
Guolin Ke committed
323
  // output max_feature_idx
324
  ss << "max_feature_idx=" << max_feature_idx_ << '\n';
Guolin Ke's avatar
Guolin Ke committed
325
326
  // output objective
  if (objective_function_ != nullptr) {
327
    ss << "objective=" << objective_function_->ToString() << '\n';
Guolin Ke's avatar
Guolin Ke committed
328
329
330
  }

  if (average_output_) {
331
    ss << "average_output" << '\n';
Guolin Ke's avatar
Guolin Ke committed
332
333
  }

334
  ss << "feature_names=" << CommonC::Join(feature_names_, " ") << '\n';
Guolin Ke's avatar
Guolin Ke committed
335

336
  if (monotone_constraints_.size() != 0) {
337
    ss << "monotone_constraints=" << CommonC::Join(monotone_constraints_, " ")
338
339
340
       << '\n';
  }

341
  ss << "feature_infos=" << CommonC::Join(feature_infos_, " ") << '\n';
Guolin Ke's avatar
Guolin Ke committed
342
343

  int num_used_model = static_cast<int>(models_.size());
344
345
346
  int total_iteration = num_used_model / num_tree_per_iteration_;
  start_iteration = std::max(start_iteration, 0);
  start_iteration = std::min(start_iteration, total_iteration);
Guolin Ke's avatar
Guolin Ke committed
347
  if (num_iteration > 0) {
348
349
    int end_iteration = start_iteration + num_iteration;
    num_used_model = std::min(end_iteration * num_tree_per_iteration_, num_used_model);
Guolin Ke's avatar
Guolin Ke committed
350
  }
351

352
353
354
355
  int start_model = start_iteration * num_tree_per_iteration_;

  std::vector<std::string> tree_strs(num_used_model - start_model);
  std::vector<size_t> tree_sizes(num_used_model - start_model);
Guolin Ke's avatar
Guolin Ke committed
356
  // output tree models
357
  #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
358
359
360
361
362
  for (int i = start_model; i < num_used_model; ++i) {
    const int idx = i - start_model;
    tree_strs[idx] = "Tree=" + std::to_string(idx) + '\n';
    tree_strs[idx] += models_[i]->ToString() + '\n';
    tree_sizes[idx] = tree_strs[idx].size();
363
364
  }

365
  ss << "tree_sizes=" << CommonC::Join(tree_sizes, " ") << '\n';
366
367
  ss << '\n';

368
  for (int i = 0; i < num_used_model - start_model; ++i) {
369
370
    ss << tree_strs[i];
    tree_strs[i].clear();
Guolin Ke's avatar
Guolin Ke committed
371
  }
Guolin Ke's avatar
Guolin Ke committed
372
  ss << "end of trees" << "\n";
373
374
  std::vector<double> feature_importances = FeatureImportance(
      num_iteration, feature_importance_type);
Guolin Ke's avatar
Guolin Ke committed
375
376
377
378
379
380
381
382
383
  // store the importance first
  std::vector<std::pair<size_t, std::string>> pairs;
  for (size_t i = 0; i < feature_importances.size(); ++i) {
    size_t feature_importances_int = static_cast<size_t>(feature_importances[i]);
    if (feature_importances_int > 0) {
      pairs.emplace_back(feature_importances_int, feature_names_[i]);
    }
  }
  // sort the importance
384
385
386
  std::stable_sort(pairs.begin(), pairs.end(),
                   [](const std::pair<size_t, std::string>& lhs,
                      const std::pair<size_t, std::string>& rhs) {
Guolin Ke's avatar
Guolin Ke committed
387
388
    return lhs.first > rhs.first;
  });
389
  ss << '\n' << "feature_importances:" << '\n';
Guolin Ke's avatar
Guolin Ke committed
390
  for (size_t i = 0; i < pairs.size(); ++i) {
391
    ss << pairs[i].second << "=" << std::to_string(pairs[i].first) << '\n';
Guolin Ke's avatar
Guolin Ke committed
392
  }
Guolin Ke's avatar
Guolin Ke committed
393
  if (config_ != nullptr) {
Guolin Ke's avatar
Guolin Ke committed
394
    ss << "\nparameters:" << '\n';
Guolin Ke's avatar
Guolin Ke committed
395
    ss << config_->ToString() << "\n";
Guolin Ke's avatar
Guolin Ke committed
396
397
398
399
400
    ss << "end of parameters" << '\n';
  } else if (!loaded_parameter_.empty()) {
    ss << "\nparameters:" << '\n';
    ss << loaded_parameter_ << "\n";
    ss << "end of parameters" << '\n';
Guolin Ke's avatar
Guolin Ke committed
401
  }
402
403
404
405
406
  if (!parser_config_str_.empty()) {
    ss << "\nparser:" << '\n';
    ss << parser_config_str_ << "\n";
    ss << "end of parser" << '\n';
  }
Nikita Titov's avatar
Nikita Titov committed
407
  return ss.str();
Guolin Ke's avatar
Guolin Ke committed
408
409
}

410
bool GBDT::SaveModelToFile(int start_iteration, int num_iteration, int feature_importance_type, const char* filename) const {
Guolin Ke's avatar
Guolin Ke committed
411
  /*! \brief File to write models */
412
413
414
415
  auto writer = VirtualFileWriter::Make(filename);
  if (!writer->Init()) {
    Log::Fatal("Model file %s is not available for writes", filename);
  }
416
  std::string str_to_write = SaveModelToString(start_iteration, num_iteration, feature_importance_type);
417
418
  auto size = writer->Write(str_to_write.c_str(), str_to_write.size());
  return size > 0;
Guolin Ke's avatar
Guolin Ke committed
419
420
}

421
bool GBDT::LoadModelFromString(const char* buffer, size_t len) {
Guolin Ke's avatar
Guolin Ke committed
422
423
  // use serialized string to restore this object
  models_.clear();
424
425
426
427
428
429
430
  auto c_str = buffer;
  auto p = c_str;
  auto end = p + len;
  std::unordered_map<std::string, std::string> key_vals;
  while (p < end) {
    auto line_len = Common::GetLine(p);
    if (line_len > 0) {
431
      std::string cur_line(p, line_len);
432
433
434
435
      if (!Common::StartsWith(cur_line, "Tree=")) {
        auto strs = Common::Split(cur_line.c_str(), '=');
        if (strs.size() == 1) {
          key_vals[strs[0]] = "";
436
        } else if (strs.size() == 2) {
437
          key_vals[strs[0]] = strs[1];
438
        } else if (strs.size() > 2) {
Guolin Ke's avatar
Guolin Ke committed
439
          if (strs[0] == "feature_names") {
Guolin Ke's avatar
Guolin Ke committed
440
            key_vals[strs[0]] = cur_line.substr(std::strlen("feature_names="));
441
442
          } else if (strs[0] == "monotone_constraints") {
            key_vals[strs[0]] = cur_line.substr(std::strlen("monotone_constraints="));
443
          } else {
Guolin Ke's avatar
Guolin Ke committed
444
445
            // Use first 128 chars to avoid exceed the message buffer.
            Log::Fatal("Wrong line at model file: %s", cur_line.substr(0, std::min<size_t>(128, cur_line.size())).c_str());
446
          }
447
        }
448
      } else {
449
450
451
452
453
454
        break;
      }
    }
    p += line_len;
    p = Common::SkipNewLine(p);
  }
Guolin Ke's avatar
Guolin Ke committed
455
456

  // get number of classes
457
458
  if (key_vals.count("num_class")) {
    Common::Atoi(key_vals["num_class"].c_str(), &num_class_);
Guolin Ke's avatar
Guolin Ke committed
459
460
461
462
463
  } else {
    Log::Fatal("Model file doesn't specify the number of classes");
    return false;
  }

464
465
  if (key_vals.count("num_tree_per_iteration")) {
    Common::Atoi(key_vals["num_tree_per_iteration"].c_str(), &num_tree_per_iteration_);
Guolin Ke's avatar
Guolin Ke committed
466
467
468
469
470
  } else {
    num_tree_per_iteration_ = num_class_;
  }

  // get index of label
471
472
  if (key_vals.count("label_index")) {
    Common::Atoi(key_vals["label_index"].c_str(), &label_idx_);
Guolin Ke's avatar
Guolin Ke committed
473
474
475
476
  } else {
    Log::Fatal("Model file doesn't specify the label index");
    return false;
  }
477

Guolin Ke's avatar
Guolin Ke committed
478
  // get max_feature_idx first
479
480
  if (key_vals.count("max_feature_idx")) {
    Common::Atoi(key_vals["max_feature_idx"].c_str(), &max_feature_idx_);
Guolin Ke's avatar
Guolin Ke committed
481
482
483
484
  } else {
    Log::Fatal("Model file doesn't specify max_feature_idx");
    return false;
  }
485

Guolin Ke's avatar
Guolin Ke committed
486
  // get average_output
487
  if (key_vals.count("average_output")) {
Guolin Ke's avatar
Guolin Ke committed
488
489
    average_output_ = true;
  }
490

Guolin Ke's avatar
Guolin Ke committed
491
  // get feature names
492
493
  if (key_vals.count("feature_names")) {
    feature_names_ = Common::Split(key_vals["feature_names"].c_str(), ' ');
Guolin Ke's avatar
Guolin Ke committed
494
495
496
497
498
    if (feature_names_.size() != static_cast<size_t>(max_feature_idx_ + 1)) {
      Log::Fatal("Wrong size of feature_names");
      return false;
    }
  } else {
499
    Log::Fatal("Model file doesn't contain feature_names");
Guolin Ke's avatar
Guolin Ke committed
500
501
502
    return false;
  }

503
504
  // get monotone_constraints
  if (key_vals.count("monotone_constraints")) {
505
    monotone_constraints_ = CommonC::StringToArray<int8_t>(key_vals["monotone_constraints"].c_str(), ' ');
506
507
508
509
510
511
    if (monotone_constraints_.size() != static_cast<size_t>(max_feature_idx_ + 1)) {
      Log::Fatal("Wrong size of monotone_constraints");
      return false;
    }
  }

512
513
  if (key_vals.count("feature_infos")) {
    feature_infos_ = Common::Split(key_vals["feature_infos"].c_str(), ' ');
Guolin Ke's avatar
Guolin Ke committed
514
515
516
517
518
    if (feature_infos_.size() != static_cast<size_t>(max_feature_idx_ + 1)) {
      Log::Fatal("Wrong size of feature_infos");
      return false;
    }
  } else {
519
    Log::Fatal("Model file doesn't contain feature_infos");
Guolin Ke's avatar
Guolin Ke committed
520
521
522
    return false;
  }

523
524
  if (key_vals.count("objective")) {
    auto str = key_vals["objective"];
525
    loaded_objective_.reset(ObjectiveFunction::CreateObjectiveFunction(ParseObjectiveAlias(str)));
Guolin Ke's avatar
Guolin Ke committed
526
527
    objective_function_ = loaded_objective_.get();
  }
528

529
530
531
532
  if (!key_vals.count("tree_sizes")) {
    while (p < end) {
      auto line_len = Common::GetLine(p);
      if (line_len > 0) {
533
        std::string cur_line(p, line_len);
534
535
536
537
538
539
        if (Common::StartsWith(cur_line, "Tree=")) {
          p += line_len;
          p = Common::SkipNewLine(p);
          size_t used_len = 0;
          models_.emplace_back(new Tree(p, &used_len));
          p += used_len;
540
        } else {
541
542
543
544
545
546
          break;
        }
      }
      p = Common::SkipNewLine(p);
    }
  } else {
547
    std::vector<size_t> tree_sizes = CommonC::StringToArray<size_t>(key_vals["tree_sizes"].c_str(), ' ');
548
549
550
551
552
    std::vector<size_t> tree_boundries(tree_sizes.size() + 1, 0);
    int num_trees = static_cast<int>(tree_sizes.size());
    for (int i = 0; i < num_trees; ++i) {
      tree_boundries[i + 1] = tree_boundries[i] + tree_sizes[i];
      models_.emplace_back(nullptr);
Guolin Ke's avatar
Guolin Ke committed
553
    }
554
    OMP_INIT_EX();
555
    #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
    for (int i = 0; i < num_trees; ++i) {
      OMP_LOOP_EX_BEGIN();
      auto cur_p = p + tree_boundries[i];
      auto line_len = Common::GetLine(cur_p);
      std::string cur_line(cur_p, line_len);
      if (Common::StartsWith(cur_line, "Tree=")) {
        cur_p += line_len;
        cur_p = Common::SkipNewLine(cur_p);
        size_t used_len = 0;
        models_[i].reset(new Tree(cur_p, &used_len));
      } else {
        Log::Fatal("Model format error, expect a tree here. met %s", cur_line.c_str());
      }
      OMP_LOOP_EX_END();
    }
    OMP_THROW_EX();
Guolin Ke's avatar
Guolin Ke committed
572
573
574
575
  }
  num_iteration_for_pred_ = static_cast<int>(models_.size()) / num_tree_per_iteration_;
  num_init_iteration_ = num_iteration_for_pred_;
  iter_ = 0;
576
  bool is_inparameter = false, is_inparser = false;
Guolin Ke's avatar
Guolin Ke committed
577
  std::stringstream ss;
578
  Common::C_stringstream(ss);
Guolin Ke's avatar
Guolin Ke committed
579
580
581
  while (p < end) {
    auto line_len = Common::GetLine(p);
    if (line_len > 0) {
582
      std::string cur_line(p, line_len);
Guolin Ke's avatar
Guolin Ke committed
583
584
585
586
587
588
      if (cur_line == std::string("parameters:")) {
        is_inparameter = true;
      } else if (cur_line == std::string("end of parameters")) {
        break;
      } else if (is_inparameter) {
        ss << cur_line << "\n";
589
590
591
592
593
        if (Common::StartsWith(cur_line, "[linear_tree: ")) {
          int is_linear = 0;
          Common::Atoi(cur_line.substr(14, 1).c_str(), &is_linear);
          linear_tree_ = static_cast<bool>(is_linear);
        }
Guolin Ke's avatar
Guolin Ke committed
594
595
596
597
598
599
600
601
      }
    }
    p += line_len;
    p = Common::SkipNewLine(p);
  }
  if (!ss.str().empty()) {
    loaded_parameter_ = ss.str();
  }
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
  ss.clear();
  ss.str("");
  while (p < end) {
    auto line_len = Common::GetLine(p);
    if (line_len > 0) {
      std::string cur_line(p, line_len);
      if (cur_line == std::string("parser:")) {
        is_inparser = true;
      } else if (cur_line == std::string("end of parser")) {
        p += line_len;
        p = Common::SkipNewLine(p);
        break;
      } else if (is_inparser) {
        ss << cur_line << "\n";
      }
    }
    p += line_len;
    p = Common::SkipNewLine(p);
  }
  parser_config_str_ = ss.str();
  ss.clear();
  ss.str("");
Guolin Ke's avatar
Guolin Ke committed
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
  return true;
}

std::vector<double> GBDT::FeatureImportance(int num_iteration, int importance_type) const {
  int num_used_model = static_cast<int>(models_.size());
  if (num_iteration > 0) {
    num_iteration += 0;
    num_used_model = std::min(num_iteration * num_tree_per_iteration_, num_used_model);
  }

  std::vector<double> feature_importances(max_feature_idx_ + 1, 0.0);
  if (importance_type == 0) {
    for (int iter = 0; iter < num_used_model; ++iter) {
      for (int split_idx = 0; split_idx < models_[iter]->num_leaves() - 1; ++split_idx) {
        if (models_[iter]->split_gain(split_idx) > 0) {
639
#ifdef DEBUG
640
          CHECK_GE(models_[iter]->split_feature(split_idx), 0);
641
#endif
Guolin Ke's avatar
Guolin Ke committed
642
643
644
645
646
647
648
649
          feature_importances[models_[iter]->split_feature(split_idx)] += 1.0;
        }
      }
    }
  } else if (importance_type == 1) {
    for (int iter = 0; iter < num_used_model; ++iter) {
      for (int split_idx = 0; split_idx < models_[iter]->num_leaves() - 1; ++split_idx) {
        if (models_[iter]->split_gain(split_idx) > 0) {
650
#ifdef DEBUG
651
          CHECK_GE(models_[iter]->split_feature(split_idx), 0);
652
#endif
Guolin Ke's avatar
Guolin Ke committed
653
654
655
656
657
          feature_importances[models_[iter]->split_feature(split_idx)] += models_[iter]->split_gain(split_idx);
        }
      }
    }
  } else {
658
    Log::Fatal("Unknown importance type: only support split=0 and gain=1");
Guolin Ke's avatar
Guolin Ke committed
659
  }
Nikita Titov's avatar
Nikita Titov committed
660
  return feature_importances;
Guolin Ke's avatar
Guolin Ke committed
661
662
663
}

}  // namespace LightGBM