Unverified Commit c4a7ab81 authored by Nikita Titov's avatar Nikita Titov Committed by GitHub
Browse files

added feature infos to JSON dump (#2660)



* added feature infos to JSON dump

* slight json schema refactor

* simpified code

* refactor feature_infos

* refactoring

* Update src/boosting/gbdt.cpp

* Update dataset.h

* Update include/LightGBM/dataset.h

* simplify

* Apply suggestions from code review

* parse string and construct JSON objs
Co-authored-by: default avatarGuolin Ke <guolin.ke@outlook.com>
parent 3c394c8d
......@@ -178,7 +178,7 @@ class BinMapper {
/*!
* \brief Get bin info
*/
inline std::string bin_info() const {
inline std::string bin_info_string() const {
if (bin_type_ == BinType::CategoricalBin) {
return Common::Join(bin_2_categorical_, ":");
} else {
......
......@@ -616,13 +616,13 @@ class Dataset {
inline std::vector<std::string> feature_infos() const {
std::vector<std::string> bufs;
for (int i = 0; i < num_total_features_; i++) {
for (int i = 0; i < num_total_features_; ++i) {
int fidx = used_feature_map_[i];
if (fidx == -1) {
if (fidx < 0) {
bufs.push_back("none");
} else {
const auto bin_mapper = FeatureBinMapper(fidx);
bufs.push_back(bin_mapper->bin_info());
bufs.push_back(bin_mapper->bin_info_string());
}
}
return bufs;
......
......@@ -195,4 +195,3 @@ class ArrayArgs {
} // namespace LightGBM
#endif // LightGBM_UTILS_ARRAY_AGRS_H_
......@@ -5,6 +5,7 @@
#include <LightGBM/config.h>
#include <LightGBM/metric.h>
#include <LightGBM/objective_function.h>
#include <LightGBM/utils/array_args.h>
#include <LightGBM/utils/common.h>
#include <string>
......@@ -39,6 +40,36 @@ std::string GBDT::DumpModel(int start_iteration, int num_iteration) const {
str_buf << "\"monotone_constraints\":["
<< Common::Join(monotone_constraints_, ",") << "]," << '\n';
str_buf << "\"feature_infos\":" << "{";
bool first_obj = true;
for (size_t i = 0; i < feature_infos_.size(); ++i) {
std::stringstream json_str_buf;
auto strs = Common::Split(feature_infos_[i].c_str(), ":");
if (strs[0][0] == '[') {
strs[0].erase(0, 1); // remove '['
strs[1].erase(strs[1].size() - 1); // remove ']'
json_str_buf << "{\"min_value\":" << strs[0] << ",";
json_str_buf << "\"max_value\":" << strs[1] << ",";
json_str_buf << "\"values\":[]}";
} else if (strs[0] != "none") { // categorical feature
auto vals = Common::StringToArray<int>(feature_infos_[i], ':');
auto max_idx = ArrayArgs<int>::ArgMax(vals);
auto min_idx = ArrayArgs<int>::ArgMin(vals);
json_str_buf << "{\"min_value\":" << vals[min_idx] << ",";
json_str_buf << "\"max_value\":" << vals[max_idx] << ",";
json_str_buf << "\"values\":[" << Common::Join(vals, ",") << "]}";
} else { // unused feature
continue;
}
if (!first_obj) {
str_buf << ",";
}
str_buf << "\"" << feature_names_[i] << "\":";
str_buf << json_str_buf.str();
first_obj = false;
}
str_buf << "}," << '\n';
str_buf << "\"tree_info\":[";
int num_used_model = static_cast<int>(models_.size());
int total_iteration = num_used_model / num_tree_per_iteration_;
......@@ -70,12 +101,11 @@ std::string GBDT::DumpModel(int start_iteration, int num_iteration) const {
}
}
str_buf << '\n' << "\"feature_importances\":" << "{";
if (!pairs.empty()) {
str_buf << "\"" << pairs[0].second << "\":" << std::to_string(pairs[0].first);
for (size_t i = 1; i < pairs.size(); ++i) {
for (size_t i = 0; i < pairs.size(); ++i) {
if (i > 0) {
str_buf << ",";
str_buf << "\"" << pairs[i].second << "\":" << std::to_string(pairs[i].first);
}
str_buf << "\"" << pairs[i].second << "\":" << std::to_string(pairs[i].first);
}
str_buf << "}" << '\n';
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment