Unverified Commit c4a7ab81 authored by Nikita Titov's avatar Nikita Titov Committed by GitHub
Browse files

added feature infos to JSON dump (#2660)



* added feature infos to JSON dump

* slight json schema refactor

* simpified code

* refactor feature_infos

* refactoring

* Update src/boosting/gbdt.cpp

* Update dataset.h

* Update include/LightGBM/dataset.h

* simplify

* Apply suggestions from code review

* parse string and construct JSON objs
Co-authored-by: default avatarGuolin Ke <guolin.ke@outlook.com>
parent 3c394c8d
...@@ -178,7 +178,7 @@ class BinMapper { ...@@ -178,7 +178,7 @@ class BinMapper {
/*! /*!
* \brief Get bin info * \brief Get bin info
*/ */
inline std::string bin_info() const { inline std::string bin_info_string() const {
if (bin_type_ == BinType::CategoricalBin) { if (bin_type_ == BinType::CategoricalBin) {
return Common::Join(bin_2_categorical_, ":"); return Common::Join(bin_2_categorical_, ":");
} else { } else {
......
...@@ -616,13 +616,13 @@ class Dataset { ...@@ -616,13 +616,13 @@ class Dataset {
inline std::vector<std::string> feature_infos() const { inline std::vector<std::string> feature_infos() const {
std::vector<std::string> bufs; std::vector<std::string> bufs;
for (int i = 0; i < num_total_features_; i++) { for (int i = 0; i < num_total_features_; ++i) {
int fidx = used_feature_map_[i]; int fidx = used_feature_map_[i];
if (fidx == -1) { if (fidx < 0) {
bufs.push_back("none"); bufs.push_back("none");
} else { } else {
const auto bin_mapper = FeatureBinMapper(fidx); const auto bin_mapper = FeatureBinMapper(fidx);
bufs.push_back(bin_mapper->bin_info()); bufs.push_back(bin_mapper->bin_info_string());
} }
} }
return bufs; return bufs;
......
...@@ -195,4 +195,3 @@ class ArrayArgs { ...@@ -195,4 +195,3 @@ class ArrayArgs {
} // namespace LightGBM } // namespace LightGBM
#endif // LightGBM_UTILS_ARRAY_AGRS_H_ #endif // LightGBM_UTILS_ARRAY_AGRS_H_
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include <LightGBM/config.h> #include <LightGBM/config.h>
#include <LightGBM/metric.h> #include <LightGBM/metric.h>
#include <LightGBM/objective_function.h> #include <LightGBM/objective_function.h>
#include <LightGBM/utils/array_args.h>
#include <LightGBM/utils/common.h> #include <LightGBM/utils/common.h>
#include <string> #include <string>
...@@ -39,6 +40,36 @@ std::string GBDT::DumpModel(int start_iteration, int num_iteration) const { ...@@ -39,6 +40,36 @@ std::string GBDT::DumpModel(int start_iteration, int num_iteration) const {
str_buf << "\"monotone_constraints\":[" str_buf << "\"monotone_constraints\":["
<< Common::Join(monotone_constraints_, ",") << "]," << '\n'; << Common::Join(monotone_constraints_, ",") << "]," << '\n';
str_buf << "\"feature_infos\":" << "{";
bool first_obj = true;
for (size_t i = 0; i < feature_infos_.size(); ++i) {
std::stringstream json_str_buf;
auto strs = Common::Split(feature_infos_[i].c_str(), ":");
if (strs[0][0] == '[') {
strs[0].erase(0, 1); // remove '['
strs[1].erase(strs[1].size() - 1); // remove ']'
json_str_buf << "{\"min_value\":" << strs[0] << ",";
json_str_buf << "\"max_value\":" << strs[1] << ",";
json_str_buf << "\"values\":[]}";
} else if (strs[0] != "none") { // categorical feature
auto vals = Common::StringToArray<int>(feature_infos_[i], ':');
auto max_idx = ArrayArgs<int>::ArgMax(vals);
auto min_idx = ArrayArgs<int>::ArgMin(vals);
json_str_buf << "{\"min_value\":" << vals[min_idx] << ",";
json_str_buf << "\"max_value\":" << vals[max_idx] << ",";
json_str_buf << "\"values\":[" << Common::Join(vals, ",") << "]}";
} else { // unused feature
continue;
}
if (!first_obj) {
str_buf << ",";
}
str_buf << "\"" << feature_names_[i] << "\":";
str_buf << json_str_buf.str();
first_obj = false;
}
str_buf << "}," << '\n';
str_buf << "\"tree_info\":["; str_buf << "\"tree_info\":[";
int num_used_model = static_cast<int>(models_.size()); int num_used_model = static_cast<int>(models_.size());
int total_iteration = num_used_model / num_tree_per_iteration_; int total_iteration = num_used_model / num_tree_per_iteration_;
...@@ -70,12 +101,11 @@ std::string GBDT::DumpModel(int start_iteration, int num_iteration) const { ...@@ -70,12 +101,11 @@ std::string GBDT::DumpModel(int start_iteration, int num_iteration) const {
} }
} }
str_buf << '\n' << "\"feature_importances\":" << "{"; str_buf << '\n' << "\"feature_importances\":" << "{";
if (!pairs.empty()) { for (size_t i = 0; i < pairs.size(); ++i) {
str_buf << "\"" << pairs[0].second << "\":" << std::to_string(pairs[0].first); if (i > 0) {
for (size_t i = 1; i < pairs.size(); ++i) {
str_buf << ","; str_buf << ",";
str_buf << "\"" << pairs[i].second << "\":" << std::to_string(pairs[i].first);
} }
str_buf << "\"" << pairs[i].second << "\":" << std::to_string(pairs[i].first);
} }
str_buf << "}" << '\n'; str_buf << "}" << '\n';
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment