Commit 011fe024 authored by Guolin Ke's avatar Guolin Ke
Browse files

output more feature information in model file, #192

parent 9fd5bc25
#ifndef LIGHTGBM_BIN_H_ #ifndef LIGHTGBM_BIN_H_
#define LIGHTGBM_BIN_H_ #define LIGHTGBM_BIN_H_
#include <LightGBM/utils/common.h>
#include <LightGBM/meta.h> #include <LightGBM/meta.h>
#include <vector> #include <vector>
#include <functional> #include <functional>
#include <unordered_map> #include <unordered_map>
#include <sstream>
namespace LightGBM { namespace LightGBM {
...@@ -150,8 +153,22 @@ public: ...@@ -150,8 +153,22 @@ public:
* \param buffer The source * \param buffer The source
*/ */
void CopyFrom(const char* buffer); void CopyFrom(const char* buffer);
/*!
* \brief Get bin types
*/
inline BinType bin_type() const { return bin_type_; } inline BinType bin_type() const { return bin_type_; }
/*!
* \brief Get bin info
*/
inline std::string bin_info() const {
if (bin_type_ == BinType::CategoricalBin) {
return Common::Join(bin_2_categorical_, ",");
} else {
std::stringstream str_buf;
str_buf << '[' << min_val_ << ',' << max_val_ << ']';
return str_buf.str();
}
}
private: private:
/*! \brief Number of bins */ /*! \brief Number of bins */
int num_bin_; int num_bin_;
...@@ -167,6 +184,10 @@ private: ...@@ -167,6 +184,10 @@ private:
std::unordered_map<int, unsigned int> categorical_2_bin_; std::unordered_map<int, unsigned int> categorical_2_bin_;
/*! \brief Mapper from bin to categorical */ /*! \brief Mapper from bin to categorical */
std::vector<int> bin_2_categorical_; std::vector<int> bin_2_categorical_;
/*! \brief minimal feature vaule */
double min_val_;
/*! \brief maximum feature value */
double max_val_;
}; };
/*! /*!
......
...@@ -109,6 +109,16 @@ void GBDT::ResetTrainingData(const BoostingConfig* config, const Dataset* train_ ...@@ -109,6 +109,16 @@ void GBDT::ResetTrainingData(const BoostingConfig* config, const Dataset* train_
label_idx_ = train_data->label_idx(); label_idx_ = train_data->label_idx();
// get feature names // get feature names
feature_names_ = train_data->feature_names(); feature_names_ = train_data->feature_names();
// get feature infos
feature_infos_.clear();
for (int i = 0; i < max_feature_idx_ + 1; ++i) {
int feature_idx = train_data->GetInnerFeatureIndex(i);
if (feature_idx < 0) {
feature_infos_.push_back("trival feature");
} else {
feature_infos_.push_back(train_data->FeatureAt(feature_idx)->bin_mapper()->bin_info());
}
}
} }
if ((train_data_ != train_data && train_data != nullptr) if ((train_data_ != train_data && train_data != nullptr)
...@@ -536,6 +546,12 @@ void GBDT::SaveModelToFile(int num_iteration, const char* filename) const { ...@@ -536,6 +546,12 @@ void GBDT::SaveModelToFile(int num_iteration, const char* filename) const {
for (size_t i = 0; i < pairs.size(); ++i) { for (size_t i = 0; i < pairs.size(); ++i) {
output_file << pairs[i].second << "=" << std::to_string(pairs[i].first) << std::endl; output_file << pairs[i].second << "=" << std::to_string(pairs[i].first) << std::endl;
} }
output_file << std::endl << "feature information:" << std::endl;
for (size_t i = 0; i < max_feature_idx_ + 1; ++i) {
output_file << feature_names_[i] << "=" << feature_infos_[i] << std::endl;
}
output_file.close(); output_file.close();
} }
......
...@@ -318,6 +318,8 @@ protected: ...@@ -318,6 +318,8 @@ protected:
int num_init_iteration_; int num_init_iteration_;
/*! \brief Feature names */ /*! \brief Feature names */
std::vector<std::string> feature_names_; std::vector<std::string> feature_names_;
/*! \brief Feature informations */
std::vector<std::string> feature_infos_;
/*! \brief number of threads */ /*! \brief number of threads */
int num_threads_; int num_threads_;
/*! \brief Buffer for multi-threading bagging */ /*! \brief Buffer for multi-threading bagging */
......
...@@ -83,7 +83,8 @@ void BinMapper::FindBin(std::vector<double>* values, size_t total_sample_cnt, in ...@@ -83,7 +83,8 @@ void BinMapper::FindBin(std::vector<double>* values, size_t total_sample_cnt, in
distinct_values.push_back(0); distinct_values.push_back(0);
counts.push_back(zero_cnt); counts.push_back(zero_cnt);
} }
min_val_ = distinct_values.front();
max_val_ = distinct_values.back();
int num_values = static_cast<int>(distinct_values.size()); int num_values = static_cast<int>(distinct_values.size());
int cnt_in_bin0 = 0; int cnt_in_bin0 = 0;
if (bin_type_ == BinType::NumericalBin) { if (bin_type_ == BinType::NumericalBin) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment