"tests/git@developer.sourcefind.cn:tianlh/lightgbm-dcu.git" did not exist on "5cdaf1bd1ef99be6fdbad90a5e67adf50be0a982"
Commit 2e83a1c9 authored by wxchan's avatar wxchan Committed by Guolin Ke
Browse files

fix model feature importances (#755)

* fix model feature importance

* fix appveryor test

* Revert "fix appveryor test"

This reverts commit 3a10a1723df5b8579e345d0da07638186257ec64.

* fix warning & boost_from_average_

* fix bug

* alter num_used_model
parent 04e0db00
...@@ -964,7 +964,7 @@ std::string GBDT::SaveModelToString(int num_iteration) const { ...@@ -964,7 +964,7 @@ std::string GBDT::SaveModelToString(int num_iteration) const {
ss << models_[i]->ToString() << std::endl; ss << models_[i]->ToString() << std::endl;
} }
std::vector<std::pair<size_t, std::string>> pairs = FeatureImportance(); std::vector<std::pair<size_t, std::string>> pairs = FeatureImportance(num_used_model);
ss << std::endl << "feature importances:" << std::endl; ss << std::endl << "feature importances:" << std::endl;
for (size_t i = 0; i < pairs.size(); ++i) { for (size_t i = 0; i < pairs.size(); ++i) {
ss << pairs[i].second << "=" << std::to_string(pairs[i].first) << std::endl; ss << pairs[i].second << "=" << std::to_string(pairs[i].first) << std::endl;
...@@ -1088,10 +1088,10 @@ bool GBDT::LoadModelFromString(const std::string& model_str) { ...@@ -1088,10 +1088,10 @@ bool GBDT::LoadModelFromString(const std::string& model_str) {
return true; return true;
} }
std::vector<std::pair<size_t, std::string>> GBDT::FeatureImportance() const { std::vector<std::pair<size_t, std::string>> GBDT::FeatureImportance(int num_used_model) const {
std::vector<size_t> feature_importances(max_feature_idx_ + 1, 0); std::vector<size_t> feature_importances(max_feature_idx_ + 1, 0);
for (size_t iter = 0; iter < models_.size(); ++iter) { for (int iter = 0; iter < num_used_model; ++iter) {
for (int split_idx = 0; split_idx < models_[iter]->num_leaves() - 1; ++split_idx) { for (int split_idx = 0; split_idx < models_[iter]->num_leaves() - 1; ++split_idx) {
if (models_[iter]->split_gain(split_idx) > 0) { if (models_[iter]->split_gain(split_idx) > 0) {
++feature_importances[models_[iter]->split_feature(split_idx)]; ++feature_importances[models_[iter]->split_feature(split_idx)];
......
...@@ -175,7 +175,7 @@ public: ...@@ -175,7 +175,7 @@ public:
/*! /*!
* \brief Save model to file * \brief Save model to file
* \param num_used_model Number of model that want to save, -1 means save all * \param num_iterations Number of model that want to save, -1 means save all
* \param filename Filename that want to save to * \param filename Filename that want to save to
* \return is_finish Is training finished or not * \return is_finish Is training finished or not
*/ */
...@@ -183,7 +183,7 @@ public: ...@@ -183,7 +183,7 @@ public:
/*! /*!
* \brief Save model to string * \brief Save model to string
* \param num_used_model Number of model that want to save, -1 means save all * \param num_iterations Number of model that want to save, -1 means save all
* \return Non-empty string if succeeded * \return Non-empty string if succeeded
*/ */
virtual std::string SaveModelToString(int num_iterations) const override; virtual std::string SaveModelToString(int num_iterations) const override;
...@@ -296,8 +296,10 @@ protected: ...@@ -296,8 +296,10 @@ protected:
std::string OutputMetric(int iter); std::string OutputMetric(int iter);
/*! /*!
* \brief Calculate feature importances * \brief Calculate feature importances
* \param num_used_model Number of model that want to use for feature importance, -1 means use all
* \return sorted pairs of (feature_importance, feature_name)
*/ */
std::vector<std::pair<size_t, std::string>> FeatureImportance() const; std::vector<std::pair<size_t, std::string>> FeatureImportance(int num_used_model) const;
/*! \brief current iteration */ /*! \brief current iteration */
int iter_; int iter_;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment