Commit c45d1d99 authored by wxchan's avatar wxchan Committed by Guolin Ke
Browse files

add feature importances (#42)

add feature importances (#42)
parent eba54290
...@@ -83,6 +83,9 @@ public: ...@@ -83,6 +83,9 @@ public:
/*! \brief Get depth of specific leaf*/ /*! \brief Get depth of specific leaf*/
inline int leaf_depth(int leaf_idx) const { return leaf_depth_[leaf_idx]; } inline int leaf_depth(int leaf_idx) const { return leaf_depth_[leaf_idx]; }
/*! \brief Get feature of specific split*/
inline int split_feature(int split_idx) const { return split_feature_[split_idx]; }
/*! /*!
* \brief Shrinkage for the tree's output * \brief Shrinkage for the tree's output
* shrinkage rate (a.k.a learning rate) is used to tune the traning process * shrinkage rate (a.k.a learning rate) is used to tune the traning process
......
...@@ -209,6 +209,7 @@ void GBDT::Train() { ...@@ -209,6 +209,7 @@ void GBDT::Train() {
if (is_early_stopping) { if (is_early_stopping) {
// close file with an early-stopping message // close file with an early-stopping message
Log::Info("Early stopping at iteration %d, the best iteration round is %d", iter + 1, iter + 1 - early_stopping_round_); Log::Info("Early stopping at iteration %d, the best iteration round is %d", iter + 1, iter + 1 - early_stopping_round_);
FeatureImportance(iter - early_stopping_round_ + 1);
fclose(output_model_file); fclose(output_model_file);
return; return;
} }
...@@ -222,6 +223,7 @@ void GBDT::Train() { ...@@ -222,6 +223,7 @@ void GBDT::Train() {
} }
fflush(output_model_file); fflush(output_model_file);
} }
FeatureImportance(models_.size());
fclose(output_model_file); fclose(output_model_file);
} }
...@@ -349,6 +351,19 @@ void GBDT::ModelsFromString(const std::string& model_str, int num_used_model) { ...@@ -349,6 +351,19 @@ void GBDT::ModelsFromString(const std::string& model_str, int num_used_model) {
Log::Info("%d models has been loaded\n", models_.size()); Log::Info("%d models has been loaded\n", models_.size());
} }
void GBDT::FeatureImportance(const int last_iter) {
size_t* feature_importances = new size_t[max_feature_idx_ + 1]{0};
for (int iter = 0; iter < last_iter; ++iter) {
for (int split_idx = 0; split_idx < models_.at(iter)->num_leaves() - 1; ++split_idx) {
++feature_importances[models_.at(iter)->split_feature(split_idx)];
}
}
std::string ret = Common::ArrayToString(feature_importances, max_feature_idx_ + 1, ' ');
fprintf(output_model_file, "feature importances=%s\n", ret.c_str());
fflush(output_model_file);
delete[] feature_importances;
}
double GBDT::PredictRaw(const double* value) const { double GBDT::PredictRaw(const double* value) const {
double ret = 0.0; double ret = 0.0;
for (size_t i = 0; i < models_.size(); ++i) { for (size_t i = 0; i < models_.size(); ++i) {
......
...@@ -119,8 +119,11 @@ private: ...@@ -119,8 +119,11 @@ private:
* \param iter Current interation * \param iter Current interation
*/ */
bool OutputMetric(int iter); bool OutputMetric(int iter);
/*!
int early_stopping_round_; * \brief Calculate feature importances
* \param last_iter Last tree use to calculate
*/
void FeatureImportance(const int last_iter);
/*! \brief Pointer to training data */ /*! \brief Pointer to training data */
const Dataset* train_data_; const Dataset* train_data_;
...@@ -138,6 +141,8 @@ private: ...@@ -138,6 +141,8 @@ private:
std::vector<ScoreUpdater*> valid_score_updater_; std::vector<ScoreUpdater*> valid_score_updater_;
/*! \brief Metric for validation data */ /*! \brief Metric for validation data */
std::vector<std::vector<const Metric*>> valid_metrics_; std::vector<std::vector<const Metric*>> valid_metrics_;
/*! \brief Number of rounds for early stopping */
int early_stopping_round_;
/*! \brief Best score(s) for early stopping */ /*! \brief Best score(s) for early stopping */
std::vector<std::vector<int>> best_iter_; std::vector<std::vector<int>> best_iter_;
std::vector<std::vector<score_t>> best_score_; std::vector<std::vector<score_t>> best_score_;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment