Commit ab4ed725 authored by wxchan's avatar wxchan Committed by Guolin Ke
Browse files

add feature name (#280)

parent cba87c37
...@@ -345,9 +345,19 @@ The methods of each Class is in alphabetical order. ...@@ -345,9 +345,19 @@ The methods of each Class is in alphabetical order.
Evaluation result list. Evaluation result list.
####feature_name()
Get feature names.
Returns
-------
result : array
Array of feature names.
####feature_importance(importance_type="split") ####feature_importance(importance_type="split")
Feature importances. Get feature importances.
Parameters Parameters
---------- ----------
...@@ -359,7 +369,7 @@ The methods of each Class is in alphabetical order. ...@@ -359,7 +369,7 @@ The methods of each Class is in alphabetical order.
Returns Returns
------- -------
result : array result : array
Array of feature importances Array of feature importances.
####predict(data, num_iteration=-1, raw_score=False, pred_leaf=False, data_has_header=False, is_reshape=True) ####predict(data, num_iteration=-1, raw_score=False, pred_leaf=False, data_has_header=False, is_reshape=True)
......
...@@ -58,7 +58,8 @@ model_json = gbm.dump_model() ...@@ -58,7 +58,8 @@ model_json = gbm.dump_model()
with open('model.json', 'w+') as f: with open('model.json', 'w+') as f:
json.dump(model_json, f, indent=4) json.dump(model_json, f, indent=4)
print('Feature names:', gbm.feature_name())
print('Calculate feature importances...') print('Calculate feature importances...')
# feature importances # feature importances
print('Feature importances:', list(gbm.feature_importance())) print('Feature importances:', list(gbm.feature_importance()))
# print('Feature importances:', list(gbm.feature_importance("gain")))
...@@ -167,6 +167,12 @@ public: ...@@ -167,6 +167,12 @@ public:
*/ */
virtual int MaxFeatureIdx() const = 0; virtual int MaxFeatureIdx() const = 0;
/*!
* \brief Get feature names of this model
* \return Feature names of this model
*/
virtual std::vector<std::string> FeatureNames() const = 0;
/*! /*!
* \brief Get index of label column * \brief Get index of label column
* \return index of label column * \return index of label column
......
...@@ -365,13 +365,28 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterGetCurrentIteration(BoosterHandle handle, int* ...@@ -365,13 +365,28 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterGetCurrentIteration(BoosterHandle handle, int*
LIGHTGBM_C_EXPORT int LGBM_BoosterGetEvalCounts(BoosterHandle handle, int* out_len); LIGHTGBM_C_EXPORT int LGBM_BoosterGetEvalCounts(BoosterHandle handle, int* out_len);
/*! /*!
* \brief Get Name of eval * \brief Get name of eval
* \param out_len total number of eval results * \param out_len total number of eval results
* \param out_strs names of eval result, need to pre-allocate memory before call this * \param out_strs names of eval result, need to pre-allocate memory before call this
* \return 0 when succeed, -1 when failure happens * \return 0 when succeed, -1 when failure happens
*/ */
LIGHTGBM_C_EXPORT int LGBM_BoosterGetEvalNames(BoosterHandle handle, int* out_len, char** out_strs); LIGHTGBM_C_EXPORT int LGBM_BoosterGetEvalNames(BoosterHandle handle, int* out_len, char** out_strs);
/*!
* \brief Get name of features
* \param out_len total number of features
* \param out_strs names of features, need to pre-allocate memory before call this
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT int LGBM_BoosterGetFeatureNames(BoosterHandle handle, int* out_len, char** out_strs);
/*!
* \brief Get number of features
* \param out_len total number of features
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT int LGBM_BoosterGetNumFeature(BoosterHandle handle, int* out_len);
/*! /*!
* \brief get evaluation for training data and validation data * \brief get evaluation for training data and validation data
Note: 1. you should call LGBM_BoosterGetEvalNames first to get the name of evaluation results Note: 1. you should call LGBM_BoosterGetEvalNames first to get the name of evaluation results
......
...@@ -1582,15 +1582,41 @@ class Booster(object): ...@@ -1582,15 +1582,41 @@ class Booster(object):
return predictor.predict(data, num_iteration, raw_score, pred_leaf, data_has_header, is_reshape) return predictor.predict(data, num_iteration, raw_score, pred_leaf, data_has_header, is_reshape)
def _to_predictor(self): def _to_predictor(self):
"""Convert to predictor """Convert to predictor"""
"""
predictor = _InnerPredictor(booster_handle=self.handle) predictor = _InnerPredictor(booster_handle=self.handle)
predictor.pandas_categorical = self.pandas_categorical predictor.pandas_categorical = self.pandas_categorical
return predictor return predictor
def feature_name(self):
"""
Get feature names.
Returns
-------
result : array
Array of feature names.
"""
out_num_feature = ctypes.c_int(0)
"""Get num of features"""
_safe_call(_LIB.LGBM_BoosterGetNumFeature(
self.handle,
ctypes.byref(out_num_feature)))
num_feature = out_num_feature.value
"""Get name of features"""
tmp_out_len = ctypes.c_int(0)
string_buffers = [ctypes.create_string_buffer(255) for i in range_(num_feature)]
ptr_string_buffers = (ctypes.c_char_p * num_feature)(*map(ctypes.addressof, string_buffers))
_safe_call(_LIB.LGBM_BoosterGetFeatureNames(
self.handle,
ctypes.byref(tmp_out_len),
ptr_string_buffers))
if num_feature != tmp_out_len.value:
raise ValueError("Length of feature names doesn't equal with num_feature")
return [string_buffers[i].value.decode() for i in range_(num_feature)]
def feature_importance(self, importance_type='split'): def feature_importance(self, importance_type='split'):
""" """
Feature importances Get feature importances
Parameters Parameters
---------- ----------
...@@ -1601,7 +1627,8 @@ class Booster(object): ...@@ -1601,7 +1627,8 @@ class Booster(object):
Returns Returns
------- -------
Array of feature importances result : array
Array of feature importances.
""" """
if importance_type not in ["split", "gain"]: if importance_type not in ["split", "gain"]:
raise KeyError("importance_type must be split or gain") raise KeyError("importance_type must be split or gain")
......
...@@ -176,6 +176,12 @@ public: ...@@ -176,6 +176,12 @@ public:
*/ */
inline int MaxFeatureIdx() const override { return max_feature_idx_; } inline int MaxFeatureIdx() const override { return max_feature_idx_; }
/*!
* \brief Get feature names of this model
* \return Feature names of this model
*/
inline std::vector<std::string> FeatureNames() const override { return feature_names_; }
/*! /*!
* \brief Get index of label column * \brief Get index of label column
* \return index of label column * \return index of label column
......
...@@ -225,6 +225,15 @@ public: ...@@ -225,6 +225,15 @@ public:
return idx; return idx;
} }
int GetFeatureNames(char** out_strs) const {
int idx = 0;
for (const auto& name : boosting_->FeatureNames()) {
std::strcpy(out_strs[idx], name.c_str());
++idx;
}
return idx;
}
const Boosting* GetBoosting() const { return boosting_.get(); } const Boosting* GetBoosting() const { return boosting_.get(); }
private: private:
...@@ -724,6 +733,20 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterGetEvalNames(BoosterHandle handle, int* out_le ...@@ -724,6 +733,20 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterGetEvalNames(BoosterHandle handle, int* out_le
API_END(); API_END();
} }
LIGHTGBM_C_EXPORT int LGBM_BoosterGetFeatureNames(BoosterHandle handle, int* out_len, char** out_strs) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
*out_len = ref_booster->GetFeatureNames(out_strs);
API_END();
}
LIGHTGBM_C_EXPORT int LGBM_BoosterGetNumFeature(BoosterHandle handle, int* out_len) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
*out_len = ref_booster->GetBoosting()->MaxFeatureIdx() + 1;
API_END();
}
LIGHTGBM_C_EXPORT int LGBM_BoosterGetEval(BoosterHandle handle, LIGHTGBM_C_EXPORT int LGBM_BoosterGetEval(BoosterHandle handle,
int data_idx, int data_idx,
int* out_len, int* out_len,
......
...@@ -124,6 +124,12 @@ class TestEngine(unittest.TestCase): ...@@ -124,6 +124,12 @@ class TestEngine(unittest.TestCase):
metrics='l1', verbose_eval=False, metrics='l1', verbose_eval=False,
callbacks=[lgb.reset_parameter(learning_rate=lambda i: 0.1 - 0.001 * i)]) callbacks=[lgb.reset_parameter(learning_rate=lambda i: 0.1 - 0.001 * i)])
def test_feature_name(self):
lgb_train, _ = template.test_template(return_data=True)
feature_names = ['f' + str(i) for i in range(13)]
gbm = lgb.train({'verbose': -1}, lgb_train, num_boost_round=10, feature_name=feature_names)
self.assertListEqual(feature_names, gbm.feature_name())
def test_save_load_copy_pickle(self): def test_save_load_copy_pickle(self):
gbm = template.test_template(num_round=20, return_model=True) gbm = template.test_template(num_round=20, return_model=True)
_, ret_origin = template.test_template(init_model=gbm) _, ret_origin = template.test_template(init_model=gbm)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment