"tests/git@developer.sourcefind.cn:tianlh/lightgbm-dcu.git" did not exist on "d8bb57841a27ac08051c31bec1c5f0a6055af504"
Commit dae75516 authored by Misha Lisovyi's avatar Misha Lisovyi Committed by Nikita Titov
Browse files

[python] Configure choice of `feature_importance_` in sklearn API (#1470)

* ignore vim temporary files

* add importance_type arg to sklearn API

* update documentation info

* remote a trailing space

* remove trailing space (again :))

* add instructions on importance choices to sklearn API

* drop mention of constructor in the feature type setting

* adding a test for different feture types

* remove trailing spaces, make shorter assert in feature importance type handling test

* fixing style issue introduced with the new test
parent fac4afe0
...@@ -181,6 +181,7 @@ BundleArtifacts/ ...@@ -181,6 +181,7 @@ BundleArtifacts/
ClientBin/ ClientBin/
~$* ~$*
*~ *~
.*.swp
*.dbmdl *.dbmdl
*.dbproj.schemaview *.dbproj.schemaview
*.pfx *.pfx
......
...@@ -134,7 +134,7 @@ class LGBMModel(_LGBMModelBase): ...@@ -134,7 +134,7 @@ class LGBMModel(_LGBMModelBase):
min_split_gain=0., min_child_weight=1e-3, min_child_samples=20, min_split_gain=0., min_child_weight=1e-3, min_child_samples=20,
subsample=1., subsample_freq=0, colsample_bytree=1., subsample=1., subsample_freq=0, colsample_bytree=1.,
reg_alpha=0., reg_lambda=0., random_state=None, reg_alpha=0., reg_lambda=0., random_state=None,
n_jobs=-1, silent=True, **kwargs): n_jobs=-1, silent=True, importance_type='split', **kwargs):
"""Construct a gradient boosting model. """Construct a gradient boosting model.
Parameters Parameters
...@@ -193,6 +193,10 @@ class LGBMModel(_LGBMModelBase): ...@@ -193,6 +193,10 @@ class LGBMModel(_LGBMModelBase):
Number of parallel threads. Number of parallel threads.
silent : bool, optional (default=True) silent : bool, optional (default=True)
Whether to print messages while running boosting. Whether to print messages while running boosting.
importance_type : str, optional (default='split')
The type of feature importance to be filled into ``feature_importances_``.
If "split", result contains numbers of times the feature is used in a model.
If "gain", result contains total gains of splits which use the feature.
**kwargs : other parameters **kwargs : other parameters
Check http://lightgbm.readthedocs.io/en/latest/Parameters.html for more parameters. Check http://lightgbm.readthedocs.io/en/latest/Parameters.html for more parameters.
...@@ -264,6 +268,7 @@ class LGBMModel(_LGBMModelBase): ...@@ -264,6 +268,7 @@ class LGBMModel(_LGBMModelBase):
self.random_state = random_state self.random_state = random_state
self.n_jobs = n_jobs self.n_jobs = n_jobs
self.silent = silent self.silent = silent
self.importance_type = importance_type
self._Booster = None self._Booster = None
self._evals_result = None self._evals_result = None
self._best_score = None self._best_score = None
...@@ -399,6 +404,7 @@ class LGBMModel(_LGBMModelBase): ...@@ -399,6 +404,7 @@ class LGBMModel(_LGBMModelBase):
if 'verbose' not in params and self.silent: if 'verbose' not in params and self.silent:
params['verbose'] = 0 params['verbose'] = 0
params.pop('silent', None) params.pop('silent', None)
params.pop('importance_type', None)
params.pop('n_estimators', None) params.pop('n_estimators', None)
params.pop('class_weight', None) params.pop('class_weight', None)
if self._n_classes is not None and self._n_classes > 2: if self._n_classes is not None and self._n_classes > 2:
...@@ -606,11 +612,13 @@ class LGBMModel(_LGBMModelBase): ...@@ -606,11 +612,13 @@ class LGBMModel(_LGBMModelBase):
Note Note
---- ----
Feature importance in sklearn interface used to normalize to 1, Feature importance in sklearn interface used to normalize to 1,
it's deprecated after 2.0.4 and same as Booster.feature_importance() now. it's deprecated after 2.0.4 and is the same as Booster.feature_importance() now.
``importance_type`` attribute is passed to the function
to configure the type of importance values to be extracted.
""" """
if self._n_features is None: if self._n_features is None:
raise LGBMNotFittedError('No feature_importances found. Need to call fit beforehand.') raise LGBMNotFittedError('No feature_importances found. Need to call fit beforehand.')
return self.booster_.feature_importance() return self.booster_.feature_importance(importance_type=self.importance_type)
class LGBMRegressor(LGBMModel, _LGBMRegressorBase): class LGBMRegressor(LGBMModel, _LGBMRegressorBase):
......
...@@ -166,6 +166,19 @@ class TestSklearn(unittest.TestCase): ...@@ -166,6 +166,19 @@ class TestSklearn(unittest.TestCase):
importances = clf.feature_importances_ importances = clf.feature_importances_
self.assertEqual(len(importances), 4) self.assertEqual(len(importances), 4)
def test_feature_importances_type(self):
clf = lgb.LGBMClassifier(n_estimators=100)
data = load_iris()
clf.fit(data.data, data.target)
clf.set_params(importance_type='split')
importances_split = clf.feature_importances_
clf.set_params(importance_type='gain')
importances_gain = clf.feature_importances_
# Test that the largest element is NOT the same, the smallest can be the same, i.e. zero
importance_split_top1 = sorted(importances_split, reverse=True)[0]
importance_gain_top1 = sorted(importances_gain, reverse=True)[0]
self.assertNotEqual(importance_split_top1, importance_gain_top1)
def test_sklearn_backward_compatibility(self): def test_sklearn_backward_compatibility(self):
iris = load_iris() iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42) X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment