Unverified Commit 0a4d1908 authored by Nikita Titov's avatar Nikita Titov Committed by GitHub
Browse files

[python][sklearn] respect objective aliases (#4758)

* respect objective aliases

* Update test_sklearn.py

* revert removal of blank lines

* add argument name which is being overwritten in warning message
parent 15a63696
......@@ -578,10 +578,20 @@ class LGBMModel(_LGBMModelBase):
feature_name='auto', categorical_feature='auto',
callbacks=None, init_model=None):
"""Docstring is set after definition, using a template."""
params = self.get_params()
params.pop('objective', None)
for alias in _ConfigAliases.get('objective'):
if alias in params:
self._objective = params.pop(alias)
_log_warning(f"Found '{alias}' in params. Will use it instead of 'objective' argument")
if self._objective is None:
if isinstance(self, LGBMRegressor):
self._objective = "regression"
elif isinstance(self, LGBMClassifier):
if self._n_classes > 2:
self._objective = "multiclass"
else:
self._objective = "binary"
elif isinstance(self, LGBMRanker):
self._objective = "lambdarank"
......@@ -589,10 +599,11 @@ class LGBMModel(_LGBMModelBase):
raise ValueError("Unknown LGBMModel type.")
if callable(self._objective):
self._fobj = _ObjectiveFunctionWrapper(self._objective)
params['objective'] = 'None' # objective = nullptr for unknown objective
else:
self._fobj = None
params['objective'] = self._objective
params = self.get_params()
# user can set verbose with kwargs, it has higher priority
if self.silent != "warn":
_log_warning("'silent' argument is deprecated and will be removed in a future release of LightGBM. "
......@@ -603,13 +614,13 @@ class LGBMModel(_LGBMModelBase):
if not any(verbose_alias in params for verbose_alias in _ConfigAliases.get("verbosity")) and silent:
params['verbose'] = -1
params.pop('silent', None)
params.pop('importance_type', None)
params.pop('n_estimators', None)
params.pop('class_weight', None)
if isinstance(params['random_state'], np.random.RandomState):
params['random_state'] = params['random_state'].randint(np.iinfo(np.int32).max)
for alias in _ConfigAliases.get('objective'):
params.pop(alias, None)
if self._n_classes is not None and self._n_classes > 2:
for alias in _ConfigAliases.get('num_class'):
params.pop(alias, None)
......@@ -621,9 +632,6 @@ class LGBMModel(_LGBMModelBase):
_log_warning(f"Found '{alias}' in params. Will use it instead of 'eval_at' argument")
eval_at = params.pop(alias)
params['eval_at'] = eval_at
params['objective'] = self._objective
if self._fobj:
params['objective'] = 'None' # objective = nullptr for unknown objective
# Do not modify original args in fit function
# Refer to https://github.com/microsoft/LightGBM/pull/2619
......@@ -952,12 +960,6 @@ class LGBMClassifier(_LGBMClassifierBase, LGBMModel):
self._classes = self._le.classes_
self._n_classes = len(self._classes)
if self._n_classes > 2:
# Switch to using a multiclass objective in the underlying LGBM instance
ova_aliases = {"multiclassova", "multiclass_ova", "ova", "ovr"}
if self._objective not in ova_aliases and not callable(self._objective):
self._objective = "multiclass"
if not callable(eval_metric):
if isinstance(eval_metric, (str, type(None))):
eval_metric = [eval_metric]
......
......@@ -157,13 +157,41 @@ def test_eval_at_aliases():
X_test, y_test = load_svmlight_file(str(rank_example_dir / 'rank.test'))
q_train = np.loadtxt(str(rank_example_dir / 'rank.train.query'))
q_test = np.loadtxt(str(rank_example_dir / 'rank.test.query'))
for alias in ('eval_at', 'ndcg_eval_at', 'ndcg_at', 'map_eval_at', 'map_at'):
for alias in lgb.basic._ConfigAliases.get('eval_at'):
gbm = lgb.LGBMRanker(n_estimators=5, **{alias: [1, 2, 3, 9]})
with pytest.warns(UserWarning, match=f"Found '{alias}' in params. Will use it instead of 'eval_at' argument"):
gbm.fit(X_train, y_train, group=q_train, eval_set=[(X_test, y_test)], eval_group=[q_test])
assert list(gbm.evals_result_['valid_0'].keys()) == ['ndcg@1', 'ndcg@2', 'ndcg@3', 'ndcg@9']
@pytest.mark.parametrize("custom_objective", [True, False])
def test_objective_aliases(custom_objective):
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
if custom_objective:
obj = custom_dummy_obj
metric_name = 'l2' # default one
else:
obj = 'mape'
metric_name = 'mape'
evals = []
for alias in lgb.basic._ConfigAliases.get('objective'):
gbm = lgb.LGBMRegressor(n_estimators=5, **{alias: obj})
if alias != 'objective':
with pytest.warns(UserWarning, match=f"Found '{alias}' in params. Will use it instead of 'objective' argument"):
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)])
else:
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)])
assert list(gbm.evals_result_['valid_0'].keys()) == [metric_name]
evals.append(gbm.evals_result_['valid_0'][metric_name])
evals_t = np.array(evals).T
for i in range(evals_t.shape[0]):
np.testing.assert_allclose(evals_t[i], evals_t[i][0])
# check that really dummy objective was used and estimator didn't learn anything
if custom_objective:
np.testing.assert_allclose(evals_t, evals_t[0][0])
def test_regression_with_custom_objective():
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
......@@ -910,10 +938,8 @@ def test_metrics():
assert 'multi_logloss' in gbm.evals_result_['training']
assert 'multi_error' in gbm.evals_result_['training']
# invalid objective is replaced with default multiclass one
# and invalid binary metric is replaced with multiclass alternative
gbm = lgb.LGBMClassifier(objective='invalid_obj',
**params).fit(eval_metric='binary_error', **params_fit)
# invalid binary metric is replaced with multiclass alternative
gbm = lgb.LGBMClassifier(**params).fit(eval_metric='binary_error', **params_fit)
assert gbm.objective_ == 'multiclass'
assert len(gbm.evals_result_['training']) == 2
assert 'multi_logloss' in gbm.evals_result_['training']
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment