Unverified Commit 4072e9f7 authored by Nikita Titov's avatar Nikita Titov Committed by GitHub
Browse files

[python][sklearn] remove `verbose` argument from `fit()` method (#4832)

parent 67b4205c
......@@ -1205,7 +1205,7 @@ class DaskLGBMClassifier(LGBMClassifier, _DaskLGBMModel):
+ _base_doc[_base_doc.find('eval_metric :'):])
_base_doc = (_base_doc[:_base_doc.find('early_stopping_rounds :')]
+ _base_doc[_base_doc.find('verbose :'):])
+ _base_doc[_base_doc.find('feature_name :'):])
# DaskLGBMClassifier support for callbacks and init_model is not tested
fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs
......@@ -1388,7 +1388,7 @@ class DaskLGBMRegressor(LGBMRegressor, _DaskLGBMModel):
+ _base_doc[_base_doc.find('eval_metric :'):])
_base_doc = (_base_doc[:_base_doc.find('early_stopping_rounds :')]
+ _base_doc[_base_doc.find('verbose :'):])
+ _base_doc[_base_doc.find('feature_name :'):])
# DaskLGBMRegressor support for callbacks and init_model is not tested
fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs
......@@ -1554,7 +1554,7 @@ class DaskLGBMRanker(LGBMRanker, _DaskLGBMModel):
_base_doc = (_base_doc[:_base_doc.find('early_stopping_rounds :')]
+ "eval_at : iterable of int, optional (default=(1, 2, 3, 4, 5))\n"
+ f"{' ':8}The evaluation positions of the specified metric.\n"
+ f"{' ':4}{_base_doc[_base_doc.find('verbose :'):]}")
+ f"{' ':4}{_base_doc[_base_doc.find('feature_name :'):]}")
# DaskLGBMRanker support for callbacks and init_model is not tested
fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs
......
......@@ -258,17 +258,6 @@ _lgbmmodel_doc_fit = (
If there's more than one, will check all of them. But the training data is ignored anyway.
To check only the first metric, set the ``first_metric_only`` parameter to ``True``
in additional parameters ``**kwargs`` of the model constructor.
verbose : bool or int, optional (default=True)
Requires at least one evaluation data.
If True, the eval metric on the eval set is printed at each boosting stage.
If int, the eval metric on the eval set is printed at every ``verbose`` boosting stage.
The last boosting stage or the boosting stage found by using ``early_stopping_rounds`` is also printed.
.. rubric:: Example
With ``verbose`` = 4 and at least one item in ``eval_set``,
an evaluation metric is printed every 4 (instead of 1) boosting stages.
feature_name : list of str, or 'auto', optional (default='auto')
Feature names.
If 'auto' and data is pandas DataFrame, data columns names are used.
......@@ -597,7 +586,7 @@ class LGBMModel(_LGBMModelBase):
sample_weight=None, init_score=None, group=None,
eval_set=None, eval_names=None, eval_sample_weight=None,
eval_class_weight=None, eval_init_score=None, eval_group=None,
eval_metric=None, early_stopping_rounds=None, verbose='warn',
eval_metric=None, early_stopping_rounds=None,
feature_name='auto', categorical_feature='auto',
callbacks=None, init_model=None):
"""Docstring is set after definition, using a template."""
......@@ -752,16 +741,6 @@ class LGBMModel(_LGBMModelBase):
else:
callbacks = copy.copy(callbacks) # don't use deepcopy here to allow non-serializable objects
if verbose != 'warn':
_log_warning("'verbose' argument is deprecated and will be removed in a future release of LightGBM. "
"Pass 'log_evaluation()' callback via 'callbacks' argument instead.")
else:
if callbacks: # assume user has already specified log_evaluation callback
verbose = False
else:
verbose = True
callbacks.append(log_evaluation(int(verbose)))
evals_result = {}
callbacks.append(record_evaluation(evals_result))
......@@ -931,13 +910,13 @@ class LGBMRegressor(_LGBMRegressorBase, LGBMModel):
sample_weight=None, init_score=None,
eval_set=None, eval_names=None, eval_sample_weight=None,
eval_init_score=None, eval_metric=None, early_stopping_rounds=None,
verbose='warn', feature_name='auto', categorical_feature='auto',
feature_name='auto', categorical_feature='auto',
callbacks=None, init_model=None):
"""Docstring is inherited from the LGBMModel."""
super().fit(X, y, sample_weight=sample_weight, init_score=init_score,
eval_set=eval_set, eval_names=eval_names, eval_sample_weight=eval_sample_weight,
eval_init_score=eval_init_score, eval_metric=eval_metric,
early_stopping_rounds=early_stopping_rounds, verbose=verbose, feature_name=feature_name,
early_stopping_rounds=early_stopping_rounds, feature_name=feature_name,
categorical_feature=categorical_feature, callbacks=callbacks, init_model=init_model)
return self
......@@ -957,7 +936,7 @@ class LGBMClassifier(_LGBMClassifierBase, LGBMModel):
sample_weight=None, init_score=None,
eval_set=None, eval_names=None, eval_sample_weight=None,
eval_class_weight=None, eval_init_score=None, eval_metric=None,
early_stopping_rounds=None, verbose='warn',
early_stopping_rounds=None,
feature_name='auto', categorical_feature='auto',
callbacks=None, init_model=None):
"""Docstring is inherited from the LGBMModel."""
......@@ -1004,7 +983,7 @@ class LGBMClassifier(_LGBMClassifierBase, LGBMModel):
eval_names=eval_names, eval_sample_weight=eval_sample_weight,
eval_class_weight=eval_class_weight, eval_init_score=eval_init_score,
eval_metric=eval_metric, early_stopping_rounds=early_stopping_rounds,
verbose=verbose, feature_name=feature_name, categorical_feature=categorical_feature,
feature_name=feature_name, categorical_feature=categorical_feature,
callbacks=callbacks, init_model=init_model)
return self
......@@ -1079,7 +1058,7 @@ class LGBMRanker(LGBMModel):
sample_weight=None, init_score=None, group=None,
eval_set=None, eval_names=None, eval_sample_weight=None,
eval_init_score=None, eval_group=None, eval_metric=None,
eval_at=(1, 2, 3, 4, 5), early_stopping_rounds=None, verbose='warn',
eval_at=(1, 2, 3, 4, 5), early_stopping_rounds=None,
feature_name='auto', categorical_feature='auto',
callbacks=None, init_model=None):
"""Docstring is inherited from the LGBMModel."""
......@@ -1103,7 +1082,7 @@ class LGBMRanker(LGBMModel):
super().fit(X, y, sample_weight=sample_weight, init_score=init_score, group=group,
eval_set=eval_set, eval_names=eval_names, eval_sample_weight=eval_sample_weight,
eval_init_score=eval_init_score, eval_group=eval_group, eval_metric=eval_metric,
early_stopping_rounds=early_stopping_rounds, verbose=verbose, feature_name=feature_name,
early_stopping_rounds=early_stopping_rounds, feature_name=feature_name,
categorical_feature=categorical_feature, callbacks=callbacks, init_model=init_model)
return self
......
......@@ -900,8 +900,7 @@ def test_eval_set_no_early_stopping(task, output, eval_sizes, eval_names_prefix,
'eval_names': eval_names,
'eval_sample_weight': eval_sample_weight,
'eval_init_score': eval_init_score,
'eval_metric': eval_metrics,
'verbose': True
'eval_metric': eval_metrics
}
if task == 'ranking':
fit_params.update(
......
......@@ -143,7 +143,7 @@ def test_plot_split_value_histogram(params, breast_cancer_split, train_data):
def test_plot_tree(breast_cancer_split):
X_train, _, y_train, _ = breast_cancer_split
gbm = lgb.LGBMClassifier(n_estimators=10, num_leaves=3, verbose=-1)
gbm.fit(X_train, y_train, verbose=False)
gbm.fit(X_train, y_train)
with pytest.raises(IndexError):
lgb.plot_tree(gbm, tree_index=83)
......@@ -161,7 +161,7 @@ def test_create_tree_digraph(breast_cancer_split):
constraints = [-1, 1] * int(X_train.shape[1] / 2)
gbm = lgb.LGBMClassifier(n_estimators=10, num_leaves=3, verbose=-1, monotone_constraints=constraints)
gbm.fit(X_train, y_train, verbose=False)
gbm.fit(X_train, y_train)
with pytest.raises(IndexError):
lgb.create_tree_digraph(gbm, tree_index=83)
......@@ -265,7 +265,7 @@ def test_plot_metrics(params, breast_cancer_split, train_data):
lgb.plot_metric(evals_result1)
gbm2 = lgb.LGBMClassifier(n_estimators=10, num_leaves=3, verbose=-1)
gbm2.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose=False)
gbm2.fit(X_train, y_train, eval_set=[(X_test, y_test)])
ax4 = lgb.plot_metric(gbm2, title=None, xlabel=None, ylabel=None)
assert isinstance(ax4, matplotlib.axes.Axes)
assert ax4.get_title() == ''
......
......@@ -92,7 +92,7 @@ def test_binary():
X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMClassifier(n_estimators=50, verbose=-1)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5)
ret = log_loss(y_test, gbm.predict_proba(X_test))
assert ret < 0.12
assert gbm.evals_result_['valid_0']['binary_logloss'][gbm.best_iteration_ - 1] == pytest.approx(ret)
......@@ -102,7 +102,7 @@ def test_regression():
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMRegressor(n_estimators=50, verbose=-1)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5)
ret = mean_squared_error(y_test, gbm.predict(X_test))
assert ret < 7
assert gbm.evals_result_['valid_0']['l2'][gbm.best_iteration_ - 1] == pytest.approx(ret)
......@@ -112,7 +112,7 @@ def test_multiclass():
X, y = load_digits(n_class=10, return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMClassifier(n_estimators=50, verbose=-1)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5)
ret = multi_error(y_test, gbm.predict(X_test))
assert ret < 0.05
ret = multi_logloss(y_test, gbm.predict_proba(X_test))
......@@ -128,7 +128,7 @@ def test_lambdarank():
q_test = np.loadtxt(str(rank_example_dir / 'rank.test.query'))
gbm = lgb.LGBMRanker(n_estimators=50)
gbm.fit(X_train, y_train, group=q_train, eval_set=[(X_test, y_test)],
eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=10, verbose=False,
eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=10,
callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))])
assert gbm.best_iteration_ <= 24
assert gbm.best_score_['valid_0']['ndcg@1'] > 0.5674
......@@ -143,7 +143,7 @@ def test_xendcg():
q_test = np.loadtxt(str(xendcg_example_dir / 'rank.test.query'))
gbm = lgb.LGBMRanker(n_estimators=50, objective='rank_xendcg', random_state=5, n_jobs=1)
gbm.fit(X_train, y_train, group=q_train, eval_set=[(X_test, y_test)],
eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=10, verbose=False,
eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=10,
eval_metric='ndcg',
callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))])
assert gbm.best_iteration_ <= 24
......@@ -196,7 +196,7 @@ def test_regression_with_custom_objective():
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMRegressor(n_estimators=50, verbose=-1, objective=objective_ls)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5)
ret = mean_squared_error(y_test, gbm.predict(X_test))
assert ret < 7.0
assert gbm.evals_result_['valid_0']['l2'][gbm.best_iteration_ - 1] == pytest.approx(ret)
......@@ -206,7 +206,7 @@ def test_binary_classification_with_custom_objective():
X, y = load_digits(n_class=2, return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMClassifier(n_estimators=50, verbose=-1, objective=logregobj)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5)
# prediction result is actually not transformed (is raw) due to custom objective
y_pred_raw = gbm.predict_proba(X_test)
assert not np.all(y_pred_raw >= 0)
......@@ -285,8 +285,7 @@ def test_grid_search():
grid_params = dict(boosting_type=['rf', 'gbdt'],
n_estimators=[4, 6],
reg_alpha=[0.01, 0.005])
fit_params = dict(verbose=False,
eval_set=[(X_val, y_val)],
fit_params = dict(eval_set=[(X_val, y_val)],
eval_metric=constant_metric,
early_stopping_rounds=2)
grid = GridSearchCV(estimator=lgb.LGBMClassifier(**params), param_grid=grid_params,
......@@ -317,8 +316,7 @@ def test_random_search():
param_dist = dict(boosting_type=['rf', 'gbdt'],
n_estimators=[np.random.randint(low=3, high=10) for i in range(n_iter)],
reg_alpha=[np.random.uniform(low=0.01, high=0.06) for i in range(n_iter)])
fit_params = dict(verbose=False,
eval_set=[(X_val, y_val)],
fit_params = dict(eval_set=[(X_val, y_val)],
eval_metric=constant_metric,
early_stopping_rounds=2)
rand = RandomizedSearchCV(estimator=lgb.LGBMClassifier(**params),
......@@ -422,7 +420,7 @@ def test_regressor_chain():
def test_clone_and_property():
X, y = load_boston(return_X_y=True)
gbm = lgb.LGBMRegressor(n_estimators=10, verbose=-1)
gbm.fit(X, y, verbose=False)
gbm.fit(X, y)
gbm_clone = clone(gbm)
assert isinstance(gbm.booster_, lgb.Booster)
......@@ -430,7 +428,7 @@ def test_clone_and_property():
X, y = load_digits(n_class=2, return_X_y=True)
clf = lgb.LGBMClassifier(n_estimators=10, verbose=-1)
clf.fit(X, y, verbose=False)
clf.fit(X, y)
assert sorted(clf.classes_) == [0, 1]
assert clf.n_classes_ == 2
assert isinstance(clf.booster_, lgb.Booster)
......@@ -443,7 +441,7 @@ def test_joblib():
gbm = lgb.LGBMRegressor(n_estimators=10, objective=custom_asymmetric_obj,
verbose=-1, importance_type='split')
gbm.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)],
eval_metric=mse, early_stopping_rounds=5, verbose=False,
eval_metric=mse, early_stopping_rounds=5,
callbacks=[lgb.reset_parameter(learning_rate=list(np.arange(1, 0, -0.1)))])
joblib.dump(gbm, 'lgb.pkl') # test model with custom functions
......@@ -695,7 +693,7 @@ def test_evaluate_train_set():
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMRegressor(n_estimators=10, verbose=-1)
gbm.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)], verbose=False)
gbm.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)])
assert len(gbm.evals_result_) == 2
assert 'training' in gbm.evals_result_
assert len(gbm.evals_result_['training']) == 1
......@@ -708,7 +706,7 @@ def test_evaluate_train_set():
def test_metrics():
X, y = load_boston(return_X_y=True)
params = {'n_estimators': 2, 'verbose': -1}
params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False}
params_fit = {'X': X, 'y': y, 'eval_set': (X, y)}
# no custom objective, no custom metric
# default metric
......@@ -750,8 +748,7 @@ def test_metrics():
params_classification = {'n_estimators': 2, 'verbose': -1,
'objective': 'binary', 'metric': 'binary_logloss'}
params_fit_classification = {'X': X_classification, 'y': y_classification,
'eval_set': (X_classification, y_classification),
'verbose': False}
'eval_set': (X_classification, y_classification)}
gbm = lgb.LGBMClassifier(**params_classification).fit(eval_metric=['fair', 'error'],
**params_fit_classification)
assert len(gbm.evals_result_['training']) == 3
......@@ -930,7 +927,7 @@ def test_metrics():
assert 'error' in gbm.evals_result_['training']
X, y = load_digits(n_class=3, return_X_y=True)
params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False}
params_fit = {'X': X, 'y': y, 'eval_set': (X, y)}
# default metric and invalid binary metric is replaced with multiclass alternative
gbm = lgb.LGBMClassifier(**params).fit(eval_metric='binary_error', **params_fit)
......@@ -955,7 +952,7 @@ def test_metrics():
assert 'multi_error' in gbm.evals_result_['training']
X, y = load_digits(n_class=2, return_X_y=True)
params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False}
params_fit = {'X': X, 'y': y, 'eval_set': (X, y)}
# default metric and invalid multiclass metric is replaced with binary alternative
gbm = lgb.LGBMClassifier(**params).fit(eval_metric='multi_error', **params_fit)
......@@ -975,7 +972,7 @@ def test_multiple_eval_metrics():
X, y = load_breast_cancer(return_X_y=True)
params = {'n_estimators': 2, 'verbose': -1, 'objective': 'binary', 'metric': 'binary_logloss'}
params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False}
params_fit = {'X': X, 'y': y, 'eval_set': (X, y)}
# Verify that can receive a list of metrics, only callable
gbm = lgb.LGBMClassifier(**params).fit(eval_metric=[constant_metric, decreasing_metric], **params_fit)
......@@ -1016,7 +1013,7 @@ def test_inf_handle():
weight = np.full(nrows, 1e10)
params = {'n_estimators': 20, 'verbose': -1}
params_fit = {'X': X, 'y': y, 'sample_weight': weight, 'eval_set': (X, y),
'verbose': False, 'early_stopping_rounds': 5}
'early_stopping_rounds': 5}
gbm = lgb.LGBMRegressor(**params).fit(**params_fit)
np.testing.assert_allclose(gbm.evals_result_['training']['l2'], np.inf)
......@@ -1029,7 +1026,7 @@ def test_nan_handle():
weight = np.zeros(nrows)
params = {'n_estimators': 20, 'verbose': -1}
params_fit = {'X': X, 'y': y, 'sample_weight': weight, 'eval_set': (X, y),
'verbose': False, 'early_stopping_rounds': 5}
'early_stopping_rounds': 5}
gbm = lgb.LGBMRegressor(**params).fit(**params_fit)
np.testing.assert_allclose(gbm.evals_result_['training']['l2'], np.nan)
......@@ -1066,8 +1063,7 @@ def test_first_metric_only():
'seed': 123}
params_fit = {'X': X_train,
'y': y_train,
'early_stopping_rounds': 5,
'verbose': False}
'early_stopping_rounds': 5}
iter_valid1_l1 = 3
iter_valid1_l2 = 18
......@@ -1146,8 +1142,7 @@ def test_class_weight():
gbm.fit(X_train, y_train,
eval_set=[(X_train, y_train), (X_test, y_test), (X_test, y_test),
(X_test, y_test), (X_test, y_test)],
eval_class_weight=['balanced', None, 'balanced', {1: 10, 4: 20}, {5: 30, 2: 40}],
verbose=False)
eval_class_weight=['balanced', None, 'balanced', {1: 10, 4: 20}, {5: 30, 2: 40}])
for eval_set1, eval_set2 in itertools.combinations(gbm.evals_result_.keys(), 2):
for metric in gbm.evals_result_[eval_set1]:
np.testing.assert_raises(AssertionError,
......@@ -1158,8 +1153,7 @@ def test_class_weight():
gbm_str.fit(X_train, y_train_str,
eval_set=[(X_train, y_train_str), (X_test, y_test_str),
(X_test, y_test_str), (X_test, y_test_str), (X_test, y_test_str)],
eval_class_weight=['balanced', None, 'balanced', {'1': 10, '4': 20}, {'5': 30, '2': 40}],
verbose=False)
eval_class_weight=['balanced', None, 'balanced', {'1': 10, '4': 20}, {'5': 30, '2': 40}])
for eval_set1, eval_set2 in itertools.combinations(gbm_str.evals_result_.keys(), 2):
for metric in gbm_str.evals_result_[eval_set1]:
np.testing.assert_raises(AssertionError,
......@@ -1175,10 +1169,9 @@ def test_class_weight():
def test_continue_training_with_model():
X, y = load_digits(n_class=3, return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
init_gbm = lgb.LGBMClassifier(n_estimators=5).fit(X_train, y_train, eval_set=(X_test, y_test),
verbose=False)
init_gbm = lgb.LGBMClassifier(n_estimators=5).fit(X_train, y_train, eval_set=(X_test, y_test))
gbm = lgb.LGBMClassifier(n_estimators=5).fit(X_train, y_train, eval_set=(X_test, y_test),
verbose=False, init_model=init_gbm)
init_model=init_gbm)
assert len(init_gbm.evals_result_['valid_0']['multi_logloss']) == len(gbm.evals_result_['valid_0']['multi_logloss'])
assert len(init_gbm.evals_result_['valid_0']['multi_logloss']) == 5
assert gbm.evals_result_['valid_0']['multi_logloss'][-1] < init_gbm.evals_result_['valid_0']['multi_logloss'][-1]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment