Unverified Commit 4072e9f7 authored by Nikita Titov's avatar Nikita Titov Committed by GitHub
Browse files

[python][sklearn] remove `verbose` argument from `fit()` method (#4832)

parent 67b4205c
...@@ -1205,7 +1205,7 @@ class DaskLGBMClassifier(LGBMClassifier, _DaskLGBMModel): ...@@ -1205,7 +1205,7 @@ class DaskLGBMClassifier(LGBMClassifier, _DaskLGBMModel):
+ _base_doc[_base_doc.find('eval_metric :'):]) + _base_doc[_base_doc.find('eval_metric :'):])
_base_doc = (_base_doc[:_base_doc.find('early_stopping_rounds :')] _base_doc = (_base_doc[:_base_doc.find('early_stopping_rounds :')]
+ _base_doc[_base_doc.find('verbose :'):]) + _base_doc[_base_doc.find('feature_name :'):])
# DaskLGBMClassifier support for callbacks and init_model is not tested # DaskLGBMClassifier support for callbacks and init_model is not tested
fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs
...@@ -1388,7 +1388,7 @@ class DaskLGBMRegressor(LGBMRegressor, _DaskLGBMModel): ...@@ -1388,7 +1388,7 @@ class DaskLGBMRegressor(LGBMRegressor, _DaskLGBMModel):
+ _base_doc[_base_doc.find('eval_metric :'):]) + _base_doc[_base_doc.find('eval_metric :'):])
_base_doc = (_base_doc[:_base_doc.find('early_stopping_rounds :')] _base_doc = (_base_doc[:_base_doc.find('early_stopping_rounds :')]
+ _base_doc[_base_doc.find('verbose :'):]) + _base_doc[_base_doc.find('feature_name :'):])
# DaskLGBMRegressor support for callbacks and init_model is not tested # DaskLGBMRegressor support for callbacks and init_model is not tested
fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs
...@@ -1554,7 +1554,7 @@ class DaskLGBMRanker(LGBMRanker, _DaskLGBMModel): ...@@ -1554,7 +1554,7 @@ class DaskLGBMRanker(LGBMRanker, _DaskLGBMModel):
_base_doc = (_base_doc[:_base_doc.find('early_stopping_rounds :')] _base_doc = (_base_doc[:_base_doc.find('early_stopping_rounds :')]
+ "eval_at : iterable of int, optional (default=(1, 2, 3, 4, 5))\n" + "eval_at : iterable of int, optional (default=(1, 2, 3, 4, 5))\n"
+ f"{' ':8}The evaluation positions of the specified metric.\n" + f"{' ':8}The evaluation positions of the specified metric.\n"
+ f"{' ':4}{_base_doc[_base_doc.find('verbose :'):]}") + f"{' ':4}{_base_doc[_base_doc.find('feature_name :'):]}")
# DaskLGBMRanker support for callbacks and init_model is not tested # DaskLGBMRanker support for callbacks and init_model is not tested
fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs
......
...@@ -258,17 +258,6 @@ _lgbmmodel_doc_fit = ( ...@@ -258,17 +258,6 @@ _lgbmmodel_doc_fit = (
If there's more than one, will check all of them. But the training data is ignored anyway. If there's more than one, will check all of them. But the training data is ignored anyway.
To check only the first metric, set the ``first_metric_only`` parameter to ``True`` To check only the first metric, set the ``first_metric_only`` parameter to ``True``
in additional parameters ``**kwargs`` of the model constructor. in additional parameters ``**kwargs`` of the model constructor.
verbose : bool or int, optional (default=True)
Requires at least one evaluation data.
If True, the eval metric on the eval set is printed at each boosting stage.
If int, the eval metric on the eval set is printed at every ``verbose`` boosting stage.
The last boosting stage or the boosting stage found by using ``early_stopping_rounds`` is also printed.
.. rubric:: Example
With ``verbose`` = 4 and at least one item in ``eval_set``,
an evaluation metric is printed every 4 (instead of 1) boosting stages.
feature_name : list of str, or 'auto', optional (default='auto') feature_name : list of str, or 'auto', optional (default='auto')
Feature names. Feature names.
If 'auto' and data is pandas DataFrame, data columns names are used. If 'auto' and data is pandas DataFrame, data columns names are used.
...@@ -597,7 +586,7 @@ class LGBMModel(_LGBMModelBase): ...@@ -597,7 +586,7 @@ class LGBMModel(_LGBMModelBase):
sample_weight=None, init_score=None, group=None, sample_weight=None, init_score=None, group=None,
eval_set=None, eval_names=None, eval_sample_weight=None, eval_set=None, eval_names=None, eval_sample_weight=None,
eval_class_weight=None, eval_init_score=None, eval_group=None, eval_class_weight=None, eval_init_score=None, eval_group=None,
eval_metric=None, early_stopping_rounds=None, verbose='warn', eval_metric=None, early_stopping_rounds=None,
feature_name='auto', categorical_feature='auto', feature_name='auto', categorical_feature='auto',
callbacks=None, init_model=None): callbacks=None, init_model=None):
"""Docstring is set after definition, using a template.""" """Docstring is set after definition, using a template."""
...@@ -752,16 +741,6 @@ class LGBMModel(_LGBMModelBase): ...@@ -752,16 +741,6 @@ class LGBMModel(_LGBMModelBase):
else: else:
callbacks = copy.copy(callbacks) # don't use deepcopy here to allow non-serializable objects callbacks = copy.copy(callbacks) # don't use deepcopy here to allow non-serializable objects
if verbose != 'warn':
_log_warning("'verbose' argument is deprecated and will be removed in a future release of LightGBM. "
"Pass 'log_evaluation()' callback via 'callbacks' argument instead.")
else:
if callbacks: # assume user has already specified log_evaluation callback
verbose = False
else:
verbose = True
callbacks.append(log_evaluation(int(verbose)))
evals_result = {} evals_result = {}
callbacks.append(record_evaluation(evals_result)) callbacks.append(record_evaluation(evals_result))
...@@ -931,13 +910,13 @@ class LGBMRegressor(_LGBMRegressorBase, LGBMModel): ...@@ -931,13 +910,13 @@ class LGBMRegressor(_LGBMRegressorBase, LGBMModel):
sample_weight=None, init_score=None, sample_weight=None, init_score=None,
eval_set=None, eval_names=None, eval_sample_weight=None, eval_set=None, eval_names=None, eval_sample_weight=None,
eval_init_score=None, eval_metric=None, early_stopping_rounds=None, eval_init_score=None, eval_metric=None, early_stopping_rounds=None,
verbose='warn', feature_name='auto', categorical_feature='auto', feature_name='auto', categorical_feature='auto',
callbacks=None, init_model=None): callbacks=None, init_model=None):
"""Docstring is inherited from the LGBMModel.""" """Docstring is inherited from the LGBMModel."""
super().fit(X, y, sample_weight=sample_weight, init_score=init_score, super().fit(X, y, sample_weight=sample_weight, init_score=init_score,
eval_set=eval_set, eval_names=eval_names, eval_sample_weight=eval_sample_weight, eval_set=eval_set, eval_names=eval_names, eval_sample_weight=eval_sample_weight,
eval_init_score=eval_init_score, eval_metric=eval_metric, eval_init_score=eval_init_score, eval_metric=eval_metric,
early_stopping_rounds=early_stopping_rounds, verbose=verbose, feature_name=feature_name, early_stopping_rounds=early_stopping_rounds, feature_name=feature_name,
categorical_feature=categorical_feature, callbacks=callbacks, init_model=init_model) categorical_feature=categorical_feature, callbacks=callbacks, init_model=init_model)
return self return self
...@@ -957,7 +936,7 @@ class LGBMClassifier(_LGBMClassifierBase, LGBMModel): ...@@ -957,7 +936,7 @@ class LGBMClassifier(_LGBMClassifierBase, LGBMModel):
sample_weight=None, init_score=None, sample_weight=None, init_score=None,
eval_set=None, eval_names=None, eval_sample_weight=None, eval_set=None, eval_names=None, eval_sample_weight=None,
eval_class_weight=None, eval_init_score=None, eval_metric=None, eval_class_weight=None, eval_init_score=None, eval_metric=None,
early_stopping_rounds=None, verbose='warn', early_stopping_rounds=None,
feature_name='auto', categorical_feature='auto', feature_name='auto', categorical_feature='auto',
callbacks=None, init_model=None): callbacks=None, init_model=None):
"""Docstring is inherited from the LGBMModel.""" """Docstring is inherited from the LGBMModel."""
...@@ -1004,7 +983,7 @@ class LGBMClassifier(_LGBMClassifierBase, LGBMModel): ...@@ -1004,7 +983,7 @@ class LGBMClassifier(_LGBMClassifierBase, LGBMModel):
eval_names=eval_names, eval_sample_weight=eval_sample_weight, eval_names=eval_names, eval_sample_weight=eval_sample_weight,
eval_class_weight=eval_class_weight, eval_init_score=eval_init_score, eval_class_weight=eval_class_weight, eval_init_score=eval_init_score,
eval_metric=eval_metric, early_stopping_rounds=early_stopping_rounds, eval_metric=eval_metric, early_stopping_rounds=early_stopping_rounds,
verbose=verbose, feature_name=feature_name, categorical_feature=categorical_feature, feature_name=feature_name, categorical_feature=categorical_feature,
callbacks=callbacks, init_model=init_model) callbacks=callbacks, init_model=init_model)
return self return self
...@@ -1079,7 +1058,7 @@ class LGBMRanker(LGBMModel): ...@@ -1079,7 +1058,7 @@ class LGBMRanker(LGBMModel):
sample_weight=None, init_score=None, group=None, sample_weight=None, init_score=None, group=None,
eval_set=None, eval_names=None, eval_sample_weight=None, eval_set=None, eval_names=None, eval_sample_weight=None,
eval_init_score=None, eval_group=None, eval_metric=None, eval_init_score=None, eval_group=None, eval_metric=None,
eval_at=(1, 2, 3, 4, 5), early_stopping_rounds=None, verbose='warn', eval_at=(1, 2, 3, 4, 5), early_stopping_rounds=None,
feature_name='auto', categorical_feature='auto', feature_name='auto', categorical_feature='auto',
callbacks=None, init_model=None): callbacks=None, init_model=None):
"""Docstring is inherited from the LGBMModel.""" """Docstring is inherited from the LGBMModel."""
...@@ -1103,7 +1082,7 @@ class LGBMRanker(LGBMModel): ...@@ -1103,7 +1082,7 @@ class LGBMRanker(LGBMModel):
super().fit(X, y, sample_weight=sample_weight, init_score=init_score, group=group, super().fit(X, y, sample_weight=sample_weight, init_score=init_score, group=group,
eval_set=eval_set, eval_names=eval_names, eval_sample_weight=eval_sample_weight, eval_set=eval_set, eval_names=eval_names, eval_sample_weight=eval_sample_weight,
eval_init_score=eval_init_score, eval_group=eval_group, eval_metric=eval_metric, eval_init_score=eval_init_score, eval_group=eval_group, eval_metric=eval_metric,
early_stopping_rounds=early_stopping_rounds, verbose=verbose, feature_name=feature_name, early_stopping_rounds=early_stopping_rounds, feature_name=feature_name,
categorical_feature=categorical_feature, callbacks=callbacks, init_model=init_model) categorical_feature=categorical_feature, callbacks=callbacks, init_model=init_model)
return self return self
......
...@@ -900,8 +900,7 @@ def test_eval_set_no_early_stopping(task, output, eval_sizes, eval_names_prefix, ...@@ -900,8 +900,7 @@ def test_eval_set_no_early_stopping(task, output, eval_sizes, eval_names_prefix,
'eval_names': eval_names, 'eval_names': eval_names,
'eval_sample_weight': eval_sample_weight, 'eval_sample_weight': eval_sample_weight,
'eval_init_score': eval_init_score, 'eval_init_score': eval_init_score,
'eval_metric': eval_metrics, 'eval_metric': eval_metrics
'verbose': True
} }
if task == 'ranking': if task == 'ranking':
fit_params.update( fit_params.update(
......
...@@ -143,7 +143,7 @@ def test_plot_split_value_histogram(params, breast_cancer_split, train_data): ...@@ -143,7 +143,7 @@ def test_plot_split_value_histogram(params, breast_cancer_split, train_data):
def test_plot_tree(breast_cancer_split): def test_plot_tree(breast_cancer_split):
X_train, _, y_train, _ = breast_cancer_split X_train, _, y_train, _ = breast_cancer_split
gbm = lgb.LGBMClassifier(n_estimators=10, num_leaves=3, verbose=-1) gbm = lgb.LGBMClassifier(n_estimators=10, num_leaves=3, verbose=-1)
gbm.fit(X_train, y_train, verbose=False) gbm.fit(X_train, y_train)
with pytest.raises(IndexError): with pytest.raises(IndexError):
lgb.plot_tree(gbm, tree_index=83) lgb.plot_tree(gbm, tree_index=83)
...@@ -161,7 +161,7 @@ def test_create_tree_digraph(breast_cancer_split): ...@@ -161,7 +161,7 @@ def test_create_tree_digraph(breast_cancer_split):
constraints = [-1, 1] * int(X_train.shape[1] / 2) constraints = [-1, 1] * int(X_train.shape[1] / 2)
gbm = lgb.LGBMClassifier(n_estimators=10, num_leaves=3, verbose=-1, monotone_constraints=constraints) gbm = lgb.LGBMClassifier(n_estimators=10, num_leaves=3, verbose=-1, monotone_constraints=constraints)
gbm.fit(X_train, y_train, verbose=False) gbm.fit(X_train, y_train)
with pytest.raises(IndexError): with pytest.raises(IndexError):
lgb.create_tree_digraph(gbm, tree_index=83) lgb.create_tree_digraph(gbm, tree_index=83)
...@@ -265,7 +265,7 @@ def test_plot_metrics(params, breast_cancer_split, train_data): ...@@ -265,7 +265,7 @@ def test_plot_metrics(params, breast_cancer_split, train_data):
lgb.plot_metric(evals_result1) lgb.plot_metric(evals_result1)
gbm2 = lgb.LGBMClassifier(n_estimators=10, num_leaves=3, verbose=-1) gbm2 = lgb.LGBMClassifier(n_estimators=10, num_leaves=3, verbose=-1)
gbm2.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose=False) gbm2.fit(X_train, y_train, eval_set=[(X_test, y_test)])
ax4 = lgb.plot_metric(gbm2, title=None, xlabel=None, ylabel=None) ax4 = lgb.plot_metric(gbm2, title=None, xlabel=None, ylabel=None)
assert isinstance(ax4, matplotlib.axes.Axes) assert isinstance(ax4, matplotlib.axes.Axes)
assert ax4.get_title() == '' assert ax4.get_title() == ''
......
...@@ -92,7 +92,7 @@ def test_binary(): ...@@ -92,7 +92,7 @@ def test_binary():
X, y = load_breast_cancer(return_X_y=True) X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMClassifier(n_estimators=50, verbose=-1) gbm = lgb.LGBMClassifier(n_estimators=50, verbose=-1)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False) gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5)
ret = log_loss(y_test, gbm.predict_proba(X_test)) ret = log_loss(y_test, gbm.predict_proba(X_test))
assert ret < 0.12 assert ret < 0.12
assert gbm.evals_result_['valid_0']['binary_logloss'][gbm.best_iteration_ - 1] == pytest.approx(ret) assert gbm.evals_result_['valid_0']['binary_logloss'][gbm.best_iteration_ - 1] == pytest.approx(ret)
...@@ -102,7 +102,7 @@ def test_regression(): ...@@ -102,7 +102,7 @@ def test_regression():
X, y = load_boston(return_X_y=True) X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMRegressor(n_estimators=50, verbose=-1) gbm = lgb.LGBMRegressor(n_estimators=50, verbose=-1)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False) gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5)
ret = mean_squared_error(y_test, gbm.predict(X_test)) ret = mean_squared_error(y_test, gbm.predict(X_test))
assert ret < 7 assert ret < 7
assert gbm.evals_result_['valid_0']['l2'][gbm.best_iteration_ - 1] == pytest.approx(ret) assert gbm.evals_result_['valid_0']['l2'][gbm.best_iteration_ - 1] == pytest.approx(ret)
...@@ -112,7 +112,7 @@ def test_multiclass(): ...@@ -112,7 +112,7 @@ def test_multiclass():
X, y = load_digits(n_class=10, return_X_y=True) X, y = load_digits(n_class=10, return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMClassifier(n_estimators=50, verbose=-1) gbm = lgb.LGBMClassifier(n_estimators=50, verbose=-1)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False) gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5)
ret = multi_error(y_test, gbm.predict(X_test)) ret = multi_error(y_test, gbm.predict(X_test))
assert ret < 0.05 assert ret < 0.05
ret = multi_logloss(y_test, gbm.predict_proba(X_test)) ret = multi_logloss(y_test, gbm.predict_proba(X_test))
...@@ -128,7 +128,7 @@ def test_lambdarank(): ...@@ -128,7 +128,7 @@ def test_lambdarank():
q_test = np.loadtxt(str(rank_example_dir / 'rank.test.query')) q_test = np.loadtxt(str(rank_example_dir / 'rank.test.query'))
gbm = lgb.LGBMRanker(n_estimators=50) gbm = lgb.LGBMRanker(n_estimators=50)
gbm.fit(X_train, y_train, group=q_train, eval_set=[(X_test, y_test)], gbm.fit(X_train, y_train, group=q_train, eval_set=[(X_test, y_test)],
eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=10, verbose=False, eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=10,
callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))]) callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))])
assert gbm.best_iteration_ <= 24 assert gbm.best_iteration_ <= 24
assert gbm.best_score_['valid_0']['ndcg@1'] > 0.5674 assert gbm.best_score_['valid_0']['ndcg@1'] > 0.5674
...@@ -143,7 +143,7 @@ def test_xendcg(): ...@@ -143,7 +143,7 @@ def test_xendcg():
q_test = np.loadtxt(str(xendcg_example_dir / 'rank.test.query')) q_test = np.loadtxt(str(xendcg_example_dir / 'rank.test.query'))
gbm = lgb.LGBMRanker(n_estimators=50, objective='rank_xendcg', random_state=5, n_jobs=1) gbm = lgb.LGBMRanker(n_estimators=50, objective='rank_xendcg', random_state=5, n_jobs=1)
gbm.fit(X_train, y_train, group=q_train, eval_set=[(X_test, y_test)], gbm.fit(X_train, y_train, group=q_train, eval_set=[(X_test, y_test)],
eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=10, verbose=False, eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=10,
eval_metric='ndcg', eval_metric='ndcg',
callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))]) callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))])
assert gbm.best_iteration_ <= 24 assert gbm.best_iteration_ <= 24
...@@ -196,7 +196,7 @@ def test_regression_with_custom_objective(): ...@@ -196,7 +196,7 @@ def test_regression_with_custom_objective():
X, y = load_boston(return_X_y=True) X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMRegressor(n_estimators=50, verbose=-1, objective=objective_ls) gbm = lgb.LGBMRegressor(n_estimators=50, verbose=-1, objective=objective_ls)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False) gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5)
ret = mean_squared_error(y_test, gbm.predict(X_test)) ret = mean_squared_error(y_test, gbm.predict(X_test))
assert ret < 7.0 assert ret < 7.0
assert gbm.evals_result_['valid_0']['l2'][gbm.best_iteration_ - 1] == pytest.approx(ret) assert gbm.evals_result_['valid_0']['l2'][gbm.best_iteration_ - 1] == pytest.approx(ret)
...@@ -206,7 +206,7 @@ def test_binary_classification_with_custom_objective(): ...@@ -206,7 +206,7 @@ def test_binary_classification_with_custom_objective():
X, y = load_digits(n_class=2, return_X_y=True) X, y = load_digits(n_class=2, return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMClassifier(n_estimators=50, verbose=-1, objective=logregobj) gbm = lgb.LGBMClassifier(n_estimators=50, verbose=-1, objective=logregobj)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False) gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5)
# prediction result is actually not transformed (is raw) due to custom objective # prediction result is actually not transformed (is raw) due to custom objective
y_pred_raw = gbm.predict_proba(X_test) y_pred_raw = gbm.predict_proba(X_test)
assert not np.all(y_pred_raw >= 0) assert not np.all(y_pred_raw >= 0)
...@@ -285,8 +285,7 @@ def test_grid_search(): ...@@ -285,8 +285,7 @@ def test_grid_search():
grid_params = dict(boosting_type=['rf', 'gbdt'], grid_params = dict(boosting_type=['rf', 'gbdt'],
n_estimators=[4, 6], n_estimators=[4, 6],
reg_alpha=[0.01, 0.005]) reg_alpha=[0.01, 0.005])
fit_params = dict(verbose=False, fit_params = dict(eval_set=[(X_val, y_val)],
eval_set=[(X_val, y_val)],
eval_metric=constant_metric, eval_metric=constant_metric,
early_stopping_rounds=2) early_stopping_rounds=2)
grid = GridSearchCV(estimator=lgb.LGBMClassifier(**params), param_grid=grid_params, grid = GridSearchCV(estimator=lgb.LGBMClassifier(**params), param_grid=grid_params,
...@@ -317,8 +316,7 @@ def test_random_search(): ...@@ -317,8 +316,7 @@ def test_random_search():
param_dist = dict(boosting_type=['rf', 'gbdt'], param_dist = dict(boosting_type=['rf', 'gbdt'],
n_estimators=[np.random.randint(low=3, high=10) for i in range(n_iter)], n_estimators=[np.random.randint(low=3, high=10) for i in range(n_iter)],
reg_alpha=[np.random.uniform(low=0.01, high=0.06) for i in range(n_iter)]) reg_alpha=[np.random.uniform(low=0.01, high=0.06) for i in range(n_iter)])
fit_params = dict(verbose=False, fit_params = dict(eval_set=[(X_val, y_val)],
eval_set=[(X_val, y_val)],
eval_metric=constant_metric, eval_metric=constant_metric,
early_stopping_rounds=2) early_stopping_rounds=2)
rand = RandomizedSearchCV(estimator=lgb.LGBMClassifier(**params), rand = RandomizedSearchCV(estimator=lgb.LGBMClassifier(**params),
...@@ -422,7 +420,7 @@ def test_regressor_chain(): ...@@ -422,7 +420,7 @@ def test_regressor_chain():
def test_clone_and_property(): def test_clone_and_property():
X, y = load_boston(return_X_y=True) X, y = load_boston(return_X_y=True)
gbm = lgb.LGBMRegressor(n_estimators=10, verbose=-1) gbm = lgb.LGBMRegressor(n_estimators=10, verbose=-1)
gbm.fit(X, y, verbose=False) gbm.fit(X, y)
gbm_clone = clone(gbm) gbm_clone = clone(gbm)
assert isinstance(gbm.booster_, lgb.Booster) assert isinstance(gbm.booster_, lgb.Booster)
...@@ -430,7 +428,7 @@ def test_clone_and_property(): ...@@ -430,7 +428,7 @@ def test_clone_and_property():
X, y = load_digits(n_class=2, return_X_y=True) X, y = load_digits(n_class=2, return_X_y=True)
clf = lgb.LGBMClassifier(n_estimators=10, verbose=-1) clf = lgb.LGBMClassifier(n_estimators=10, verbose=-1)
clf.fit(X, y, verbose=False) clf.fit(X, y)
assert sorted(clf.classes_) == [0, 1] assert sorted(clf.classes_) == [0, 1]
assert clf.n_classes_ == 2 assert clf.n_classes_ == 2
assert isinstance(clf.booster_, lgb.Booster) assert isinstance(clf.booster_, lgb.Booster)
...@@ -443,7 +441,7 @@ def test_joblib(): ...@@ -443,7 +441,7 @@ def test_joblib():
gbm = lgb.LGBMRegressor(n_estimators=10, objective=custom_asymmetric_obj, gbm = lgb.LGBMRegressor(n_estimators=10, objective=custom_asymmetric_obj,
verbose=-1, importance_type='split') verbose=-1, importance_type='split')
gbm.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)], gbm.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)],
eval_metric=mse, early_stopping_rounds=5, verbose=False, eval_metric=mse, early_stopping_rounds=5,
callbacks=[lgb.reset_parameter(learning_rate=list(np.arange(1, 0, -0.1)))]) callbacks=[lgb.reset_parameter(learning_rate=list(np.arange(1, 0, -0.1)))])
joblib.dump(gbm, 'lgb.pkl') # test model with custom functions joblib.dump(gbm, 'lgb.pkl') # test model with custom functions
...@@ -695,7 +693,7 @@ def test_evaluate_train_set(): ...@@ -695,7 +693,7 @@ def test_evaluate_train_set():
X, y = load_boston(return_X_y=True) X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMRegressor(n_estimators=10, verbose=-1) gbm = lgb.LGBMRegressor(n_estimators=10, verbose=-1)
gbm.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)], verbose=False) gbm.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)])
assert len(gbm.evals_result_) == 2 assert len(gbm.evals_result_) == 2
assert 'training' in gbm.evals_result_ assert 'training' in gbm.evals_result_
assert len(gbm.evals_result_['training']) == 1 assert len(gbm.evals_result_['training']) == 1
...@@ -708,7 +706,7 @@ def test_evaluate_train_set(): ...@@ -708,7 +706,7 @@ def test_evaluate_train_set():
def test_metrics(): def test_metrics():
X, y = load_boston(return_X_y=True) X, y = load_boston(return_X_y=True)
params = {'n_estimators': 2, 'verbose': -1} params = {'n_estimators': 2, 'verbose': -1}
params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False} params_fit = {'X': X, 'y': y, 'eval_set': (X, y)}
# no custom objective, no custom metric # no custom objective, no custom metric
# default metric # default metric
...@@ -750,8 +748,7 @@ def test_metrics(): ...@@ -750,8 +748,7 @@ def test_metrics():
params_classification = {'n_estimators': 2, 'verbose': -1, params_classification = {'n_estimators': 2, 'verbose': -1,
'objective': 'binary', 'metric': 'binary_logloss'} 'objective': 'binary', 'metric': 'binary_logloss'}
params_fit_classification = {'X': X_classification, 'y': y_classification, params_fit_classification = {'X': X_classification, 'y': y_classification,
'eval_set': (X_classification, y_classification), 'eval_set': (X_classification, y_classification)}
'verbose': False}
gbm = lgb.LGBMClassifier(**params_classification).fit(eval_metric=['fair', 'error'], gbm = lgb.LGBMClassifier(**params_classification).fit(eval_metric=['fair', 'error'],
**params_fit_classification) **params_fit_classification)
assert len(gbm.evals_result_['training']) == 3 assert len(gbm.evals_result_['training']) == 3
...@@ -930,7 +927,7 @@ def test_metrics(): ...@@ -930,7 +927,7 @@ def test_metrics():
assert 'error' in gbm.evals_result_['training'] assert 'error' in gbm.evals_result_['training']
X, y = load_digits(n_class=3, return_X_y=True) X, y = load_digits(n_class=3, return_X_y=True)
params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False} params_fit = {'X': X, 'y': y, 'eval_set': (X, y)}
# default metric and invalid binary metric is replaced with multiclass alternative # default metric and invalid binary metric is replaced with multiclass alternative
gbm = lgb.LGBMClassifier(**params).fit(eval_metric='binary_error', **params_fit) gbm = lgb.LGBMClassifier(**params).fit(eval_metric='binary_error', **params_fit)
...@@ -955,7 +952,7 @@ def test_metrics(): ...@@ -955,7 +952,7 @@ def test_metrics():
assert 'multi_error' in gbm.evals_result_['training'] assert 'multi_error' in gbm.evals_result_['training']
X, y = load_digits(n_class=2, return_X_y=True) X, y = load_digits(n_class=2, return_X_y=True)
params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False} params_fit = {'X': X, 'y': y, 'eval_set': (X, y)}
# default metric and invalid multiclass metric is replaced with binary alternative # default metric and invalid multiclass metric is replaced with binary alternative
gbm = lgb.LGBMClassifier(**params).fit(eval_metric='multi_error', **params_fit) gbm = lgb.LGBMClassifier(**params).fit(eval_metric='multi_error', **params_fit)
...@@ -975,7 +972,7 @@ def test_multiple_eval_metrics(): ...@@ -975,7 +972,7 @@ def test_multiple_eval_metrics():
X, y = load_breast_cancer(return_X_y=True) X, y = load_breast_cancer(return_X_y=True)
params = {'n_estimators': 2, 'verbose': -1, 'objective': 'binary', 'metric': 'binary_logloss'} params = {'n_estimators': 2, 'verbose': -1, 'objective': 'binary', 'metric': 'binary_logloss'}
params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False} params_fit = {'X': X, 'y': y, 'eval_set': (X, y)}
# Verify that can receive a list of metrics, only callable # Verify that can receive a list of metrics, only callable
gbm = lgb.LGBMClassifier(**params).fit(eval_metric=[constant_metric, decreasing_metric], **params_fit) gbm = lgb.LGBMClassifier(**params).fit(eval_metric=[constant_metric, decreasing_metric], **params_fit)
...@@ -1016,7 +1013,7 @@ def test_inf_handle(): ...@@ -1016,7 +1013,7 @@ def test_inf_handle():
weight = np.full(nrows, 1e10) weight = np.full(nrows, 1e10)
params = {'n_estimators': 20, 'verbose': -1} params = {'n_estimators': 20, 'verbose': -1}
params_fit = {'X': X, 'y': y, 'sample_weight': weight, 'eval_set': (X, y), params_fit = {'X': X, 'y': y, 'sample_weight': weight, 'eval_set': (X, y),
'verbose': False, 'early_stopping_rounds': 5} 'early_stopping_rounds': 5}
gbm = lgb.LGBMRegressor(**params).fit(**params_fit) gbm = lgb.LGBMRegressor(**params).fit(**params_fit)
np.testing.assert_allclose(gbm.evals_result_['training']['l2'], np.inf) np.testing.assert_allclose(gbm.evals_result_['training']['l2'], np.inf)
...@@ -1029,7 +1026,7 @@ def test_nan_handle(): ...@@ -1029,7 +1026,7 @@ def test_nan_handle():
weight = np.zeros(nrows) weight = np.zeros(nrows)
params = {'n_estimators': 20, 'verbose': -1} params = {'n_estimators': 20, 'verbose': -1}
params_fit = {'X': X, 'y': y, 'sample_weight': weight, 'eval_set': (X, y), params_fit = {'X': X, 'y': y, 'sample_weight': weight, 'eval_set': (X, y),
'verbose': False, 'early_stopping_rounds': 5} 'early_stopping_rounds': 5}
gbm = lgb.LGBMRegressor(**params).fit(**params_fit) gbm = lgb.LGBMRegressor(**params).fit(**params_fit)
np.testing.assert_allclose(gbm.evals_result_['training']['l2'], np.nan) np.testing.assert_allclose(gbm.evals_result_['training']['l2'], np.nan)
...@@ -1066,8 +1063,7 @@ def test_first_metric_only(): ...@@ -1066,8 +1063,7 @@ def test_first_metric_only():
'seed': 123} 'seed': 123}
params_fit = {'X': X_train, params_fit = {'X': X_train,
'y': y_train, 'y': y_train,
'early_stopping_rounds': 5, 'early_stopping_rounds': 5}
'verbose': False}
iter_valid1_l1 = 3 iter_valid1_l1 = 3
iter_valid1_l2 = 18 iter_valid1_l2 = 18
...@@ -1146,8 +1142,7 @@ def test_class_weight(): ...@@ -1146,8 +1142,7 @@ def test_class_weight():
gbm.fit(X_train, y_train, gbm.fit(X_train, y_train,
eval_set=[(X_train, y_train), (X_test, y_test), (X_test, y_test), eval_set=[(X_train, y_train), (X_test, y_test), (X_test, y_test),
(X_test, y_test), (X_test, y_test)], (X_test, y_test), (X_test, y_test)],
eval_class_weight=['balanced', None, 'balanced', {1: 10, 4: 20}, {5: 30, 2: 40}], eval_class_weight=['balanced', None, 'balanced', {1: 10, 4: 20}, {5: 30, 2: 40}])
verbose=False)
for eval_set1, eval_set2 in itertools.combinations(gbm.evals_result_.keys(), 2): for eval_set1, eval_set2 in itertools.combinations(gbm.evals_result_.keys(), 2):
for metric in gbm.evals_result_[eval_set1]: for metric in gbm.evals_result_[eval_set1]:
np.testing.assert_raises(AssertionError, np.testing.assert_raises(AssertionError,
...@@ -1158,8 +1153,7 @@ def test_class_weight(): ...@@ -1158,8 +1153,7 @@ def test_class_weight():
gbm_str.fit(X_train, y_train_str, gbm_str.fit(X_train, y_train_str,
eval_set=[(X_train, y_train_str), (X_test, y_test_str), eval_set=[(X_train, y_train_str), (X_test, y_test_str),
(X_test, y_test_str), (X_test, y_test_str), (X_test, y_test_str)], (X_test, y_test_str), (X_test, y_test_str), (X_test, y_test_str)],
eval_class_weight=['balanced', None, 'balanced', {'1': 10, '4': 20}, {'5': 30, '2': 40}], eval_class_weight=['balanced', None, 'balanced', {'1': 10, '4': 20}, {'5': 30, '2': 40}])
verbose=False)
for eval_set1, eval_set2 in itertools.combinations(gbm_str.evals_result_.keys(), 2): for eval_set1, eval_set2 in itertools.combinations(gbm_str.evals_result_.keys(), 2):
for metric in gbm_str.evals_result_[eval_set1]: for metric in gbm_str.evals_result_[eval_set1]:
np.testing.assert_raises(AssertionError, np.testing.assert_raises(AssertionError,
...@@ -1175,10 +1169,9 @@ def test_class_weight(): ...@@ -1175,10 +1169,9 @@ def test_class_weight():
def test_continue_training_with_model(): def test_continue_training_with_model():
X, y = load_digits(n_class=3, return_X_y=True) X, y = load_digits(n_class=3, return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
init_gbm = lgb.LGBMClassifier(n_estimators=5).fit(X_train, y_train, eval_set=(X_test, y_test), init_gbm = lgb.LGBMClassifier(n_estimators=5).fit(X_train, y_train, eval_set=(X_test, y_test))
verbose=False)
gbm = lgb.LGBMClassifier(n_estimators=5).fit(X_train, y_train, eval_set=(X_test, y_test), gbm = lgb.LGBMClassifier(n_estimators=5).fit(X_train, y_train, eval_set=(X_test, y_test),
verbose=False, init_model=init_gbm) init_model=init_gbm)
assert len(init_gbm.evals_result_['valid_0']['multi_logloss']) == len(gbm.evals_result_['valid_0']['multi_logloss']) assert len(init_gbm.evals_result_['valid_0']['multi_logloss']) == len(gbm.evals_result_['valid_0']['multi_logloss'])
assert len(init_gbm.evals_result_['valid_0']['multi_logloss']) == 5 assert len(init_gbm.evals_result_['valid_0']['multi_logloss']) == 5
assert gbm.evals_result_['valid_0']['multi_logloss'][-1] < init_gbm.evals_result_['valid_0']['multi_logloss'][-1] assert gbm.evals_result_['valid_0']['multi_logloss'][-1] < init_gbm.evals_result_['valid_0']['multi_logloss'][-1]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment