Unverified Commit 01568cf5 authored by Nikita Titov's avatar Nikita Titov Committed by GitHub
Browse files

[tests][python] move tests that use `train()` function defined in `engine.py`...

[tests][python] move tests that use `train()` function defined in `engine.py` from `test_basic.py` to `test_engine.py` (#5034)

* Update test_basic.py

* Update test_engine.py

* Update test_engine.py
parent 56313661
...@@ -7,14 +7,13 @@ from pathlib import Path ...@@ -7,14 +7,13 @@ from pathlib import Path
import numpy as np import numpy as np
import pytest import pytest
from scipy import sparse from scipy import sparse
from sklearn.datasets import dump_svmlight_file, load_svmlight_file, make_blobs from sklearn.datasets import dump_svmlight_file, load_svmlight_file
from sklearn.metrics import log_loss, mean_squared_error
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
import lightgbm as lgb import lightgbm as lgb
from lightgbm.compat import PANDAS_INSTALLED, pd_DataFrame, pd_Series from lightgbm.compat import PANDAS_INSTALLED, pd_DataFrame, pd_Series
from .utils import load_breast_cancer, sklearn_multiclass_custom_objective, softmax from .utils import load_breast_cancer
def test_basic(tmp_path): def test_basic(tmp_path):
...@@ -612,54 +611,6 @@ def test_custom_objective_safety(): ...@@ -612,54 +611,6 @@ def test_custom_objective_safety():
bad_bst_multi.update(fobj=_bad_gradients) bad_bst_multi.update(fobj=_bad_gradients)
def test_multiclass_custom_objective():
def custom_obj(y_pred, ds):
y_true = ds.get_label()
return sklearn_multiclass_custom_objective(y_true, y_pred)
centers = [[-4, -4], [4, 4], [-4, 4]]
X, y = make_blobs(n_samples=1_000, centers=centers, random_state=42)
ds = lgb.Dataset(X, y)
params = {'objective': 'multiclass', 'num_class': 3, 'num_leaves': 7}
builtin_obj_bst = lgb.train(params, ds, num_boost_round=10)
builtin_obj_preds = builtin_obj_bst.predict(X)
custom_obj_bst = lgb.train(params, ds, num_boost_round=10, fobj=custom_obj)
custom_obj_preds = softmax(custom_obj_bst.predict(X))
np.testing.assert_allclose(builtin_obj_preds, custom_obj_preds, rtol=0.01)
def test_multiclass_custom_eval():
def custom_eval(y_pred, ds):
y_true = ds.get_label()
return 'custom_logloss', log_loss(y_true, y_pred), False
centers = [[-4, -4], [4, 4], [-4, 4]]
X, y = make_blobs(n_samples=1_000, centers=centers, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=0)
train_ds = lgb.Dataset(X_train, y_train)
valid_ds = lgb.Dataset(X_valid, y_valid, reference=train_ds)
params = {'objective': 'multiclass', 'num_class': 3, 'num_leaves': 7}
eval_result = {}
bst = lgb.train(
params,
train_ds,
num_boost_round=10,
valid_sets=[train_ds, valid_ds],
valid_names=['train', 'valid'],
feval=custom_eval,
callbacks=[lgb.record_evaluation(eval_result)],
keep_training_booster=True,
)
for key, ds in zip(['train', 'valid'], [train_ds, valid_ds]):
np.testing.assert_allclose(eval_result[key]['multi_logloss'], eval_result[key]['custom_logloss'])
_, metric, value, _ = bst.eval(ds, key, feval=custom_eval)[1] # first element is multi_logloss
assert metric == 'custom_logloss'
np.testing.assert_allclose(value, eval_result[key][metric][-1])
@pytest.mark.parametrize('dtype', [np.float32, np.float64]) @pytest.mark.parametrize('dtype', [np.float32, np.float64])
def test_no_copy_when_single_float_dtype_dataframe(dtype): def test_no_copy_when_single_float_dtype_dataframe(dtype):
pd = pytest.importorskip('pandas') pd = pytest.importorskip('pandas')
......
...@@ -12,13 +12,14 @@ import numpy as np ...@@ -12,13 +12,14 @@ import numpy as np
import psutil import psutil
import pytest import pytest
from scipy.sparse import csr_matrix, isspmatrix_csc, isspmatrix_csr from scipy.sparse import csr_matrix, isspmatrix_csc, isspmatrix_csr
from sklearn.datasets import load_svmlight_file, make_multilabel_classification from sklearn.datasets import load_svmlight_file, make_blobs, make_multilabel_classification
from sklearn.metrics import average_precision_score, log_loss, mean_absolute_error, mean_squared_error, roc_auc_score from sklearn.metrics import average_precision_score, log_loss, mean_absolute_error, mean_squared_error, roc_auc_score
from sklearn.model_selection import GroupKFold, TimeSeriesSplit, train_test_split from sklearn.model_selection import GroupKFold, TimeSeriesSplit, train_test_split
import lightgbm as lgb import lightgbm as lgb
from .utils import load_boston, load_breast_cancer, load_digits, load_iris, make_synthetic_regression from .utils import (load_boston, load_breast_cancer, load_digits, load_iris, make_synthetic_regression,
sklearn_multiclass_custom_objective, softmax)
decreasing_generator = itertools.count(0, -1) decreasing_generator = itertools.count(0, -1)
...@@ -2310,6 +2311,54 @@ def test_default_objective_and_metric(): ...@@ -2310,6 +2311,54 @@ def test_default_objective_and_metric():
assert len(evals_result['valid_0']['l2']) == 5 assert len(evals_result['valid_0']['l2']) == 5
def test_multiclass_custom_objective():
def custom_obj(y_pred, ds):
y_true = ds.get_label()
return sklearn_multiclass_custom_objective(y_true, y_pred)
centers = [[-4, -4], [4, 4], [-4, 4]]
X, y = make_blobs(n_samples=1_000, centers=centers, random_state=42)
ds = lgb.Dataset(X, y)
params = {'objective': 'multiclass', 'num_class': 3, 'num_leaves': 7}
builtin_obj_bst = lgb.train(params, ds, num_boost_round=10)
builtin_obj_preds = builtin_obj_bst.predict(X)
custom_obj_bst = lgb.train(params, ds, num_boost_round=10, fobj=custom_obj)
custom_obj_preds = softmax(custom_obj_bst.predict(X))
np.testing.assert_allclose(builtin_obj_preds, custom_obj_preds, rtol=0.01)
def test_multiclass_custom_eval():
def custom_eval(y_pred, ds):
y_true = ds.get_label()
return 'custom_logloss', log_loss(y_true, y_pred), False
centers = [[-4, -4], [4, 4], [-4, 4]]
X, y = make_blobs(n_samples=1_000, centers=centers, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=0)
train_ds = lgb.Dataset(X_train, y_train)
valid_ds = lgb.Dataset(X_valid, y_valid, reference=train_ds)
params = {'objective': 'multiclass', 'num_class': 3, 'num_leaves': 7}
eval_result = {}
bst = lgb.train(
params,
train_ds,
num_boost_round=10,
valid_sets=[train_ds, valid_ds],
valid_names=['train', 'valid'],
feval=custom_eval,
callbacks=[lgb.record_evaluation(eval_result)],
keep_training_booster=True,
)
for key, ds in zip(['train', 'valid'], [train_ds, valid_ds]):
np.testing.assert_allclose(eval_result[key]['multi_logloss'], eval_result[key]['custom_logloss'])
_, metric, value, _ = bst.eval(ds, key, feval=custom_eval)[1] # first element is multi_logloss
assert metric == 'custom_logloss'
np.testing.assert_allclose(value, eval_result[key][metric][-1])
@pytest.mark.skipif(psutil.virtual_memory().available / 1024 / 1024 / 1024 < 3, reason='not enough RAM') @pytest.mark.skipif(psutil.virtual_memory().available / 1024 / 1024 / 1024 < 3, reason='not enough RAM')
def test_model_size(): def test_model_size():
X, y = make_synthetic_regression() X, y = make_synthetic_regression()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment