Unverified Commit 8a34b1af authored by José Morales's avatar José Morales Committed by GitHub
Browse files

[tests][python-package] change boston dataset to synthetic dataset in tests...

[tests][python-package] change boston dataset to synthetic dataset in tests that don't check score (#4895)

* change boston dataset to synthetic dataset in tests that don't evaluate score

* format imports
parent 8e729af3
......@@ -18,7 +18,7 @@ from sklearn.model_selection import GroupKFold, TimeSeriesSplit, train_test_spli
import lightgbm as lgb
from .utils import load_boston, load_breast_cancer, load_digits, load_iris
from .utils import load_boston, load_breast_cancer, load_digits, load_iris, make_synthetic_regression
decreasing_generator = itertools.count(0, -1)
......@@ -731,7 +731,7 @@ def test_continue_train():
def test_continue_train_reused_dataset():
X, y = load_boston(return_X_y=True)
X, y = make_synthetic_regression()
params = {
'objective': 'regression',
'verbose': -1
......@@ -791,7 +791,7 @@ def test_continue_train_multiclass():
def test_cv():
X_train, y_train = load_boston(return_X_y=True)
X_train, y_train = make_synthetic_regression()
params = {'verbose': -1}
lgb_train = lgb.Dataset(X_train, y_train)
# shuffle = False, override metric in params
......@@ -887,7 +887,7 @@ def test_cvbooster():
def test_feature_name():
X_train, y_train = load_boston(return_X_y=True)
X_train, y_train = make_synthetic_regression()
params = {'verbose': -1}
lgb_train = lgb.Dataset(X_train, y_train)
feature_names = [f'f_{i}' for i in range(X_train.shape[-1])]
......@@ -917,7 +917,7 @@ def test_feature_name_with_non_ascii():
def test_save_load_copy_pickle():
def train_and_predict(init_model=None, return_model=False):
X, y = load_boston(return_X_y=True)
X, y = make_synthetic_regression()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
params = {
'objective': 'regression',
......@@ -2102,7 +2102,7 @@ def test_default_objective_and_metric():
@pytest.mark.skipif(psutil.virtual_memory().available / 1024 / 1024 / 1024 < 3, reason='not enough RAM')
def test_model_size():
X, y = load_boston(return_X_y=True)
X, y = make_synthetic_regression()
data = lgb.Dataset(X, y)
bst = lgb.train({'verbose': -1}, data, num_boost_round=2)
y_pred = bst.predict(X)
......@@ -2515,7 +2515,7 @@ def test_dataset_params_with_reference():
def test_extra_trees():
# check extra trees increases regularization
X, y = load_boston(return_X_y=True)
X, y = make_synthetic_regression()
lgb_x = lgb.Dataset(X, label=y)
params = {'objective': 'regression',
'num_leaves': 32,
......@@ -2534,7 +2534,7 @@ def test_extra_trees():
def test_path_smoothing():
# check path smoothing increases regularization
X, y = load_boston(return_X_y=True)
X, y = make_synthetic_regression()
lgb_x = lgb.Dataset(X, label=y)
params = {'objective': 'regression',
'num_leaves': 32,
......@@ -2804,7 +2804,7 @@ def test_predict_with_start_iteration():
np.testing.assert_allclose(pred4, pred6)
# test for regression
X, y = load_boston(return_X_y=True)
X, y = make_synthetic_regression()
params = {
'objective': 'regression',
'verbose': -1,
......
......@@ -18,7 +18,8 @@ from sklearn.utils.validation import check_is_fitted
import lightgbm as lgb
from .utils import load_boston, load_breast_cancer, load_digits, load_iris, load_linnerud, make_ranking
from .utils import (load_boston, load_breast_cancer, load_digits, load_iris, load_linnerud, make_ranking,
make_synthetic_regression)
sk_version = parse_version(sk_version)
if sk_version < parse_version("0.23"):
......@@ -184,7 +185,7 @@ def test_eval_at_aliases():
@pytest.mark.parametrize("custom_objective", [True, False])
def test_objective_aliases(custom_objective):
X, y = load_boston(return_X_y=True)
X, y = make_synthetic_regression()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
if custom_objective:
obj = custom_dummy_obj
......@@ -440,7 +441,7 @@ def test_regressor_chain():
def test_clone_and_property():
X, y = load_boston(return_X_y=True)
X, y = make_synthetic_regression()
gbm = lgb.LGBMRegressor(n_estimators=10, verbose=-1)
gbm.fit(X, y)
......@@ -458,7 +459,7 @@ def test_clone_and_property():
def test_joblib():
X, y = load_boston(return_X_y=True)
X, y = make_synthetic_regression()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMRegressor(n_estimators=10, objective=custom_asymmetric_obj,
verbose=-1, importance_type='split')
......@@ -499,7 +500,7 @@ def test_non_serializable_objects_in_callbacks(tmp_path):
with pytest.raises(Exception, match="This class in not picklable"):
joblib.dump(unpicklable_callback, tmp_path / 'tmp.joblib')
X, y = load_boston(return_X_y=True)
X, y = make_synthetic_regression()
gbm = lgb.LGBMRegressor(n_estimators=5)
gbm.fit(X, y, callbacks=[unpicklable_callback])
assert gbm.booster_.attr('attr_set_inside_callback') == '40'
......@@ -757,7 +758,7 @@ def test_predict_with_params_from_init():
def test_evaluate_train_set():
X, y = load_boston(return_X_y=True)
X, y = make_synthetic_regression()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMRegressor(n_estimators=10, verbose=-1)
gbm.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)])
......@@ -1332,7 +1333,7 @@ def test_training_succeeds_when_data_is_dataframe_and_label_is_column_array(task
X, y = load_iris(return_X_y=True)
model_factory = lgb.LGBMClassifier
elif task == 'regression':
X, y = load_boston(return_X_y=True)
X, y = make_synthetic_regression()
model_factory = lgb.LGBMRegressor
X = pd.DataFrame(X)
y_col_array = y.reshape(-1, 1)
......
......@@ -109,3 +109,8 @@ def make_ranking(n_samples=100, n_features=20, n_informative=5, gmax=2,
X[:, j] = bias + coef * y_vec
return X, y_vec, group_id_vec
@lru_cache(maxsize=None)
def make_synthetic_regression(n_samples=100):
return sklearn.datasets.make_regression(n_samples, n_features=4, n_informative=2, random_state=42)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment