Unverified Commit fed57520 authored by Alex Wozniakowski's avatar Alex Wozniakowski Committed by GitHub
Browse files

[tests][python][scikit-learn] New unit tests and maintenance (#3253)



* [python][scikit-learn] New unit tests and maintenance

* Includes multioutput tests
* Includes RandomizedSearchCV test
* Updates dataset parameters to eliminate FutureWarning

* Change to n_class in load_digits

* Fix spacing

* Changes after review

* Also updates validation split in grid and random search

* Include skipif for classes_ attr

* Updates checks for classes and order
Co-authored-by: default avatarNikita Titov <nekit94-08@mail.ru>
parent adb769cd
...@@ -11,10 +11,13 @@ import numpy as np ...@@ -11,10 +11,13 @@ import numpy as np
from sklearn import __version__ as sk_version from sklearn import __version__ as sk_version
from sklearn.base import clone from sklearn.base import clone
from sklearn.datasets import (load_boston, load_breast_cancer, load_digits, from sklearn.datasets import (load_boston, load_breast_cancer, load_digits,
load_iris, load_svmlight_file) load_iris, load_linnerud, load_svmlight_file,
make_multilabel_classification)
from sklearn.exceptions import SkipTestWarning from sklearn.exceptions import SkipTestWarning
from sklearn.metrics import log_loss, mean_squared_error from sklearn.metrics import log_loss, mean_squared_error
from sklearn.model_selection import GridSearchCV, train_test_split from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split
from sklearn.multioutput import (MultiOutputClassifier, ClassifierChain, MultiOutputRegressor,
RegressorChain)
from sklearn.utils.estimator_checks import (_yield_all_checks, SkipTest, from sklearn.utils.estimator_checks import (_yield_all_checks, SkipTest,
check_parameters_default_constructible) check_parameters_default_constructible)
...@@ -73,7 +76,7 @@ def multi_logloss(y_true, y_pred): ...@@ -73,7 +76,7 @@ def multi_logloss(y_true, y_pred):
class TestSklearn(unittest.TestCase): class TestSklearn(unittest.TestCase):
def test_binary(self): def test_binary(self):
X, y = load_breast_cancer(True) X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMClassifier(n_estimators=50, silent=True) gbm = lgb.LGBMClassifier(n_estimators=50, silent=True)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False) gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
...@@ -82,7 +85,7 @@ class TestSklearn(unittest.TestCase): ...@@ -82,7 +85,7 @@ class TestSklearn(unittest.TestCase):
self.assertAlmostEqual(ret, gbm.evals_result_['valid_0']['binary_logloss'][gbm.best_iteration_ - 1], places=5) self.assertAlmostEqual(ret, gbm.evals_result_['valid_0']['binary_logloss'][gbm.best_iteration_ - 1], places=5)
def test_regression(self): def test_regression(self):
X, y = load_boston(True) X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMRegressor(n_estimators=50, silent=True) gbm = lgb.LGBMRegressor(n_estimators=50, silent=True)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False) gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
...@@ -91,7 +94,7 @@ class TestSklearn(unittest.TestCase): ...@@ -91,7 +94,7 @@ class TestSklearn(unittest.TestCase):
self.assertAlmostEqual(ret, gbm.evals_result_['valid_0']['l2'][gbm.best_iteration_ - 1], places=5) self.assertAlmostEqual(ret, gbm.evals_result_['valid_0']['l2'][gbm.best_iteration_ - 1], places=5)
def test_multiclass(self): def test_multiclass(self):
X, y = load_digits(10, True) X, y = load_digits(n_class=10, return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMClassifier(n_estimators=50, silent=True) gbm = lgb.LGBMClassifier(n_estimators=50, silent=True)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False) gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
...@@ -134,7 +137,7 @@ class TestSklearn(unittest.TestCase): ...@@ -134,7 +137,7 @@ class TestSklearn(unittest.TestCase):
self.assertGreater(gbm.best_score_['valid_0']['ndcg@3'], 0.6253) self.assertGreater(gbm.best_score_['valid_0']['ndcg@3'], 0.6253)
def test_regression_with_custom_objective(self): def test_regression_with_custom_objective(self):
X, y = load_boston(True) X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMRegressor(n_estimators=50, silent=True, objective=objective_ls) gbm = lgb.LGBMRegressor(n_estimators=50, silent=True, objective=objective_ls)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False) gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
...@@ -143,7 +146,7 @@ class TestSklearn(unittest.TestCase): ...@@ -143,7 +146,7 @@ class TestSklearn(unittest.TestCase):
self.assertAlmostEqual(ret, gbm.evals_result_['valid_0']['l2'][gbm.best_iteration_ - 1], places=5) self.assertAlmostEqual(ret, gbm.evals_result_['valid_0']['l2'][gbm.best_iteration_ - 1], places=5)
def test_binary_classification_with_custom_objective(self): def test_binary_classification_with_custom_objective(self):
X, y = load_digits(2, True) X, y = load_digits(n_class=2, return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMClassifier(n_estimators=50, silent=True, objective=logregobj) gbm = lgb.LGBMClassifier(n_estimators=50, silent=True, objective=logregobj)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False) gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
...@@ -155,7 +158,7 @@ class TestSklearn(unittest.TestCase): ...@@ -155,7 +158,7 @@ class TestSklearn(unittest.TestCase):
self.assertLess(ret, 0.05) self.assertLess(ret, 0.05)
def test_dart(self): def test_dart(self):
X, y = load_boston(True) X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMRegressor(boosting_type='dart', n_estimators=50) gbm = lgb.LGBMRegressor(boosting_type='dart', n_estimators=50)
gbm.fit(X_train, y_train) gbm.fit(X_train, y_train)
...@@ -214,30 +217,147 @@ class TestSklearn(unittest.TestCase): ...@@ -214,30 +217,147 @@ class TestSklearn(unittest.TestCase):
self.assertEqual(len(reg.final_estimator_.feature_importances_), 15) self.assertEqual(len(reg.final_estimator_.feature_importances_), 15)
def test_grid_search(self): def test_grid_search(self):
X, y = load_iris(True) X, y = load_iris(return_X_y=True)
y = np.array(list(map(str, y))) # utilize label encoder at it's max power y = y.astype(str) # utilize label encoder at it's max power
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1,
params = {'subsample': 0.8, random_state=42)
'subsample_freq': 1} X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1,
grid_params = {'boosting_type': ['rf', 'gbdt'], random_state=42)
'n_estimators': [4, 6], params = dict(subsample=0.8,
'reg_alpha': [0.01, 0.005]} subsample_freq=1)
fit_params = {'verbose': False, grid_params = dict(boosting_type=['rf', 'gbdt'],
'eval_set': [(X_test, y_test)], n_estimators=[4, 6],
'eval_metric': constant_metric, reg_alpha=[0.01, 0.005])
'early_stopping_rounds': 2} fit_params = dict(verbose=False,
grid = GridSearchCV(lgb.LGBMClassifier(**params), grid_params, cv=2) eval_set=[(X_val, y_val)],
grid.fit(X, y, **fit_params) eval_metric=constant_metric,
early_stopping_rounds=2)
grid = GridSearchCV(estimator=lgb.LGBMClassifier(**params), param_grid=grid_params,
cv=2)
grid.fit(X_train, y_train, **fit_params)
score = grid.score(X_test, y_test) # utilizes GridSearchCV default refit=True
self.assertIn(grid.best_params_['boosting_type'], ['rf', 'gbdt']) self.assertIn(grid.best_params_['boosting_type'], ['rf', 'gbdt'])
self.assertIn(grid.best_params_['n_estimators'], [4, 6]) self.assertIn(grid.best_params_['n_estimators'], [4, 6])
self.assertIn(grid.best_params_['reg_alpha'], [0.01, 0.005]) self.assertIn(grid.best_params_['reg_alpha'], [0.01, 0.005])
self.assertLess(grid.best_score_, 0.9) self.assertLessEqual(grid.best_score_, 1.)
self.assertEqual(grid.best_estimator_.best_iteration_, 1) self.assertEqual(grid.best_estimator_.best_iteration_, 1)
self.assertLess(grid.best_estimator_.best_score_['valid_0']['multi_logloss'], 0.25) self.assertLess(grid.best_estimator_.best_score_['valid_0']['multi_logloss'], 0.25)
self.assertEqual(grid.best_estimator_.best_score_['valid_0']['error'], 0) self.assertEqual(grid.best_estimator_.best_score_['valid_0']['error'], 0)
self.assertGreaterEqual(score, 0.2)
self.assertLessEqual(score, 1.)
def test_random_search(self):
X, y = load_iris(return_X_y=True)
y = y.astype(str) # utilize label encoder at it's max power
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1,
random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1,
random_state=42)
n_iter = 3 # Number of samples
params = dict(subsample=0.8,
subsample_freq=1)
param_dist = dict(boosting_type=['rf', 'gbdt'],
n_estimators=[np.random.randint(low=3, high=10) for i in range(n_iter)],
reg_alpha=[np.random.uniform(low=0.01, high=0.06) for i in range(n_iter)])
fit_params = dict(verbose=False,
eval_set=[(X_val, y_val)],
eval_metric=constant_metric,
early_stopping_rounds=2)
rand = RandomizedSearchCV(estimator=lgb.LGBMClassifier(**params),
param_distributions=param_dist, cv=2,
n_iter=n_iter, random_state=42)
rand.fit(X_train, y_train, **fit_params)
score = rand.score(X_test, y_test) # utilizes RandomizedSearchCV default refit=True
self.assertIn(rand.best_params_['boosting_type'], ['rf', 'gbdt'])
self.assertIn(rand.best_params_['n_estimators'], list(range(3, 10)))
self.assertGreaterEqual(rand.best_params_['reg_alpha'], 0.01) # Left-closed boundary point
self.assertLessEqual(rand.best_params_['reg_alpha'], 0.06) # Right-closed boundary point
self.assertLessEqual(rand.best_score_, 1.)
self.assertLess(rand.best_estimator_.best_score_['valid_0']['multi_logloss'], 0.25)
self.assertEqual(rand.best_estimator_.best_score_['valid_0']['error'], 0)
self.assertGreaterEqual(score, 0.2)
self.assertLessEqual(score, 1.)
# sklearn < 0.22 does not have the post fit attribute: classes_
@unittest.skipIf(sk_version < '0.22.0', 'scikit-learn version is less than 0.22')
def test_multioutput_classifier(self):
n_outputs = 3
X, y = make_multilabel_classification(n_samples=100, n_features=20,
n_classes=n_outputs, random_state=0)
y = y.astype(str) # utilize label encoder at it's max power
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1,
random_state=42)
clf = MultiOutputClassifier(estimator=lgb.LGBMClassifier(n_estimators=10))
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)
self.assertGreaterEqual(score, 0.2)
self.assertLessEqual(score, 1.)
np.testing.assert_array_equal(np.tile(np.unique(y_train), n_outputs),
np.concatenate(clf.classes_))
for classifier in clf.estimators_:
self.assertIsInstance(classifier, lgb.LGBMClassifier)
self.assertIsInstance(classifier.booster_, lgb.Booster)
# sklearn < 0.23 does not have as_frame parameter
@unittest.skipIf(sk_version < '0.23.0', 'scikit-learn version is less than 0.23')
def test_multioutput_regressor(self):
bunch = load_linnerud(as_frame=True) # returns a Bunch instance
X, y = bunch['data'], bunch['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1,
random_state=42)
reg = MultiOutputRegressor(estimator=lgb.LGBMRegressor(n_estimators=10))
reg.fit(X_train, y_train)
y_pred = reg.predict(X_test)
_, score, _ = mse(y_test, y_pred)
self.assertGreaterEqual(score, 0.2)
self.assertLessEqual(score, 120.)
for regressor in reg.estimators_:
self.assertIsInstance(regressor, lgb.LGBMRegressor)
self.assertIsInstance(regressor.booster_, lgb.Booster)
# sklearn < 0.22 does not have the post fit attribute: classes_
@unittest.skipIf(sk_version < '0.22.0', 'scikit-learn version is less than 0.22')
def test_classifier_chain(self):
n_outputs = 3
X, y = make_multilabel_classification(n_samples=100, n_features=20,
n_classes=n_outputs, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1,
random_state=42)
order = [2, 0, 1]
clf = ClassifierChain(base_estimator=lgb.LGBMClassifier(n_estimators=10),
order=order, random_state=42)
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)
self.assertGreaterEqual(score, 0.2)
self.assertLessEqual(score, 1.)
np.testing.assert_array_equal(np.tile(np.unique(y_train), n_outputs),
np.concatenate(clf.classes_))
self.assertListEqual(order, clf.order_)
for classifier in clf.estimators_:
self.assertIsInstance(classifier, lgb.LGBMClassifier)
self.assertIsInstance(classifier.booster_, lgb.Booster)
# sklearn < 0.23 does not have as_frame parameter
@unittest.skipIf(sk_version < '0.23.0', 'scikit-learn version is less than 0.23')
def test_regressor_chain(self):
bunch = load_linnerud(as_frame=True) # returns a Bunch instance
X, y = bunch['data'], bunch['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
order = [2, 0, 1]
reg = RegressorChain(base_estimator=lgb.LGBMRegressor(n_estimators=10), order=order,
random_state=42)
reg.fit(X_train, y_train)
y_pred = reg.predict(X_test)
_, score, _ = mse(y_test, y_pred)
self.assertGreaterEqual(score, 0.2)
self.assertLessEqual(score, 120.)
self.assertListEqual(order, reg.order_)
for regressor in reg.estimators_:
self.assertIsInstance(regressor, lgb.LGBMRegressor)
self.assertIsInstance(regressor.booster_, lgb.Booster)
def test_clone_and_property(self): def test_clone_and_property(self):
X, y = load_boston(True) X, y = load_boston(return_X_y=True)
gbm = lgb.LGBMRegressor(n_estimators=10, silent=True) gbm = lgb.LGBMRegressor(n_estimators=10, silent=True)
gbm.fit(X, y, verbose=False) gbm.fit(X, y, verbose=False)
...@@ -245,7 +365,7 @@ class TestSklearn(unittest.TestCase): ...@@ -245,7 +365,7 @@ class TestSklearn(unittest.TestCase):
self.assertIsInstance(gbm.booster_, lgb.Booster) self.assertIsInstance(gbm.booster_, lgb.Booster)
self.assertIsInstance(gbm.feature_importances_, np.ndarray) self.assertIsInstance(gbm.feature_importances_, np.ndarray)
X, y = load_digits(2, True) X, y = load_digits(n_class=2, return_X_y=True)
clf = lgb.LGBMClassifier(n_estimators=10, silent=True) clf = lgb.LGBMClassifier(n_estimators=10, silent=True)
clf.fit(X, y, verbose=False) clf.fit(X, y, verbose=False)
self.assertListEqual(sorted(clf.classes_), [0, 1]) self.assertListEqual(sorted(clf.classes_), [0, 1])
...@@ -254,7 +374,7 @@ class TestSklearn(unittest.TestCase): ...@@ -254,7 +374,7 @@ class TestSklearn(unittest.TestCase):
self.assertIsInstance(clf.feature_importances_, np.ndarray) self.assertIsInstance(clf.feature_importances_, np.ndarray)
def test_joblib(self): def test_joblib(self):
X, y = load_boston(True) X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMRegressor(n_estimators=10, objective=custom_asymmetric_obj, gbm = lgb.LGBMRegressor(n_estimators=10, objective=custom_asymmetric_obj,
silent=True, importance_type='split') silent=True, importance_type='split')
...@@ -279,7 +399,7 @@ class TestSklearn(unittest.TestCase): ...@@ -279,7 +399,7 @@ class TestSklearn(unittest.TestCase):
np.testing.assert_allclose(pred_origin, pred_pickle) np.testing.assert_allclose(pred_origin, pred_pickle)
def test_random_state_object(self): def test_random_state_object(self):
X, y = load_iris(True) X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
state1 = np.random.RandomState(123) state1 = np.random.RandomState(123)
state2 = np.random.RandomState(123) state2 = np.random.RandomState(123)
...@@ -312,14 +432,14 @@ class TestSklearn(unittest.TestCase): ...@@ -312,14 +432,14 @@ class TestSklearn(unittest.TestCase):
df1, df3) df1, df3)
def test_feature_importances_single_leaf(self): def test_feature_importances_single_leaf(self):
data = load_iris() data = load_iris(return_X_y=False)
clf = lgb.LGBMClassifier(n_estimators=10) clf = lgb.LGBMClassifier(n_estimators=10)
clf.fit(data.data, data.target) clf.fit(data.data, data.target)
importances = clf.feature_importances_ importances = clf.feature_importances_
self.assertEqual(len(importances), 4) self.assertEqual(len(importances), 4)
def test_feature_importances_type(self): def test_feature_importances_type(self):
data = load_iris() data = load_iris(return_X_y=False)
clf = lgb.LGBMClassifier(n_estimators=10) clf = lgb.LGBMClassifier(n_estimators=10)
clf.fit(data.data, data.target) clf.fit(data.data, data.target)
clf.set_params(importance_type='split') clf.set_params(importance_type='split')
...@@ -443,7 +563,7 @@ class TestSklearn(unittest.TestCase): ...@@ -443,7 +563,7 @@ class TestSklearn(unittest.TestCase):
def test_predict(self): def test_predict(self):
# With default params # With default params
iris = load_iris() iris = load_iris(return_X_y=False)
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target,
test_size=0.2, random_state=42) test_size=0.2, random_state=42)
...@@ -488,7 +608,7 @@ class TestSklearn(unittest.TestCase): ...@@ -488,7 +608,7 @@ class TestSklearn(unittest.TestCase):
res_engine, res_sklearn_params) res_engine, res_sklearn_params)
def test_evaluate_train_set(self): def test_evaluate_train_set(self):
X, y = load_boston(True) X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMRegressor(n_estimators=10, silent=True) gbm = lgb.LGBMRegressor(n_estimators=10, silent=True)
gbm.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)], verbose=False) gbm.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)], verbose=False)
...@@ -501,7 +621,7 @@ class TestSklearn(unittest.TestCase): ...@@ -501,7 +621,7 @@ class TestSklearn(unittest.TestCase):
self.assertIn('l2', gbm.evals_result_['valid_1']) self.assertIn('l2', gbm.evals_result_['valid_1'])
def test_metrics(self): def test_metrics(self):
X, y = load_boston(True) X, y = load_boston(return_X_y=True)
params = {'n_estimators': 2, 'verbose': -1} params = {'n_estimators': 2, 'verbose': -1}
params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False} params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False}
...@@ -541,7 +661,7 @@ class TestSklearn(unittest.TestCase): ...@@ -541,7 +661,7 @@ class TestSklearn(unittest.TestCase):
self.assertIn('mape', gbm.evals_result_['training']) self.assertIn('mape', gbm.evals_result_['training'])
# non-default metric with multiple metrics in eval_metric for LGBMClassifier # non-default metric with multiple metrics in eval_metric for LGBMClassifier
X_classification, y_classification = load_breast_cancer(True) X_classification, y_classification = load_breast_cancer(return_X_y=True)
params_classification = {'n_estimators': 2, 'verbose': -1, params_classification = {'n_estimators': 2, 'verbose': -1,
'objective': 'binary', 'metric': 'binary_logloss'} 'objective': 'binary', 'metric': 'binary_logloss'}
params_fit_classification = {'X': X_classification, 'y': y_classification, params_fit_classification = {'X': X_classification, 'y': y_classification,
...@@ -724,7 +844,7 @@ class TestSklearn(unittest.TestCase): ...@@ -724,7 +844,7 @@ class TestSklearn(unittest.TestCase):
self.assertIn('mape', gbm.evals_result_['training']) self.assertIn('mape', gbm.evals_result_['training'])
self.assertIn('error', gbm.evals_result_['training']) self.assertIn('error', gbm.evals_result_['training'])
X, y = load_digits(3, True) X, y = load_digits(n_class=3, return_X_y=True)
params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False} params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False}
# default metric and invalid binary metric is replaced with multiclass alternative # default metric and invalid binary metric is replaced with multiclass alternative
...@@ -751,7 +871,7 @@ class TestSklearn(unittest.TestCase): ...@@ -751,7 +871,7 @@ class TestSklearn(unittest.TestCase):
self.assertIn('multi_logloss', gbm.evals_result_['training']) self.assertIn('multi_logloss', gbm.evals_result_['training'])
self.assertIn('multi_error', gbm.evals_result_['training']) self.assertIn('multi_error', gbm.evals_result_['training'])
X, y = load_digits(2, True) X, y = load_digits(n_class=2, return_X_y=True)
params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False} params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False}
# default metric and invalid multiclass metric is replaced with binary alternative # default metric and invalid multiclass metric is replaced with binary alternative
...@@ -810,7 +930,7 @@ class TestSklearn(unittest.TestCase): ...@@ -810,7 +930,7 @@ class TestSklearn(unittest.TestCase):
self.assertEqual(assumed_iteration if eval_set_name != 'training' else gbm.n_estimators, self.assertEqual(assumed_iteration if eval_set_name != 'training' else gbm.n_estimators,
gbm.best_iteration_) gbm.best_iteration_)
X, y = load_boston(True) X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_test1, X_test2, y_test1, y_test2 = train_test_split(X_test, y_test, test_size=0.5, random_state=72) X_test1, X_test2, y_test1, y_test2 = train_test_split(X_test, y_test, test_size=0.5, random_state=72)
params = {'n_estimators': 30, params = {'n_estimators': 30,
...@@ -891,7 +1011,7 @@ class TestSklearn(unittest.TestCase): ...@@ -891,7 +1011,7 @@ class TestSklearn(unittest.TestCase):
fit_and_check(['valid_0', 'valid_1'], ['l1', 'l2'], iter_min_l2, True) fit_and_check(['valid_0', 'valid_1'], ['l1', 'l2'], iter_min_l2, True)
def test_class_weight(self): def test_class_weight(self):
X, y = load_digits(10, True) X, y = load_digits(n_class=10, return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
y_train_str = y_train.astype('str') y_train_str = y_train.astype('str')
y_test_str = y_test.astype('str') y_test_str = y_test.astype('str')
...@@ -925,7 +1045,7 @@ class TestSklearn(unittest.TestCase): ...@@ -925,7 +1045,7 @@ class TestSklearn(unittest.TestCase):
gbm_str.evals_result_[eval_set][metric]) gbm_str.evals_result_[eval_set][metric])
def test_continue_training_with_model(self): def test_continue_training_with_model(self):
X, y = load_digits(3, True) X, y = load_digits(n_class=3, return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
init_gbm = lgb.LGBMClassifier(n_estimators=5).fit(X_train, y_train, eval_set=(X_test, y_test), init_gbm = lgb.LGBMClassifier(n_estimators=5).fit(X_train, y_train, eval_set=(X_test, y_test),
verbose=False) verbose=False)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment