[ci][python] fix sklearn FutureWarning about positional args (#3295)

e6bf4090 · Nikita Titov · GitHub · 6bc55093 · e6bf4090 · e6bf4090
Unverified Commit e6bf4090 authored Aug 11, 2020 by Nikita Titov Committed by GitHub Aug 11, 2020
3 changed files
--- a/tests/python_package_test/test_basic.py
+++ b/tests/python_package_test/test_basic.py
@@ -14,7 +14,7 @@ from sklearn.model_selection import train_test_split
 class TestBasic(unittest.TestCase):

    def test(self):
-        X_train, X_test, y_train, y_test = train_test_split(*load_breast_cancer(True),
+        X_train, X_test, y_train, y_test = train_test_split(*load_breast_cancer(return_X_y=True),
                                                            test_size=0.1, random_state=2)
        train_data = lgb.Dataset(X_train, label=y_train)
        valid_data = train_data.create_valid(X_test, label=y_test)
@@ -86,7 +86,7 @@ class TestBasic(unittest.TestCase):
        os.remove(tname)

    def test_chunked_dataset(self):
-        X_train, X_test, y_train, y_test = train_test_split(*load_breast_cancer(True), test_size=0.1, random_state=2)
+        X_train, X_test, y_train, y_test = train_test_split(*load_breast_cancer(return_X_y=True), test_size=0.1, random_state=2)

        chunk_size = X_train.shape[0] // 10 + 1
        X_train = [X_train[i * chunk_size:(i + 1) * chunk_size, :] for i in range(X_train.shape[0] // chunk_size + 1)]
@@ -273,7 +273,7 @@ class TestBasic(unittest.TestCase):
            self.assertAlmostEqual(data.label[1], data.weight[1])
            self.assertListEqual(data.feature_name, data.get_feature_name())

-        X, y = load_breast_cancer(True)
+        X, y = load_breast_cancer(return_X_y=True)
        sequence = np.ones(y.shape[0])
        sequence[0] = np.nan
        sequence[1] = np.inf

--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -53,7 +53,7 @@ def categorize(continuous_x):

 class TestEngine(unittest.TestCase):
    def test_binary(self):
-        X, y = load_breast_cancer(True)
+        X, y = load_breast_cancer(return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
        params = {
            'objective': 'binary',
@@ -75,7 +75,7 @@ class TestEngine(unittest.TestCase):
        self.assertAlmostEqual(evals_result['valid_0']['binary_logloss'][-1], ret, places=5)

    def test_rf(self):
-        X, y = load_breast_cancer(True)
+        X, y = load_breast_cancer(return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
        params = {
            'boosting_type': 'rf',
@@ -100,7 +100,7 @@ class TestEngine(unittest.TestCase):
        self.assertAlmostEqual(evals_result['valid_0']['binary_logloss'][-1], ret, places=5)

    def test_regression(self):
-        X, y = load_boston(True)
+        X, y = load_boston(return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
        params = {
            'metric': 'l2',
@@ -377,7 +377,7 @@ class TestEngine(unittest.TestCase):
        self.assertAlmostEqual(evals_result['valid_0']['auc'][-1], ret, places=5)

    def test_multiclass(self):
-        X, y = load_digits(10, True)
+        X, y = load_digits(n_class=10, return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
        params = {
            'objective': 'multiclass',
@@ -398,7 +398,7 @@ class TestEngine(unittest.TestCase):
        self.assertAlmostEqual(evals_result['valid_0']['multi_logloss'][-1], ret, places=5)

    def test_multiclass_rf(self):
-        X, y = load_digits(10, True)
+        X, y = load_digits(n_class=10, return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
        params = {
            'boosting_type': 'rf',
@@ -426,7 +426,7 @@ class TestEngine(unittest.TestCase):
        self.assertAlmostEqual(evals_result['valid_0']['multi_logloss'][-1], ret, places=5)

    def test_multiclass_prediction_early_stopping(self):
-        X, y = load_digits(10, True)
+        X, y = load_digits(n_class=10, return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
        params = {
            'objective': 'multiclass',
@@ -452,7 +452,7 @@ class TestEngine(unittest.TestCase):
        self.assertLess(ret, 0.2)

    def test_multi_class_error(self):
-        X, y = load_digits(10, True)
+        X, y = load_digits(n_class=10, return_X_y=True)
        params = {'objective': 'multiclass', 'num_classes': 10, 'metric': 'multi_error',
                  'num_leaves': 4, 'verbose': -1}
        lgb_data = lgb.Dataset(X, label=y)
@@ -497,7 +497,7 @@ class TestEngine(unittest.TestCase):

    def test_auc_mu(self):
        # should give same result as binary auc for 2 classes
-        X, y = load_digits(10, True)
+        X, y = load_digits(n_class=10, return_X_y=True)
        y_new = np.zeros((len(y)))
        y_new[y != 0] = 1
        lgb_X = lgb.Dataset(X, label=y_new)
@@ -558,7 +558,7 @@ class TestEngine(unittest.TestCase):
        self.assertNotEqual(results_weight['training']['auc_mu'][-1], results_no_weight['training']['auc_mu'][-1])

    def test_early_stopping(self):
-        X, y = load_breast_cancer(True)
+        X, y = load_breast_cancer(return_X_y=True)
        params = {
            'objective': 'binary',
            'metric': 'binary_logloss',
@@ -590,7 +590,7 @@ class TestEngine(unittest.TestCase):
        self.assertIn('binary_logloss', gbm.best_score[valid_set_name])

    def test_continue_train(self):
-        X, y = load_boston(True)
+        X, y = load_boston(return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
        params = {
            'objective': 'regression',
@@ -618,7 +618,7 @@ class TestEngine(unittest.TestCase):
        os.remove(model_name)

    def test_continue_train_reused_dataset(self):
-        X, y = load_boston(True)
+        X, y = load_boston(return_X_y=True)
        params = {
            'objective': 'regression',
            'verbose': -1
@@ -631,7 +631,7 @@ class TestEngine(unittest.TestCase):
        self.assertEqual(gbm.current_iteration(), 20)

    def test_continue_train_dart(self):
-        X, y = load_boston(True)
+        X, y = load_boston(return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
        params = {
            'boosting_type': 'dart',
@@ -654,7 +654,7 @@ class TestEngine(unittest.TestCase):
        self.assertAlmostEqual(evals_result['valid_0']['l1'][-1], ret, places=5)

    def test_continue_train_multiclass(self):
-        X, y = load_iris(True)
+        X, y = load_iris(return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
        params = {
            'objective': 'multiclass',
@@ -677,7 +677,7 @@ class TestEngine(unittest.TestCase):
        self.assertAlmostEqual(evals_result['valid_0']['multi_logloss'][-1], ret, places=5)

    def test_cv(self):
-        X_train, y_train = load_boston(True)
+        X_train, y_train = load_boston(return_X_y=True)
        params = {'verbose': -1}
        lgb_train = lgb.Dataset(X_train, y_train)
        # shuffle = False, override metric in params
@@ -736,7 +736,7 @@ class TestEngine(unittest.TestCase):
        np.testing.assert_allclose(cv_res_lambda['ndcg@3-mean'], cv_res_lambda_obj['ndcg@3-mean'])

    def test_cvbooster(self):
-        X, y = load_breast_cancer(True)
+        X, y = load_breast_cancer(return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
        params = {
            'objective': 'binary',
@@ -780,7 +780,7 @@ class TestEngine(unittest.TestCase):
        self.assertLess(ret, 0.15)

    def test_feature_name(self):
-        X_train, y_train = load_boston(True)
+        X_train, y_train = load_boston(return_X_y=True)
        params = {'verbose': -1}
        lgb_train = lgb.Dataset(X_train, y_train)
        feature_names = ['f_' + str(i) for i in range(X_train.shape[-1])]
@@ -808,7 +808,7 @@ class TestEngine(unittest.TestCase):

    def test_save_load_copy_pickle(self):
        def train_and_predict(init_model=None, return_model=False):
-            X, y = load_boston(True)
+            X, y = load_boston(return_X_y=True)
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
            params = {
                'objective': 'regression',
@@ -972,7 +972,7 @@ class TestEngine(unittest.TestCase):
        self.assertEqual(len(evals_result['valid_1']['rmse']), 20)

    def test_contribs(self):
-        X, y = load_breast_cancer(True)
+        X, y = load_breast_cancer(return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
        params = {
            'objective': 'binary',
@@ -1310,7 +1310,7 @@ class TestEngine(unittest.TestCase):
        np.random.seed()  # reset seed

    def test_refit(self):
-        X, y = load_breast_cancer(True)
+        X, y = load_breast_cancer(return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
        params = {
            'objective': 'binary',
@@ -1326,7 +1326,7 @@ class TestEngine(unittest.TestCase):
        self.assertGreater(err_pred, new_err_pred)

    def test_mape_rf(self):
-        X, y = load_boston(True)
+        X, y = load_boston(return_X_y=True)
        params = {
            'boosting_type': 'rf',
            'objective': 'mape',
@@ -1343,7 +1343,7 @@ class TestEngine(unittest.TestCase):
        self.assertGreater(pred_mean, 20)

    def test_mape_dart(self):
-        X, y = load_boston(True)
+        X, y = load_boston(return_X_y=True)
        params = {
            'boosting_type': 'dart',
            'objective': 'mape',
@@ -1422,7 +1422,7 @@ class TestEngine(unittest.TestCase):
            params['num_class'] = 4
            return dtrain, dtest, params

-        X, y = load_iris(True)
+        X, y = load_iris(return_X_y=True)
        dataset = lgb.Dataset(X, y, free_raw_data=False)
        params = {'objective': 'multiclass', 'num_class': 3, 'verbose': -1}
        results = lgb.cv(params, dataset, num_boost_round=10, fpreproc=preprocess_data)
@@ -1430,7 +1430,7 @@ class TestEngine(unittest.TestCase):
        self.assertEqual(len(results['multi_logloss-mean']), 10)

    def test_metrics(self):
-        X, y = load_digits(2, True)
+        X, y = load_digits(n_class=2, return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
        lgb_train = lgb.Dataset(X_train, y_train, silent=True)
        lgb_valid = lgb.Dataset(X_test, y_test, reference=lgb_train, silent=True)
@@ -1736,7 +1736,7 @@ class TestEngine(unittest.TestCase):
        self.assertEqual(len(evals_result), 1)
        self.assertIn('error', evals_result['valid_0'])

-        X, y = load_digits(3, True)
+        X, y = load_digits(n_class=3, return_X_y=True)
        lgb_train = lgb.Dataset(X, y, silent=True)

        obj_multi_aliases = ['multiclass', 'softmax', 'multiclassova', 'multiclass_ova', 'ova', 'ovr']
@@ -1805,7 +1805,7 @@ class TestEngine(unittest.TestCase):

    @unittest.skipIf(psutil.virtual_memory().available / 1024 / 1024 / 1024 < 3, 'not enough RAM')
    def test_model_size(self):
-        X, y = load_boston(True)
+        X, y = load_boston(return_X_y=True)
        data = lgb.Dataset(X, y)
        bst = lgb.train({'verbose': -1}, data, num_boost_round=2)
        y_pred = bst.predict(X)
@@ -1831,7 +1831,7 @@ class TestEngine(unittest.TestCase):
            self.skipTest('not enough RAM')

    def test_get_split_value_histogram(self):
-        X, y = load_boston(True)
+        X, y = load_boston(return_X_y=True)
        lgb_train = lgb.Dataset(X, y, categorical_feature=[2])
        gbm = lgb.train({'verbose': -1}, lgb_train, num_boost_round=20)
        # test XGBoost-style return value
@@ -1941,7 +1941,7 @@ class TestEngine(unittest.TestCase):
                         eval_train_metric=eval_train_metric)
            self.assertEqual(assumed_iteration, len(ret[list(ret.keys())[0]]))

-        X, y = load_boston(True)
+        X, y = load_boston(return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        X_test1, X_test2, y_test1, y_test2 = train_test_split(X_test, y_test, test_size=0.5, random_state=73)
        lgb_train = lgb.Dataset(X_train, y_train)
@@ -2019,7 +2019,7 @@ class TestEngine(unittest.TestCase):
                                                                           decreasing_metric(preds, train_data)])

    def test_node_level_subcol(self):
-        X, y = load_breast_cancer(True)
+        X, y = load_breast_cancer(return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
        params = {
            'objective': 'binary',
@@ -2200,7 +2200,7 @@ class TestEngine(unittest.TestCase):

    def test_extra_trees(self):
        # check extra trees increases regularization
-        X, y = load_boston(True)
+        X, y = load_boston(return_X_y=True)
        lgb_x = lgb.Dataset(X, label=y)
        params = {'objective': 'regression',
                  'num_leaves': 32,
@@ -2218,7 +2218,7 @@ class TestEngine(unittest.TestCase):

    def test_path_smoothing(self):
        # check path smoothing increases regularization
-        X, y = load_boston(True)
+        X, y = load_boston(return_X_y=True)
        lgb_x = lgb.Dataset(X, label=y)
        params = {'objective': 'regression',
                  'num_leaves': 32,
@@ -2240,7 +2240,7 @@ class TestEngine(unittest.TestCase):
            cols = ['Column_' + str(i) for i in range(X.shape[1])]
            return [impcts_dict.get(col, 0.) for col in cols]

-        X, y = load_breast_cancer(True)
+        X, y = load_breast_cancer(return_X_y=True)
        data = lgb.Dataset(X, label=y)
        num_trees = 10
        bst = lgb.train({"objective": "binary", "verbose": -1}, data, num_trees)
@@ -2285,7 +2285,7 @@ class TestEngine(unittest.TestCase):
            self.assertIsNone(tree_df.loc[0, col])

    def test_interaction_constraints(self):
-        X, y = load_boston(True)
+        X, y = load_boston(return_X_y=True)
        num_features = X.shape[1]
        train_data = lgb.Dataset(X, label=y)
        # check that constraint containing all features is equivalent to no constraint
@@ -2364,7 +2364,7 @@ class TestEngine(unittest.TestCase):
            np.testing.assert_allclose(pred4, pred6)

        # test for regression
-        X, y = load_boston(True)
+        X, y = load_boston(return_X_y=True)
        params = {
            'objective': 'regression',
            'verbose': -1,
@@ -2377,7 +2377,7 @@ class TestEngine(unittest.TestCase):
        inner_test(X, y, params, early_stopping_rounds=None)

        # test for multi-class
-        X, y = load_iris(True)
+        X, y = load_iris(return_X_y=True)
        params = {
            'objective': 'multiclass',
            'metric': 'multi_logloss',
@@ -2391,7 +2391,7 @@ class TestEngine(unittest.TestCase):
        inner_test(X, y, params, early_stopping_rounds=None)

        # test for binary
-        X, y = load_breast_cancer(True)
+        X, y = load_breast_cancer(return_X_y=True)
        params = {
            'objective': 'binary',
            'metric': 'binary_logloss',

--- a/tests/python_package_test/test_plotting.py
+++ b/tests/python_package_test/test_plotting.py
@@ -16,7 +16,7 @@ if GRAPHVIZ_INSTALLED:
 class TestBasic(unittest.TestCase):

    def setUp(self):
-        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(*load_breast_cancer(True),
+        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(*load_breast_cancer(return_X_y=True),
                                                                                test_size=0.1, random_state=1)
        self.train_data = lgb.Dataset(self.X_train, self.y_train)
        self.params = {