test_sklearn.py 5.66 KB
Newer Older
wxchan's avatar
wxchan committed
1
2
# coding: utf-8
# pylint: skip-file
wxchan's avatar
wxchan committed
3
4
import unittest

Guolin Ke's avatar
Guolin Ke committed
5
import lightgbm as lgb
wxchan's avatar
wxchan committed
6
import numpy as np
wxchan's avatar
wxchan committed
7
from sklearn.base import clone
wxchan's avatar
wxchan committed
8
9
from sklearn.datasets import (load_boston, load_breast_cancer, load_digits,
                              load_svmlight_file)
wxchan's avatar
wxchan committed
10
from sklearn.externals import joblib
wxchan's avatar
wxchan committed
11
12
13
from sklearn.metrics import log_loss, mean_squared_error
from sklearn.model_selection import GridSearchCV, train_test_split

wxchan's avatar
wxchan committed
14
15

def test_template(X_y=load_boston(True), model=lgb.LGBMRegressor,
wxchan's avatar
wxchan committed
16
17
18
19
                  feval=mean_squared_error, num_round=100,
                  custom_obj=None, predict_proba=False,
                  return_data=False, return_model=False):
    X_train, X_test, y_train, y_test = train_test_split(*X_y, test_size=0.1, random_state=42)
wxchan's avatar
wxchan committed
20
21
22
23
24
    if return_data:
        return X_train, X_test, y_train, y_test
    arguments = {'n_estimators': num_round, 'silent': True}
    if custom_obj:
        arguments['objective'] = custom_obj
wxchan's avatar
wxchan committed
25
    gbm = model(**arguments)
wxchan's avatar
wxchan committed
26
    gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=10, verbose=False)
wxchan's avatar
wxchan committed
27
28
29
30
31
32
33
    if return_model:
        return gbm
    elif predict_proba:
        return feval(y_test, gbm.predict_proba(X_test))
    else:
        return feval(y_test, gbm.predict(X_test))

wxchan's avatar
wxchan committed
34
35
36
37

class TestSklearn(unittest.TestCase):

    def test_binary(self):
wxchan's avatar
wxchan committed
38
        X_y = load_breast_cancer(True)
wxchan's avatar
wxchan committed
39
        ret = test_template(X_y, lgb.LGBMClassifier, log_loss, predict_proba=True)
wxchan's avatar
wxchan committed
40
41
42
43
        self.assertLess(ret, 0.15)

    def test_regreesion(self):
        self.assertLess(test_template() ** 0.5, 4)
wxchan's avatar
wxchan committed
44

wxchan's avatar
wxchan committed
45
46
    def test_multiclass(self):
        X_y = load_digits(10, True)
wxchan's avatar
wxchan committed
47

wxchan's avatar
wxchan committed
48
49
        def multi_error(y_true, y_pred):
            return np.mean(y_true != y_pred)
wxchan's avatar
wxchan committed
50
        ret = test_template(X_y, lgb.LGBMClassifier, multi_error)
wxchan's avatar
wxchan committed
51
        self.assertLess(ret, 0.2)
wxchan's avatar
wxchan committed
52

wxchan's avatar
wxchan committed
53
54
55
56
    def test_lambdarank(self):
        X_train, y_train = load_svmlight_file('../../examples/lambdarank/rank.train')
        X_test, y_test = load_svmlight_file('../../examples/lambdarank/rank.test')
        q_train = np.loadtxt('../../examples/lambdarank/rank.train.query')
57
58
59
60
61
62
63
64
        q_test = np.loadtxt('../../examples/lambdarank/rank.test.query')
        lgb_model = lgb.LGBMRanker().fit(X_train, y_train,
                                         group=q_train,
                                         eval_set=[(X_test, y_test)],
                                         eval_group=[q_test],
                                         eval_at=[1],
                                         verbose=False,
                                         callbacks=[lgb.reset_parameter(learning_rate=lambda x: 0.95 ** x * 0.1)])
wxchan's avatar
wxchan committed
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80

    def test_regression_with_custom_objective(self):
        def objective_ls(y_true, y_pred):
            grad = (y_pred - y_true)
            hess = np.ones(len(y_true))
            return grad, hess
        ret = test_template(custom_obj=objective_ls)
        self.assertLess(ret, 100)

    def test_binary_classification_with_custom_objective(self):
        def logregobj(y_true, y_pred):
            y_pred = 1.0 / (1.0 + np.exp(-y_pred))
            grad = y_pred - y_true
            hess = y_pred * (1.0 - y_pred)
            return grad, hess
        X_y = load_digits(2, True)
wxchan's avatar
wxchan committed
81

wxchan's avatar
wxchan committed
82
83
84
85
86
        def binary_error(y_test, y_pred):
            return np.mean([int(p > 0.5) != y for y, p in zip(y_test, y_pred)])
        ret = test_template(X_y, lgb.LGBMClassifier, feval=binary_error, custom_obj=logregobj)
        self.assertLess(ret, 0.1)

87
88
89
90
91
92
    def test_dart(self):
        X_train, X_test, y_train, y_test = test_template(return_data=True)
        gbm = lgb.LGBMRegressor(boosting_type='dart')
        gbm.fit(X_train, y_train)
        self.assertLessEqual(gbm.score(X_train, y_train), 1.)

wxchan's avatar
wxchan committed
93
94
    def test_grid_search(self):
        X_train, X_test, y_train, y_test = test_template(return_data=True)
95
        params = {'boosting_type': ['dart', 'gbdt'],
wxchan's avatar
wxchan committed
96
97
                  'n_estimators': [15, 20],
                  'drop_rate': [0.1, 0.2]}
98
        gbm = GridSearchCV(lgb.LGBMRegressor(), params, cv=3)
wxchan's avatar
wxchan committed
99
        gbm.fit(X_train, y_train)
100
        self.assertIn(gbm.best_params_['n_estimators'], [15, 20])
wxchan's avatar
wxchan committed
101

102
    def test_clone_and_property(self):
wxchan's avatar
wxchan committed
103
104
        gbm = test_template(return_model=True)
        gbm_clone = clone(gbm)
105
106
107
108
109
110
111
        self.assertIsInstance(gbm.booster_, lgb.Booster)
        self.assertIsInstance(gbm.feature_importance_, np.ndarray)
        clf = test_template(load_digits(2, True), model=lgb.LGBMClassifier, return_model=True)
        self.assertListEqual(sorted(clf.classes_), [0, 1])
        self.assertEqual(clf.n_classes_, 2)
        self.assertIsInstance(clf.booster_, lgb.Booster)
        self.assertIsInstance(clf.feature_importance_, np.ndarray)
wxchan's avatar
wxchan committed
112

wxchan's avatar
wxchan committed
113
114
115
116
    def test_joblib(self):
        gbm = test_template(num_round=10, return_model=True)
        joblib.dump(gbm, 'lgb.pkl')
        gbm_pickle = joblib.load('lgb.pkl')
117
        self.assertIsInstance(gbm_pickle.booster_, lgb.Booster)
wxchan's avatar
wxchan committed
118
        self.assertDictEqual(gbm.get_params(), gbm_pickle.get_params())
119
        self.assertListEqual(list(gbm.feature_importance_), list(gbm_pickle.feature_importance_))
wxchan's avatar
wxchan committed
120
121
122
        X_train, X_test, y_train, y_test = test_template(return_data=True)
        gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose=False)
        gbm_pickle.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose=False)
123
        self.assertDictEqual(gbm.evals_result_, gbm_pickle.evals_result_)
wxchan's avatar
wxchan committed
124
125
126
127
128
129
        pred_origin = gbm.predict(X_test)
        pred_pickle = gbm_pickle.predict(X_test)
        self.assertEqual(len(pred_origin), len(pred_pickle))
        for preds in zip(pred_origin, pred_pickle):
            self.assertAlmostEqual(*preds, places=5)

wxchan's avatar
wxchan committed
130

wxchan's avatar
wxchan committed
131
132
133
print("----------------------------------------------------------------------")
print("running test_sklearn.py")
unittest.main()