test_sklearn.py 5.98 KB
Newer Older
wxchan's avatar
wxchan committed
1
2
# coding: utf-8
# pylint: skip-file
wxchan's avatar
wxchan committed
3
4
import unittest

Guolin Ke's avatar
Guolin Ke committed
5
import lightgbm as lgb
wxchan's avatar
wxchan committed
6
import numpy as np
wxchan's avatar
wxchan committed
7
from sklearn.base import clone
wxchan's avatar
wxchan committed
8
9
from sklearn.datasets import (load_boston, load_breast_cancer, load_digits,
                              load_svmlight_file)
wxchan's avatar
wxchan committed
10
from sklearn.externals import joblib
wxchan's avatar
wxchan committed
11
12
13
from sklearn.metrics import log_loss, mean_squared_error
from sklearn.model_selection import GridSearchCV, train_test_split

wxchan's avatar
wxchan committed
14

wxchan's avatar
wxchan committed
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
class template(object):
    @staticmethod
    def test_template(X_y=load_boston(True), model=lgb.LGBMRegressor,
                      feval=mean_squared_error, num_round=100,
                      custom_obj=None, predict_proba=False,
                      return_data=False, return_model=False):
        X_train, X_test, y_train, y_test = train_test_split(*X_y, test_size=0.1, random_state=42)
        if return_data:
            return X_train, X_test, y_train, y_test
        arguments = {'n_estimators': num_round, 'silent': True}
        if custom_obj:
            arguments['objective'] = custom_obj
        gbm = model(**arguments)
        gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=10, verbose=False)
        if return_model:
            return gbm
        elif predict_proba:
            return feval(y_test, gbm.predict_proba(X_test))
        else:
            return feval(y_test, gbm.predict(X_test))
wxchan's avatar
wxchan committed
35

wxchan's avatar
wxchan committed
36
37
38
39

class TestSklearn(unittest.TestCase):

    def test_binary(self):
wxchan's avatar
wxchan committed
40
        X_y = load_breast_cancer(True)
wxchan's avatar
wxchan committed
41
        ret = template.test_template(X_y, lgb.LGBMClassifier, log_loss, predict_proba=True)
wxchan's avatar
wxchan committed
42
43
44
        self.assertLess(ret, 0.15)

    def test_regreesion(self):
wxchan's avatar
wxchan committed
45
        self.assertLess(template.test_template() ** 0.5, 4)
wxchan's avatar
wxchan committed
46

wxchan's avatar
wxchan committed
47
48
    def test_multiclass(self):
        X_y = load_digits(10, True)
wxchan's avatar
wxchan committed
49

wxchan's avatar
wxchan committed
50
51
        def multi_error(y_true, y_pred):
            return np.mean(y_true != y_pred)
wxchan's avatar
wxchan committed
52
        ret = template.test_template(X_y, lgb.LGBMClassifier, multi_error)
wxchan's avatar
wxchan committed
53
        self.assertLess(ret, 0.2)
wxchan's avatar
wxchan committed
54

wxchan's avatar
wxchan committed
55
56
57
58
    def test_lambdarank(self):
        X_train, y_train = load_svmlight_file('../../examples/lambdarank/rank.train')
        X_test, y_test = load_svmlight_file('../../examples/lambdarank/rank.test')
        q_train = np.loadtxt('../../examples/lambdarank/rank.train.query')
59
60
61
62
63
64
65
66
        q_test = np.loadtxt('../../examples/lambdarank/rank.test.query')
        lgb_model = lgb.LGBMRanker().fit(X_train, y_train,
                                         group=q_train,
                                         eval_set=[(X_test, y_test)],
                                         eval_group=[q_test],
                                         eval_at=[1],
                                         verbose=False,
                                         callbacks=[lgb.reset_parameter(learning_rate=lambda x: 0.95 ** x * 0.1)])
wxchan's avatar
wxchan committed
67
68
69
70
71
72

    def test_regression_with_custom_objective(self):
        def objective_ls(y_true, y_pred):
            grad = (y_pred - y_true)
            hess = np.ones(len(y_true))
            return grad, hess
wxchan's avatar
wxchan committed
73
        ret = template.test_template(custom_obj=objective_ls)
wxchan's avatar
wxchan committed
74
75
76
77
78
79
80
81
82
        self.assertLess(ret, 100)

    def test_binary_classification_with_custom_objective(self):
        def logregobj(y_true, y_pred):
            y_pred = 1.0 / (1.0 + np.exp(-y_pred))
            grad = y_pred - y_true
            hess = y_pred * (1.0 - y_pred)
            return grad, hess
        X_y = load_digits(2, True)
wxchan's avatar
wxchan committed
83

wxchan's avatar
wxchan committed
84
85
        def binary_error(y_test, y_pred):
            return np.mean([int(p > 0.5) != y for y, p in zip(y_test, y_pred)])
wxchan's avatar
wxchan committed
86
        ret = template.test_template(X_y, lgb.LGBMClassifier, feval=binary_error, custom_obj=logregobj)
wxchan's avatar
wxchan committed
87
88
        self.assertLess(ret, 0.1)

89
    def test_dart(self):
wxchan's avatar
wxchan committed
90
        X_train, X_test, y_train, y_test = template.test_template(return_data=True)
91
92
93
94
        gbm = lgb.LGBMRegressor(boosting_type='dart')
        gbm.fit(X_train, y_train)
        self.assertLessEqual(gbm.score(X_train, y_train), 1.)

wxchan's avatar
wxchan committed
95
    def test_grid_search(self):
wxchan's avatar
wxchan committed
96
        X_train, X_test, y_train, y_test = template.test_template(return_data=True)
97
        params = {'boosting_type': ['dart', 'gbdt'],
wxchan's avatar
wxchan committed
98
99
                  'n_estimators': [15, 20],
                  'drop_rate': [0.1, 0.2]}
100
        gbm = GridSearchCV(lgb.LGBMRegressor(), params, cv=3)
wxchan's avatar
wxchan committed
101
        gbm.fit(X_train, y_train)
102
        self.assertIn(gbm.best_params_['n_estimators'], [15, 20])
wxchan's avatar
wxchan committed
103

104
    def test_clone_and_property(self):
wxchan's avatar
wxchan committed
105
        gbm = template.test_template(return_model=True)
wxchan's avatar
wxchan committed
106
        gbm_clone = clone(gbm)
107
        self.assertIsInstance(gbm.booster_, lgb.Booster)
108
        self.assertIsInstance(gbm.feature_importances_, np.ndarray)
wxchan's avatar
wxchan committed
109
        clf = template.test_template(load_digits(2, True), model=lgb.LGBMClassifier, return_model=True)
110
111
112
        self.assertListEqual(sorted(clf.classes_), [0, 1])
        self.assertEqual(clf.n_classes_, 2)
        self.assertIsInstance(clf.booster_, lgb.Booster)
113
        self.assertIsInstance(clf.feature_importances_, np.ndarray)
wxchan's avatar
wxchan committed
114

wxchan's avatar
wxchan committed
115
    def test_joblib(self):
wxchan's avatar
wxchan committed
116
        gbm = template.test_template(num_round=10, return_model=True)
wxchan's avatar
wxchan committed
117
118
        joblib.dump(gbm, 'lgb.pkl')
        gbm_pickle = joblib.load('lgb.pkl')
119
        self.assertIsInstance(gbm_pickle.booster_, lgb.Booster)
wxchan's avatar
wxchan committed
120
        self.assertDictEqual(gbm.get_params(), gbm_pickle.get_params())
121
        self.assertListEqual(list(gbm.feature_importances_), list(gbm_pickle.feature_importances_))
wxchan's avatar
wxchan committed
122
        X_train, X_test, y_train, y_test = template.test_template(return_data=True)
wxchan's avatar
wxchan committed
123
124
        gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose=False)
        gbm_pickle.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose=False)
wxchan's avatar
wxchan committed
125
126
127
        for key in gbm.evals_result_:
            for evals in zip(gbm.evals_result_[key], gbm_pickle.evals_result_[key]):
                self.assertAlmostEqual(*evals, places=5)
wxchan's avatar
wxchan committed
128
129
130
131
132
133
        pred_origin = gbm.predict(X_test)
        pred_pickle = gbm_pickle.predict(X_test)
        self.assertEqual(len(pred_origin), len(pred_pickle))
        for preds in zip(pred_origin, pred_pickle):
            self.assertAlmostEqual(*preds, places=5)

wxchan's avatar
wxchan committed
134

wxchan's avatar
wxchan committed
135
136
137
print("----------------------------------------------------------------------")
print("running test_sklearn.py")
unittest.main()