test_engine.py 5.58 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
# coding: utf-8
wxchan's avatar
wxchan committed
2
# pylint: skip-file
wxchan's avatar
wxchan committed
3
4
5
6
7
import copy
import math
import os
import unittest

Guolin Ke's avatar
Guolin Ke committed
8
import lightgbm as lgb
wxchan's avatar
wxchan committed
9
10
11
12
import numpy as np
from sklearn.datasets import (load_boston, load_breast_cancer, load_digits,
                              load_iris)
from sklearn.metrics import log_loss, mean_absolute_error, mean_squared_error
wxchan's avatar
wxchan committed
13
from sklearn.model_selection import train_test_split
wxchan's avatar
wxchan committed
14

wxchan's avatar
wxchan committed
15
16
17
18
try:
    import cPickle as pickle
except:
    import pickle
wxchan's avatar
wxchan committed
19

wxchan's avatar
wxchan committed
20

wxchan's avatar
wxchan committed
21
22
23
def multi_logloss(y_true, y_pred):
    return np.mean([-math.log(y_pred[i][y]) for i, y in enumerate(y_true)])

wxchan's avatar
wxchan committed
24
25

def test_template(params={'objective': 'regression', 'metric': 'l2'},
wxchan's avatar
wxchan committed
26
27
                  X_y=load_boston(True), feval=mean_squared_error,
                  num_round=100, init_model=None, custom_eval=None,
28
29
30
                  early_stopping_rounds=10,
                  return_data=False, return_model=False):
    params['verbose'], params['seed'] = -1, 42
wxchan's avatar
wxchan committed
31
32
33
    X_train, X_test, y_train, y_test = train_test_split(*X_y, test_size=0.1, random_state=42)
    lgb_train = lgb.Dataset(X_train, y_train, params=params)
    lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train, params=params)
wxchan's avatar
wxchan committed
34
35
    if return_data:
        return lgb_train, lgb_eval
wxchan's avatar
wxchan committed
36
37
38
39
40
41
42
43
    evals_result = {}
    gbm = lgb.train(params, lgb_train,
                    num_boost_round=num_round,
                    valid_sets=lgb_eval,
                    valid_names='eval',
                    verbose_eval=False,
                    feval=custom_eval,
                    evals_result=evals_result,
44
                    early_stopping_rounds=early_stopping_rounds,
wxchan's avatar
wxchan committed
45
                    init_model=init_model)
wxchan's avatar
wxchan committed
46
47
48
49
50
    if return_model:
        return gbm
    else:
        return evals_result, feval(y_test, gbm.predict(X_test, gbm.best_iteration))

wxchan's avatar
wxchan committed
51

wxchan's avatar
wxchan committed
52
class TestEngine(unittest.TestCase):
wxchan's avatar
wxchan committed
53
54

    def test_binary(self):
wxchan's avatar
wxchan committed
55
        X_y = load_breast_cancer(True)
wxchan's avatar
wxchan committed
56
        params = {
wxchan's avatar
wxchan committed
57
58
            'objective': 'binary',
            'metric': 'binary_logloss'
wxchan's avatar
wxchan committed
59
        }
wxchan's avatar
wxchan committed
60
        evals_result, ret = test_template(params, X_y, log_loss)
wxchan's avatar
wxchan committed
61
62
        self.assertLess(ret, 0.15)
        self.assertAlmostEqual(min(evals_result['eval']['logloss']), ret, places=5)
wxchan's avatar
wxchan committed
63

wxchan's avatar
wxchan committed
64
65
66
67
68
69
70
71
72
    def test_regreesion(self):
        evals_result, ret = test_template()
        ret **= 0.5
        self.assertLess(ret, 4)
        self.assertAlmostEqual(min(evals_result['eval']['l2']), ret, places=5)

    def test_multiclass(self):
        X_y = load_digits(10, True)
        params = {
wxchan's avatar
wxchan committed
73
74
75
            'objective': 'multiclass',
            'metric': 'multi_logloss',
            'num_class': 10
wxchan's avatar
wxchan committed
76
        }
wxchan's avatar
wxchan committed
77
        evals_result, ret = test_template(params, X_y, multi_logloss)
wxchan's avatar
wxchan committed
78
79
80
81
82
        self.assertLess(ret, 0.2)
        self.assertAlmostEqual(min(evals_result['eval']['multi_logloss']), ret, places=5)

    def test_continue_train_and_other(self):
        params = {
wxchan's avatar
wxchan committed
83
84
            'objective': 'regression',
            'metric': 'l1'
wxchan's avatar
wxchan committed
85
86
        }
        model_name = 'model.txt'
87
        gbm = test_template(params, num_round=20, return_model=True, early_stopping_rounds=-1)
wxchan's avatar
wxchan committed
88
89
        gbm.save_model(model_name)
        evals_result, ret = test_template(params, feval=mean_absolute_error,
wxchan's avatar
wxchan committed
90
91
                                          num_round=80, init_model=model_name,
                                          custom_eval=(lambda p, d: ('mae', mean_absolute_error(p, d.get_label()), False)))
wxchan's avatar
wxchan committed
92
93
94
95
96
97
98
99
100
101
102
        self.assertLess(ret, 3)
        self.assertAlmostEqual(min(evals_result['eval']['l1']), ret, places=5)
        for l1, mae in zip(evals_result['eval']['l1'], evals_result['eval']['mae']):
            self.assertAlmostEqual(l1, mae, places=5)
        self.assertIn('tree_info', gbm.dump_model())
        self.assertIsInstance(gbm.feature_importance(), np.ndarray)
        os.remove(model_name)

    def test_continue_train_multiclass(self):
        X_y = load_iris(True)
        params = {
wxchan's avatar
wxchan committed
103
104
105
            'objective': 'multiclass',
            'metric': 'multi_logloss',
            'num_class': 3
wxchan's avatar
wxchan committed
106
        }
107
        gbm = test_template(params, X_y, num_round=20, return_model=True, early_stopping_rounds=-1)
wxchan's avatar
wxchan committed
108
        evals_result, ret = test_template(params, X_y, feval=multi_logloss,
wxchan's avatar
wxchan committed
109
                                          num_round=80, init_model=gbm)
wxchan's avatar
wxchan committed
110
111
112
113
        self.assertLess(ret, 1.5)
        self.assertAlmostEqual(min(evals_result['eval']['multi_logloss']), ret, places=5)

    def test_cv(self):
wxchan's avatar
wxchan committed
114
        lgb_train, _ = test_template(return_data=True)
wxchan's avatar
wxchan committed
115
        lgb.cv({'verbose': 0}, lgb_train, num_boost_round=20, nfold=5,
116
117
               metrics='l1', verbose_eval=False,
               callbacks=[lgb.reset_parameter(learning_rate=lambda i: 0.1 - 0.001 * i)])
wxchan's avatar
wxchan committed
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137

    def test_save_load_copy_pickle(self):
        gbm = test_template(num_round=20, return_model=True)
        _, ret_origin = test_template(init_model=gbm)
        other_ret = []
        gbm.save_model('lgb.model')
        other_ret.append(test_template(init_model='lgb.model')[1])
        gbm_load = lgb.Booster(model_file='lgb.model')
        other_ret.append(test_template(init_model=gbm_load)[1])
        other_ret.append(test_template(init_model=copy.copy(gbm))[1])
        other_ret.append(test_template(init_model=copy.deepcopy(gbm))[1])
        with open('lgb.pkl', 'wb') as f:
            pickle.dump(gbm, f)
        with open('lgb.pkl', 'rb') as f:
            gbm_pickle = pickle.load(f)
        other_ret.append(test_template(init_model=gbm_pickle)[1])
        gbm_pickles = pickle.loads(pickle.dumps(gbm))
        other_ret.append(test_template(init_model=gbm_pickles)[1])
        for ret in other_ret:
            self.assertAlmostEqual(ret_origin, ret, places=5)
wxchan's avatar
wxchan committed
138

wxchan's avatar
wxchan committed
139

wxchan's avatar
wxchan committed
140
141
142
print("----------------------------------------------------------------------")
print("running test_engine.py")
unittest.main()