test_sklearn.py 5.11 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
10
11
12
import numpy as np
import random
import lightgbm as lgb


rng = np.random.RandomState(2016)

def test_binary_classification():

    from sklearn import datasets, metrics, model_selection

    X, y = datasets.make_classification(n_samples=10000, n_features=100)
Guolin Ke's avatar
Guolin Ke committed
13
    x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1, random_state=1)
Guolin Ke's avatar
Guolin Ke committed
14
    lgb_model = lgb.LGBMClassifier().fit(x_train, y_train)
Guolin Ke's avatar
Guolin Ke committed
15
16
17
18
    from sklearn.datasets import load_digits
    digits = load_digits(2)
    y = digits['target']
    X = digits['data']
Guolin Ke's avatar
Guolin Ke committed
19
    x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1, random_state=1)
Guolin Ke's avatar
Guolin Ke committed
20
    lgb_model = lgb.LGBMClassifier().fit(x_train, y_train)
Guolin Ke's avatar
Guolin Ke committed
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
    preds = lgb_model.predict(x_test)
    err = sum(1 for i in range(len(preds))
          if int(preds[i] > 0.5) != y_test[i]) / float(len(preds))
    assert err < 0.1

def test_multiclass_classification():
    from sklearn.datasets import load_iris
    from sklearn import datasets, metrics, model_selection

    def check_pred(preds, labels):
        err = sum(1 for i in range(len(preds))
                  if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
        assert err < 0.7


    X, y = datasets.make_classification(n_samples=10000, n_features=100, n_classes=4, n_informative=3)

Guolin Ke's avatar
Guolin Ke committed
38
    x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1, random_state=1)
Guolin Ke's avatar
Guolin Ke committed
39

Guolin Ke's avatar
Guolin Ke committed
40
    lgb_model = lgb.LGBMClassifier().fit(x_train, y_train)
Guolin Ke's avatar
Guolin Ke committed
41
42
43
44
45
46
47
48
49
50
51
52
53
    preds = lgb_model.predict(x_test)

    check_pred(preds, y_test)

def test_regression():
    from sklearn.metrics import mean_squared_error
    from sklearn.datasets import load_boston
    from sklearn.cross_validation import KFold
    from sklearn import datasets, metrics, model_selection

    boston = load_boston()
    y = boston['target']
    X = boston['data']
Guolin Ke's avatar
Guolin Ke committed
54
    x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1, random_state=1)
Guolin Ke's avatar
Guolin Ke committed
55
    lgb_model = lgb.LGBMRegressor().fit(x_train, y_train)
Guolin Ke's avatar
Guolin Ke committed
56
    preds = lgb_model.predict(x_test)
Guolin Ke's avatar
Guolin Ke committed
57
    assert mean_squared_error(preds, y_test) < 100
Guolin Ke's avatar
Guolin Ke committed
58

wxchan's avatar
wxchan committed
59
60
61
62
63
64
65
def test_lambdarank():
    from sklearn.datasets import load_svmlight_file
    X_train, y_train = load_svmlight_file('../../examples/lambdarank/rank.train')
    X_test, y_test = load_svmlight_file('../../examples/lambdarank/rank.test')
    q_train = np.loadtxt('../../examples/lambdarank/rank.train.query')
    lgb_model = lgb.LGBMRanker().fit(X_train, y_train, group=q_train, eval_at=[1])

Guolin Ke's avatar
Guolin Ke committed
66
67
68
69
70
71
72
73
74
75
76
77
def test_regression_with_custom_objective():
    from sklearn.metrics import mean_squared_error
    from sklearn.datasets import load_boston
    from sklearn.cross_validation import KFold
    from sklearn import datasets, metrics, model_selection
    def objective_ls(y_true, y_pred):
        grad = (y_pred - y_true)
        hess = np.ones(len(y_true))
        return grad, hess
    boston = load_boston()
    y = boston['target']
    X = boston['data']
Guolin Ke's avatar
Guolin Ke committed
78
    x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1, random_state=1)
Guolin Ke's avatar
Guolin Ke committed
79
    lgb_model = lgb.LGBMRegressor(objective=objective_ls).fit(x_train, y_train)
Guolin Ke's avatar
Guolin Ke committed
80
    preds = lgb_model.predict(x_test)
Guolin Ke's avatar
Guolin Ke committed
81
    assert mean_squared_error(preds, y_test) < 100
Guolin Ke's avatar
Guolin Ke committed
82
83
84
85
86
87
88
89
90
91
92


def test_binary_classification_with_custom_objective():

    from sklearn import datasets, metrics, model_selection
    def logregobj(y_true, y_pred):
        y_pred = 1.0 / (1.0 + np.exp(-y_pred))
        grad = y_pred - y_true
        hess = y_pred * (1.0 - y_pred)
        return grad, hess
    X, y = datasets.make_classification(n_samples=10000, n_features=100)
Guolin Ke's avatar
Guolin Ke committed
93
    x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1, random_state=1)
Guolin Ke's avatar
Guolin Ke committed
94
    lgb_model = lgb.LGBMClassifier(objective=logregobj).fit(x_train, y_train)
Guolin Ke's avatar
Guolin Ke committed
95
96
97
98
    from sklearn.datasets import load_digits
    digits = load_digits(2)
    y = digits['target']
    X = digits['data']
Guolin Ke's avatar
Guolin Ke committed
99
    x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.2, random_state=1)
Guolin Ke's avatar
Guolin Ke committed
100
    lgb_model = lgb.LGBMClassifier(objective=logregobj).fit(x_train, y_train)
Guolin Ke's avatar
Guolin Ke committed
101
102
103
104
105
    preds = lgb_model.predict(x_test)
    err = sum(1 for i in range(len(preds))
          if int(preds[i] > 0.5) != y_test[i]) / float(len(preds))
    assert err < 0.1

Guolin Ke's avatar
Guolin Ke committed
106
107
108
109
110
def test_early_stopping():
    from sklearn.metrics import mean_squared_error
    from sklearn.datasets import load_boston
    from sklearn.cross_validation import KFold
    from sklearn import datasets, metrics, model_selection
111
    from sklearn.base import clone
Guolin Ke's avatar
Guolin Ke committed
112
113
114
115
116
117
118
119
120
121

    boston = load_boston()
    y = boston['target']
    X = boston['data']
    x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1, random_state=1)
    lgb_model = lgb.LGBMRegressor(n_estimators=500) \
            .fit(x_train, y_train, eval_set=[(x_test, y_test)], 
                eval_metric='l2', 
                early_stopping_rounds=10,
                verbose=10)
122
    lgb_model_clone = clone(lgb_model)
Guolin Ke's avatar
Guolin Ke committed
123
124
    print(lgb_model.best_iteration)

Guolin Ke's avatar
Guolin Ke committed
125
126
127
test_binary_classification()
test_multiclass_classification()
test_regression()
wxchan's avatar
wxchan committed
128
test_lambdarank()
Guolin Ke's avatar
Guolin Ke committed
129
test_regression_with_custom_objective()
Guolin Ke's avatar
Guolin Ke committed
130
test_binary_classification_with_custom_objective()
wxchan's avatar
wxchan committed
131
test_early_stopping()