"git@developer.sourcefind.cn:tianlh/lightgbm-dcu.git" did not exist on "72e8106d3bbb60dd4f670729fbe6e4421676ffcb"
test_sklearn.py 3.96 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
10
11
12
import numpy as np
import random
import lightgbm as lgb


rng = np.random.RandomState(2016)

def test_binary_classification():

    from sklearn import datasets, metrics, model_selection

    X, y = datasets.make_classification(n_samples=10000, n_features=100)
Guolin Ke's avatar
Guolin Ke committed
13
    x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1, random_state=1)
Guolin Ke's avatar
Guolin Ke committed
14
    lgb_model = lgb.LGBMClassifier().fit(x_train, y_train)
Guolin Ke's avatar
Guolin Ke committed
15
16
17
18
    from sklearn.datasets import load_digits
    digits = load_digits(2)
    y = digits['target']
    X = digits['data']
Guolin Ke's avatar
Guolin Ke committed
19
    x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1, random_state=1)
Guolin Ke's avatar
Guolin Ke committed
20
    lgb_model = lgb.LGBMClassifier().fit(x_train, y_train)
Guolin Ke's avatar
Guolin Ke committed
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
    preds = lgb_model.predict(x_test)
    err = sum(1 for i in range(len(preds))
          if int(preds[i] > 0.5) != y_test[i]) / float(len(preds))
    assert err < 0.1

def test_multiclass_classification():
    from sklearn.datasets import load_iris
    from sklearn import datasets, metrics, model_selection

    def check_pred(preds, labels):
        err = sum(1 for i in range(len(preds))
                  if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
        assert err < 0.7


    X, y = datasets.make_classification(n_samples=10000, n_features=100, n_classes=4, n_informative=3)

Guolin Ke's avatar
Guolin Ke committed
38
    x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1, random_state=1)
Guolin Ke's avatar
Guolin Ke committed
39

Guolin Ke's avatar
Guolin Ke committed
40
    lgb_model = lgb.LGBMClassifier().fit(x_train, y_train)
Guolin Ke's avatar
Guolin Ke committed
41
42
43
44
45
46
47
48
49
50
51
52
53
    preds = lgb_model.predict(x_test)

    check_pred(preds, y_test)

def test_regression():
    from sklearn.metrics import mean_squared_error
    from sklearn.datasets import load_boston
    from sklearn.cross_validation import KFold
    from sklearn import datasets, metrics, model_selection

    boston = load_boston()
    y = boston['target']
    X = boston['data']
Guolin Ke's avatar
Guolin Ke committed
54
    x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1, random_state=1)
Guolin Ke's avatar
Guolin Ke committed
55
    lgb_model = lgb.LGBMRegressor().fit(x_train, y_train)
Guolin Ke's avatar
Guolin Ke committed
56
    preds = lgb_model.predict(x_test)
Guolin Ke's avatar
Guolin Ke committed
57
    assert mean_squared_error(preds, y_test) < 100
Guolin Ke's avatar
Guolin Ke committed
58
59
60
61
62
63
64
65
66
67
68
69
70

def test_regression_with_custom_objective():
    from sklearn.metrics import mean_squared_error
    from sklearn.datasets import load_boston
    from sklearn.cross_validation import KFold
    from sklearn import datasets, metrics, model_selection
    def objective_ls(y_true, y_pred):
        grad = (y_pred - y_true)
        hess = np.ones(len(y_true))
        return grad, hess
    boston = load_boston()
    y = boston['target']
    X = boston['data']
Guolin Ke's avatar
Guolin Ke committed
71
    x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1, random_state=1)
Guolin Ke's avatar
Guolin Ke committed
72
    lgb_model = lgb.LGBMRegressor(objective=objective_ls).fit(x_train, y_train)
Guolin Ke's avatar
Guolin Ke committed
73
    preds = lgb_model.predict(x_test)
Guolin Ke's avatar
Guolin Ke committed
74
    assert mean_squared_error(preds, y_test) < 100
Guolin Ke's avatar
Guolin Ke committed
75
76
77
78
79
80
81
82
83
84
85


def test_binary_classification_with_custom_objective():

    from sklearn import datasets, metrics, model_selection
    def logregobj(y_true, y_pred):
        y_pred = 1.0 / (1.0 + np.exp(-y_pred))
        grad = y_pred - y_true
        hess = y_pred * (1.0 - y_pred)
        return grad, hess
    X, y = datasets.make_classification(n_samples=10000, n_features=100)
Guolin Ke's avatar
Guolin Ke committed
86
    x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1, random_state=1)
Guolin Ke's avatar
Guolin Ke committed
87
    lgb_model = lgb.LGBMClassifier(objective=logregobj).fit(x_train, y_train)
Guolin Ke's avatar
Guolin Ke committed
88
89
90
91
    from sklearn.datasets import load_digits
    digits = load_digits(2)
    y = digits['target']
    X = digits['data']
Guolin Ke's avatar
Guolin Ke committed
92
    x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.2, random_state=1)
Guolin Ke's avatar
Guolin Ke committed
93
    lgb_model = lgb.LGBMClassifier(objective=logregobj).fit(x_train, y_train)
Guolin Ke's avatar
Guolin Ke committed
94
95
96
97
98
99
100
101
102
103
    preds = lgb_model.predict(x_test)
    err = sum(1 for i in range(len(preds))
          if int(preds[i] > 0.5) != y_test[i]) / float(len(preds))
    assert err < 0.1

test_binary_classification()
test_multiclass_classification()
test_regression()
test_regression_with_custom_objective()
test_binary_classification_with_custom_objective()