"src/git@developer.sourcefind.cn:tianlh/lightgbm-dcu.git" did not exist on "d0d70716ce8351ec82d1ad42d25ff3779ee5a94d"
test_sklearn.py 4.35 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import numpy as np
import random
import lightgbm as lgb


rng = np.random.RandomState(2016)

def test_binary_classification():

    from sklearn import datasets, metrics, model_selection

    X, y = datasets.make_classification(n_samples=10000, n_features=100)
    x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1)
    lgb_model = lgb.LGBMClassifier().fit(x_train, y_train, eval_set=[[x_train, y_train],(x_test, y_test)], eval_metric='binary_logloss')
    from sklearn.datasets import load_digits
    digits = load_digits(2)
    y = digits['target']
    X = digits['data']
    x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.2)
    lgb_model = lgb.LGBMClassifier().fit(x_train, y_train, eval_set=[[x_train, y_train],(x_test, y_test)], eval_metric='binary_logloss')
    preds = lgb_model.predict(x_test)
    err = sum(1 for i in range(len(preds))
          if int(preds[i] > 0.5) != y_test[i]) / float(len(preds))
    assert err < 0.1

def test_multiclass_classification():
    from sklearn.datasets import load_iris
    from sklearn import datasets, metrics, model_selection

    def check_pred(preds, labels):
        err = sum(1 for i in range(len(preds))
                  if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
        assert err < 0.7


    X, y = datasets.make_classification(n_samples=10000, n_features=100, n_classes=4, n_informative=3)

    x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1)

    lgb_model = lgb.LGBMClassifier().fit(x_train, y_train,eval_set=[[x_train, y_train],(x_test, y_test)], eval_metric='multi_logloss')
    preds = lgb_model.predict(x_test)

    check_pred(preds, y_test)

def test_regression():
    from sklearn.metrics import mean_squared_error
    from sklearn.datasets import load_boston
    from sklearn.cross_validation import KFold
    from sklearn import datasets, metrics, model_selection

    boston = load_boston()
    y = boston['target']
    X = boston['data']
    x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1)
    lgb_model = lgb.LGBMRegressor().fit(x_train, y_train,eval_set=[[x_train, y_train],(x_test, y_test)], eval_metric='l2')
    preds = lgb_model.predict(x_test)
Guolin Ke's avatar
Guolin Ke committed
57
    assert mean_squared_error(preds, y_test) < 40
Guolin Ke's avatar
Guolin Ke committed
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73

def test_regression_with_custom_objective():
    from sklearn.metrics import mean_squared_error
    from sklearn.datasets import load_boston
    from sklearn.cross_validation import KFold
    from sklearn import datasets, metrics, model_selection
    def objective_ls(y_true, y_pred):
        grad = (y_pred - y_true)
        hess = np.ones(len(y_true))
        return grad, hess
    boston = load_boston()
    y = boston['target']
    X = boston['data']
    x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1)
    lgb_model = lgb.LGBMRegressor(objective=objective_ls).fit(x_train, y_train,eval_set=[[x_train, y_train],(x_test, y_test)], eval_metric='l2')
    preds = lgb_model.predict(x_test)
Guolin Ke's avatar
Guolin Ke committed
74
    assert mean_squared_error(preds, y_test) < 40
Guolin Ke's avatar
Guolin Ke committed
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103


def test_binary_classification_with_custom_objective():

    from sklearn import datasets, metrics, model_selection
    def logregobj(y_true, y_pred):
        y_pred = 1.0 / (1.0 + np.exp(-y_pred))
        grad = y_pred - y_true
        hess = y_pred * (1.0 - y_pred)
        return grad, hess
    X, y = datasets.make_classification(n_samples=10000, n_features=100)
    x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1)
    lgb_model = lgb.LGBMClassifier(objective=logregobj).fit(x_train, y_train, eval_set=[[x_train, y_train],(x_test, y_test)], eval_metric='binary_logloss')
    from sklearn.datasets import load_digits
    digits = load_digits(2)
    y = digits['target']
    X = digits['data']
    x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.2)
    lgb_model = lgb.LGBMClassifier(objective=logregobj).fit(x_train, y_train, eval_set=[[x_train, y_train],(x_test, y_test)], eval_metric='binary_logloss')
    preds = lgb_model.predict(x_test)
    err = sum(1 for i in range(len(preds))
          if int(preds[i] > 0.5) != y_test[i]) / float(len(preds))
    assert err < 0.1

test_binary_classification()
test_multiclass_classification()
test_regression()
test_regression_with_custom_objective()
test_binary_classification_with_custom_objective()