test.py 8.15 KB
Newer Older
wxchan's avatar
wxchan committed
1
2
# coding: utf-8
# pylint: skip-file
Guolin Ke's avatar
Guolin Ke committed
3
import ctypes
wxchan's avatar
wxchan committed
4
import os
Guolin Ke's avatar
Guolin Ke committed
5
6

import numpy as np
7
import pytest
Guolin Ke's avatar
Guolin Ke committed
8
9
from scipy import sparse

wxchan's avatar
wxchan committed
10

Guolin Ke's avatar
Guolin Ke committed
11
def LoadDll():
Guolin Ke's avatar
Guolin Ke committed
12
    if os.name == 'nt':
13
        lib_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../windows/x64/DLL/lib_lightgbm.dll')
Guolin Ke's avatar
Guolin Ke committed
14
    else:
15
        lib_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../lib_lightgbm.so')
Guolin Ke's avatar
Guolin Ke committed
16
17
18
    lib = ctypes.cdll.LoadLibrary(lib_path)
    return lib

wxchan's avatar
wxchan committed
19

Guolin Ke's avatar
Guolin Ke committed
20
21
LIB = LoadDll()

Guolin Ke's avatar
Guolin Ke committed
22
23
LIB.LGBM_GetLastError.restype = ctypes.c_char_p

24
25
26
27
28
29
dtype_float32 = 0
dtype_float64 = 1
dtype_int32 = 2
dtype_int64 = 3


Guolin Ke's avatar
Guolin Ke committed
30
31
32
def c_array(ctype, values):
    return (ctype * len(values))(*values)

wxchan's avatar
wxchan committed
33

Guolin Ke's avatar
Guolin Ke committed
34
def c_str(string):
Guolin Ke's avatar
Guolin Ke committed
35
    return ctypes.c_char_p(string.encode('ascii'))
Guolin Ke's avatar
Guolin Ke committed
36

wxchan's avatar
wxchan committed
37

38
@pytest.mark.skip
39
40
def test_load_from_file(filename, reference):
    ref = None
wxchan's avatar
wxchan committed
41
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
42
        ref = reference
43
    handle = ctypes.c_void_p()
wxchan's avatar
wxchan committed
44
45
46
47
    LIB.LGBM_DatasetCreateFromFile(
        c_str(filename),
        c_str('max_bin=15'),
        ref, ctypes.byref(handle))
Guolin Ke's avatar
Guolin Ke committed
48
    print(LIB.LGBM_GetLastError())
49
    num_data = ctypes.c_long()
wxchan's avatar
wxchan committed
50
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
51
    num_feature = ctypes.c_long()
wxchan's avatar
wxchan committed
52
53
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
    print('#data:%d #feature:%d' % (num_data.value, num_feature.value))
54
55
    return handle

wxchan's avatar
wxchan committed
56

57
@pytest.mark.skip
58
59
60
61
def test_save_to_binary(handle, filename):
    LIB.LGBM_DatasetSaveBinary(handle, c_str(filename))


62
@pytest.mark.skip
Guolin Ke's avatar
Guolin Ke committed
63
64
65
66
67
def test_load_from_csr(filename, reference):
    data = []
    label = []
    inp = open(filename, 'r')
    for line in inp.readlines():
wxchan's avatar
wxchan committed
68
69
        data.append([float(x) for x in line.split('\t')[1:]])
        label.append(float(line.split('\t')[0]))
Guolin Ke's avatar
Guolin Ke committed
70
71
72
73
74
75
    inp.close()
    mat = np.array(data)
    label = np.array(label, dtype=np.float32)
    csr = sparse.csr_matrix(mat)
    handle = ctypes.c_void_p()
    ref = None
wxchan's avatar
wxchan committed
76
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
77
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
78

wxchan's avatar
wxchan committed
79
80
81
82
    LIB.LGBM_DatasetCreateFromCSR(
        c_array(ctypes.c_int, csr.indptr),
        dtype_int32,
        c_array(ctypes.c_int, csr.indices),
Guolin Ke's avatar
Guolin Ke committed
83
        csr.data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)),
wxchan's avatar
wxchan committed
84
85
        dtype_float64,
        len(csr.indptr),
86
        len(csr.data),
wxchan's avatar
wxchan committed
87
88
89
90
        csr.shape[1],
        c_str('max_bin=15'),
        ref,
        ctypes.byref(handle))
91
    num_data = ctypes.c_long()
wxchan's avatar
wxchan committed
92
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
93
    num_feature = ctypes.c_long()
wxchan's avatar
wxchan committed
94
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
95
    LIB.LGBM_DatasetSetField(handle, c_str('label'), c_array(ctypes.c_float, label), len(label), 0)
wxchan's avatar
wxchan committed
96
    print('#data:%d #feature:%d' % (num_data.value, num_feature.value))
97
98
    return handle

wxchan's avatar
wxchan committed
99

100
@pytest.mark.skip
101
102
103
104
105
def test_load_from_csc(filename, reference):
    data = []
    label = []
    inp = open(filename, 'r')
    for line in inp.readlines():
wxchan's avatar
wxchan committed
106
107
        data.append([float(x) for x in line.split('\t')[1:]])
        label.append(float(line.split('\t')[0]))
108
109
110
111
112
113
    inp.close()
    mat = np.array(data)
    label = np.array(label, dtype=np.float32)
    csr = sparse.csc_matrix(mat)
    handle = ctypes.c_void_p()
    ref = None
wxchan's avatar
wxchan committed
114
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
115
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
116

wxchan's avatar
wxchan committed
117
118
119
120
    LIB.LGBM_DatasetCreateFromCSC(
        c_array(ctypes.c_int, csr.indptr),
        dtype_int32,
        c_array(ctypes.c_int, csr.indices),
121
        csr.data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)),
wxchan's avatar
wxchan committed
122
123
        dtype_float64,
        len(csr.indptr),
124
        len(csr.data),
wxchan's avatar
wxchan committed
125
126
127
128
        csr.shape[0],
        c_str('max_bin=15'),
        ref,
        ctypes.byref(handle))
129
    num_data = ctypes.c_long()
wxchan's avatar
wxchan committed
130
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
131
    num_feature = ctypes.c_long()
wxchan's avatar
wxchan committed
132
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
133
    LIB.LGBM_DatasetSetField(handle, c_str('label'), c_array(ctypes.c_float, label), len(label), 0)
wxchan's avatar
wxchan committed
134
    print('#data:%d #feature:%d' % (num_data.value, num_feature.value))
135
136
    return handle

wxchan's avatar
wxchan committed
137

138
@pytest.mark.skip
139
140
141
142
143
def test_load_from_mat(filename, reference):
    data = []
    label = []
    inp = open(filename, 'r')
    for line in inp.readlines():
wxchan's avatar
wxchan committed
144
145
        data.append([float(x) for x in line.split('\t')[1:]])
        label.append(float(line.split('\t')[0]))
146
147
148
149
150
151
    inp.close()
    mat = np.array(data)
    data = np.array(mat.reshape(mat.size), copy=False)
    label = np.array(label, dtype=np.float32)
    handle = ctypes.c_void_p()
    ref = None
wxchan's avatar
wxchan committed
152
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
153
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
154

wxchan's avatar
wxchan committed
155
156
    LIB.LGBM_DatasetCreateFromMat(data.ctypes.data_as(
        ctypes.POINTER(ctypes.c_void_p)),
157
158
159
160
        dtype_float64,
        mat.shape[0],
        mat.shape[1],
        1,
wxchan's avatar
wxchan committed
161
162
163
        c_str('max_bin=15'),
        ref,
        ctypes.byref(handle))
164
    num_data = ctypes.c_long()
wxchan's avatar
wxchan committed
165
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
166
    num_feature = ctypes.c_long()
wxchan's avatar
wxchan committed
167
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
Guolin Ke's avatar
Guolin Ke committed
168
    LIB.LGBM_DatasetSetField(handle, c_str('label'), c_array(ctypes.c_float, label), len(label), 0)
wxchan's avatar
wxchan committed
169
    print('#data:%d #feature:%d' % (num_data.value, num_feature.value))
Guolin Ke's avatar
Guolin Ke committed
170
    return handle
wxchan's avatar
wxchan committed
171
172


173
@pytest.mark.skip
174
175
176
def test_free_dataset(handle):
    LIB.LGBM_DatasetFree(handle)

wxchan's avatar
wxchan committed
177

178
def test_dataset():
179
180
    train = test_load_from_file(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../examples/binary_classification/binary.train'), None)
    test = test_load_from_mat(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../examples/binary_classification/binary.test'), train)
181
    test_free_dataset(test)
182
    test = test_load_from_csr(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../examples/binary_classification/binary.test'), train)
183
    test_free_dataset(test)
184
    test = test_load_from_csc(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../examples/binary_classification/binary.test'), train)
185
186
187
    test_free_dataset(test)
    test_save_to_binary(train, 'train.binary.bin')
    test_free_dataset(train)
wxchan's avatar
wxchan committed
188
    train = test_load_from_file('train.binary.bin', None)
189
    test_free_dataset(train)
wxchan's avatar
wxchan committed
190
191


192
def test_booster():
193
194
    train = test_load_from_mat(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../examples/binary_classification/binary.train'), None)
    test = test_load_from_mat(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../examples/binary_classification/binary.test'), train)
195
    booster = ctypes.c_void_p()
196
197
    LIB.LGBM_BoosterCreate(train, c_str("app=binary metric=auc num_leaves=31 verbose=0"), ctypes.byref(booster))
    LIB.LGBM_BoosterAddValidData(booster, test)
198
    is_finished = ctypes.c_int(0)
wxchan's avatar
wxchan committed
199
    for i in range(1, 101):
wxchan's avatar
wxchan committed
200
        LIB.LGBM_BoosterUpdateOneIter(booster, ctypes.byref(is_finished))
Guolin Ke's avatar
Guolin Ke committed
201
        result = np.array([0.0], dtype=np.float64)
202
        out_len = ctypes.c_ulong(0)
Guolin Ke's avatar
Guolin Ke committed
203
        LIB.LGBM_BoosterGetEval(booster, 0, ctypes.byref(out_len), result.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))
wxchan's avatar
wxchan committed
204
205
        if i % 10 == 0:
            print('%d Iteration test AUC %f' % (i, result[0]))
206
207
208
    LIB.LGBM_BoosterSaveModel(booster, -1, c_str('model.txt'))
    LIB.LGBM_BoosterFree(booster)
    test_free_dataset(train)
209
    test_free_dataset(test)
210
    booster2 = ctypes.c_void_p()
Guolin Ke's avatar
Guolin Ke committed
211
212
    num_total_model = ctypes.c_long()
    LIB.LGBM_BoosterCreateFromModelfile(c_str('model.txt'), ctypes.byref(num_total_model), ctypes.byref(booster2))
213
    data = []
214
    inp = open(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../examples/binary_classification/binary.test'), 'r')
215
    for line in inp.readlines():
wxchan's avatar
wxchan committed
216
        data.append([float(x) for x in line.split('\t')[1:]])
217
218
    inp.close()
    mat = np.array(data)
Guolin Ke's avatar
Guolin Ke committed
219
    preb = np.zeros(mat.shape[0], dtype=np.float64)
Guolin Ke's avatar
Guolin Ke committed
220
    num_preb = ctypes.c_long()
221
    data = np.array(mat.reshape(mat.size), copy=False)
wxchan's avatar
wxchan committed
222
223
224
    LIB.LGBM_BoosterPredictForMat(
        booster2,
        data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)),
225
226
227
228
229
230
        dtype_float64,
        mat.shape[0],
        mat.shape[1],
        1,
        1,
        50,
cbecker's avatar
cbecker committed
231
        ctypes.c_void_p(),
Guolin Ke's avatar
Guolin Ke committed
232
        ctypes.byref(num_preb),
233
        preb.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))
cbecker's avatar
cbecker committed
234
    LIB.LGBM_BoosterPredictForFile(booster2, c_str(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../examples/binary_classification/binary.test')), 0, 0, 50, ctypes.c_void_p(), c_str('preb.txt'))
235
    LIB.LGBM_BoosterFree(booster2)