test_.py 9.01 KB
Newer Older
wxchan's avatar
wxchan committed
1
# coding: utf-8
Guolin Ke's avatar
Guolin Ke committed
2
import ctypes
3
from pathlib import Path
4
5
from platform import system

Guolin Ke's avatar
Guolin Ke committed
6
7
8
import numpy as np
from scipy import sparse

9
10
11
12
try:
    from lightgbm.basic import _LIB as LIB
except ModuleNotFoundError:
    print("Could not import lightgbm Python package, looking for lib_lightgbm at the repo root")
13
    if system() in ('Windows', 'Microsoft'):
14
        lib_file = Path(__file__).absolute().parents[2] / "Release" / "lib_lightgbm.dll"
Guolin Ke's avatar
Guolin Ke committed
15
    else:
16
17
        lib_file = Path(__file__).absolute().parents[2] / "lib_lightgbm.so"
    LIB = ctypes.cdll.LoadLibrary(lib_file)
Guolin Ke's avatar
Guolin Ke committed
18

Guolin Ke's avatar
Guolin Ke committed
19
20
LIB.LGBM_GetLastError.restype = ctypes.c_char_p

21
22
23
24
25
26
dtype_float32 = 0
dtype_float64 = 1
dtype_int32 = 2
dtype_int64 = 3


Guolin Ke's avatar
Guolin Ke committed
27
def c_str(string):
28
    return ctypes.c_char_p(string.encode('utf-8'))
Guolin Ke's avatar
Guolin Ke committed
29

wxchan's avatar
wxchan committed
30

31
def load_from_file(filename, reference):
32
    ref = None
wxchan's avatar
wxchan committed
33
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
34
        ref = reference
35
    handle = ctypes.c_void_p()
wxchan's avatar
wxchan committed
36
    LIB.LGBM_DatasetCreateFromFile(
37
        c_str(str(filename)),
wxchan's avatar
wxchan committed
38
        c_str('max_bin=15'),
39
40
        ref,
        ctypes.byref(handle))
Guolin Ke's avatar
Guolin Ke committed
41
    print(LIB.LGBM_GetLastError())
42
    num_data = ctypes.c_int(0)
wxchan's avatar
wxchan committed
43
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
44
    num_feature = ctypes.c_int(0)
wxchan's avatar
wxchan committed
45
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
46
    print(f'#data: {num_data.value} #feature: {num_feature.value}')
47
48
    return handle

wxchan's avatar
wxchan committed
49

50
def save_to_binary(handle, filename):
51
52
53
    LIB.LGBM_DatasetSaveBinary(handle, c_str(filename))


54
def load_from_csr(filename, reference):
55
56
57
    data = np.loadtxt(str(filename), dtype=np.float64)
    csr = sparse.csr_matrix(data[:, 1:])
    label = data[:, 0].astype(np.float32)
Guolin Ke's avatar
Guolin Ke committed
58
59
    handle = ctypes.c_void_p()
    ref = None
wxchan's avatar
wxchan committed
60
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
61
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
62

wxchan's avatar
wxchan committed
63
    LIB.LGBM_DatasetCreateFromCSR(
64
65
66
67
68
        csr.indptr.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
        ctypes.c_int(dtype_int32),
        csr.indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
        csr.data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
        ctypes.c_int(dtype_float64),
69
70
71
        ctypes.c_int64(len(csr.indptr)),
        ctypes.c_int64(len(csr.data)),
        ctypes.c_int64(csr.shape[1]),
wxchan's avatar
wxchan committed
72
73
74
        c_str('max_bin=15'),
        ref,
        ctypes.byref(handle))
75
    num_data = ctypes.c_int(0)
wxchan's avatar
wxchan committed
76
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
77
    num_feature = ctypes.c_int(0)
wxchan's avatar
wxchan committed
78
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
79
80
81
82
83
84
    LIB.LGBM_DatasetSetField(
        handle,
        c_str('label'),
        label.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
        ctypes.c_int(len(label)),
        ctypes.c_int(dtype_float32))
85
    print(f'#data: {num_data.value} #feature: {num_feature.value}')
86
87
    return handle

wxchan's avatar
wxchan committed
88

89
def load_from_csc(filename, reference):
90
91
92
    data = np.loadtxt(str(filename), dtype=np.float64)
    csc = sparse.csc_matrix(data[:, 1:])
    label = data[:, 0].astype(np.float32)
93
94
    handle = ctypes.c_void_p()
    ref = None
wxchan's avatar
wxchan committed
95
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
96
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
97

wxchan's avatar
wxchan committed
98
    LIB.LGBM_DatasetCreateFromCSC(
99
100
101
102
103
104
105
106
        csc.indptr.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
        ctypes.c_int(dtype_int32),
        csc.indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
        csc.data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
        ctypes.c_int(dtype_float64),
        ctypes.c_int64(len(csc.indptr)),
        ctypes.c_int64(len(csc.data)),
        ctypes.c_int64(csc.shape[0]),
wxchan's avatar
wxchan committed
107
108
109
        c_str('max_bin=15'),
        ref,
        ctypes.byref(handle))
110
    num_data = ctypes.c_int(0)
wxchan's avatar
wxchan committed
111
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
112
    num_feature = ctypes.c_int(0)
wxchan's avatar
wxchan committed
113
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
114
115
116
117
118
119
    LIB.LGBM_DatasetSetField(
        handle,
        c_str('label'),
        label.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
        ctypes.c_int(len(label)),
        ctypes.c_int(dtype_float32))
120
    print(f'#data: {num_data.value} #feature: {num_feature.value}')
121
122
    return handle

wxchan's avatar
wxchan committed
123

124
def load_from_mat(filename, reference):
125
126
127
    mat = np.loadtxt(str(filename), dtype=np.float64)
    label = mat[:, 0].astype(np.float32)
    mat = mat[:, 1:]
128
    data = np.array(mat.reshape(mat.size), dtype=np.float64, copy=False)
129
130
    handle = ctypes.c_void_p()
    ref = None
wxchan's avatar
wxchan committed
131
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
132
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
133

134
    LIB.LGBM_DatasetCreateFromMat(
135
136
137
138
139
        data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
        ctypes.c_int(dtype_float64),
        ctypes.c_int32(mat.shape[0]),
        ctypes.c_int32(mat.shape[1]),
        ctypes.c_int(1),
wxchan's avatar
wxchan committed
140
141
142
        c_str('max_bin=15'),
        ref,
        ctypes.byref(handle))
143
    num_data = ctypes.c_int(0)
wxchan's avatar
wxchan committed
144
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
145
    num_feature = ctypes.c_int(0)
wxchan's avatar
wxchan committed
146
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
147
148
149
150
151
152
    LIB.LGBM_DatasetSetField(
        handle,
        c_str('label'),
        label.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
        ctypes.c_int(len(label)),
        ctypes.c_int(dtype_float32))
153
    print(f'#data: {num_data.value} #feature: {num_feature.value}')
Guolin Ke's avatar
Guolin Ke committed
154
    return handle
wxchan's avatar
wxchan committed
155
156


157
def free_dataset(handle):
158
159
    LIB.LGBM_DatasetFree(handle)

wxchan's avatar
wxchan committed
160

161
def test_dataset():
162
163
164
    binary_example_dir = Path(__file__).absolute().parents[2] / 'examples' / 'binary_classification'
    train = load_from_file(binary_example_dir / 'binary.train', None)
    test = load_from_mat(binary_example_dir / 'binary.test', train)
165
    free_dataset(test)
166
    test = load_from_csr(binary_example_dir / 'binary.test', train)
167
    free_dataset(test)
168
    test = load_from_csc(binary_example_dir / 'binary.test', train)
169
170
171
172
173
    free_dataset(test)
    save_to_binary(train, 'train.binary.bin')
    free_dataset(train)
    train = load_from_file('train.binary.bin', None)
    free_dataset(train)
wxchan's avatar
wxchan committed
174
175


176
def test_booster():
177
178
179
    binary_example_dir = Path(__file__).absolute().parents[2] / 'examples' / 'binary_classification'
    train = load_from_mat(binary_example_dir / 'binary.train', None)
    test = load_from_mat(binary_example_dir / 'binary.test', train)
180
    booster = ctypes.c_void_p()
181
182
183
184
    LIB.LGBM_BoosterCreate(
        train,
        c_str("app=binary metric=auc num_leaves=31 verbose=0"),
        ctypes.byref(booster))
185
    LIB.LGBM_BoosterAddValidData(booster, test)
186
    is_finished = ctypes.c_int(0)
187
    for i in range(1, 51):
wxchan's avatar
wxchan committed
188
        LIB.LGBM_BoosterUpdateOneIter(booster, ctypes.byref(is_finished))
Guolin Ke's avatar
Guolin Ke committed
189
        result = np.array([0.0], dtype=np.float64)
190
        out_len = ctypes.c_int(0)
191
192
        LIB.LGBM_BoosterGetEval(
            booster,
193
            ctypes.c_int(0),
194
195
            ctypes.byref(out_len),
            result.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))
wxchan's avatar
wxchan committed
196
        if i % 10 == 0:
197
            print(f'{i} iteration test AUC {result[0]:.6f}')
198
199
200
201
202
203
    LIB.LGBM_BoosterSaveModel(
        booster,
        ctypes.c_int(0),
        ctypes.c_int(-1),
        ctypes.c_int(0),
        c_str('model.txt'))
204
    LIB.LGBM_BoosterFree(booster)
205
206
    free_dataset(train)
    free_dataset(test)
207
    booster2 = ctypes.c_void_p()
208
    num_total_model = ctypes.c_int(0)
209
210
211
212
    LIB.LGBM_BoosterCreateFromModelfile(
        c_str('model.txt'),
        ctypes.byref(num_total_model),
        ctypes.byref(booster2))
213
214
    data = np.loadtxt(str(binary_example_dir / 'binary.test'), dtype=np.float64)
    mat = data[:, 1:]
215
    preb = np.empty(mat.shape[0], dtype=np.float64)
216
217
    num_preb = ctypes.c_int64(0)
    data = np.array(mat.reshape(mat.size), dtype=np.float64, copy=False)
wxchan's avatar
wxchan committed
218
219
    LIB.LGBM_BoosterPredictForMat(
        booster2,
220
221
222
223
224
225
226
227
        data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
        ctypes.c_int(dtype_float64),
        ctypes.c_int32(mat.shape[0]),
        ctypes.c_int32(mat.shape[1]),
        ctypes.c_int(1),
        ctypes.c_int(1),
        ctypes.c_int(0),
        ctypes.c_int(25),
228
        c_str(''),
Guolin Ke's avatar
Guolin Ke committed
229
        ctypes.byref(num_preb),
230
        preb.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))
231
232
    LIB.LGBM_BoosterPredictForFile(
        booster2,
233
        c_str(str(binary_example_dir / 'binary.test')),
234
235
236
237
        ctypes.c_int(0),
        ctypes.c_int(0),
        ctypes.c_int(0),
        ctypes.c_int(25),
238
239
240
241
        c_str(''),
        c_str('preb.txt'))
    LIB.LGBM_BoosterPredictForFile(
        booster2,
242
        c_str(str(binary_example_dir / 'binary.test')),
243
244
245
246
        ctypes.c_int(0),
        ctypes.c_int(0),
        ctypes.c_int(10),
        ctypes.c_int(25),
247
248
        c_str(''),
        c_str('preb.txt'))
249
    LIB.LGBM_BoosterFree(booster2)
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282


def test_max_thread_control():
    # at initialization, should be -1
    num_threads = ctypes.c_int(0)
    ret = LIB.LGBM_GetMaxThreads(
        ctypes.byref(num_threads)
    )
    assert ret == 0
    assert num_threads.value == -1

    # updating that value through the C API should work
    ret = LIB.LGBM_SetMaxThreads(
        ctypes.c_int(6)
    )
    assert ret == 0

    ret = LIB.LGBM_GetMaxThreads(
        ctypes.byref(num_threads)
    )
    assert ret == 0
    assert num_threads.value == 6

    # resetting to any negative number should set it to -1
    ret = LIB.LGBM_SetMaxThreads(
        ctypes.c_int(-123)
    )
    assert ret == 0
    ret = LIB.LGBM_GetMaxThreads(
        ctypes.byref(num_threads)
    )
    assert ret == 0
    assert num_threads.value == -1