test_.py 10.8 KB
Newer Older
wxchan's avatar
wxchan committed
1
# coding: utf-8
Guolin Ke's avatar
Guolin Ke committed
2
import ctypes
wxchan's avatar
wxchan committed
3
import os
4
5
from platform import system

Guolin Ke's avatar
Guolin Ke committed
6
7
8
import numpy as np
from scipy import sparse

wxchan's avatar
wxchan committed
9

Guolin Ke's avatar
Guolin Ke committed
10
11
12
13
14
15
def find_lib_path():
    if os.environ.get('LIGHTGBM_BUILD_DOC', False):
        # we don't need lib_lightgbm while building docs
        return []

    curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
16
17
    dll_path = [curr_path,
                os.path.join(curr_path, '../../'),
18
19
20
                os.path.join(curr_path, '../../python-package/lightgbm/compile'),
                os.path.join(curr_path, '../../python-package/compile'),
                os.path.join(curr_path, '../../lib/')]
21
    if system() in ('Windows', 'Microsoft'):
22
23
        dll_path.append(os.path.join(curr_path, '../../python-package/compile/Release/'))
        dll_path.append(os.path.join(curr_path, '../../python-package/compile/windows/x64/DLL/'))
Guolin Ke's avatar
Guolin Ke committed
24
25
26
        dll_path.append(os.path.join(curr_path, '../../Release/'))
        dll_path.append(os.path.join(curr_path, '../../windows/x64/DLL/'))
        dll_path = [os.path.join(p, 'lib_lightgbm.dll') for p in dll_path]
Guolin Ke's avatar
Guolin Ke committed
27
    else:
Guolin Ke's avatar
Guolin Ke committed
28
29
30
31
        dll_path = [os.path.join(p, 'lib_lightgbm.so') for p in dll_path]
    lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)]
    if not lib_path:
        dll_path = [os.path.realpath(p) for p in dll_path]
32
33
        dll_path_joined = '\n'.join(dll_path)
        raise Exception(f'Cannot find lightgbm library file in following paths:\n{dll_path_joined}')
Guolin Ke's avatar
Guolin Ke committed
34
35
36
37
38
39
40
41
    return lib_path


def LoadDll():
    lib_path = find_lib_path()
    if len(lib_path) == 0:
        return None
    lib = ctypes.cdll.LoadLibrary(lib_path[0])
Guolin Ke's avatar
Guolin Ke committed
42
43
    return lib

wxchan's avatar
wxchan committed
44

Guolin Ke's avatar
Guolin Ke committed
45
46
LIB = LoadDll()

Guolin Ke's avatar
Guolin Ke committed
47
48
LIB.LGBM_GetLastError.restype = ctypes.c_char_p

49
50
51
52
53
54
dtype_float32 = 0
dtype_float64 = 1
dtype_int32 = 2
dtype_int64 = 3


Guolin Ke's avatar
Guolin Ke committed
55
def c_str(string):
56
    return ctypes.c_char_p(string.encode('utf-8'))
Guolin Ke's avatar
Guolin Ke committed
57

wxchan's avatar
wxchan committed
58

59
def load_from_file(filename, reference):
60
    ref = None
wxchan's avatar
wxchan committed
61
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
62
        ref = reference
63
    handle = ctypes.c_void_p()
wxchan's avatar
wxchan committed
64
65
66
    LIB.LGBM_DatasetCreateFromFile(
        c_str(filename),
        c_str('max_bin=15'),
67
68
        ref,
        ctypes.byref(handle))
Guolin Ke's avatar
Guolin Ke committed
69
    print(LIB.LGBM_GetLastError())
70
    num_data = ctypes.c_int(0)
wxchan's avatar
wxchan committed
71
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
72
    num_feature = ctypes.c_int(0)
wxchan's avatar
wxchan committed
73
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
74
    print(f'#data: {num_data.value} #feature: {num_feature.value}')
75
76
    return handle

wxchan's avatar
wxchan committed
77

78
def save_to_binary(handle, filename):
79
80
81
    LIB.LGBM_DatasetSaveBinary(handle, c_str(filename))


82
def load_from_csr(filename, reference):
Guolin Ke's avatar
Guolin Ke committed
83
84
    data = []
    label = []
85
86
    with open(filename, 'r') as inp:
        for line in inp.readlines():
87
88
89
            values = line.split('\t')
            data.append([float(x) for x in values[1:]])
            label.append(float(values[0]))
90
    mat = np.array(data, dtype=np.float64)
Guolin Ke's avatar
Guolin Ke committed
91
92
93
94
    label = np.array(label, dtype=np.float32)
    csr = sparse.csr_matrix(mat)
    handle = ctypes.c_void_p()
    ref = None
wxchan's avatar
wxchan committed
95
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
96
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
97

wxchan's avatar
wxchan committed
98
    LIB.LGBM_DatasetCreateFromCSR(
99
100
101
102
103
        csr.indptr.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
        ctypes.c_int(dtype_int32),
        csr.indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
        csr.data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
        ctypes.c_int(dtype_float64),
104
105
106
        ctypes.c_int64(len(csr.indptr)),
        ctypes.c_int64(len(csr.data)),
        ctypes.c_int64(csr.shape[1]),
wxchan's avatar
wxchan committed
107
108
109
        c_str('max_bin=15'),
        ref,
        ctypes.byref(handle))
110
    num_data = ctypes.c_int(0)
wxchan's avatar
wxchan committed
111
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
112
    num_feature = ctypes.c_int(0)
wxchan's avatar
wxchan committed
113
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
114
115
116
117
118
119
    LIB.LGBM_DatasetSetField(
        handle,
        c_str('label'),
        label.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
        ctypes.c_int(len(label)),
        ctypes.c_int(dtype_float32))
120
    print(f'#data: {num_data.value} #feature: {num_feature.value}')
121
122
    return handle

wxchan's avatar
wxchan committed
123

124
def load_from_csc(filename, reference):
125
126
    data = []
    label = []
127
128
    with open(filename, 'r') as inp:
        for line in inp.readlines():
129
130
131
            values = line.split('\t')
            data.append([float(x) for x in values[1:]])
            label.append(float(values[0]))
132
    mat = np.array(data, dtype=np.float64)
133
    label = np.array(label, dtype=np.float32)
134
    csc = sparse.csc_matrix(mat)
135
136
    handle = ctypes.c_void_p()
    ref = None
wxchan's avatar
wxchan committed
137
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
138
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
139

wxchan's avatar
wxchan committed
140
    LIB.LGBM_DatasetCreateFromCSC(
141
142
143
144
145
146
147
148
        csc.indptr.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
        ctypes.c_int(dtype_int32),
        csc.indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
        csc.data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
        ctypes.c_int(dtype_float64),
        ctypes.c_int64(len(csc.indptr)),
        ctypes.c_int64(len(csc.data)),
        ctypes.c_int64(csc.shape[0]),
wxchan's avatar
wxchan committed
149
150
151
        c_str('max_bin=15'),
        ref,
        ctypes.byref(handle))
152
    num_data = ctypes.c_int(0)
wxchan's avatar
wxchan committed
153
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
154
    num_feature = ctypes.c_int(0)
wxchan's avatar
wxchan committed
155
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
156
157
158
159
160
161
    LIB.LGBM_DatasetSetField(
        handle,
        c_str('label'),
        label.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
        ctypes.c_int(len(label)),
        ctypes.c_int(dtype_float32))
162
    print(f'#data: {num_data.value} #feature: {num_feature.value}')
163
164
    return handle

wxchan's avatar
wxchan committed
165

166
def load_from_mat(filename, reference):
167
168
    data = []
    label = []
169
170
    with open(filename, 'r') as inp:
        for line in inp.readlines():
171
172
173
            values = line.split('\t')
            data.append([float(x) for x in values[1:]])
            label.append(float(values[0]))
174
175
    mat = np.array(data, dtype=np.float64)
    data = np.array(mat.reshape(mat.size), dtype=np.float64, copy=False)
176
177
178
    label = np.array(label, dtype=np.float32)
    handle = ctypes.c_void_p()
    ref = None
wxchan's avatar
wxchan committed
179
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
180
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
181

182
    LIB.LGBM_DatasetCreateFromMat(
183
184
185
186
187
        data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
        ctypes.c_int(dtype_float64),
        ctypes.c_int32(mat.shape[0]),
        ctypes.c_int32(mat.shape[1]),
        ctypes.c_int(1),
wxchan's avatar
wxchan committed
188
189
190
        c_str('max_bin=15'),
        ref,
        ctypes.byref(handle))
191
    num_data = ctypes.c_int(0)
wxchan's avatar
wxchan committed
192
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
193
    num_feature = ctypes.c_int(0)
wxchan's avatar
wxchan committed
194
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
195
196
197
198
199
200
    LIB.LGBM_DatasetSetField(
        handle,
        c_str('label'),
        label.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
        ctypes.c_int(len(label)),
        ctypes.c_int(dtype_float32))
201
    print(f'#data: {num_data.value} #feature: {num_feature.value}')
Guolin Ke's avatar
Guolin Ke committed
202
    return handle
wxchan's avatar
wxchan committed
203
204


205
def free_dataset(handle):
206
207
    LIB.LGBM_DatasetFree(handle)

wxchan's avatar
wxchan committed
208

209
def test_dataset():
210
211
212
213
    train = load_from_file(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                        '../../examples/binary_classification/binary.train'), None)
    test = load_from_mat(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                      '../../examples/binary_classification/binary.test'), train)
214
    free_dataset(test)
215
216
    test = load_from_csr(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                      '../../examples/binary_classification/binary.test'), train)
217
    free_dataset(test)
218
219
    test = load_from_csc(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                      '../../examples/binary_classification/binary.test'), train)
220
221
222
223
224
    free_dataset(test)
    save_to_binary(train, 'train.binary.bin')
    free_dataset(train)
    train = load_from_file('train.binary.bin', None)
    free_dataset(train)
wxchan's avatar
wxchan committed
225
226


227
def test_booster():
228
229
230
231
    train = load_from_mat(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                       '../../examples/binary_classification/binary.train'), None)
    test = load_from_mat(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                      '../../examples/binary_classification/binary.test'), train)
232
    booster = ctypes.c_void_p()
233
234
235
236
    LIB.LGBM_BoosterCreate(
        train,
        c_str("app=binary metric=auc num_leaves=31 verbose=0"),
        ctypes.byref(booster))
237
    LIB.LGBM_BoosterAddValidData(booster, test)
238
    is_finished = ctypes.c_int(0)
239
    for i in range(1, 51):
wxchan's avatar
wxchan committed
240
        LIB.LGBM_BoosterUpdateOneIter(booster, ctypes.byref(is_finished))
Guolin Ke's avatar
Guolin Ke committed
241
        result = np.array([0.0], dtype=np.float64)
242
        out_len = ctypes.c_int(0)
243
244
        LIB.LGBM_BoosterGetEval(
            booster,
245
            ctypes.c_int(0),
246
247
            ctypes.byref(out_len),
            result.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))
wxchan's avatar
wxchan committed
248
        if i % 10 == 0:
249
            print(f'{i} iteration test AUC {result[0]:.6f}')
250
251
252
253
254
255
    LIB.LGBM_BoosterSaveModel(
        booster,
        ctypes.c_int(0),
        ctypes.c_int(-1),
        ctypes.c_int(0),
        c_str('model.txt'))
256
    LIB.LGBM_BoosterFree(booster)
257
258
    free_dataset(train)
    free_dataset(test)
259
    booster2 = ctypes.c_void_p()
260
    num_total_model = ctypes.c_int(0)
261
262
263
264
    LIB.LGBM_BoosterCreateFromModelfile(
        c_str('model.txt'),
        ctypes.byref(num_total_model),
        ctypes.byref(booster2))
265
    data = []
266
267
268
269
    with open(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                           '../../examples/binary_classification/binary.test'), 'r') as inp:
        for line in inp.readlines():
            data.append([float(x) for x in line.split('\t')[1:]])
270
    mat = np.array(data, dtype=np.float64)
Guolin Ke's avatar
Guolin Ke committed
271
    preb = np.zeros(mat.shape[0], dtype=np.float64)
272
273
    num_preb = ctypes.c_int64(0)
    data = np.array(mat.reshape(mat.size), dtype=np.float64, copy=False)
wxchan's avatar
wxchan committed
274
275
    LIB.LGBM_BoosterPredictForMat(
        booster2,
276
277
278
279
280
281
282
283
        data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
        ctypes.c_int(dtype_float64),
        ctypes.c_int32(mat.shape[0]),
        ctypes.c_int32(mat.shape[1]),
        ctypes.c_int(1),
        ctypes.c_int(1),
        ctypes.c_int(0),
        ctypes.c_int(25),
284
        c_str(''),
Guolin Ke's avatar
Guolin Ke committed
285
        ctypes.byref(num_preb),
286
        preb.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))
287
288
    LIB.LGBM_BoosterPredictForFile(
        booster2,
289
290
        c_str(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                           '../../examples/binary_classification/binary.test')),
291
292
293
294
        ctypes.c_int(0),
        ctypes.c_int(0),
        ctypes.c_int(0),
        ctypes.c_int(25),
295
296
297
298
299
300
        c_str(''),
        c_str('preb.txt'))
    LIB.LGBM_BoosterPredictForFile(
        booster2,
        c_str(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                           '../../examples/binary_classification/binary.test')),
301
302
303
304
        ctypes.c_int(0),
        ctypes.c_int(0),
        ctypes.c_int(10),
        ctypes.c_int(25),
305
306
        c_str(''),
        c_str('preb.txt'))
307
    LIB.LGBM_BoosterFree(booster2)