test_.py 9.97 KB
Newer Older
wxchan's avatar
wxchan committed
1
# coding: utf-8
Guolin Ke's avatar
Guolin Ke committed
2
import ctypes
wxchan's avatar
wxchan committed
3
import os
4
5
from platform import system

Guolin Ke's avatar
Guolin Ke committed
6
7
8
import numpy as np
from scipy import sparse

wxchan's avatar
wxchan committed
9

Guolin Ke's avatar
Guolin Ke committed
10
11
12
13
14
15
def find_lib_path():
    if os.environ.get('LIGHTGBM_BUILD_DOC', False):
        # we don't need lib_lightgbm while building docs
        return []

    curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
16
17
    dll_path = [curr_path,
                os.path.join(curr_path, '../../'),
18
19
20
                os.path.join(curr_path, '../../python-package/lightgbm/compile'),
                os.path.join(curr_path, '../../python-package/compile'),
                os.path.join(curr_path, '../../lib/')]
21
    if system() in ('Windows', 'Microsoft'):
22
23
        dll_path.append(os.path.join(curr_path, '../../python-package/compile/Release/'))
        dll_path.append(os.path.join(curr_path, '../../python-package/compile/windows/x64/DLL/'))
Guolin Ke's avatar
Guolin Ke committed
24
25
26
        dll_path.append(os.path.join(curr_path, '../../Release/'))
        dll_path.append(os.path.join(curr_path, '../../windows/x64/DLL/'))
        dll_path = [os.path.join(p, 'lib_lightgbm.dll') for p in dll_path]
Guolin Ke's avatar
Guolin Ke committed
27
    else:
Guolin Ke's avatar
Guolin Ke committed
28
29
30
31
        dll_path = [os.path.join(p, 'lib_lightgbm.so') for p in dll_path]
    lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)]
    if not lib_path:
        dll_path = [os.path.realpath(p) for p in dll_path]
32
        raise Exception('Cannot find lightgbm library file in following paths:\n' + '\n'.join(dll_path))
Guolin Ke's avatar
Guolin Ke committed
33
34
35
36
37
38
39
40
    return lib_path


def LoadDll():
    lib_path = find_lib_path()
    if len(lib_path) == 0:
        return None
    lib = ctypes.cdll.LoadLibrary(lib_path[0])
Guolin Ke's avatar
Guolin Ke committed
41
42
    return lib

wxchan's avatar
wxchan committed
43

Guolin Ke's avatar
Guolin Ke committed
44
45
LIB = LoadDll()

Guolin Ke's avatar
Guolin Ke committed
46
47
LIB.LGBM_GetLastError.restype = ctypes.c_char_p

48
49
50
51
52
53
dtype_float32 = 0
dtype_float64 = 1
dtype_int32 = 2
dtype_int64 = 3


Guolin Ke's avatar
Guolin Ke committed
54
55
56
def c_array(ctype, values):
    return (ctype * len(values))(*values)

wxchan's avatar
wxchan committed
57

Guolin Ke's avatar
Guolin Ke committed
58
def c_str(string):
59
    return ctypes.c_char_p(string.encode('utf-8'))
Guolin Ke's avatar
Guolin Ke committed
60

wxchan's avatar
wxchan committed
61

62
def load_from_file(filename, reference):
63
    ref = None
wxchan's avatar
wxchan committed
64
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
65
        ref = reference
66
    handle = ctypes.c_void_p()
wxchan's avatar
wxchan committed
67
68
69
    LIB.LGBM_DatasetCreateFromFile(
        c_str(filename),
        c_str('max_bin=15'),
70
71
        ref,
        ctypes.byref(handle))
Guolin Ke's avatar
Guolin Ke committed
72
    print(LIB.LGBM_GetLastError())
73
    num_data = ctypes.c_long()
wxchan's avatar
wxchan committed
74
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
75
    num_feature = ctypes.c_long()
wxchan's avatar
wxchan committed
76
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
77
    print('#data: %d #feature: %d' % (num_data.value, num_feature.value))
78
79
    return handle

wxchan's avatar
wxchan committed
80

81
def save_to_binary(handle, filename):
82
83
84
    LIB.LGBM_DatasetSaveBinary(handle, c_str(filename))


85
def load_from_csr(filename, reference):
Guolin Ke's avatar
Guolin Ke committed
86
87
    data = []
    label = []
88
89
    with open(filename, 'r') as inp:
        for line in inp.readlines():
90
91
92
            values = line.split('\t')
            data.append([float(x) for x in values[1:]])
            label.append(float(values[0]))
Guolin Ke's avatar
Guolin Ke committed
93
94
95
96
97
    mat = np.array(data)
    label = np.array(label, dtype=np.float32)
    csr = sparse.csr_matrix(mat)
    handle = ctypes.c_void_p()
    ref = None
wxchan's avatar
wxchan committed
98
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
99
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
100

wxchan's avatar
wxchan committed
101
102
103
104
    LIB.LGBM_DatasetCreateFromCSR(
        c_array(ctypes.c_int, csr.indptr),
        dtype_int32,
        c_array(ctypes.c_int, csr.indices),
Guolin Ke's avatar
Guolin Ke committed
105
        csr.data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)),
wxchan's avatar
wxchan committed
106
        dtype_float64,
107
108
109
        ctypes.c_int64(len(csr.indptr)),
        ctypes.c_int64(len(csr.data)),
        ctypes.c_int64(csr.shape[1]),
wxchan's avatar
wxchan committed
110
111
112
        c_str('max_bin=15'),
        ref,
        ctypes.byref(handle))
113
    num_data = ctypes.c_long()
wxchan's avatar
wxchan committed
114
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
115
    num_feature = ctypes.c_long()
wxchan's avatar
wxchan committed
116
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
117
    LIB.LGBM_DatasetSetField(handle, c_str('label'), c_array(ctypes.c_float, label), len(label), 0)
118
    print('#data: %d #feature: %d' % (num_data.value, num_feature.value))
119
120
    return handle

wxchan's avatar
wxchan committed
121

122
def load_from_csc(filename, reference):
123
124
    data = []
    label = []
125
126
    with open(filename, 'r') as inp:
        for line in inp.readlines():
127
128
129
            values = line.split('\t')
            data.append([float(x) for x in values[1:]])
            label.append(float(values[0]))
130
131
132
133
134
    mat = np.array(data)
    label = np.array(label, dtype=np.float32)
    csr = sparse.csc_matrix(mat)
    handle = ctypes.c_void_p()
    ref = None
wxchan's avatar
wxchan committed
135
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
136
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
137

wxchan's avatar
wxchan committed
138
139
140
141
    LIB.LGBM_DatasetCreateFromCSC(
        c_array(ctypes.c_int, csr.indptr),
        dtype_int32,
        c_array(ctypes.c_int, csr.indices),
142
        csr.data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)),
wxchan's avatar
wxchan committed
143
        dtype_float64,
144
145
146
        ctypes.c_int64(len(csr.indptr)),
        ctypes.c_int64(len(csr.data)),
        ctypes.c_int64(csr.shape[0]),
wxchan's avatar
wxchan committed
147
148
149
        c_str('max_bin=15'),
        ref,
        ctypes.byref(handle))
150
    num_data = ctypes.c_long()
wxchan's avatar
wxchan committed
151
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
152
    num_feature = ctypes.c_long()
wxchan's avatar
wxchan committed
153
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
154
    LIB.LGBM_DatasetSetField(handle, c_str('label'), c_array(ctypes.c_float, label), len(label), 0)
155
    print('#data: %d #feature: %d' % (num_data.value, num_feature.value))
156
157
    return handle

wxchan's avatar
wxchan committed
158

159
def load_from_mat(filename, reference):
160
161
    data = []
    label = []
162
163
    with open(filename, 'r') as inp:
        for line in inp.readlines():
164
165
166
            values = line.split('\t')
            data.append([float(x) for x in values[1:]])
            label.append(float(values[0]))
167
168
169
170
171
    mat = np.array(data)
    data = np.array(mat.reshape(mat.size), copy=False)
    label = np.array(label, dtype=np.float32)
    handle = ctypes.c_void_p()
    ref = None
wxchan's avatar
wxchan committed
172
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
173
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
174

175
176
    LIB.LGBM_DatasetCreateFromMat(
        data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)),
177
178
179
180
        dtype_float64,
        mat.shape[0],
        mat.shape[1],
        1,
wxchan's avatar
wxchan committed
181
182
183
        c_str('max_bin=15'),
        ref,
        ctypes.byref(handle))
184
    num_data = ctypes.c_long()
wxchan's avatar
wxchan committed
185
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
186
    num_feature = ctypes.c_long()
wxchan's avatar
wxchan committed
187
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
Guolin Ke's avatar
Guolin Ke committed
188
    LIB.LGBM_DatasetSetField(handle, c_str('label'), c_array(ctypes.c_float, label), len(label), 0)
189
    print('#data: %d #feature: %d' % (num_data.value, num_feature.value))
Guolin Ke's avatar
Guolin Ke committed
190
    return handle
wxchan's avatar
wxchan committed
191
192


193
def free_dataset(handle):
194
195
    LIB.LGBM_DatasetFree(handle)

wxchan's avatar
wxchan committed
196

197
def test_dataset():
198
199
200
201
    train = load_from_file(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                        '../../examples/binary_classification/binary.train'), None)
    test = load_from_mat(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                      '../../examples/binary_classification/binary.test'), train)
202
    free_dataset(test)
203
204
    test = load_from_csr(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                      '../../examples/binary_classification/binary.test'), train)
205
    free_dataset(test)
206
207
    test = load_from_csc(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                      '../../examples/binary_classification/binary.test'), train)
208
209
210
211
212
    free_dataset(test)
    save_to_binary(train, 'train.binary.bin')
    free_dataset(train)
    train = load_from_file('train.binary.bin', None)
    free_dataset(train)
wxchan's avatar
wxchan committed
213
214


215
def test_booster():
216
217
218
219
    train = load_from_mat(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                       '../../examples/binary_classification/binary.train'), None)
    test = load_from_mat(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                      '../../examples/binary_classification/binary.test'), train)
220
    booster = ctypes.c_void_p()
221
222
223
224
    LIB.LGBM_BoosterCreate(
        train,
        c_str("app=binary metric=auc num_leaves=31 verbose=0"),
        ctypes.byref(booster))
225
    LIB.LGBM_BoosterAddValidData(booster, test)
226
    is_finished = ctypes.c_int(0)
227
    for i in range(1, 51):
wxchan's avatar
wxchan committed
228
        LIB.LGBM_BoosterUpdateOneIter(booster, ctypes.byref(is_finished))
Guolin Ke's avatar
Guolin Ke committed
229
        result = np.array([0.0], dtype=np.float64)
230
        out_len = ctypes.c_ulong(0)
231
232
233
234
235
        LIB.LGBM_BoosterGetEval(
            booster,
            0,
            ctypes.byref(out_len),
            result.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))
wxchan's avatar
wxchan committed
236
        if i % 10 == 0:
237
            print('%d iteration test AUC %f' % (i, result[0]))
238
    LIB.LGBM_BoosterSaveModel(booster, 0, -1, 0, c_str('model.txt'))
239
    LIB.LGBM_BoosterFree(booster)
240
241
    free_dataset(train)
    free_dataset(test)
242
    booster2 = ctypes.c_void_p()
Guolin Ke's avatar
Guolin Ke committed
243
    num_total_model = ctypes.c_long()
244
245
246
247
    LIB.LGBM_BoosterCreateFromModelfile(
        c_str('model.txt'),
        ctypes.byref(num_total_model),
        ctypes.byref(booster2))
248
    data = []
249
250
251
252
    with open(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                           '../../examples/binary_classification/binary.test'), 'r') as inp:
        for line in inp.readlines():
            data.append([float(x) for x in line.split('\t')[1:]])
253
    mat = np.array(data)
Guolin Ke's avatar
Guolin Ke committed
254
    preb = np.zeros(mat.shape[0], dtype=np.float64)
Guolin Ke's avatar
Guolin Ke committed
255
    num_preb = ctypes.c_long()
256
    data = np.array(mat.reshape(mat.size), copy=False)
wxchan's avatar
wxchan committed
257
258
259
    LIB.LGBM_BoosterPredictForMat(
        booster2,
        data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)),
260
261
262
263
264
        dtype_float64,
        mat.shape[0],
        mat.shape[1],
        1,
        1,
265
        0,
266
        25,
267
        c_str(''),
Guolin Ke's avatar
Guolin Ke committed
268
        ctypes.byref(num_preb),
269
        preb.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))
270
271
    LIB.LGBM_BoosterPredictForFile(
        booster2,
272
273
        c_str(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                           '../../examples/binary_classification/binary.test')),
274
275
        0,
        0,
276
277
278
279
280
281
282
283
284
285
286
        0,
        25,
        c_str(''),
        c_str('preb.txt'))
    LIB.LGBM_BoosterPredictForFile(
        booster2,
        c_str(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                           '../../examples/binary_classification/binary.test')),
        0,
        0,
        10,
287
        25,
288
289
        c_str(''),
        c_str('preb.txt'))
290
    LIB.LGBM_BoosterFree(booster2)