test_.py 9 KB
Newer Older
wxchan's avatar
wxchan committed
1
2
# coding: utf-8
# pylint: skip-file
Guolin Ke's avatar
Guolin Ke committed
3
import ctypes
wxchan's avatar
wxchan committed
4
import os
Guolin Ke's avatar
Guolin Ke committed
5
import sys
Guolin Ke's avatar
Guolin Ke committed
6

7
8
from platform import system

Guolin Ke's avatar
Guolin Ke committed
9
import numpy as np
10
import pytest
Guolin Ke's avatar
Guolin Ke committed
11
12
from scipy import sparse

wxchan's avatar
wxchan committed
13

Guolin Ke's avatar
Guolin Ke committed
14
15
16
17
18
19
def find_lib_path():
    if os.environ.get('LIGHTGBM_BUILD_DOC', False):
        # we don't need lib_lightgbm while building docs
        return []

    curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
Guolin Ke's avatar
Guolin Ke committed
20
    dll_path = [curr_path, os.path.join(curr_path, '../../'), os.path.join(curr_path, '../../lib/')]
21
    if system() in ('Windows', 'Microsoft'):
Guolin Ke's avatar
Guolin Ke committed
22
23
24
        dll_path.append(os.path.join(curr_path, '../../Release/'))
        dll_path.append(os.path.join(curr_path, '../../windows/x64/DLL/'))
        dll_path = [os.path.join(p, 'lib_lightgbm.dll') for p in dll_path]
Guolin Ke's avatar
Guolin Ke committed
25
    else:
Guolin Ke's avatar
Guolin Ke committed
26
27
28
29
        dll_path = [os.path.join(p, 'lib_lightgbm.so') for p in dll_path]
    lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)]
    if not lib_path:
        dll_path = [os.path.realpath(p) for p in dll_path]
30
        raise Exception('Cannot find lightgbm library in following paths: ' + '\n'.join(dll_path))
Guolin Ke's avatar
Guolin Ke committed
31
32
33
34
35
36
37
38
    return lib_path


def LoadDll():
    lib_path = find_lib_path()
    if len(lib_path) == 0:
        return None
    lib = ctypes.cdll.LoadLibrary(lib_path[0])
Guolin Ke's avatar
Guolin Ke committed
39
40
    return lib

wxchan's avatar
wxchan committed
41

Guolin Ke's avatar
Guolin Ke committed
42
43
LIB = LoadDll()

Guolin Ke's avatar
Guolin Ke committed
44
45
LIB.LGBM_GetLastError.restype = ctypes.c_char_p

46
47
48
49
50
51
dtype_float32 = 0
dtype_float64 = 1
dtype_int32 = 2
dtype_int64 = 3


Guolin Ke's avatar
Guolin Ke committed
52
53
54
def c_array(ctype, values):
    return (ctype * len(values))(*values)

wxchan's avatar
wxchan committed
55

Guolin Ke's avatar
Guolin Ke committed
56
def c_str(string):
Guolin Ke's avatar
Guolin Ke committed
57
    return ctypes.c_char_p(string.encode('ascii'))
Guolin Ke's avatar
Guolin Ke committed
58

wxchan's avatar
wxchan committed
59

60
@pytest.mark.skip
61
62
def test_load_from_file(filename, reference):
    ref = None
wxchan's avatar
wxchan committed
63
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
64
        ref = reference
65
    handle = ctypes.c_void_p()
wxchan's avatar
wxchan committed
66
67
68
69
    LIB.LGBM_DatasetCreateFromFile(
        c_str(filename),
        c_str('max_bin=15'),
        ref, ctypes.byref(handle))
Guolin Ke's avatar
Guolin Ke committed
70
    print(LIB.LGBM_GetLastError())
71
    num_data = ctypes.c_long()
wxchan's avatar
wxchan committed
72
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
73
    num_feature = ctypes.c_long()
wxchan's avatar
wxchan committed
74
75
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
    print('#data:%d #feature:%d' % (num_data.value, num_feature.value))
76
77
    return handle

wxchan's avatar
wxchan committed
78

79
@pytest.mark.skip
80
81
82
83
def test_save_to_binary(handle, filename):
    LIB.LGBM_DatasetSaveBinary(handle, c_str(filename))


84
@pytest.mark.skip
Guolin Ke's avatar
Guolin Ke committed
85
86
87
88
89
def test_load_from_csr(filename, reference):
    data = []
    label = []
    inp = open(filename, 'r')
    for line in inp.readlines():
wxchan's avatar
wxchan committed
90
91
        data.append([float(x) for x in line.split('\t')[1:]])
        label.append(float(line.split('\t')[0]))
Guolin Ke's avatar
Guolin Ke committed
92
93
94
95
96
97
    inp.close()
    mat = np.array(data)
    label = np.array(label, dtype=np.float32)
    csr = sparse.csr_matrix(mat)
    handle = ctypes.c_void_p()
    ref = None
wxchan's avatar
wxchan committed
98
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
99
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
100

wxchan's avatar
wxchan committed
101
102
103
104
    LIB.LGBM_DatasetCreateFromCSR(
        c_array(ctypes.c_int, csr.indptr),
        dtype_int32,
        c_array(ctypes.c_int, csr.indices),
Guolin Ke's avatar
Guolin Ke committed
105
        csr.data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)),
wxchan's avatar
wxchan committed
106
107
        dtype_float64,
        len(csr.indptr),
108
        len(csr.data),
wxchan's avatar
wxchan committed
109
110
111
112
        csr.shape[1],
        c_str('max_bin=15'),
        ref,
        ctypes.byref(handle))
113
    num_data = ctypes.c_long()
wxchan's avatar
wxchan committed
114
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
115
    num_feature = ctypes.c_long()
wxchan's avatar
wxchan committed
116
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
117
    LIB.LGBM_DatasetSetField(handle, c_str('label'), c_array(ctypes.c_float, label), len(label), 0)
wxchan's avatar
wxchan committed
118
    print('#data:%d #feature:%d' % (num_data.value, num_feature.value))
119
120
    return handle

wxchan's avatar
wxchan committed
121

122
@pytest.mark.skip
123
124
125
126
127
def test_load_from_csc(filename, reference):
    data = []
    label = []
    inp = open(filename, 'r')
    for line in inp.readlines():
wxchan's avatar
wxchan committed
128
129
        data.append([float(x) for x in line.split('\t')[1:]])
        label.append(float(line.split('\t')[0]))
130
131
132
133
134
135
    inp.close()
    mat = np.array(data)
    label = np.array(label, dtype=np.float32)
    csr = sparse.csc_matrix(mat)
    handle = ctypes.c_void_p()
    ref = None
wxchan's avatar
wxchan committed
136
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
137
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
138

wxchan's avatar
wxchan committed
139
140
141
142
    LIB.LGBM_DatasetCreateFromCSC(
        c_array(ctypes.c_int, csr.indptr),
        dtype_int32,
        c_array(ctypes.c_int, csr.indices),
143
        csr.data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)),
wxchan's avatar
wxchan committed
144
145
        dtype_float64,
        len(csr.indptr),
146
        len(csr.data),
wxchan's avatar
wxchan committed
147
148
149
150
        csr.shape[0],
        c_str('max_bin=15'),
        ref,
        ctypes.byref(handle))
151
    num_data = ctypes.c_long()
wxchan's avatar
wxchan committed
152
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
153
    num_feature = ctypes.c_long()
wxchan's avatar
wxchan committed
154
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
155
    LIB.LGBM_DatasetSetField(handle, c_str('label'), c_array(ctypes.c_float, label), len(label), 0)
wxchan's avatar
wxchan committed
156
    print('#data:%d #feature:%d' % (num_data.value, num_feature.value))
157
158
    return handle

wxchan's avatar
wxchan committed
159

160
@pytest.mark.skip
161
162
163
164
165
def test_load_from_mat(filename, reference):
    data = []
    label = []
    inp = open(filename, 'r')
    for line in inp.readlines():
wxchan's avatar
wxchan committed
166
167
        data.append([float(x) for x in line.split('\t')[1:]])
        label.append(float(line.split('\t')[0]))
168
169
170
171
172
173
    inp.close()
    mat = np.array(data)
    data = np.array(mat.reshape(mat.size), copy=False)
    label = np.array(label, dtype=np.float32)
    handle = ctypes.c_void_p()
    ref = None
wxchan's avatar
wxchan committed
174
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
175
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
176

wxchan's avatar
wxchan committed
177
178
    LIB.LGBM_DatasetCreateFromMat(data.ctypes.data_as(
        ctypes.POINTER(ctypes.c_void_p)),
179
180
181
182
        dtype_float64,
        mat.shape[0],
        mat.shape[1],
        1,
wxchan's avatar
wxchan committed
183
184
185
        c_str('max_bin=15'),
        ref,
        ctypes.byref(handle))
186
    num_data = ctypes.c_long()
wxchan's avatar
wxchan committed
187
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
188
    num_feature = ctypes.c_long()
wxchan's avatar
wxchan committed
189
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
Guolin Ke's avatar
Guolin Ke committed
190
    LIB.LGBM_DatasetSetField(handle, c_str('label'), c_array(ctypes.c_float, label), len(label), 0)
wxchan's avatar
wxchan committed
191
    print('#data:%d #feature:%d' % (num_data.value, num_feature.value))
Guolin Ke's avatar
Guolin Ke committed
192
    return handle
wxchan's avatar
wxchan committed
193
194


195
@pytest.mark.skip
196
197
198
def test_free_dataset(handle):
    LIB.LGBM_DatasetFree(handle)

wxchan's avatar
wxchan committed
199

200
def test_dataset():
201
202
    train = test_load_from_file(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../examples/binary_classification/binary.train'), None)
    test = test_load_from_mat(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../examples/binary_classification/binary.test'), train)
203
    test_free_dataset(test)
204
    test = test_load_from_csr(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../examples/binary_classification/binary.test'), train)
205
    test_free_dataset(test)
206
    test = test_load_from_csc(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../examples/binary_classification/binary.test'), train)
207
208
209
    test_free_dataset(test)
    test_save_to_binary(train, 'train.binary.bin')
    test_free_dataset(train)
wxchan's avatar
wxchan committed
210
    train = test_load_from_file('train.binary.bin', None)
211
    test_free_dataset(train)
wxchan's avatar
wxchan committed
212
213


214
def test_booster():
215
216
    train = test_load_from_mat(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../examples/binary_classification/binary.train'), None)
    test = test_load_from_mat(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../examples/binary_classification/binary.test'), train)
217
    booster = ctypes.c_void_p()
218
219
    LIB.LGBM_BoosterCreate(train, c_str("app=binary metric=auc num_leaves=31 verbose=0"), ctypes.byref(booster))
    LIB.LGBM_BoosterAddValidData(booster, test)
220
    is_finished = ctypes.c_int(0)
wxchan's avatar
wxchan committed
221
    for i in range(1, 101):
wxchan's avatar
wxchan committed
222
        LIB.LGBM_BoosterUpdateOneIter(booster, ctypes.byref(is_finished))
Guolin Ke's avatar
Guolin Ke committed
223
        result = np.array([0.0], dtype=np.float64)
224
        out_len = ctypes.c_ulong(0)
Guolin Ke's avatar
Guolin Ke committed
225
        LIB.LGBM_BoosterGetEval(booster, 0, ctypes.byref(out_len), result.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))
wxchan's avatar
wxchan committed
226
227
        if i % 10 == 0:
            print('%d Iteration test AUC %f' % (i, result[0]))
228
    LIB.LGBM_BoosterSaveModel(booster, 0, -1, c_str('model.txt'))
229
230
    LIB.LGBM_BoosterFree(booster)
    test_free_dataset(train)
231
    test_free_dataset(test)
232
    booster2 = ctypes.c_void_p()
Guolin Ke's avatar
Guolin Ke committed
233
234
    num_total_model = ctypes.c_long()
    LIB.LGBM_BoosterCreateFromModelfile(c_str('model.txt'), ctypes.byref(num_total_model), ctypes.byref(booster2))
235
    data = []
236
    inp = open(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../examples/binary_classification/binary.test'), 'r')
237
    for line in inp.readlines():
wxchan's avatar
wxchan committed
238
        data.append([float(x) for x in line.split('\t')[1:]])
239
240
    inp.close()
    mat = np.array(data)
Guolin Ke's avatar
Guolin Ke committed
241
    preb = np.zeros(mat.shape[0], dtype=np.float64)
Guolin Ke's avatar
Guolin Ke committed
242
    num_preb = ctypes.c_long()
243
    data = np.array(mat.reshape(mat.size), copy=False)
wxchan's avatar
wxchan committed
244
245
246
    LIB.LGBM_BoosterPredictForMat(
        booster2,
        data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)),
247
248
249
250
251
252
        dtype_float64,
        mat.shape[0],
        mat.shape[1],
        1,
        1,
        50,
253
        c_str(''),
Guolin Ke's avatar
Guolin Ke committed
254
        ctypes.byref(num_preb),
255
        preb.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))
256
257
258
259
260
261
262
263
    LIB.LGBM_BoosterPredictForFile(
        booster2,
        c_str(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../examples/binary_classification/binary.test')),
        0,
        0,
        50,
        c_str(''),
        c_str('preb.txt'))
264
    LIB.LGBM_BoosterFree(booster2)