test.py 9 KB
Newer Older
wxchan's avatar
wxchan committed
1
2
# coding: utf-8
# pylint: skip-file
Guolin Ke's avatar
Guolin Ke committed
3
import ctypes
wxchan's avatar
wxchan committed
4
import os
Guolin Ke's avatar
Guolin Ke committed
5
import sys
Guolin Ke's avatar
Guolin Ke committed
6
7

import numpy as np
8
import pytest
Guolin Ke's avatar
Guolin Ke committed
9
10
from scipy import sparse

wxchan's avatar
wxchan committed
11

Guolin Ke's avatar
Guolin Ke committed
12
13
14
15
16
17
18
19
20
21
def find_lib_path():
    if os.environ.get('LIGHTGBM_BUILD_DOC', False):
        # we don't need lib_lightgbm while building docs
        return []

    curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
    dll_path = [curr_path, os.path.join(curr_path, '../../lib/'),
                os.path.join(curr_path, '../../'),
                os.path.join(curr_path, './lib/'),
                os.path.join(sys.prefix, 'lightgbm')]
Guolin Ke's avatar
Guolin Ke committed
22
    if os.name == 'nt':
Guolin Ke's avatar
Guolin Ke committed
23
24
25
        dll_path.append(os.path.join(curr_path, '../../Release/'))
        dll_path.append(os.path.join(curr_path, '../../windows/x64/DLL/'))
        dll_path = [os.path.join(p, 'lib_lightgbm.dll') for p in dll_path]
Guolin Ke's avatar
Guolin Ke committed
26
    else:
Guolin Ke's avatar
Guolin Ke committed
27
28
29
30
31
32
33
34
35
36
37
38
39
        dll_path = [os.path.join(p, 'lib_lightgbm.so') for p in dll_path]
    lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)]
    if not lib_path:
        dll_path = [os.path.realpath(p) for p in dll_path]
        raise Exception('Cannot find lightgbm Library in following paths: ' + ','.join(dll_path))
    return lib_path


def LoadDll():
    lib_path = find_lib_path()
    if len(lib_path) == 0:
        return None
    lib = ctypes.cdll.LoadLibrary(lib_path[0])
Guolin Ke's avatar
Guolin Ke committed
40
41
    return lib

wxchan's avatar
wxchan committed
42

Guolin Ke's avatar
Guolin Ke committed
43
44
LIB = LoadDll()

Guolin Ke's avatar
Guolin Ke committed
45
46
LIB.LGBM_GetLastError.restype = ctypes.c_char_p

47
48
49
50
51
52
dtype_float32 = 0
dtype_float64 = 1
dtype_int32 = 2
dtype_int64 = 3


Guolin Ke's avatar
Guolin Ke committed
53
54
55
def c_array(ctype, values):
    return (ctype * len(values))(*values)

wxchan's avatar
wxchan committed
56

Guolin Ke's avatar
Guolin Ke committed
57
def c_str(string):
Guolin Ke's avatar
Guolin Ke committed
58
    return ctypes.c_char_p(string.encode('ascii'))
Guolin Ke's avatar
Guolin Ke committed
59

wxchan's avatar
wxchan committed
60

61
@pytest.mark.skip
62
63
def test_load_from_file(filename, reference):
    ref = None
wxchan's avatar
wxchan committed
64
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
65
        ref = reference
66
    handle = ctypes.c_void_p()
wxchan's avatar
wxchan committed
67
68
69
70
    LIB.LGBM_DatasetCreateFromFile(
        c_str(filename),
        c_str('max_bin=15'),
        ref, ctypes.byref(handle))
Guolin Ke's avatar
Guolin Ke committed
71
    print(LIB.LGBM_GetLastError())
72
    num_data = ctypes.c_long()
wxchan's avatar
wxchan committed
73
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
74
    num_feature = ctypes.c_long()
wxchan's avatar
wxchan committed
75
76
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
    print('#data:%d #feature:%d' % (num_data.value, num_feature.value))
77
78
    return handle

wxchan's avatar
wxchan committed
79

80
@pytest.mark.skip
81
82
83
84
def test_save_to_binary(handle, filename):
    LIB.LGBM_DatasetSaveBinary(handle, c_str(filename))


85
@pytest.mark.skip
Guolin Ke's avatar
Guolin Ke committed
86
87
88
89
90
def test_load_from_csr(filename, reference):
    data = []
    label = []
    inp = open(filename, 'r')
    for line in inp.readlines():
wxchan's avatar
wxchan committed
91
92
        data.append([float(x) for x in line.split('\t')[1:]])
        label.append(float(line.split('\t')[0]))
Guolin Ke's avatar
Guolin Ke committed
93
94
95
96
97
98
    inp.close()
    mat = np.array(data)
    label = np.array(label, dtype=np.float32)
    csr = sparse.csr_matrix(mat)
    handle = ctypes.c_void_p()
    ref = None
wxchan's avatar
wxchan committed
99
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
100
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
101

wxchan's avatar
wxchan committed
102
103
104
105
    LIB.LGBM_DatasetCreateFromCSR(
        c_array(ctypes.c_int, csr.indptr),
        dtype_int32,
        c_array(ctypes.c_int, csr.indices),
Guolin Ke's avatar
Guolin Ke committed
106
        csr.data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)),
wxchan's avatar
wxchan committed
107
108
        dtype_float64,
        len(csr.indptr),
109
        len(csr.data),
wxchan's avatar
wxchan committed
110
111
112
113
        csr.shape[1],
        c_str('max_bin=15'),
        ref,
        ctypes.byref(handle))
114
    num_data = ctypes.c_long()
wxchan's avatar
wxchan committed
115
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
116
    num_feature = ctypes.c_long()
wxchan's avatar
wxchan committed
117
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
118
    LIB.LGBM_DatasetSetField(handle, c_str('label'), c_array(ctypes.c_float, label), len(label), 0)
wxchan's avatar
wxchan committed
119
    print('#data:%d #feature:%d' % (num_data.value, num_feature.value))
120
121
    return handle

wxchan's avatar
wxchan committed
122

123
@pytest.mark.skip
124
125
126
127
128
def test_load_from_csc(filename, reference):
    data = []
    label = []
    inp = open(filename, 'r')
    for line in inp.readlines():
wxchan's avatar
wxchan committed
129
130
        data.append([float(x) for x in line.split('\t')[1:]])
        label.append(float(line.split('\t')[0]))
131
132
133
134
135
136
    inp.close()
    mat = np.array(data)
    label = np.array(label, dtype=np.float32)
    csr = sparse.csc_matrix(mat)
    handle = ctypes.c_void_p()
    ref = None
wxchan's avatar
wxchan committed
137
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
138
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
139

wxchan's avatar
wxchan committed
140
141
142
143
    LIB.LGBM_DatasetCreateFromCSC(
        c_array(ctypes.c_int, csr.indptr),
        dtype_int32,
        c_array(ctypes.c_int, csr.indices),
144
        csr.data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)),
wxchan's avatar
wxchan committed
145
146
        dtype_float64,
        len(csr.indptr),
147
        len(csr.data),
wxchan's avatar
wxchan committed
148
149
150
151
        csr.shape[0],
        c_str('max_bin=15'),
        ref,
        ctypes.byref(handle))
152
    num_data = ctypes.c_long()
wxchan's avatar
wxchan committed
153
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
154
    num_feature = ctypes.c_long()
wxchan's avatar
wxchan committed
155
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
156
    LIB.LGBM_DatasetSetField(handle, c_str('label'), c_array(ctypes.c_float, label), len(label), 0)
wxchan's avatar
wxchan committed
157
    print('#data:%d #feature:%d' % (num_data.value, num_feature.value))
158
159
    return handle

wxchan's avatar
wxchan committed
160

161
@pytest.mark.skip
162
163
164
165
166
def test_load_from_mat(filename, reference):
    data = []
    label = []
    inp = open(filename, 'r')
    for line in inp.readlines():
wxchan's avatar
wxchan committed
167
168
        data.append([float(x) for x in line.split('\t')[1:]])
        label.append(float(line.split('\t')[0]))
169
170
171
172
173
174
    inp.close()
    mat = np.array(data)
    data = np.array(mat.reshape(mat.size), copy=False)
    label = np.array(label, dtype=np.float32)
    handle = ctypes.c_void_p()
    ref = None
wxchan's avatar
wxchan committed
175
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
176
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
177

wxchan's avatar
wxchan committed
178
179
    LIB.LGBM_DatasetCreateFromMat(data.ctypes.data_as(
        ctypes.POINTER(ctypes.c_void_p)),
180
181
182
183
        dtype_float64,
        mat.shape[0],
        mat.shape[1],
        1,
wxchan's avatar
wxchan committed
184
185
186
        c_str('max_bin=15'),
        ref,
        ctypes.byref(handle))
187
    num_data = ctypes.c_long()
wxchan's avatar
wxchan committed
188
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
189
    num_feature = ctypes.c_long()
wxchan's avatar
wxchan committed
190
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
Guolin Ke's avatar
Guolin Ke committed
191
    LIB.LGBM_DatasetSetField(handle, c_str('label'), c_array(ctypes.c_float, label), len(label), 0)
wxchan's avatar
wxchan committed
192
    print('#data:%d #feature:%d' % (num_data.value, num_feature.value))
Guolin Ke's avatar
Guolin Ke committed
193
    return handle
wxchan's avatar
wxchan committed
194
195


196
@pytest.mark.skip
197
198
199
def test_free_dataset(handle):
    LIB.LGBM_DatasetFree(handle)

wxchan's avatar
wxchan committed
200

201
def test_dataset():
202
203
    train = test_load_from_file(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../examples/binary_classification/binary.train'), None)
    test = test_load_from_mat(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../examples/binary_classification/binary.test'), train)
204
    test_free_dataset(test)
205
    test = test_load_from_csr(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../examples/binary_classification/binary.test'), train)
206
    test_free_dataset(test)
207
    test = test_load_from_csc(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../examples/binary_classification/binary.test'), train)
208
209
210
    test_free_dataset(test)
    test_save_to_binary(train, 'train.binary.bin')
    test_free_dataset(train)
wxchan's avatar
wxchan committed
211
    train = test_load_from_file('train.binary.bin', None)
212
    test_free_dataset(train)
wxchan's avatar
wxchan committed
213
214


215
def test_booster():
216
217
    train = test_load_from_mat(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../examples/binary_classification/binary.train'), None)
    test = test_load_from_mat(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../examples/binary_classification/binary.test'), train)
218
    booster = ctypes.c_void_p()
219
220
    LIB.LGBM_BoosterCreate(train, c_str("app=binary metric=auc num_leaves=31 verbose=0"), ctypes.byref(booster))
    LIB.LGBM_BoosterAddValidData(booster, test)
221
    is_finished = ctypes.c_int(0)
wxchan's avatar
wxchan committed
222
    for i in range(1, 101):
wxchan's avatar
wxchan committed
223
        LIB.LGBM_BoosterUpdateOneIter(booster, ctypes.byref(is_finished))
Guolin Ke's avatar
Guolin Ke committed
224
        result = np.array([0.0], dtype=np.float64)
225
        out_len = ctypes.c_ulong(0)
Guolin Ke's avatar
Guolin Ke committed
226
        LIB.LGBM_BoosterGetEval(booster, 0, ctypes.byref(out_len), result.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))
wxchan's avatar
wxchan committed
227
228
        if i % 10 == 0:
            print('%d Iteration test AUC %f' % (i, result[0]))
229
230
231
    LIB.LGBM_BoosterSaveModel(booster, -1, c_str('model.txt'))
    LIB.LGBM_BoosterFree(booster)
    test_free_dataset(train)
232
    test_free_dataset(test)
233
    booster2 = ctypes.c_void_p()
Guolin Ke's avatar
Guolin Ke committed
234
235
    num_total_model = ctypes.c_long()
    LIB.LGBM_BoosterCreateFromModelfile(c_str('model.txt'), ctypes.byref(num_total_model), ctypes.byref(booster2))
236
    data = []
237
    inp = open(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../examples/binary_classification/binary.test'), 'r')
238
    for line in inp.readlines():
wxchan's avatar
wxchan committed
239
        data.append([float(x) for x in line.split('\t')[1:]])
240
241
    inp.close()
    mat = np.array(data)
Guolin Ke's avatar
Guolin Ke committed
242
    preb = np.zeros(mat.shape[0], dtype=np.float64)
Guolin Ke's avatar
Guolin Ke committed
243
    num_preb = ctypes.c_long()
244
    data = np.array(mat.reshape(mat.size), copy=False)
wxchan's avatar
wxchan committed
245
246
247
    LIB.LGBM_BoosterPredictForMat(
        booster2,
        data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)),
248
249
250
251
252
253
        dtype_float64,
        mat.shape[0],
        mat.shape[1],
        1,
        1,
        50,
254
        c_str(''),
Guolin Ke's avatar
Guolin Ke committed
255
        ctypes.byref(num_preb),
256
        preb.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))
257
    LIB.LGBM_BoosterPredictForFile(booster2, c_str(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../examples/binary_classification/binary.test')), 0, 0, 50, c_str(''), c_str('preb.txt'))
258
    LIB.LGBM_BoosterFree(booster2)