test_.py 8.25 KB
Newer Older
wxchan's avatar
wxchan committed
1
# coding: utf-8
Guolin Ke's avatar
Guolin Ke committed
2
import ctypes
3
from pathlib import Path
4
5
from platform import system

Guolin Ke's avatar
Guolin Ke committed
6
7
8
import numpy as np
from scipy import sparse

9
10
11
12
try:
    from lightgbm.basic import _LIB as LIB
except ModuleNotFoundError:
    print("Could not import lightgbm Python package, looking for lib_lightgbm at the repo root")
13
    if system() in ('Windows', 'Microsoft'):
14
        lib_file = Path(__file__).absolute().parents[2] / "Release" / "lib_lightgbm.dll"
Guolin Ke's avatar
Guolin Ke committed
15
    else:
16
17
        lib_file = Path(__file__).absolute().parents[2] / "lib_lightgbm.so"
    LIB = ctypes.cdll.LoadLibrary(lib_file)
Guolin Ke's avatar
Guolin Ke committed
18

Guolin Ke's avatar
Guolin Ke committed
19
20
LIB.LGBM_GetLastError.restype = ctypes.c_char_p

21
22
23
24
25
26
dtype_float32 = 0
dtype_float64 = 1
dtype_int32 = 2
dtype_int64 = 3


Guolin Ke's avatar
Guolin Ke committed
27
def c_str(string):
28
    return ctypes.c_char_p(string.encode('utf-8'))
Guolin Ke's avatar
Guolin Ke committed
29

wxchan's avatar
wxchan committed
30

31
def load_from_file(filename, reference):
32
    ref = None
wxchan's avatar
wxchan committed
33
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
34
        ref = reference
35
    handle = ctypes.c_void_p()
wxchan's avatar
wxchan committed
36
    LIB.LGBM_DatasetCreateFromFile(
37
        c_str(str(filename)),
wxchan's avatar
wxchan committed
38
        c_str('max_bin=15'),
39
40
        ref,
        ctypes.byref(handle))
Guolin Ke's avatar
Guolin Ke committed
41
    print(LIB.LGBM_GetLastError())
42
    num_data = ctypes.c_int(0)
wxchan's avatar
wxchan committed
43
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
44
    num_feature = ctypes.c_int(0)
wxchan's avatar
wxchan committed
45
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
46
    print(f'#data: {num_data.value} #feature: {num_feature.value}')
47
48
    return handle

wxchan's avatar
wxchan committed
49

50
def save_to_binary(handle, filename):
51
52
53
    LIB.LGBM_DatasetSaveBinary(handle, c_str(filename))


54
def load_from_csr(filename, reference):
55
56
57
    data = np.loadtxt(str(filename), dtype=np.float64)
    csr = sparse.csr_matrix(data[:, 1:])
    label = data[:, 0].astype(np.float32)
Guolin Ke's avatar
Guolin Ke committed
58
59
    handle = ctypes.c_void_p()
    ref = None
wxchan's avatar
wxchan committed
60
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
61
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
62

wxchan's avatar
wxchan committed
63
    LIB.LGBM_DatasetCreateFromCSR(
64
65
66
67
68
        csr.indptr.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
        ctypes.c_int(dtype_int32),
        csr.indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
        csr.data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
        ctypes.c_int(dtype_float64),
69
70
71
        ctypes.c_int64(len(csr.indptr)),
        ctypes.c_int64(len(csr.data)),
        ctypes.c_int64(csr.shape[1]),
wxchan's avatar
wxchan committed
72
73
74
        c_str('max_bin=15'),
        ref,
        ctypes.byref(handle))
75
    num_data = ctypes.c_int(0)
wxchan's avatar
wxchan committed
76
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
77
    num_feature = ctypes.c_int(0)
wxchan's avatar
wxchan committed
78
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
79
80
81
82
83
84
    LIB.LGBM_DatasetSetField(
        handle,
        c_str('label'),
        label.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
        ctypes.c_int(len(label)),
        ctypes.c_int(dtype_float32))
85
    print(f'#data: {num_data.value} #feature: {num_feature.value}')
86
87
    return handle

wxchan's avatar
wxchan committed
88

89
def load_from_csc(filename, reference):
90
91
92
    data = np.loadtxt(str(filename), dtype=np.float64)
    csc = sparse.csc_matrix(data[:, 1:])
    label = data[:, 0].astype(np.float32)
93
94
    handle = ctypes.c_void_p()
    ref = None
wxchan's avatar
wxchan committed
95
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
96
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
97

wxchan's avatar
wxchan committed
98
    LIB.LGBM_DatasetCreateFromCSC(
99
100
101
102
103
104
105
106
        csc.indptr.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
        ctypes.c_int(dtype_int32),
        csc.indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
        csc.data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
        ctypes.c_int(dtype_float64),
        ctypes.c_int64(len(csc.indptr)),
        ctypes.c_int64(len(csc.data)),
        ctypes.c_int64(csc.shape[0]),
wxchan's avatar
wxchan committed
107
108
109
        c_str('max_bin=15'),
        ref,
        ctypes.byref(handle))
110
    num_data = ctypes.c_int(0)
wxchan's avatar
wxchan committed
111
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
112
    num_feature = ctypes.c_int(0)
wxchan's avatar
wxchan committed
113
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
114
115
116
117
118
119
    LIB.LGBM_DatasetSetField(
        handle,
        c_str('label'),
        label.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
        ctypes.c_int(len(label)),
        ctypes.c_int(dtype_float32))
120
    print(f'#data: {num_data.value} #feature: {num_feature.value}')
121
122
    return handle

wxchan's avatar
wxchan committed
123

124
def load_from_mat(filename, reference):
125
126
127
    mat = np.loadtxt(str(filename), dtype=np.float64)
    label = mat[:, 0].astype(np.float32)
    mat = mat[:, 1:]
128
    data = np.array(mat.reshape(mat.size), dtype=np.float64, copy=False)
129
130
    handle = ctypes.c_void_p()
    ref = None
wxchan's avatar
wxchan committed
131
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
132
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
133

134
    LIB.LGBM_DatasetCreateFromMat(
135
136
137
138
139
        data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
        ctypes.c_int(dtype_float64),
        ctypes.c_int32(mat.shape[0]),
        ctypes.c_int32(mat.shape[1]),
        ctypes.c_int(1),
wxchan's avatar
wxchan committed
140
141
142
        c_str('max_bin=15'),
        ref,
        ctypes.byref(handle))
143
    num_data = ctypes.c_int(0)
wxchan's avatar
wxchan committed
144
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
145
    num_feature = ctypes.c_int(0)
wxchan's avatar
wxchan committed
146
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
147
148
149
150
151
152
    LIB.LGBM_DatasetSetField(
        handle,
        c_str('label'),
        label.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
        ctypes.c_int(len(label)),
        ctypes.c_int(dtype_float32))
153
    print(f'#data: {num_data.value} #feature: {num_feature.value}')
Guolin Ke's avatar
Guolin Ke committed
154
    return handle
wxchan's avatar
wxchan committed
155
156


157
def free_dataset(handle):
158
159
    LIB.LGBM_DatasetFree(handle)

wxchan's avatar
wxchan committed
160

161
def test_dataset():
162
163
164
    binary_example_dir = Path(__file__).absolute().parents[2] / 'examples' / 'binary_classification'
    train = load_from_file(binary_example_dir / 'binary.train', None)
    test = load_from_mat(binary_example_dir / 'binary.test', train)
165
    free_dataset(test)
166
    test = load_from_csr(binary_example_dir / 'binary.test', train)
167
    free_dataset(test)
168
    test = load_from_csc(binary_example_dir / 'binary.test', train)
169
170
171
172
173
    free_dataset(test)
    save_to_binary(train, 'train.binary.bin')
    free_dataset(train)
    train = load_from_file('train.binary.bin', None)
    free_dataset(train)
wxchan's avatar
wxchan committed
174
175


176
def test_booster():
177
178
179
    binary_example_dir = Path(__file__).absolute().parents[2] / 'examples' / 'binary_classification'
    train = load_from_mat(binary_example_dir / 'binary.train', None)
    test = load_from_mat(binary_example_dir / 'binary.test', train)
180
    booster = ctypes.c_void_p()
181
182
183
184
    LIB.LGBM_BoosterCreate(
        train,
        c_str("app=binary metric=auc num_leaves=31 verbose=0"),
        ctypes.byref(booster))
185
    LIB.LGBM_BoosterAddValidData(booster, test)
186
    is_finished = ctypes.c_int(0)
187
    for i in range(1, 51):
wxchan's avatar
wxchan committed
188
        LIB.LGBM_BoosterUpdateOneIter(booster, ctypes.byref(is_finished))
Guolin Ke's avatar
Guolin Ke committed
189
        result = np.array([0.0], dtype=np.float64)
190
        out_len = ctypes.c_int(0)
191
192
        LIB.LGBM_BoosterGetEval(
            booster,
193
            ctypes.c_int(0),
194
195
            ctypes.byref(out_len),
            result.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))
wxchan's avatar
wxchan committed
196
        if i % 10 == 0:
197
            print(f'{i} iteration test AUC {result[0]:.6f}')
198
199
200
201
202
203
    LIB.LGBM_BoosterSaveModel(
        booster,
        ctypes.c_int(0),
        ctypes.c_int(-1),
        ctypes.c_int(0),
        c_str('model.txt'))
204
    LIB.LGBM_BoosterFree(booster)
205
206
    free_dataset(train)
    free_dataset(test)
207
    booster2 = ctypes.c_void_p()
208
    num_total_model = ctypes.c_int(0)
209
210
211
212
    LIB.LGBM_BoosterCreateFromModelfile(
        c_str('model.txt'),
        ctypes.byref(num_total_model),
        ctypes.byref(booster2))
213
214
    data = np.loadtxt(str(binary_example_dir / 'binary.test'), dtype=np.float64)
    mat = data[:, 1:]
215
    preb = np.empty(mat.shape[0], dtype=np.float64)
216
217
    num_preb = ctypes.c_int64(0)
    data = np.array(mat.reshape(mat.size), dtype=np.float64, copy=False)
wxchan's avatar
wxchan committed
218
219
    LIB.LGBM_BoosterPredictForMat(
        booster2,
220
221
222
223
224
225
226
227
        data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
        ctypes.c_int(dtype_float64),
        ctypes.c_int32(mat.shape[0]),
        ctypes.c_int32(mat.shape[1]),
        ctypes.c_int(1),
        ctypes.c_int(1),
        ctypes.c_int(0),
        ctypes.c_int(25),
228
        c_str(''),
Guolin Ke's avatar
Guolin Ke committed
229
        ctypes.byref(num_preb),
230
        preb.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))
231
232
    LIB.LGBM_BoosterPredictForFile(
        booster2,
233
        c_str(str(binary_example_dir / 'binary.test')),
234
235
236
237
        ctypes.c_int(0),
        ctypes.c_int(0),
        ctypes.c_int(0),
        ctypes.c_int(25),
238
239
240
241
        c_str(''),
        c_str('preb.txt'))
    LIB.LGBM_BoosterPredictForFile(
        booster2,
242
        c_str(str(binary_example_dir / 'binary.test')),
243
244
245
246
        ctypes.c_int(0),
        ctypes.c_int(0),
        ctypes.c_int(10),
        ctypes.c_int(25),
247
248
        c_str(''),
        c_str('preb.txt'))
249
    LIB.LGBM_BoosterFree(booster2)