test.py 7.8 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
import sys
import os
import ctypes
import collections

import numpy as np
from scipy import sparse

def LoadDll():
Guolin Ke's avatar
Guolin Ke committed
10
11
12
13
    if os.name == 'nt':
        lib_path = '../../windows/x64/DLL/lib_lightgbm.dll'
    else:
        lib_path = '../../lib_lightgbm.so'
Guolin Ke's avatar
Guolin Ke committed
14
15
16
17
18
    lib = ctypes.cdll.LoadLibrary(lib_path)
    return lib

LIB = LoadDll()

19
20
21
22
23
24
dtype_float32 = 0
dtype_float64 = 1
dtype_int32 = 2
dtype_int64 = 3


Guolin Ke's avatar
Guolin Ke committed
25
26
27
28
def c_array(ctype, values):
    return (ctype * len(values))(*values)

def c_str(string):
Guolin Ke's avatar
Guolin Ke committed
29
    return ctypes.c_char_p(string.encode('ascii'))
Guolin Ke's avatar
Guolin Ke committed
30

31
32
33
34
35
36
37
38
39
40
41
42
def test_load_from_file(filename, reference):
    ref = None
    if reference != None:
        ref = ctypes.byref(reference)
    handle = ctypes.c_void_p()
    LIB.LGBM_CreateDatasetFromFile(c_str(filename), 
        c_str('max_bin=15'), 
        ref, ctypes.byref(handle) )
    num_data = ctypes.c_long()
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data) )
    num_feature = ctypes.c_long()
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature) )
Guolin Ke's avatar
Guolin Ke committed
43
    print ('#data:%d #feature:%d' %(num_data.value, num_feature.value) ) 
44
45
46
47
48
49
50
51
52
53
54
55
    return handle

def test_save_to_binary(handle, filename):
    LIB.LGBM_DatasetSaveBinary(handle, c_str(filename))

def test_load_from_binary(filename):
    handle = ctypes.c_void_p()
    LIB.LGBM_CreateDatasetFromBinaryFile(c_str(filename), ctypes.byref(handle) )
    num_data = ctypes.c_long()
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data) )
    num_feature = ctypes.c_long()
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature) )
Guolin Ke's avatar
Guolin Ke committed
56
    print ('#data:%d #feature:%d' %(num_data.value, num_feature.value) ) 
57
58
    return handle

Guolin Ke's avatar
Guolin Ke committed
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def test_load_from_csr(filename, reference):
    data = []
    label = []
    inp = open(filename, 'r')
    for line in inp.readlines():
        data.append( [float(x) for x in line.split('\t')[1:]] )
        label.append( float(line.split('\t')[0]) )
    inp.close()
    mat = np.array(data)
    label = np.array(label, dtype=np.float32)
    csr = sparse.csr_matrix(mat)
    handle = ctypes.c_void_p()
    ref = None
    if reference != None:
        ref = ctypes.byref(reference)

75
76
    LIB.LGBM_CreateDatasetFromCSR(c_array(ctypes.c_int, csr.indptr), 
        dtype_int32, 
Guolin Ke's avatar
Guolin Ke committed
77
78
        c_array(ctypes.c_int, csr.indices), 
        csr.data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)),
79
80
81
82
        dtype_float64, 
        len(csr.indptr), 
        len(csr.data),
        csr.shape[1], 
Guolin Ke's avatar
Guolin Ke committed
83
        c_str('max_bin=15'), 
84
85
86
87
88
89
90
        ref, 
        ctypes.byref(handle) )
    num_data = ctypes.c_long()
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data) )
    num_feature = ctypes.c_long()
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature) )
    LIB.LGBM_DatasetSetField(handle, c_str('label'), c_array(ctypes.c_float, label), len(label), 0)
Guolin Ke's avatar
Guolin Ke committed
91
    print ('#data:%d #feature:%d' %(num_data.value, num_feature.value) ) 
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
    return handle

def test_load_from_csc(filename, reference):
    data = []
    label = []
    inp = open(filename, 'r')
    for line in inp.readlines():
        data.append( [float(x) for x in line.split('\t')[1:]] )
        label.append( float(line.split('\t')[0]) )
    inp.close()
    mat = np.array(data)
    label = np.array(label, dtype=np.float32)
    csr = sparse.csc_matrix(mat)
    handle = ctypes.c_void_p()
    ref = None
    if reference != None:
        ref = ctypes.byref(reference)
Guolin Ke's avatar
Guolin Ke committed
109

110
111
112
113
114
115
116
117
    LIB.LGBM_CreateDatasetFromCSC(c_array(ctypes.c_int, csr.indptr), 
        dtype_int32, 
        c_array(ctypes.c_int, csr.indices), 
        csr.data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)),
        dtype_float64, 
        len(csr.indptr), 
        len(csr.data),
        csr.shape[0], 
Guolin Ke's avatar
Guolin Ke committed
118
        c_str('max_bin=15'), 
119
120
121
        ref, 
        ctypes.byref(handle) )
    num_data = ctypes.c_long()
Guolin Ke's avatar
Guolin Ke committed
122
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data) )
123
    num_feature = ctypes.c_long()
Guolin Ke's avatar
Guolin Ke committed
124
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature) )
125
    LIB.LGBM_DatasetSetField(handle, c_str('label'), c_array(ctypes.c_float, label), len(label), 0)
Guolin Ke's avatar
Guolin Ke committed
126
    print ('#data:%d #feature:%d' %(num_data.value, num_feature.value) ) 
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
    return handle

def test_load_from_mat(filename, reference):
    data = []
    label = []
    inp = open(filename, 'r')
    for line in inp.readlines():
        data.append( [float(x) for x in line.split('\t')[1:]] )
        label.append( float(line.split('\t')[0]) )
    inp.close()
    mat = np.array(data)
    data = np.array(mat.reshape(mat.size), copy=False)
    label = np.array(label, dtype=np.float32)
    handle = ctypes.c_void_p()
    ref = None
    if reference != None:
        ref = ctypes.byref(reference)
Guolin Ke's avatar
Guolin Ke committed
144

145
146
147
148
149
    LIB.LGBM_CreateDatasetFromMat(data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)), 
        dtype_float64,
        mat.shape[0],
        mat.shape[1],
        1,
Guolin Ke's avatar
Guolin Ke committed
150
        c_str('max_bin=15'), 
151
152
153
154
155
156
        ref, 
        ctypes.byref(handle) )
    num_data = ctypes.c_long()
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data) )
    num_feature = ctypes.c_long()
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature) )
Guolin Ke's avatar
Guolin Ke committed
157
    LIB.LGBM_DatasetSetField(handle, c_str('label'), c_array(ctypes.c_float, label), len(label), 0)
Guolin Ke's avatar
Guolin Ke committed
158
    print ('#data:%d #feature:%d' %(num_data.value, num_feature.value) ) 
Guolin Ke's avatar
Guolin Ke committed
159
    return handle
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
def test_free_dataset(handle):
    LIB.LGBM_DatasetFree(handle)

def test_dataset():
    train = test_load_from_file('../../examples/binary_classification/binary.train', None)
    test = test_load_from_mat('../../examples/binary_classification/binary.test', train)
    test_free_dataset(test)
    test = test_load_from_csr('../../examples/binary_classification/binary.test', train)
    test_free_dataset(test)
    test = test_load_from_csc('../../examples/binary_classification/binary.test', train)
    test_free_dataset(test)
    test_save_to_binary(train, 'train.binary.bin')
    test_free_dataset(train)
    train  = test_load_from_binary('train.binary.bin')
    test_free_dataset(train)
def test_booster():
    train = test_load_from_mat('../../examples/binary_classification/binary.train', None)
    test = [test_load_from_mat('../../examples/binary_classification/binary.test', train)]
    booster = ctypes.c_void_p()
Guolin Ke's avatar
Guolin Ke committed
179
    LIB.LGBM_BoosterCreate(train, c_array(ctypes.c_void_p, test), 
180
        len(test), c_str("app=binary metric=auc num_leaves=31 verbose=0"),None, ctypes.byref(booster))
181
    is_finished = ctypes.c_int(0)
Guolin Ke's avatar
Guolin Ke committed
182
    for i in range(100):
183
184
185
        LIB.LGBM_BoosterUpdateOneIter(booster,ctypes.byref(is_finished))
        result = np.array([0.0], dtype=np.float32)
        out_len = ctypes.c_ulong(0)
Guolin Ke's avatar
Guolin Ke committed
186
        LIB.LGBM_BoosterGetEval(booster, 0, ctypes.byref(out_len), result.ctypes.data_as(ctypes.POINTER(ctypes.c_float)))
Guolin Ke's avatar
Guolin Ke committed
187
        print ('%d Iteration test AUC %f' %(i, result[0]))
188
189
190
191
192
    LIB.LGBM_BoosterSaveModel(booster, -1, c_str('model.txt'))
    LIB.LGBM_BoosterFree(booster)
    test_free_dataset(train)
    test_free_dataset(test[0])
    booster2 = ctypes.c_void_p()
Guolin Ke's avatar
Guolin Ke committed
193
194
    num_total_model = ctypes.c_long()
    LIB.LGBM_BoosterCreateFromModelfile(c_str('model.txt'), ctypes.byref(num_total_model), ctypes.byref(booster2))
195
196
197
198
199
200
    data = []
    inp = open('../../examples/binary_classification/binary.test', 'r')
    for line in inp.readlines():
        data.append( [float(x) for x in line.split('\t')[1:]] )
    inp.close()
    mat = np.array(data)
Guolin Ke's avatar
Guolin Ke committed
201
202
    preb = np.zeros(mat.shape[0], dtype=np.float32)
    num_preb = ctypes.c_long()
203
204
205
206
207
208
209
210
211
    data = np.array(mat.reshape(mat.size), copy=False)
    LIB.LGBM_BoosterPredictForMat(booster2,
        data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)), 
        dtype_float64,
        mat.shape[0],
        mat.shape[1],
        1,
        1,
        50,
Guolin Ke's avatar
Guolin Ke committed
212
        ctypes.byref(num_preb),
213
        preb.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))
Guolin Ke's avatar
Guolin Ke committed
214
    LIB.LGBM_BoosterPredictForFile(booster2,c_str('../../examples/binary_classification/binary.test'),0 , 0, 50, c_str('preb.txt'))
215
    LIB.LGBM_BoosterFree(booster2)
Guolin Ke's avatar
Guolin Ke committed
216

217
218
test_dataset()
test_booster()