test.py 7.39 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
import sys
import os
import ctypes
import collections

import numpy as np
from scipy import sparse

def LoadDll():
Guolin Ke's avatar
Guolin Ke committed
10
11
12
13
    if os.name == 'nt':
        lib_path = '../../windows/x64/DLL/lib_lightgbm.dll'
    else:
        lib_path = '../../lib_lightgbm.so'
Guolin Ke's avatar
Guolin Ke committed
14
15
16
17
18
    lib = ctypes.cdll.LoadLibrary(lib_path)
    return lib

LIB = LoadDll()

Guolin Ke's avatar
Guolin Ke committed
19
20
LIB.LGBM_GetLastError.restype = ctypes.c_char_p

21
22
23
24
25
26
dtype_float32 = 0
dtype_float64 = 1
dtype_int32 = 2
dtype_int64 = 3


Guolin Ke's avatar
Guolin Ke committed
27
28
29
30
def c_array(ctype, values):
    return (ctype * len(values))(*values)

def c_str(string):
Guolin Ke's avatar
Guolin Ke committed
31
    return ctypes.c_char_p(string.encode('ascii'))
Guolin Ke's avatar
Guolin Ke committed
32

33
34
35
def test_load_from_file(filename, reference):
    ref = None
    if reference != None:
Guolin Ke's avatar
Guolin Ke committed
36
        ref = reference
37
    handle = ctypes.c_void_p()
Guolin Ke's avatar
Guolin Ke committed
38
    LIB.LGBM_DatasetCreateFromFile(c_str(filename), 
39
40
        c_str('max_bin=15'), 
        ref, ctypes.byref(handle) )
Guolin Ke's avatar
Guolin Ke committed
41
    print(LIB.LGBM_GetLastError())
42
43
44
45
    num_data = ctypes.c_long()
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data) )
    num_feature = ctypes.c_long()
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature) )
Guolin Ke's avatar
Guolin Ke committed
46
    print ('#data:%d #feature:%d' %(num_data.value, num_feature.value) ) 
47
48
49
50
51
52
    return handle

def test_save_to_binary(handle, filename):
    LIB.LGBM_DatasetSaveBinary(handle, c_str(filename))


Guolin Ke's avatar
Guolin Ke committed
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def test_load_from_csr(filename, reference):
    data = []
    label = []
    inp = open(filename, 'r')
    for line in inp.readlines():
        data.append( [float(x) for x in line.split('\t')[1:]] )
        label.append( float(line.split('\t')[0]) )
    inp.close()
    mat = np.array(data)
    label = np.array(label, dtype=np.float32)
    csr = sparse.csr_matrix(mat)
    handle = ctypes.c_void_p()
    ref = None
    if reference != None:
Guolin Ke's avatar
Guolin Ke committed
67
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
68

Guolin Ke's avatar
Guolin Ke committed
69
    LIB.LGBM_DatasetCreateFromCSR(c_array(ctypes.c_int, csr.indptr), 
70
        dtype_int32, 
Guolin Ke's avatar
Guolin Ke committed
71
72
        c_array(ctypes.c_int, csr.indices), 
        csr.data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)),
73
74
75
76
        dtype_float64, 
        len(csr.indptr), 
        len(csr.data),
        csr.shape[1], 
Guolin Ke's avatar
Guolin Ke committed
77
        c_str('max_bin=15'), 
78
79
80
81
82
83
84
        ref, 
        ctypes.byref(handle) )
    num_data = ctypes.c_long()
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data) )
    num_feature = ctypes.c_long()
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature) )
    LIB.LGBM_DatasetSetField(handle, c_str('label'), c_array(ctypes.c_float, label), len(label), 0)
Guolin Ke's avatar
Guolin Ke committed
85
    print ('#data:%d #feature:%d' %(num_data.value, num_feature.value) ) 
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
    return handle

def test_load_from_csc(filename, reference):
    data = []
    label = []
    inp = open(filename, 'r')
    for line in inp.readlines():
        data.append( [float(x) for x in line.split('\t')[1:]] )
        label.append( float(line.split('\t')[0]) )
    inp.close()
    mat = np.array(data)
    label = np.array(label, dtype=np.float32)
    csr = sparse.csc_matrix(mat)
    handle = ctypes.c_void_p()
    ref = None
    if reference != None:
Guolin Ke's avatar
Guolin Ke committed
102
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
103

Guolin Ke's avatar
Guolin Ke committed
104
    LIB.LGBM_DatasetCreateFromCSC(c_array(ctypes.c_int, csr.indptr), 
105
106
107
108
109
110
111
        dtype_int32, 
        c_array(ctypes.c_int, csr.indices), 
        csr.data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)),
        dtype_float64, 
        len(csr.indptr), 
        len(csr.data),
        csr.shape[0], 
Guolin Ke's avatar
Guolin Ke committed
112
        c_str('max_bin=15'), 
113
114
115
        ref, 
        ctypes.byref(handle) )
    num_data = ctypes.c_long()
Guolin Ke's avatar
Guolin Ke committed
116
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data) )
117
    num_feature = ctypes.c_long()
Guolin Ke's avatar
Guolin Ke committed
118
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature) )
119
    LIB.LGBM_DatasetSetField(handle, c_str('label'), c_array(ctypes.c_float, label), len(label), 0)
Guolin Ke's avatar
Guolin Ke committed
120
    print ('#data:%d #feature:%d' %(num_data.value, num_feature.value) ) 
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
    return handle

def test_load_from_mat(filename, reference):
    data = []
    label = []
    inp = open(filename, 'r')
    for line in inp.readlines():
        data.append( [float(x) for x in line.split('\t')[1:]] )
        label.append( float(line.split('\t')[0]) )
    inp.close()
    mat = np.array(data)
    data = np.array(mat.reshape(mat.size), copy=False)
    label = np.array(label, dtype=np.float32)
    handle = ctypes.c_void_p()
    ref = None
    if reference != None:
Guolin Ke's avatar
Guolin Ke committed
137
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
138

Guolin Ke's avatar
Guolin Ke committed
139
    LIB.LGBM_DatasetCreateFromMat(data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)), 
140
141
142
143
        dtype_float64,
        mat.shape[0],
        mat.shape[1],
        1,
Guolin Ke's avatar
Guolin Ke committed
144
        c_str('max_bin=15'), 
145
146
147
148
149
150
        ref, 
        ctypes.byref(handle) )
    num_data = ctypes.c_long()
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data) )
    num_feature = ctypes.c_long()
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature) )
Guolin Ke's avatar
Guolin Ke committed
151
    LIB.LGBM_DatasetSetField(handle, c_str('label'), c_array(ctypes.c_float, label), len(label), 0)
Guolin Ke's avatar
Guolin Ke committed
152
    print ('#data:%d #feature:%d' %(num_data.value, num_feature.value) ) 
Guolin Ke's avatar
Guolin Ke committed
153
    return handle
154
155
156
157
158
159
160
161
162
163
164
165
166
def test_free_dataset(handle):
    LIB.LGBM_DatasetFree(handle)

def test_dataset():
    train = test_load_from_file('../../examples/binary_classification/binary.train', None)
    test = test_load_from_mat('../../examples/binary_classification/binary.test', train)
    test_free_dataset(test)
    test = test_load_from_csr('../../examples/binary_classification/binary.test', train)
    test_free_dataset(test)
    test = test_load_from_csc('../../examples/binary_classification/binary.test', train)
    test_free_dataset(test)
    test_save_to_binary(train, 'train.binary.bin')
    test_free_dataset(train)
Guolin Ke's avatar
Guolin Ke committed
167
    train  = test_load_from_file('train.binary.bin', None)
168
169
170
    test_free_dataset(train)
def test_booster():
    train = test_load_from_mat('../../examples/binary_classification/binary.train', None)
171
    test = test_load_from_mat('../../examples/binary_classification/binary.test', train)
172
    booster = ctypes.c_void_p()
173
174
    LIB.LGBM_BoosterCreate(train, c_str("app=binary metric=auc num_leaves=31 verbose=0"), ctypes.byref(booster))
    LIB.LGBM_BoosterAddValidData(booster, test)
175
    is_finished = ctypes.c_int(0)
Guolin Ke's avatar
Guolin Ke committed
176
    for i in range(100):
177
        LIB.LGBM_BoosterUpdateOneIter(booster,ctypes.byref(is_finished))
Guolin Ke's avatar
Guolin Ke committed
178
        result = np.array([0.0], dtype=np.float64)
179
        out_len = ctypes.c_ulong(0)
Guolin Ke's avatar
Guolin Ke committed
180
        LIB.LGBM_BoosterGetEval(booster, 0, ctypes.byref(out_len), result.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))
Guolin Ke's avatar
Guolin Ke committed
181
        print ('%d Iteration test AUC %f' %(i, result[0]))
182
183
184
    LIB.LGBM_BoosterSaveModel(booster, -1, c_str('model.txt'))
    LIB.LGBM_BoosterFree(booster)
    test_free_dataset(train)
185
    test_free_dataset(test)
186
    booster2 = ctypes.c_void_p()
Guolin Ke's avatar
Guolin Ke committed
187
188
    num_total_model = ctypes.c_long()
    LIB.LGBM_BoosterCreateFromModelfile(c_str('model.txt'), ctypes.byref(num_total_model), ctypes.byref(booster2))
189
190
191
192
193
194
    data = []
    inp = open('../../examples/binary_classification/binary.test', 'r')
    for line in inp.readlines():
        data.append( [float(x) for x in line.split('\t')[1:]] )
    inp.close()
    mat = np.array(data)
Guolin Ke's avatar
Guolin Ke committed
195
    preb = np.zeros(mat.shape[0], dtype=np.float64)
Guolin Ke's avatar
Guolin Ke committed
196
    num_preb = ctypes.c_long()
197
198
199
200
201
202
203
204
205
    data = np.array(mat.reshape(mat.size), copy=False)
    LIB.LGBM_BoosterPredictForMat(booster2,
        data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)), 
        dtype_float64,
        mat.shape[0],
        mat.shape[1],
        1,
        1,
        50,
Guolin Ke's avatar
Guolin Ke committed
206
        ctypes.byref(num_preb),
207
        preb.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))
Guolin Ke's avatar
Guolin Ke committed
208
    LIB.LGBM_BoosterPredictForFile(booster2,c_str('../../examples/binary_classification/binary.test'),0 , 0, 50, c_str('preb.txt'))
209
    LIB.LGBM_BoosterFree(booster2)
Guolin Ke's avatar
Guolin Ke committed
210

211
212
test_dataset()
test_booster()