"vscode:/vscode.git/clone" did not exist on "f5b6bd60d9d752c8e5a75b11ab771d0422214bb4"
test.py 7.72 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
import sys
import os
import ctypes
import collections

import numpy as np
from scipy import sparse

def LoadDll():
Guolin Ke's avatar
Guolin Ke committed
10
11
12
13
    if os.name == 'nt':
        lib_path = '../../windows/x64/DLL/lib_lightgbm.dll'
    else:
        lib_path = '../../lib_lightgbm.so'
Guolin Ke's avatar
Guolin Ke committed
14
15
16
17
18
    lib = ctypes.cdll.LoadLibrary(lib_path)
    return lib

LIB = LoadDll()

19
20
21
22
23
24
dtype_float32 = 0
dtype_float64 = 1
dtype_int32 = 2
dtype_int64 = 3


Guolin Ke's avatar
Guolin Ke committed
25
26
27
28
def c_array(ctype, values):
    return (ctype * len(values))(*values)

def c_str(string):
Guolin Ke's avatar
Guolin Ke committed
29
    return ctypes.c_char_p(string.encode('ascii'))
Guolin Ke's avatar
Guolin Ke committed
30

31
32
33
34
35
36
37
38
39
40
41
42
def test_load_from_file(filename, reference):
    ref = None
    if reference != None:
        ref = ctypes.byref(reference)
    handle = ctypes.c_void_p()
    LIB.LGBM_CreateDatasetFromFile(c_str(filename), 
        c_str('max_bin=15'), 
        ref, ctypes.byref(handle) )
    num_data = ctypes.c_long()
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data) )
    num_feature = ctypes.c_long()
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature) )
Guolin Ke's avatar
Guolin Ke committed
43
    print ('#data:%d #feature:%d' %(num_data.value, num_feature.value) ) 
44
45
46
47
48
49
50
51
52
53
54
55
    return handle

def test_save_to_binary(handle, filename):
    LIB.LGBM_DatasetSaveBinary(handle, c_str(filename))

def test_load_from_binary(filename):
    handle = ctypes.c_void_p()
    LIB.LGBM_CreateDatasetFromBinaryFile(c_str(filename), ctypes.byref(handle) )
    num_data = ctypes.c_long()
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data) )
    num_feature = ctypes.c_long()
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature) )
Guolin Ke's avatar
Guolin Ke committed
56
    print ('#data:%d #feature:%d' %(num_data.value, num_feature.value) ) 
57
58
    return handle

Guolin Ke's avatar
Guolin Ke committed
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def test_load_from_csr(filename, reference):
    data = []
    label = []
    inp = open(filename, 'r')
    for line in inp.readlines():
        data.append( [float(x) for x in line.split('\t')[1:]] )
        label.append( float(line.split('\t')[0]) )
    inp.close()
    mat = np.array(data)
    label = np.array(label, dtype=np.float32)
    csr = sparse.csr_matrix(mat)
    handle = ctypes.c_void_p()
    ref = None
    if reference != None:
        ref = ctypes.byref(reference)

75
76
    LIB.LGBM_CreateDatasetFromCSR(c_array(ctypes.c_int, csr.indptr), 
        dtype_int32, 
Guolin Ke's avatar
Guolin Ke committed
77
78
        c_array(ctypes.c_int, csr.indices), 
        csr.data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)),
79
80
81
82
        dtype_float64, 
        len(csr.indptr), 
        len(csr.data),
        csr.shape[1], 
Guolin Ke's avatar
Guolin Ke committed
83
        c_str('max_bin=15'), 
84
85
86
87
88
89
90
        ref, 
        ctypes.byref(handle) )
    num_data = ctypes.c_long()
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data) )
    num_feature = ctypes.c_long()
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature) )
    LIB.LGBM_DatasetSetField(handle, c_str('label'), c_array(ctypes.c_float, label), len(label), 0)
Guolin Ke's avatar
Guolin Ke committed
91
    print ('#data:%d #feature:%d' %(num_data.value, num_feature.value) ) 
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
    return handle

def test_load_from_csc(filename, reference):
    data = []
    label = []
    inp = open(filename, 'r')
    for line in inp.readlines():
        data.append( [float(x) for x in line.split('\t')[1:]] )
        label.append( float(line.split('\t')[0]) )
    inp.close()
    mat = np.array(data)
    label = np.array(label, dtype=np.float32)
    csr = sparse.csc_matrix(mat)
    handle = ctypes.c_void_p()
    ref = None
    if reference != None:
        ref = ctypes.byref(reference)
Guolin Ke's avatar
Guolin Ke committed
109

110
111
112
113
114
115
116
117
    LIB.LGBM_CreateDatasetFromCSC(c_array(ctypes.c_int, csr.indptr), 
        dtype_int32, 
        c_array(ctypes.c_int, csr.indices), 
        csr.data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)),
        dtype_float64, 
        len(csr.indptr), 
        len(csr.data),
        csr.shape[0], 
Guolin Ke's avatar
Guolin Ke committed
118
        c_str('max_bin=15'), 
119
120
121
        ref, 
        ctypes.byref(handle) )
    num_data = ctypes.c_long()
Guolin Ke's avatar
Guolin Ke committed
122
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data) )
123
    num_feature = ctypes.c_long()
Guolin Ke's avatar
Guolin Ke committed
124
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature) )
125
    LIB.LGBM_DatasetSetField(handle, c_str('label'), c_array(ctypes.c_float, label), len(label), 0)
Guolin Ke's avatar
Guolin Ke committed
126
    print ('#data:%d #feature:%d' %(num_data.value, num_feature.value) ) 
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
    return handle

def test_load_from_mat(filename, reference):
    data = []
    label = []
    inp = open(filename, 'r')
    for line in inp.readlines():
        data.append( [float(x) for x in line.split('\t')[1:]] )
        label.append( float(line.split('\t')[0]) )
    inp.close()
    mat = np.array(data)
    data = np.array(mat.reshape(mat.size), copy=False)
    label = np.array(label, dtype=np.float32)
    handle = ctypes.c_void_p()
    ref = None
    if reference != None:
        ref = ctypes.byref(reference)
Guolin Ke's avatar
Guolin Ke committed
144

145
146
147
148
149
    LIB.LGBM_CreateDatasetFromMat(data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)), 
        dtype_float64,
        mat.shape[0],
        mat.shape[1],
        1,
Guolin Ke's avatar
Guolin Ke committed
150
        c_str('max_bin=15'), 
151
152
153
154
155
156
        ref, 
        ctypes.byref(handle) )
    num_data = ctypes.c_long()
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data) )
    num_feature = ctypes.c_long()
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature) )
Guolin Ke's avatar
Guolin Ke committed
157
    LIB.LGBM_DatasetSetField(handle, c_str('label'), c_array(ctypes.c_float, label), len(label), 0)
Guolin Ke's avatar
Guolin Ke committed
158
    print ('#data:%d #feature:%d' %(num_data.value, num_feature.value) ) 
Guolin Ke's avatar
Guolin Ke committed
159
    return handle
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
def test_free_dataset(handle):
    LIB.LGBM_DatasetFree(handle)

def test_dataset():
    train = test_load_from_file('../../examples/binary_classification/binary.train', None)
    test = test_load_from_mat('../../examples/binary_classification/binary.test', train)
    test_free_dataset(test)
    test = test_load_from_csr('../../examples/binary_classification/binary.test', train)
    test_free_dataset(test)
    test = test_load_from_csc('../../examples/binary_classification/binary.test', train)
    test_free_dataset(test)
    test_save_to_binary(train, 'train.binary.bin')
    test_free_dataset(train)
    train  = test_load_from_binary('train.binary.bin')
    test_free_dataset(train)
def test_booster():
    train = test_load_from_mat('../../examples/binary_classification/binary.train', None)
    test = [test_load_from_mat('../../examples/binary_classification/binary.test', train)]
    name = [c_str('test')]
    booster = ctypes.c_void_p()
    LIB.LGBM_BoosterCreate(train, c_array(ctypes.c_void_p, test), c_array(ctypes.c_char_p, name), 
Guolin Ke's avatar
Guolin Ke committed
181
        len(test), c_str("app=binary metric=auc num_leaves=31 verbose=0"), ctypes.byref(booster))
182
    is_finished = ctypes.c_int(0)
Guolin Ke's avatar
Guolin Ke committed
183
    for i in range(100):
184
185
186
        LIB.LGBM_BoosterUpdateOneIter(booster,ctypes.byref(is_finished))
        result = np.array([0.0], dtype=np.float32)
        out_len = ctypes.c_ulong(0)
Guolin Ke's avatar
Guolin Ke committed
187
188
        LIB.LGBM_BoosterEval(booster, 0, ctypes.byref(out_len), result.ctypes.data_as(ctypes.POINTER(ctypes.c_float)))
        print ('%d Iteration test AUC %f' %(i, result[0]))
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
    LIB.LGBM_BoosterSaveModel(booster, -1, c_str('model.txt'))
    LIB.LGBM_BoosterFree(booster)
    test_free_dataset(train)
    test_free_dataset(test[0])
    booster2 = ctypes.c_void_p()
    LIB.LGBM_BoosterLoadFromModelfile(c_str('model.txt'), ctypes.byref(booster2))
    data = []
    inp = open('../../examples/binary_classification/binary.test', 'r')
    for line in inp.readlines():
        data.append( [float(x) for x in line.split('\t')[1:]] )
    inp.close()
    mat = np.array(data)
    preb = np.zeros(( mat.shape[0],1 ), dtype=np.float64)
    data = np.array(mat.reshape(mat.size), copy=False)
    LIB.LGBM_BoosterPredictForMat(booster2,
        data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)), 
        dtype_float64,
        mat.shape[0],
        mat.shape[1],
        1,
        1,
        50,
        preb.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))
    LIB.LGBM_BoosterPredictForFile(booster2, 1, 50, 0, c_str('../../examples/binary_classification/binary.test'), c_str('preb.txt'))
    LIB.LGBM_BoosterFree(booster2)
Guolin Ke's avatar
Guolin Ke committed
214

215
216
test_dataset()
test_booster()
Guolin Ke's avatar
Guolin Ke committed
217