test_.py 8.85 KB
Newer Older
wxchan's avatar
wxchan committed
1
# coding: utf-8
Guolin Ke's avatar
Guolin Ke committed
2
import ctypes
3
from pathlib import Path
4
5
from platform import system

Guolin Ke's avatar
Guolin Ke committed
6
7
8
import numpy as np
from scipy import sparse

9
10
11
12
try:
    from lightgbm.basic import _LIB as LIB
except ModuleNotFoundError:
    print("Could not import lightgbm Python package, looking for lib_lightgbm at the repo root")
13
    if system() in ("Windows", "Microsoft"):
14
        lib_file = Path(__file__).absolute().parents[2] / "Release" / "lib_lightgbm.dll"
Guolin Ke's avatar
Guolin Ke committed
15
    else:
16
17
        lib_file = Path(__file__).absolute().parents[2] / "lib_lightgbm.so"
    LIB = ctypes.cdll.LoadLibrary(lib_file)
Guolin Ke's avatar
Guolin Ke committed
18

Guolin Ke's avatar
Guolin Ke committed
19
20
LIB.LGBM_GetLastError.restype = ctypes.c_char_p

21
22
23
24
25
26
dtype_float32 = 0
dtype_float64 = 1
dtype_int32 = 2
dtype_int64 = 3


Guolin Ke's avatar
Guolin Ke committed
27
def c_str(string):
28
    return ctypes.c_char_p(string.encode("utf-8"))
Guolin Ke's avatar
Guolin Ke committed
29

wxchan's avatar
wxchan committed
30

31
def load_from_file(filename, reference):
32
    ref = None
wxchan's avatar
wxchan committed
33
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
34
        ref = reference
35
    handle = ctypes.c_void_p()
36
    LIB.LGBM_DatasetCreateFromFile(c_str(str(filename)), c_str("max_bin=15"), ref, ctypes.byref(handle))
Guolin Ke's avatar
Guolin Ke committed
37
    print(LIB.LGBM_GetLastError())
38
    num_data = ctypes.c_int(0)
wxchan's avatar
wxchan committed
39
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
40
    num_feature = ctypes.c_int(0)
wxchan's avatar
wxchan committed
41
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
42
    print(f"#data: {num_data.value} #feature: {num_feature.value}")
43
44
    return handle

wxchan's avatar
wxchan committed
45

46
def save_to_binary(handle, filename):
47
48
49
    LIB.LGBM_DatasetSaveBinary(handle, c_str(filename))


50
def load_from_csr(filename, reference):
51
52
53
    data = np.loadtxt(str(filename), dtype=np.float64)
    csr = sparse.csr_matrix(data[:, 1:])
    label = data[:, 0].astype(np.float32)
Guolin Ke's avatar
Guolin Ke committed
54
55
    handle = ctypes.c_void_p()
    ref = None
wxchan's avatar
wxchan committed
56
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
57
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
58

wxchan's avatar
wxchan committed
59
    LIB.LGBM_DatasetCreateFromCSR(
60
61
62
63
64
        csr.indptr.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
        ctypes.c_int(dtype_int32),
        csr.indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
        csr.data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
        ctypes.c_int(dtype_float64),
65
66
67
        ctypes.c_int64(len(csr.indptr)),
        ctypes.c_int64(len(csr.data)),
        ctypes.c_int64(csr.shape[1]),
68
        c_str("max_bin=15"),
wxchan's avatar
wxchan committed
69
        ref,
70
71
        ctypes.byref(handle),
    )
72
    num_data = ctypes.c_int(0)
wxchan's avatar
wxchan committed
73
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
74
    num_feature = ctypes.c_int(0)
wxchan's avatar
wxchan committed
75
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
76
77
    LIB.LGBM_DatasetSetField(
        handle,
78
        c_str("label"),
79
80
        label.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
        ctypes.c_int(len(label)),
81
82
83
        ctypes.c_int(dtype_float32),
    )
    print(f"#data: {num_data.value} #feature: {num_feature.value}")
84
85
    return handle

wxchan's avatar
wxchan committed
86

87
def load_from_csc(filename, reference):
88
89
90
    data = np.loadtxt(str(filename), dtype=np.float64)
    csc = sparse.csc_matrix(data[:, 1:])
    label = data[:, 0].astype(np.float32)
91
92
    handle = ctypes.c_void_p()
    ref = None
wxchan's avatar
wxchan committed
93
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
94
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
95

wxchan's avatar
wxchan committed
96
    LIB.LGBM_DatasetCreateFromCSC(
97
98
99
100
101
102
103
104
        csc.indptr.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
        ctypes.c_int(dtype_int32),
        csc.indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
        csc.data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
        ctypes.c_int(dtype_float64),
        ctypes.c_int64(len(csc.indptr)),
        ctypes.c_int64(len(csc.data)),
        ctypes.c_int64(csc.shape[0]),
105
        c_str("max_bin=15"),
wxchan's avatar
wxchan committed
106
        ref,
107
108
        ctypes.byref(handle),
    )
109
    num_data = ctypes.c_int(0)
wxchan's avatar
wxchan committed
110
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
111
    num_feature = ctypes.c_int(0)
wxchan's avatar
wxchan committed
112
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
113
114
    LIB.LGBM_DatasetSetField(
        handle,
115
        c_str("label"),
116
117
        label.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
        ctypes.c_int(len(label)),
118
119
120
        ctypes.c_int(dtype_float32),
    )
    print(f"#data: {num_data.value} #feature: {num_feature.value}")
121
122
    return handle

wxchan's avatar
wxchan committed
123

124
def load_from_mat(filename, reference):
125
126
127
    mat = np.loadtxt(str(filename), dtype=np.float64)
    label = mat[:, 0].astype(np.float32)
    mat = mat[:, 1:]
128
    data = np.array(mat.reshape(mat.size), dtype=np.float64, copy=False)
129
130
    handle = ctypes.c_void_p()
    ref = None
wxchan's avatar
wxchan committed
131
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
132
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
133

134
    LIB.LGBM_DatasetCreateFromMat(
135
136
137
138
139
        data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
        ctypes.c_int(dtype_float64),
        ctypes.c_int32(mat.shape[0]),
        ctypes.c_int32(mat.shape[1]),
        ctypes.c_int(1),
140
        c_str("max_bin=15"),
wxchan's avatar
wxchan committed
141
        ref,
142
143
        ctypes.byref(handle),
    )
144
    num_data = ctypes.c_int(0)
wxchan's avatar
wxchan committed
145
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
146
    num_feature = ctypes.c_int(0)
wxchan's avatar
wxchan committed
147
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
148
149
    LIB.LGBM_DatasetSetField(
        handle,
150
        c_str("label"),
151
152
        label.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
        ctypes.c_int(len(label)),
153
154
155
        ctypes.c_int(dtype_float32),
    )
    print(f"#data: {num_data.value} #feature: {num_feature.value}")
Guolin Ke's avatar
Guolin Ke committed
156
    return handle
wxchan's avatar
wxchan committed
157
158


159
def free_dataset(handle):
160
161
    LIB.LGBM_DatasetFree(handle)

wxchan's avatar
wxchan committed
162

163
def test_dataset():
164
165
166
    binary_example_dir = Path(__file__).absolute().parents[2] / "examples" / "binary_classification"
    train = load_from_file(binary_example_dir / "binary.train", None)
    test = load_from_mat(binary_example_dir / "binary.test", train)
167
    free_dataset(test)
168
    test = load_from_csr(binary_example_dir / "binary.test", train)
169
    free_dataset(test)
170
    test = load_from_csc(binary_example_dir / "binary.test", train)
171
    free_dataset(test)
172
    save_to_binary(train, "train.binary.bin")
173
    free_dataset(train)
174
    train = load_from_file("train.binary.bin", None)
175
    free_dataset(train)
wxchan's avatar
wxchan committed
176
177


178
def test_booster():
179
180
181
    binary_example_dir = Path(__file__).absolute().parents[2] / "examples" / "binary_classification"
    train = load_from_mat(binary_example_dir / "binary.train", None)
    test = load_from_mat(binary_example_dir / "binary.test", train)
182
    booster = ctypes.c_void_p()
183
    LIB.LGBM_BoosterCreate(train, c_str("app=binary metric=auc num_leaves=31 verbose=0"), ctypes.byref(booster))
184
    LIB.LGBM_BoosterAddValidData(booster, test)
185
    is_finished = ctypes.c_int(0)
186
    for i in range(1, 51):
wxchan's avatar
wxchan committed
187
        LIB.LGBM_BoosterUpdateOneIter(booster, ctypes.byref(is_finished))
Guolin Ke's avatar
Guolin Ke committed
188
        result = np.array([0.0], dtype=np.float64)
189
        out_len = ctypes.c_int(0)
190
        LIB.LGBM_BoosterGetEval(
191
192
            booster, ctypes.c_int(0), ctypes.byref(out_len), result.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
        )
wxchan's avatar
wxchan committed
193
        if i % 10 == 0:
194
195
            print(f"{i} iteration test AUC {result[0]:.6f}")
    LIB.LGBM_BoosterSaveModel(booster, ctypes.c_int(0), ctypes.c_int(-1), ctypes.c_int(0), c_str("model.txt"))
196
    LIB.LGBM_BoosterFree(booster)
197
198
    free_dataset(train)
    free_dataset(test)
199
    booster2 = ctypes.c_void_p()
200
    num_total_model = ctypes.c_int(0)
201
202
    LIB.LGBM_BoosterCreateFromModelfile(c_str("model.txt"), ctypes.byref(num_total_model), ctypes.byref(booster2))
    data = np.loadtxt(str(binary_example_dir / "binary.test"), dtype=np.float64)
203
    mat = data[:, 1:]
204
    preb = np.empty(mat.shape[0], dtype=np.float64)
205
206
    num_preb = ctypes.c_int64(0)
    data = np.array(mat.reshape(mat.size), dtype=np.float64, copy=False)
wxchan's avatar
wxchan committed
207
208
    LIB.LGBM_BoosterPredictForMat(
        booster2,
209
210
211
212
213
214
215
216
        data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
        ctypes.c_int(dtype_float64),
        ctypes.c_int32(mat.shape[0]),
        ctypes.c_int32(mat.shape[1]),
        ctypes.c_int(1),
        ctypes.c_int(1),
        ctypes.c_int(0),
        ctypes.c_int(25),
217
        c_str(""),
Guolin Ke's avatar
Guolin Ke committed
218
        ctypes.byref(num_preb),
219
220
        preb.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
    )
221
222
    LIB.LGBM_BoosterPredictForFile(
        booster2,
223
        c_str(str(binary_example_dir / "binary.test")),
224
225
226
227
        ctypes.c_int(0),
        ctypes.c_int(0),
        ctypes.c_int(0),
        ctypes.c_int(25),
228
229
230
        c_str(""),
        c_str("preb.txt"),
    )
231
232
    LIB.LGBM_BoosterPredictForFile(
        booster2,
233
        c_str(str(binary_example_dir / "binary.test")),
234
235
236
237
        ctypes.c_int(0),
        ctypes.c_int(0),
        ctypes.c_int(10),
        ctypes.c_int(25),
238
239
240
        c_str(""),
        c_str("preb.txt"),
    )
241
    LIB.LGBM_BoosterFree(booster2)
242
243
244
245
246


def test_max_thread_control():
    # at initialization, should be -1
    num_threads = ctypes.c_int(0)
247
    ret = LIB.LGBM_GetMaxThreads(ctypes.byref(num_threads))
248
249
250
251
    assert ret == 0
    assert num_threads.value == -1

    # updating that value through the C API should work
252
    ret = LIB.LGBM_SetMaxThreads(ctypes.c_int(6))
253
254
    assert ret == 0

255
    ret = LIB.LGBM_GetMaxThreads(ctypes.byref(num_threads))
256
257
258
259
    assert ret == 0
    assert num_threads.value == 6

    # resetting to any negative number should set it to -1
260
    ret = LIB.LGBM_SetMaxThreads(ctypes.c_int(-123))
261
    assert ret == 0
262
    ret = LIB.LGBM_GetMaxThreads(ctypes.byref(num_threads))
263
264
    assert ret == 0
    assert num_threads.value == -1