test_.py 8.98 KB
Newer Older
wxchan's avatar
wxchan committed
1
# coding: utf-8
Guolin Ke's avatar
Guolin Ke committed
2
import ctypes
3
from pathlib import Path
4
5
from platform import system

Guolin Ke's avatar
Guolin Ke committed
6
7
8
import numpy as np
from scipy import sparse

9
10
11
try:
    from lightgbm.basic import _LIB as LIB
except ModuleNotFoundError:
12
    print("Could not import lightgbm Python-package, looking for lib_lightgbm at the repo root")
13
    if system() in ("Windows", "Microsoft"):
14
        lib_file = Path(__file__).absolute().parents[2] / "Release" / "lib_lightgbm.dll"
Guolin Ke's avatar
Guolin Ke committed
15
    else:
16
17
        lib_file = Path(__file__).absolute().parents[2] / "lib_lightgbm.so"
    LIB = ctypes.cdll.LoadLibrary(lib_file)
Guolin Ke's avatar
Guolin Ke committed
18

Guolin Ke's avatar
Guolin Ke committed
19
20
LIB.LGBM_GetLastError.restype = ctypes.c_char_p

21
22
23
24
25
26
dtype_float32 = 0
dtype_float64 = 1
dtype_int32 = 2
dtype_int64 = 3


Guolin Ke's avatar
Guolin Ke committed
27
def c_str(string):
28
    return ctypes.c_char_p(str(string).encode("utf-8"))
Guolin Ke's avatar
Guolin Ke committed
29

wxchan's avatar
wxchan committed
30

31
def load_from_file(filename, reference):
32
    ref = None
wxchan's avatar
wxchan committed
33
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
34
        ref = reference
35
    handle = ctypes.c_void_p()
36
    LIB.LGBM_DatasetCreateFromFile(c_str(str(filename)), c_str("max_bin=15"), ref, ctypes.byref(handle))
Guolin Ke's avatar
Guolin Ke committed
37
    print(LIB.LGBM_GetLastError())
38
    num_data = ctypes.c_int(0)
wxchan's avatar
wxchan committed
39
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
40
    num_feature = ctypes.c_int(0)
wxchan's avatar
wxchan committed
41
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
42
    print(f"#data: {num_data.value} #feature: {num_feature.value}")
43
44
    return handle

wxchan's avatar
wxchan committed
45

46
def save_to_binary(handle, filename):
47
48
49
    LIB.LGBM_DatasetSaveBinary(handle, c_str(filename))


50
def load_from_csr(filename, reference):
51
52
53
    data = np.loadtxt(str(filename), dtype=np.float64)
    csr = sparse.csr_matrix(data[:, 1:])
    label = data[:, 0].astype(np.float32)
Guolin Ke's avatar
Guolin Ke committed
54
55
    handle = ctypes.c_void_p()
    ref = None
wxchan's avatar
wxchan committed
56
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
57
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
58

wxchan's avatar
wxchan committed
59
    LIB.LGBM_DatasetCreateFromCSR(
60
61
62
63
64
        csr.indptr.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
        ctypes.c_int(dtype_int32),
        csr.indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
        csr.data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
        ctypes.c_int(dtype_float64),
65
66
67
        ctypes.c_int64(len(csr.indptr)),
        ctypes.c_int64(len(csr.data)),
        ctypes.c_int64(csr.shape[1]),
68
        c_str("max_bin=15"),
wxchan's avatar
wxchan committed
69
        ref,
70
71
        ctypes.byref(handle),
    )
72
    num_data = ctypes.c_int(0)
wxchan's avatar
wxchan committed
73
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
74
    num_feature = ctypes.c_int(0)
wxchan's avatar
wxchan committed
75
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
76
77
    LIB.LGBM_DatasetSetField(
        handle,
78
        c_str("label"),
79
80
        label.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
        ctypes.c_int(len(label)),
81
82
83
        ctypes.c_int(dtype_float32),
    )
    print(f"#data: {num_data.value} #feature: {num_feature.value}")
84
85
    return handle

wxchan's avatar
wxchan committed
86

87
def load_from_csc(filename, reference):
88
89
90
    data = np.loadtxt(str(filename), dtype=np.float64)
    csc = sparse.csc_matrix(data[:, 1:])
    label = data[:, 0].astype(np.float32)
91
92
    handle = ctypes.c_void_p()
    ref = None
wxchan's avatar
wxchan committed
93
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
94
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
95

wxchan's avatar
wxchan committed
96
    LIB.LGBM_DatasetCreateFromCSC(
97
98
99
100
101
102
103
104
        csc.indptr.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
        ctypes.c_int(dtype_int32),
        csc.indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
        csc.data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
        ctypes.c_int(dtype_float64),
        ctypes.c_int64(len(csc.indptr)),
        ctypes.c_int64(len(csc.data)),
        ctypes.c_int64(csc.shape[0]),
105
        c_str("max_bin=15"),
wxchan's avatar
wxchan committed
106
        ref,
107
108
        ctypes.byref(handle),
    )
109
    num_data = ctypes.c_int(0)
wxchan's avatar
wxchan committed
110
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
111
    num_feature = ctypes.c_int(0)
wxchan's avatar
wxchan committed
112
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
113
114
    LIB.LGBM_DatasetSetField(
        handle,
115
        c_str("label"),
116
117
        label.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
        ctypes.c_int(len(label)),
118
119
120
        ctypes.c_int(dtype_float32),
    )
    print(f"#data: {num_data.value} #feature: {num_feature.value}")
121
122
    return handle

wxchan's avatar
wxchan committed
123

124
def load_from_mat(filename, reference):
125
126
127
    mat = np.loadtxt(str(filename), dtype=np.float64)
    label = mat[:, 0].astype(np.float32)
    mat = mat[:, 1:]
128
    data = np.asarray(mat.reshape(mat.size), dtype=np.float64)
129
130
    handle = ctypes.c_void_p()
    ref = None
wxchan's avatar
wxchan committed
131
    if reference is not None:
Guolin Ke's avatar
Guolin Ke committed
132
        ref = reference
Guolin Ke's avatar
Guolin Ke committed
133

134
    LIB.LGBM_DatasetCreateFromMat(
135
136
137
138
139
        data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
        ctypes.c_int(dtype_float64),
        ctypes.c_int32(mat.shape[0]),
        ctypes.c_int32(mat.shape[1]),
        ctypes.c_int(1),
140
        c_str("max_bin=15"),
wxchan's avatar
wxchan committed
141
        ref,
142
143
        ctypes.byref(handle),
    )
144
    num_data = ctypes.c_int(0)
wxchan's avatar
wxchan committed
145
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
146
    num_feature = ctypes.c_int(0)
wxchan's avatar
wxchan committed
147
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
148
149
    LIB.LGBM_DatasetSetField(
        handle,
150
        c_str("label"),
151
152
        label.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
        ctypes.c_int(len(label)),
153
154
155
        ctypes.c_int(dtype_float32),
    )
    print(f"#data: {num_data.value} #feature: {num_feature.value}")
Guolin Ke's avatar
Guolin Ke committed
156
    return handle
wxchan's avatar
wxchan committed
157
158


159
def free_dataset(handle):
160
161
    LIB.LGBM_DatasetFree(handle)

wxchan's avatar
wxchan committed
162

163
def test_dataset(tmp_path):
164
165
166
    binary_example_dir = Path(__file__).absolute().parents[2] / "examples" / "binary_classification"
    train = load_from_file(binary_example_dir / "binary.train", None)
    test = load_from_mat(binary_example_dir / "binary.test", train)
167
    free_dataset(test)
168
    test = load_from_csr(binary_example_dir / "binary.test", train)
169
    free_dataset(test)
170
    test = load_from_csc(binary_example_dir / "binary.test", train)
171
    free_dataset(test)
172
173
    train_binary = str(tmp_path / "train.binary.bin")
    save_to_binary(train, train_binary)
174
    free_dataset(train)
175
    train = load_from_file(train_binary, None)
176
    free_dataset(train)
wxchan's avatar
wxchan committed
177
178


179
def test_booster(tmp_path):
180
181
182
    binary_example_dir = Path(__file__).absolute().parents[2] / "examples" / "binary_classification"
    train = load_from_mat(binary_example_dir / "binary.train", None)
    test = load_from_mat(binary_example_dir / "binary.test", train)
183
    booster = ctypes.c_void_p()
184
    model_path = tmp_path / "model.txt"
185
    LIB.LGBM_BoosterCreate(train, c_str("app=binary metric=auc num_leaves=31 verbose=0"), ctypes.byref(booster))
186
    LIB.LGBM_BoosterAddValidData(booster, test)
187
    produced_empty_tree = ctypes.c_int(0)
188
    for i in range(1, 51):
189
        LIB.LGBM_BoosterUpdateOneIter(booster, ctypes.byref(produced_empty_tree))
Guolin Ke's avatar
Guolin Ke committed
190
        result = np.array([0.0], dtype=np.float64)
191
        out_len = ctypes.c_int(0)
192
        LIB.LGBM_BoosterGetEval(
193
194
            booster, ctypes.c_int(0), ctypes.byref(out_len), result.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
        )
wxchan's avatar
wxchan committed
195
        if i % 10 == 0:
196
            print(f"{i} iteration test AUC {result[0]:.6f}")
197
    LIB.LGBM_BoosterSaveModel(booster, ctypes.c_int(0), ctypes.c_int(-1), ctypes.c_int(0), c_str(str(model_path)))
198
    LIB.LGBM_BoosterFree(booster)
199
200
    free_dataset(train)
    free_dataset(test)
201
    booster2 = ctypes.c_void_p()
202
    num_total_model = ctypes.c_int(0)
203
    LIB.LGBM_BoosterCreateFromModelfile(c_str(str(model_path)), ctypes.byref(num_total_model), ctypes.byref(booster2))
204
    data = np.loadtxt(str(binary_example_dir / "binary.test"), dtype=np.float64)
205
    mat = data[:, 1:]
206
207
    preds = np.empty(mat.shape[0], dtype=np.float64)
    num_preds = ctypes.c_int64(0)
208
    data = np.asarray(mat.reshape(mat.size), dtype=np.float64)
wxchan's avatar
wxchan committed
209
210
    LIB.LGBM_BoosterPredictForMat(
        booster2,
211
212
213
214
215
216
217
218
        data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
        ctypes.c_int(dtype_float64),
        ctypes.c_int32(mat.shape[0]),
        ctypes.c_int32(mat.shape[1]),
        ctypes.c_int(1),
        ctypes.c_int(1),
        ctypes.c_int(0),
        ctypes.c_int(25),
219
        c_str(""),
220
221
        ctypes.byref(num_preds),
        preds.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
222
    )
223
224
    LIB.LGBM_BoosterPredictForFile(
        booster2,
225
        c_str(str(binary_example_dir / "binary.test")),
226
227
228
229
        ctypes.c_int(0),
        ctypes.c_int(0),
        ctypes.c_int(0),
        ctypes.c_int(25),
230
        c_str(""),
231
        c_str(tmp_path / "preds.txt"),
232
    )
233
234
    LIB.LGBM_BoosterPredictForFile(
        booster2,
235
        c_str(str(binary_example_dir / "binary.test")),
236
237
238
239
        ctypes.c_int(0),
        ctypes.c_int(0),
        ctypes.c_int(10),
        ctypes.c_int(25),
240
        c_str(""),
241
        c_str(tmp_path / "preds.txt"),
242
    )
243
    LIB.LGBM_BoosterFree(booster2)
244
245
246
247
248


def test_max_thread_control():
    # at initialization, should be -1
    num_threads = ctypes.c_int(0)
249
    ret = LIB.LGBM_GetMaxThreads(ctypes.byref(num_threads))
250
251
252
253
    assert ret == 0
    assert num_threads.value == -1

    # updating that value through the C API should work
254
    ret = LIB.LGBM_SetMaxThreads(ctypes.c_int(6))
255
256
    assert ret == 0

257
    ret = LIB.LGBM_GetMaxThreads(ctypes.byref(num_threads))
258
259
260
261
    assert ret == 0
    assert num_threads.value == 6

    # resetting to any negative number should set it to -1
262
    ret = LIB.LGBM_SetMaxThreads(ctypes.c_int(-123))
263
    assert ret == 0
264
    ret = LIB.LGBM_GetMaxThreads(ctypes.byref(num_threads))
265
266
    assert ret == 0
    assert num_threads.value == -1