test_dll.py 3.98 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
from __future__ import absolute_import

import sys
import os
import ctypes
import collections
import re

import numpy as np
from scipy import sparse

def _load_lib():
    """Load xgboost Library."""
    lib_path = './windows/x64/DLL/lib_lightgbm.dll'
    if len(lib_path) == 0:
        return None
    lib = ctypes.cdll.LoadLibrary(lib_path)
    return lib


LIB = _load_lib()
LIB.LGBM_GetLastError.restype = ctypes.c_char_p
def test_load_from_file():
    handle = ctypes.c_void_p()
    LIB.LGBM_CreateDatasetFromFile(ctypes.c_char_p('./examples/binary_classification/binary.train'),
    ctypes.c_char_p('max_bin=15'), ctypes.c_void_p(None), ctypes.byref(handle))
    num_data = ctypes.c_ulong()
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data) )
    print num_data
    num_feature = ctypes.c_ulong()
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature) )
    return handle

def c_array(ctype, values):
    """Convert a python string to c array."""
    return (ctype * len(values))(*values)

def c_str(string):
    """Convert a python string to cstring."""
    return ctypes.c_char_p(string.encode('utf-8'))

def test_load_from_matric():
    data = []

    inp = open('./examples/binary_classification/binary.train', 'r')
    for line in inp.readlines():
        data.append( [float(x) for x in line.split('\t')[1:]] )
    inp.close()
    mat = np.array(data)
    print mat.shape
    data = np.array(mat.reshape(mat.size), copy=False)
    handle = ctypes.c_void_p()
    LIB.LGBM_CreateDatasetFromMat(data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), 1, 
        mat.shape[0], 
        mat.shape[1], 1, 
    ctypes.c_char_p('max_bin=15 is_sparse=false'), None, ctypes.byref(handle) )
    LIB.LGBM_DatasetFree(ctypes.byref(handle))
    # num_data = ctypes.c_ulong()
    # LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data) )
    # print num_data
    # num_feature = ctypes.c_ulong()
    # LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature) )
    # print num_feature
    return handle

def test_load_from_csr(filename, reference):
    data = []
    label = []
    inp = open(filename, 'r')
    for line in inp.readlines():
        data.append( [float(x) for x in line.split('\t')[1:]] )
        label.append( float(line.split('\t')[0]) )
    inp.close()
    mat = np.array(data)
    label = np.array(label, dtype=np.float32)
    print mat.shape
    csr = sparse.csr_matrix(mat)
    handle = ctypes.c_void_p()
    ref = None
    if reference != None:
        ref = ctypes.byref(reference)
    LIB.LGBM_CreateDatasetFromCSR(c_array(ctypes.c_int, csr.indptr), 
        c_array(ctypes.c_int, csr.indices), 
        csr.data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)),
        1, len(csr.indptr), len(csr.data),
        csr.shape[1], ctypes.c_char_p('max_bin=15'), ref, ctypes.byref(handle) )
    num_data = ctypes.c_ulong()
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data) )
    print num_data
    num_feature = ctypes.c_ulong()
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature) )
    LIB.LGBM_DatasetSetField(handle, c_str('label'), c_array(ctypes.c_float, label), len(label), 0)
    return handle


train = test_load_from_csr('./examples/binary_classification/binary.train', None)
test = [test_load_from_csr('./examples/binary_classification/binary.test', train)]
name = [c_str('test')]
booster = ctypes.c_void_p()
LIB.LGBM_BoosterCreate(train, c_array(ctypes.c_void_p, test), c_array(ctypes.c_char_p, name), 1, "app=binary metric=auc num_leaves=31", ctypes.byref(booster))
is_finished = ctypes.c_int(0)
for i in xrange(100):
    LIB.LGBM_BoosterUpdateOneIter(booster,ctypes.byref(is_finished))
    result = np.array([0.0], dtype=np.float32)
    out_len = ctypes.c_ulong(0)
    LIB.LGBM_BoosterEval(booster, 0, ctypes.byref(out_len), result.ctypes.data_as(ctypes.POINTER(ctypes.c_float)))
    print result
LIB.LGBM_BoosterSaveModel(booster, -1, c_str('model.txt'))
booster2 = ctypes.c_void_p()

LIB.LGBM_BoosterLoadFromModelfile(c_str('model.txt'), ctypes.byref(booster2))

print type(len([0,0]))