Commit 422c0ef7 authored by Guolin Ke's avatar Guolin Ke
Browse files

almost finish, need some tests

parent fc383361
......@@ -73,7 +73,7 @@ public:
* \param result used to store prediction result, should allocate memory before call this function
* \param out_len lenght of returned score
*/
virtual void GetPredictAt(int data_idx, score_t* result, data_size_t* out_len) const = 0;
virtual void GetPredictAt(int data_idx, score_t* result, data_size_t* out_len) = 0;
/*!
* \brief Prediction for one record, not sigmoid transform
......@@ -127,7 +127,7 @@ public:
* \brief Get number of weak sub-models
* \return Number of weak sub-models
*/
virtual int NumberOfSubModels() const = 0;
virtual int NumberOfTotalModel() const = 0;
/*!
* \brief Get number of classes
......@@ -138,7 +138,7 @@ public:
/*!
* \brief Set number of used model for prediction
*/
virtual void SetNumUsedModel(int num_used_model) = 0;
virtual void SetNumIterationForPred(int num_iteration) = 0;
/*!
* \brief Get Type name of this boosting object
......
......@@ -230,11 +230,13 @@ DllExport int LGBM_BoosterCreate(const DatesetHandle train_data,
/*!
* \brief load an existing boosting from model file
* \param filename filename of model
* \param out_num_total_model number of total models
* \param out handle of created Booster
* \return 0 when success, -1 when failure happens
*/
DllExport int LGBM_BoosterCreateFromModelfile(
const char* filename,
int64_t* out_num_total_model,
BoosterHandle* out);
/*!
......@@ -244,6 +246,12 @@ DllExport int LGBM_BoosterCreateFromModelfile(
*/
DllExport int LGBM_BoosterFree(BoosterHandle handle);
/*!
* \brief Get number of class
* \return number of class
*/
DllExport int LGBM_BoosterGetNumClasses(BoosterHandle handle, int64_t* out_len);
/*!
* \brief update the model in one round
* \param handle handle
......@@ -276,7 +284,7 @@ DllExport int LGBM_BoosterGetEvalCounts(BoosterHandle handle, int64_t* out_len);
* \brief Get number of eval
* \return total number of eval result
*/
DllExport int LGBM_BoosterGetEvalNames(BoosterHandle handle, int64_t* out_len, const char*** out_strs);
DllExport int LGBM_BoosterGetEvalNames(BoosterHandle handle, int64_t* out_len, char** out_strs);
/*!
* \brief get evaluation for training data and validation data
......@@ -291,17 +299,6 @@ DllExport int LGBM_BoosterGetEval(BoosterHandle handle,
int64_t* out_len,
float* out_results);
/*!
* \brief get raw score for training data, used to calculate gradients outside
* \param handle handle
* \param out_len len of output result
* \param out_result used to set a pointer to array
* \return 0 when success, -1 when failure happens
*/
DllExport int LGBM_BoosterGetTrainingScore(BoosterHandle handle,
int64_t* out_len,
const float** out_result);
/*!
* \brief Get prediction for training data and validation data
this can be used to support customized eval function
......@@ -319,21 +316,21 @@ DllExport int LGBM_BoosterGetPredict(BoosterHandle handle,
/*!
* \brief make prediction for file
* \param handle handle
* \param data_filename filename of data file
* \param data_has_header data file has header or not
* \param predict_type
* 0:raw score
* 1:with transform(if needed)
* 2:leaf index
* \param n_used_trees number of used tree
* \param data_has_header data file has header or not
* \param data_filename filename of data file
* \param num_iteration number of iteration for prediction
* \param result_filename filename of result file
* \return 0 when success, -1 when failure happens
*/
DllExport int LGBM_BoosterPredictForFile(BoosterHandle handle,
int predict_type,
int64_t n_used_trees,
int data_has_header,
const char* data_filename,
int data_has_header,
int predict_type,
int64_t num_iteration,
const char* result_filename);
/*!
......@@ -351,7 +348,8 @@ DllExport int LGBM_BoosterPredictForFile(BoosterHandle handle,
* 0:raw score
* 1:with transform(if needed)
* 2:leaf index
* \param n_used_trees number of used tree
* \param num_iteration number of iteration for prediction
* \param out_len len of output result
* \param out_result used to set a pointer to array, should allocate memory before call this function
* \return 0 when success, -1 when failure happens
*/
......@@ -365,8 +363,9 @@ DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
int64_t nelem,
int64_t num_col,
int predict_type,
int64_t n_used_trees,
double* out_result);
int64_t num_iteration,
int64_t* out_len,
float* out_result);
/*!
* \brief make prediction for an new data set
......@@ -380,7 +379,8 @@ DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
* 0:raw score
* 1:with transform(if needed)
* 2:leaf index
* \param n_used_trees number of used tree
* \param num_iteration number of iteration for prediction
* \param out_len len of output result
* \param out_result used to set a pointer to array, should allocate memory before call this function
* \return 0 when success, -1 when failure happens
*/
......@@ -391,18 +391,19 @@ DllExport int LGBM_BoosterPredictForMat(BoosterHandle handle,
int32_t ncol,
int is_row_major,
int predict_type,
int64_t n_used_trees,
double* out_result);
int64_t num_iteration,
int64_t* out_len,
float* out_result);
/*!
* \brief save model into file
* \param handle handle
* \param num_used_model
* \param num_iteration
* \param filename file name
* \return 0 when success, -1 when failure happens
*/
DllExport int LGBM_BoosterSaveModel(BoosterHandle handle,
int num_used_model,
int num_iteration,
const char* filename);
......
......@@ -97,7 +97,7 @@ public:
std::string output_result = "LightGBM_predict_result.txt";
std::string input_model = "";
int verbosity = 1;
int num_model_predict = NO_LIMIT;
int num_iteration_predict = NO_LIMIT;
bool is_pre_partition = false;
bool is_enable_sparse = true;
bool use_two_round_loading = false;
......
......@@ -6,6 +6,7 @@ import os
import ctypes
import collections
import re
import tempfile
import numpy as np
import scipy.sparse
......@@ -111,7 +112,7 @@ def c_array(ctype, values):
return (ctype * len(values))(*values)
def dict_to_str(data):
if len(data) == 0:
if data is None or len(data) == 0:
return ""
pairs = []
for key in data:
......@@ -131,10 +132,10 @@ def c_float_array(data):
data = np.array(data, copy=False)
if is_numpy_1d_array(data):
if data.dtype == np.float32:
ptr_data = c_array(ctypes.c_float, data)
ptr_data = data.ctypes.data_as(ctypes.c_float)
type_data = C_API_DTYPE_FLOAT32
elif data.dtype == np.float64:
ptr_data = c_array(ctypes.c_double, data)
ptr_data = data.ctypes.data_as(ctypes.c_double)
type_data = C_API_DTYPE_FLOAT64
else:
raise TypeError("expected np.float32 or np.float64, met type({})".format(data.dtype))
......@@ -148,10 +149,10 @@ def c_int_array(data):
data = np.array(data, copy=False)
if is_numpy_1d_array(data):
if data.dtype == np.int32:
ptr_data = c_array(ctypes.c_int32, data)
ptr_data = data.ctypes.data_as(ctypes.c_int32)
type_data = C_API_DTYPE_INT32
elif data.dtype == np.int64:
ptr_data = c_array(ctypes.c_int64, data)
ptr_data = data.ctypes.data_as(ctypes.c_int64)
type_data = C_API_DTYPE_INT64
else:
raise TypeError("expected np.int32 or np.int64, met type({})".format(data.dtype))
......@@ -206,6 +207,7 @@ class Dataset(object):
self.raw_data = data
else:
self.raw_data = None
self.data_has_header = False
"""process for args"""
params = {}
params["max_bin"] = max_bin
......@@ -223,6 +225,10 @@ class Dataset(object):
raise TypeError('Reference dataset should be None or dataset instance')
"""start construct data"""
if is_str(data):
"""check data has header or not"""
if "has_header" in params or "header" in params:
if params["has_header"].lower() == "true" or params["header"].lower() == "true":
data_has_header = True
self.handle = ctypes.c_void_p()
_safe_call(_LIB.LGBM_CreateDatasetFromFile(
c_str(data),
......@@ -230,17 +236,21 @@ class Dataset(object):
ref_dataset,
ctypes.byref(self.handle)))
elif isinstance(data, scipy.sparse.csr_matrix):
self._init_from_csr(data, params_str, ref_dataset)
elif isinstance(data, scipy.sparse.csc_matrix):
self._init_from_csc(data, params_str, ref_dataset)
self.__init_from_csr(data, params_str, ref_dataset)
elif isinstance(data, np.ndarray):
self._init_from_npy2d(data, params_str, ref_dataset)
self.__init_from_np2d(data, params_str, ref_dataset)
else:
try:
csr = scipy.sparse.csr_matrix(data)
self._init_from_csr(csr)
if self.raw_data is not None:
self.raw_data = csr
self.__init_from_csr(csr)
except:
raise TypeError('can not initialize Dataset from {}'.format(type(data).__name__))
self.__label = None
self.__weight = None
self.__init_score = None
self.__group = None
if label is not None:
self.set_label(label)
if weight is not None:
......@@ -252,55 +262,7 @@ class Dataset(object):
def free_raw_data(self):
self.raw_data = None
def _init_from_csr(self, csr, params_str, ref_dataset):
"""
Initialize data from a CSR matrix.
"""
if len(csr.indices) != len(csr.data):
raise ValueError('length mismatch: {} vs {}'.format(len(csr.indices), len(csr.data)))
self.handle = ctypes.c_void_p()
ptr_indptr, type_ptr_indptr = c_int_array(csr.indptr)
ptr_data, type_ptr_data = c_float_array(csr.data)
_safe_call(_LIB.LGBM_CreateDatasetFromCSR(
ptr_indptr,
type_ptr_indptr,
c_array(ctypes.c_int32, csr.indices),
ptr_data,
type_ptr_data,
len(csr.indptr),
len(csr.data),
csr.shape[1],
c_str(params_str),
ref_dataset,
ctypes.byref(self.handle)))
def _init_from_csc(self, csr, params_str, ref_dataset):
"""
Initialize data from a CSC matrix.
"""
if len(csc.indices) != len(csc.data):
raise ValueError('length mismatch: {} vs {}'.format(len(csc.indices), len(csc.data)))
self.handle = ctypes.c_void_p()
ptr_indptr, type_ptr_indptr = c_int_array(csc.indptr)
ptr_data, type_ptr_data = c_float_array(csc.data)
_safe_call(_LIB.LGBM_CreateDatasetFromCSC(
ptr_indptr,
type_ptr_indptr,
c_array(ctypes.c_int32, csc.indices),
ptr_data,
type_ptr_data,
len(csc.indptr),
len(csc.data),
csc.shape[0],
c_str(params_str),
ref_dataset,
ctypes.byref(self.handle)))
def _init_from_npy2d(self, mat, params_str, ref_dataset):
def __init_from_np2d(self, mat, params_str, ref_dataset):
"""
Initialize data from a 2-D numpy matrix.
"""
......@@ -325,6 +287,30 @@ class Dataset(object):
ref_dataset,
ctypes.byref(self.handle)))
def __init_from_csr(self, csr, params_str, ref_dataset):
"""
Initialize data from a CSR matrix.
"""
if len(csr.indices) != len(csr.data):
raise ValueError('length mismatch: {} vs {}'.format(len(csr.indices), len(csr.data)))
self.handle = ctypes.c_void_p()
ptr_indptr, type_ptr_indptr = c_int_array(csr.indptr)
ptr_data, type_ptr_data = c_float_array(csr.data)
_safe_call(_LIB.LGBM_CreateDatasetFromCSR(
ptr_indptr,
type_ptr_indptr,
csr.indices.ctypes.data_as(ctypes.c_int32),
ptr_data,
type_ptr_data,
len(csr.indptr),
len(csr.data),
csr.shape[1],
c_str(params_str),
ref_dataset,
ctypes.byref(self.handle)))
def __del__(self):
_safe_call(_LIB.LGBM_DatasetFree(self.handle))
......@@ -371,10 +357,10 @@ class Dataset(object):
if not is_numpy_1d_array(data):
raise TypeError("Unknow type({})".format(type(data).__name__))
if data.dtype == np.float32:
ptr_data = c_array(ctypes.c_float, data)
ptr_data = data.ctypes.data_as(ctypes.c_float)
type_data = C_API_DTYPE_FLOAT32
elif data.dtype == np.int32:
ptr_data = c_array(ctypes.c_int32, data)
ptr_data = data.ctypes.data_as(ctypes.c_int32)
type_data = C_API_DTYPE_INT32
else:
raise TypeError("excepted np.float32 or np.int32, met type({})".format(data.dtype))
......@@ -409,6 +395,7 @@ class Dataset(object):
label = list_to_1d_numpy(label, np.float32)
if label.dtype != np.float32:
label = label.astype(np.float32, copy=False)
self.__label = label
self.set_field('label', label)
def set_weight(self, weight):
......@@ -422,6 +409,7 @@ class Dataset(object):
weight = list_to_1d_numpy(weight, np.float32)
if weight.dtype != np.float32:
weight = weight.astype(np.float32, copy=False)
self.__weight = weight
self.set_field('weight', weight)
def set_init_score(self, score):
......@@ -434,6 +422,7 @@ class Dataset(object):
score = list_to_1d_numpy(score, np.float32)
if score.dtype != np.float32:
score = score.astype(np.float32, copy=False)
self.__init_score = init_score
self.set_field('init_score', score)
def set_group(self, group):
......@@ -447,6 +436,7 @@ class Dataset(object):
group = list_to_1d_numpy(group, np.int32)
if group.dtype != np.int32:
group = group.astype(np.int32, copy=False)
self.__group = group
self.set_field('group', group)
def set_group_id(self, group_id):
......@@ -470,7 +460,9 @@ class Dataset(object):
-------
label : array
"""
return self.get_field('label')
if self.__label is None:
self.__label = self.get_field('label')
return self.__label
def get_weight(self):
"""Get the weight of the Dataset.
......@@ -479,7 +471,9 @@ class Dataset(object):
-------
weight : array
"""
return self.get_field('weight')
if self.__weight is None:
self.__weight = self.get_field('weight')
return self.__weight
def get_init_score(self):
"""Get the initial score of the Dataset.
......@@ -488,7 +482,20 @@ class Dataset(object):
-------
init_score : array
"""
return self.get_field('init_score')
if self.__init_score is None:
self.__init_score = self.get_field('init_score')
return self.__init_score
def get_group(self):
"""Get the initial score of the Dataset.
Returns
-------
init_score : array
"""
if self.__group is None:
self.__group = self.get_field('group')
return self.__group
def num_data(self):
"""Get the number of rows in the Dataset.
......@@ -553,6 +560,9 @@ class Dataset(object):
else:
self._feature_names = None
C_API_PREDICT_NORMAL =0
C_API_PREDICT_RAW_SCORE =1
C_API_PREDICT_LEAF_INDEX =2
class Booster(object):
""""A Booster of of LightGBM.
......@@ -560,12 +570,9 @@ class Booster(object):
feature_names = None
def __init__(self, params=None,
train_set=None,
valid_sets=None,
name_valid_sets=None,
model_file=None,
fobj=None):
def __init__(self,params=None,
train_set=None, valid_sets=None,
name_valid_sets=None, model_file=None):
# pylint: disable=invalid-name
"""Initialize the Booster.
......@@ -580,15 +587,17 @@ class Booster(object):
name_valid_sets : List of string
name of validation datasets
model_file : string
Path to the model file.
Path to the model file.
If tarin_set is not None, used for continued train.
else used for loading model prediction task
"""
self.handle = ctypes.c_void_p()
if train_set is not None:
"""Training task"""
if not isinstance(train_set, Dataset):
raise TypeError('training data should be Dataset instance, met{}'.format(type(train_set).__name__))
valid_handles = None
valid_cnames = None
n_valid = 0
if valid_sets is not None:
for valid in valid_sets:
......@@ -596,36 +605,364 @@ class Booster(object):
raise TypeError('valid data should be Dataset instance, met{}'.format(type(valid).__name__))
valid_handles = c_array(ctypes.c_void_p, [valid.handle for valid in valid_sets])
if name_valid_sets is None:
name_valid_sets = ["valid_{}".format(x) for x in range(len(valid_sets)) ]
name_valid_sets = ["valid_{}".format(x+1) for x in range(len(valid_sets)) ]
if len(valid_sets) != len(name_valid_sets):
raise Exception('len of valid_sets should be equal with len of name_valid_sets')
valid_cnames = c_array(ctypes.c_char_p, [c_str(x) for x in name_valid_sets])
n_valid = len(valid_sets)
ref_input_model = None
params_str = dict_to_str(params)
if model_file is not None:
ref_input_model = c_str(model_file)
"""construct booster object"""
_safe_call(LIB.LGBM_BoosterCreate(
_safe_call(_LIB.LGBM_BoosterCreate(
train_set.handle,
valid_handles,
valid_cnames,
n_valid,
params_str,
c_str(params_str),
ref_input_model,
ctypes.byref(self.handle)))
"""if need to continue train"""
if model_file is not None:
self.init_continue_train(train_set)
self.__init_continue_train(train_set)
if valid_sets is not None:
for valid in valid_sets:
self.init_continue_train(valid)
self.__init_continue_train(valid)
"""save reference to data"""
self.train_set = train_set
self.valid_sets = valid_sets
self.name_valid_sets = name_valid_sets
self.__num_dataset = 1 + n_valid
self.__training_score = None
out_len = ctypes.c_int64(0)
_safe_call(_LIB.LGBM_BoosterGetNumClasses(
self.handle,
ctypes.byref(out_len)))
self.__num_class = out_len.value
"""buffer for inner predict"""
self.__inner_predict_buffer = [None for _ in range(self.__num_dataset)]
"""Get num of inner evals"""
_safe_call(_LIB.LGBM_BoosterGetEvalCounts(
self.handle,
ctypes.byref(out_len)))
self.__num_inner_eval = out_len.value
if self.__num_inner_eval > 0:
"""Get name of evals"""
string_buffers = [ctypes.create_string_buffer(255) for i in range(self.__num_inner_eval)]
ptr_string_buffers = (ctypes.c_char_p*self.__num_inner_eval)(*map(ctypes.addressof, string_buffers))
_safe_call(_LIB.LGBM_BoosterGetEvalNames(
self.handle,
ctypes.byref(out_len),
ptr_string_buffers))
if self.__num_inner_eval != out_len.value:
raise ValueError("size of eval names doesn't equal with num_evals")
self.__name_inner_eval = []
for i in range(self.__num_inner_eval):
self.__name_inner_eval.append(string_buffers[i].value.decode())
elif model_file is not None:
_safe_call(_LIB.LGBM_BoosterCreateFromModelfile(c_str(model_file), ctypes.byref(self.handle)))
"""Prediction task"""
out_num_total_model = ctypes.c_int64(0)
_safe_call(_LIB.LGBM_BoosterCreateFromModelfile(
c_str(model_file),
ctypes.byref(out_num_total_model),
ctypes.byref(self.handle)))
self.__num_total_model = out_num_total_model.value
out_len = ctypes.c_int64(0)
_safe_call(_LIB.LGBM_BoosterGetNumClasses(
self.handle,
ctypes.byref(out_len)))
self.__num_class = out_len.value
else:
raise TypeError('At least need training dataset or model file to create booster instance')
def __del__(self):
_LIB.LGBM_BoosterFree(self.handle)
_safe_call(_LIB.LGBM_BoosterFree(self.handle))
def update(self, fobj=None):
"""
Update for one iteration
Note: for multi-class task, the score is group by class_id first, then group by row_id
if you want to get i-th row score in j-th class, the access way is score[j*num_data+i]
and you should group grad and hess in this way as well
Parameters
----------
fobj : function
Customized objective function.
Returns
-------
is_finished, bool
"""
is_finished = ctypes.c_int(0)
if fobj is None:
_safe_call(_LIB.LGBM_BoosterUpdateOneIter(
self.handle,
ctypes.byref(is_finished)))
return is_finished.value == 1
else:
grad, hess = fobj(self.__inner_predict(0), self.train_set)
return self.boost(grad, hess)
def boost(self, grad, hess):
"""
Boost the booster for one iteration, with customized gradient statistics.
Note: for multi-class task, the score is group by class_id first, then group by row_id
if you want to get i-th row score in j-th class, the access way is score[j*num_data+i]
and you should group grad and hess in this way as well
Parameters
----------
grad : 1d numpy with dtype=float32
The first order of gradient.
hess : 1d numpy with dtype=float32
The second order of gradient.
Returns
-------
is_finished, bool
"""
if not is_numpy_1d_array(grad) and not is_numpy_1d_array(hess):
raise TypeError('type of grad / hess should be 1d numpy object')
if not grad.dtype == np.float32 and not hess.dtype == np.float32:
raise TypeError('type of grad / hess should be np.float32')
if len(grad) != len(hess):
raise ValueError('grad / hess length mismatch: {} / {}'.format(len(grad), len(hess)))
is_finished = ctypes.c_int(0)
_safe_call(_LIB.LGBM_BoosterUpdateOneIterCustom(
self.handle,
grad.ctypes.data_as(ctypes.c_float),
hess.ctypes.data_as(ctypes.c_float),
ctypes.byref(is_finished)))
return is_finished.value == 1
def eval_train(self, feval=None):
"""Evaluate for training data
Parameters
----------
feval : function
Custom evaluation function.
Returns
-------
result: str
Evaluation result string.
"""
return self.__inner_eval("training", 0, feval)
def eval_valid(self, feval=None):
"""Evaluate for validation data
Parameters
----------
feval : function
Custom evaluation function.
Returns
-------
result: str
Evaluation result string.
"""
ret = []
for i in range(1, self.__num_dataset):
ret.append(self.__inner_eval(self.name_valid_sets[i-1], i, feval))
return '\n'.join(ret)
def save_model(self, filename, num_iteration=-1):
_safe_call(_LIB.LGBM_BoosterSaveModel(
self.handle,
num_iteration,
c_str(filename)))
def predict(self, data, num_iteration=-1, raw_score=False, pred_leaf=False, data_has_header=False, is_reshape=True):
if isinstance(data, Dataset):
raise TypeError("cannot use Dataset instance for prediction, please use raw data instead")
predict_type = C_API_PREDICT_NORMAL
if raw_score:
predict_type = cC_API_PREDICT_RAW_SCORE
if pred_leaf:
predict_type = C_API_PREDICT_LEAF_INDEX
int_data_has_header = 0
if data_has_header:
int_data_has_header = 1
if is_str(data):
tmp_pred_fname = tempfile.NamedTemporaryFile(prefix="lightgbm_tmp_pred_").name
_safe_call(_LIB.LGBM_BoosterPredictForFile(
self.handle,
c_str(data),
int_data_has_header,
predict_type,
num_iteration,
c_str(tmp_pred_fname)))
lines = open(tmp_pred_fname,"r").readlines()
nrow = len(lines)
preds = []
for line in lines:
for token in line.split('\t'):
preds.append(float(token))
preds = np.array(preds, copy=False)
os.remove(tmp_pred_fname)
elif isinstance(data, scipy.sparse.csr_matrix):
preds, nrow = self.__pred_for_csr(data, num_iteration, predict_type)
elif isinstance(data, np.ndarray):
preds, nrow = self.__pred_for_np2d(data, num_iteration, predict_type)
else:
try:
csr = scipy.sparse.csr_matrix(data)
res = self.__pred_for_csr(csr, num_iteration, predict_type)
except:
raise TypeError('can not predict data for type {}'.format(type(data).__name__))
if pred_leaf:
preds = preds.astype(np.int32)
if preds.size != nrow and is_reshape:
if preds.size % nrow == 0:
ncol = int(preds.size / nrow)
preds = preds.reshape(nrow, ncol)
else:
raise ValueError('len of predict result(%d) cannot be divide nrow(%d)' %(preds.size, nrow) )
return preds
def __pred_for_np2d(self, mat, num_iteration, predict_type):
"""
Predict for a 2-D numpy matrix.
"""
if len(mat.shape) != 2:
raise ValueError('Input numpy.ndarray must be 2 dimensional')
if mat.dtype == np.float32 or mat.dtype == np.float64:
data = np.array(mat.reshape(mat.size), dtype=mat.dtype, copy=False)
else:
"""change non-float data to float data, need to copy"""
data = np.array(mat.reshape(mat.size), dtype=np.float32)
ptr_data, type_ptr_data = c_float_array(data)
n_preds = self.__num_class * mat.shape[0]
if predict_type == C_API_PREDICT_LEAF_INDEX:
if num_iteration > 0:
n_preds *= num_iteration
else:
used_iteration = self.__num_total_model / self.__num_class
n_preds *= used_iteration
preds = np.zeros(n_preds, dtype=np.float32)
out_num_preds = ctypes.c_int64(0)
_safe_call(LIB.LGBM_BoosterPredictForMat(
self.handle,
ptr_data,
type_ptr_data,
mat.shape[0],
mat.shape[1],
C_API_IS_ROW_MAJOR,
predict_type,
num_iteration,
ctypes.byref(out_num_preds),
preds.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
))
if n_preds != out_num_preds.value:
raise ValueError("incorrect number for predict result")
return preds, mat.shape[0]
def __pred_for_csr(self, csr, num_iteration, predict_type):
"""
Predict for a csr data
"""
nrow = len(csr.indptr) - 1
n_preds = self.__num_class * nrow
if predict_type == C_API_PREDICT_LEAF_INDEX:
if num_iteration > 0:
n_preds *= num_iteration
else:
used_iteration = self.__num_total_model / self.__num_class
n_preds *= used_iteration
preds = np.zeros(n_preds, dtype=np.float32)
out_num_preds = ctypes.c_int64(0)
ptr_indptr, type_ptr_indptr = c_int_array(csr.indptr)
ptr_data, type_ptr_data = c_float_array(csr.data)
_safe_call(LIB.LGBM_BoosterPredictForCSR(
self.handle,
ptr_indptr,
type_ptr_indptr,
csr.indices.ctypes.data_as(ctypes.c_int32),
ptr_data,
type_ptr_data,
len(csr.indptr),
len(csr.data),
csr.shape[1],
predict_type,
num_iteration,
ctypes.byref(out_num_preds),
preds.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
))
if n_preds != out_num_preds.value:
raise ValueError("incorrect number for predict result")
return preds, nrow
def __inner_eval(self, data_name, data_idx, feval=None):
if data_idx >= self.__num_dataset:
raise ValueError("data_idx should be smaller than number of dataset")
ret = []
if self.__num_inner_eval > 0:
result = np.array([0.0 for _ in range(self.__num_inner_eval)], dtype=np.float32)
out_len = ctypes.c_int64(0)
_safe_call(_LIB.LGBM_BoosterGetEval(
self.handle,
data_idx,
ctypes.byref(out_len),
result.ctypes.data_as(ctypes.POINTER(ctypes.c_float))))
if out_len.value != self.__num_inner_eval:
raise ValueError("incorrect number of eval results")
for i in range(self.__num_inner_eval):
ret.append('%s %s : %f' %(data_name, self.__name_inner_eval[i], result[i]))
if feval is not None:
if data_idx == 0:
cur_data = self.train_set
else:
cur_data = self.valid_sets[data_idx - 1]
feval_ret = feval(self.__inner_predict(data_idx), cur_data)
if isinstance(feval_ret, list):
for name, val in feval_ret:
ret.append('%s %s : %f' % (data_name, name, val))
else:
name, val = feval_ret
ret.append('%s %s : %f' % (data_name, name, val))
return '\t'.join(ret)
def __inner_predict(self, data_idx):
if data_idx >= self.__num_dataset:
raise ValueError("data_idx should be smaller than number of dataset")
if self.__inner_predict_buffer[data_idx] is None:
if data_idx == 0:
num_data = self.train_set.num_data() * self.__num_class
else:
num_data = self.valid_sets[data_idx - 1].num_data() * self.__num_class
self.__inner_predict_buffer[data_idx] = \
np.array([0.0 for _ in range(num_data)], dtype=np.float32, copy=False)
out_len = ctypes.c_int64(0)
data_ptr = self.__inner_predict_buffer[data_idx].ctypes.data_as(ctypes.POINTER(ctypes.c_float))
_safe_call(_LIB.LGBM_BoosterGetPredict(
self.handle,
data_idx,
ctypes.byref(out_len),
data_ptr))
if out_len.value != len(self.__inner_predict_buffer[data_idx]):
raise ValueError("incorrect number of predict results for data %d" %(data_idx) )
return self.__inner_predict_buffer[data_idx]
def __init_continue_train(self, dataset):
if dataset.raw_data is None:
raise ValueError("should set is_continue_train=True in dataset while need to continue train")
init_score = self.predict(dataset.raw_data, raw_score=True,data_has_header=dataset.data_has_header, is_reshape=False)
dataset.set_init_score(init_score)
dataset.free_raw_data()
#tmp test
train_data = Dataset('../../examples/binary_classification/binary.train')
test_data = Dataset('../../examples/binary_classification/binary.test', reference = train_data)
param = {"metric":"l2,l1"}
lgb = Booster(train_set=train_data, valid_sets=[test_data], params=param)
for i in range(100):
lgb.update()
print(lgb.eval_valid())
print(lgb.eval_train())
print(lgb.predict('../../examples/binary_classification/binary.train'))
\ No newline at end of file
......@@ -108,7 +108,7 @@ void Application::LoadData() {
// prediction is needed if using input initial model(continued train)
PredictFunction predict_fun = nullptr;
// need to continue training
if (boosting_->NumberOfSubModels() > 0) {
if (boosting_->NumberOfTotalModel() > 0) {
Predictor predictor(boosting_.get(), true, false);
predict_fun = predictor.GetPredictFunction();
}
......@@ -235,7 +235,7 @@ void Application::Train() {
void Application::Predict() {
boosting_->SetNumUsedModel(config_.io_config.num_model_predict);
boosting_->SetNumIterationForPred(config_.io_config.num_iteration_predict);
// create predictor
Predictor predictor(boosting_.get(), config_.io_config.is_predict_raw_score,
config_.io_config.is_predict_leaf_index);
......
......@@ -43,6 +43,7 @@ public:
* \brief one training iteration
*/
bool TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) override {
is_update_score_cur_iter_ = false;
GBDT::TrainOneIter(gradient, hessian, false);
// normalize
Normalize();
......@@ -58,20 +59,24 @@ public:
* \return training score
*/
const score_t* GetTrainingScore(data_size_t* out_len) override {
DroppingTrees();
if (!is_update_score_cur_iter_) {
// only drop one time in one iteration
DroppingTrees();
is_update_score_cur_iter_ = true;
}
*out_len = train_score_updater_->num_data() * num_class_;
return train_score_updater_->score();
}
/*!
* \brief save model to file
* \param num_used_model number of model that want to save, -1 means save all
* \param num_iteration -1 means save all
* \param is_finish is training finished or not
* \param filename filename that want to save to
*/
void SaveModelToFile(int num_used_model, bool is_finish, const char* filename) override {
void SaveModelToFile(int num_iteration, bool is_finish, const char* filename) override {
// only save model once when is_finish = true
if (is_finish && saved_model_size_ < 0) {
GBDT::SaveModelToFile(num_used_model, is_finish, filename);
GBDT::SaveModelToFile(num_iteration, is_finish, filename);
}
}
/*!
......@@ -133,6 +138,8 @@ private:
double drop_rate_;
/*! \brief Random generator, used to select dropping trees */
Random random_for_drop_;
/*! \brief Flag that the score is update on current iter or not*/
bool is_update_score_cur_iter_;
};
} // namespace LightGBM
......
......@@ -16,7 +16,7 @@
namespace LightGBM {
GBDT::GBDT() : saved_model_size_(-1), num_used_model_(0) {
GBDT::GBDT() : saved_model_size_(-1), num_iteration_for_pred_(0) {
}
......@@ -29,7 +29,7 @@ void GBDT::Init(const BoostingConfig* config, const Dataset* train_data, const O
gbdt_config_ = config;
iter_ = 0;
saved_model_size_ = -1;
num_used_model_ = 0;
num_iteration_for_pred_ = 0;
max_feature_idx_ = 0;
early_stopping_round_ = gbdt_config_->early_stopping_round;
shrinkage_rate_ = gbdt_config_->learning_rate;
......@@ -296,24 +296,23 @@ const score_t* GBDT::GetTrainingScore(data_size_t* out_len) {
return train_score_updater_->score();
}
void GBDT::GetPredictAt(int data_idx, score_t* out_result, data_size_t* out_len) const {
void GBDT::GetPredictAt(int data_idx, score_t* out_result, data_size_t* out_len) {
CHECK(data_idx >= 0 && data_idx <= static_cast<int>(valid_metrics_.size()));
std::vector<double> ret;
const score_t* raw_scores = nullptr;
data_size_t num_data = 0;
if (data_idx == 0) {
raw_scores = train_score_updater_->score();
raw_scores = GetTrainingScore(out_len);
num_data = train_score_updater_->num_data();
} else {
auto used_idx = data_idx - 1;
raw_scores = valid_score_updater_[used_idx]->score();
num_data = valid_score_updater_[used_idx]->num_data();
*out_len = num_data * num_class_;
}
*out_len = num_data * num_class_;
if (num_class_ > 1) {
#pragma omp parallel for schedule(guided)
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data; ++i) {
std::vector<double> tmp_result;
for (int j = 0; j < num_class_; ++j) {
......@@ -325,12 +324,12 @@ void GBDT::GetPredictAt(int data_idx, score_t* out_result, data_size_t* out_len)
}
}
} else if(sigmoid_ > 0.0f){
#pragma omp parallel for schedule(guided)
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data; ++i) {
out_result[i] = static_cast<score_t>(1.0f / (1.0f + std::exp(-2.0f * sigmoid_ * raw_scores[i])));
}
} else {
#pragma omp parallel for schedule(guided)
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data; ++i) {
out_result[i] = raw_scores[i];
}
......@@ -348,7 +347,7 @@ void GBDT::Boosting() {
GetGradients(GetTrainingScore(&num_score), gradients_.data(), hessians_.data());
}
void GBDT::SaveModelToFile(int num_used_model, bool is_finish, const char* filename) {
void GBDT::SaveModelToFile(int num_iteration, bool is_finish, const char* filename) {
// first time to this function, open file
if (saved_model_size_ < 0) {
model_output_file_.open(filename);
......@@ -373,10 +372,11 @@ void GBDT::SaveModelToFile(int num_used_model, bool is_finish, const char* filen
if (!model_output_file_.is_open()) {
return;
}
if (num_used_model == NO_LIMIT) {
int num_used_model = 0;
if (num_iteration == NO_LIMIT) {
num_used_model = static_cast<int>(models_.size());
} else {
num_used_model = num_used_model * num_class_;
num_used_model = num_iteration * num_class_;
}
int rest = num_used_model - early_stopping_round_ * num_class_;
// output tree models
......@@ -452,7 +452,7 @@ void GBDT::LoadModelFromString(const std::string& model_str) {
}
}
Log::Info("Finished loading %d models", models_.size());
num_used_model_ = static_cast<int>(models_.size()) / num_class_;
num_iteration_for_pred_ = static_cast<int>(models_.size()) / num_class_;
}
std::string GBDT::FeatureImportance() const {
......@@ -486,7 +486,7 @@ std::string GBDT::FeatureImportance() const {
std::vector<double> GBDT::PredictRaw(const double* value) const {
std::vector<double> ret(num_class_, 0.0f);
for (int i = 0; i < num_used_model_; ++i) {
for (int i = 0; i < num_iteration_for_pred_; ++i) {
for (int j = 0; j < num_class_; ++j) {
ret[j] += models_[i * num_class_ + j]->Predict(value);
}
......@@ -496,7 +496,7 @@ std::vector<double> GBDT::PredictRaw(const double* value) const {
std::vector<double> GBDT::Predict(const double* value) const {
std::vector<double> ret(num_class_, 0.0f);
for (int i = 0; i < num_used_model_; ++i) {
for (int i = 0; i < num_iteration_for_pred_; ++i) {
for (int j = 0; j < num_class_; ++j) {
ret[j] += models_[i * num_class_ + j]->Predict(value);
}
......@@ -512,7 +512,7 @@ std::vector<double> GBDT::Predict(const double* value) const {
std::vector<int> GBDT::PredictLeafIndex(const double* value) const {
std::vector<int> ret;
for (int i = 0; i < num_used_model_; ++i) {
for (int i = 0; i < num_iteration_for_pred_; ++i) {
for (int j = 0; j < num_class_; ++j) {
ret.push_back(models_[i * num_class_ + j]->PredictLeafIndex(value));
}
......
......@@ -73,7 +73,7 @@ public:
* \param result used to store prediction result, should allocate memory before call this function
* \param out_len lenght of returned score
*/
void GetPredictAt(int data_idx, score_t* out_result, data_size_t* out_len) const override;
void GetPredictAt(int data_idx, score_t* out_result, data_size_t* out_len) override;
/*!
* \brief Predtion for one record without sigmoid transformation
......@@ -98,11 +98,11 @@ public:
/*!
* \brief save model to file
* \param num_used_model number of model that want to save, -1 means save all
* \param num_iteration -1 means save all
* \param is_finish is training finished or not
* \param filename filename that want to save to
*/
virtual void SaveModelToFile(int num_used_model, bool is_finish, const char* filename) override;
virtual void SaveModelToFile(int num_iteration, bool is_finish, const char* filename) override;
/*!
* \brief Restore from a serialized string
*/
......@@ -119,11 +119,12 @@ public:
*/
inline int LabelIdx() const override { return label_idx_; }
/*!
* \brief Get number of weak sub-models
* \return Number of weak sub-models
*/
inline int NumberOfSubModels() const override { return static_cast<int>(models_.size()); }
inline int NumberOfTotalModel() const override { return static_cast<int>(models_.size()); }
/*!
* \brief Get number of classes
......@@ -132,11 +133,13 @@ public:
inline int NumberOfClasses() const override { return num_class_; }
/*!
* \brief Set number of used model for prediction
* \brief Set number of iterations for prediction
*/
inline void SetNumUsedModel(int num_used_model) {
if (num_used_model >= 0) {
num_used_model_ = static_cast<int>(num_used_model / num_class_);
inline void SetNumIterationForPred(int num_iteration) override {
if (num_iteration > 0) {
num_iteration_for_pred_ = num_iteration;
} else {
num_iteration_for_pred_ = static_cast<int>(models_.size()) / num_class_;
}
}
......@@ -236,7 +239,7 @@ protected:
/*! \brief File to write models */
std::ofstream model_output_file_;
/*! \brief number of used model */
int num_used_model_;
int num_iteration_for_pred_;
/*! \brief Shrinkage rate for one iteration */
double shrinkage_rate_;
};
......
......@@ -95,8 +95,8 @@ public:
return boosting_->TrainOneIter(gradients, hessians, false);
}
void PrepareForPrediction(int num_used_model, int predict_type) {
boosting_->SetNumUsedModel(num_used_model);
void PrepareForPrediction(int num_iteration, int predict_type) {
boosting_->SetNumIterationForPred(num_iteration);
bool is_predict_leaf = false;
bool is_raw_score = false;
if (predict_type == C_API_PREDICT_LEAF_INDEX) {
......@@ -109,6 +109,10 @@ public:
predictor_.reset(new Predictor(boosting_.get(), is_raw_score, is_predict_leaf));
}
void GetPredictAt(int data_idx, score_t* out_result, data_size_t* out_len) {
boosting_->GetPredictAt(data_idx, out_result, out_len);
}
std::vector<double> Predict(const std::vector<std::pair<int, double>>& features) {
return predictor_->GetPredictFunction()(features);
}
......@@ -117,8 +121,8 @@ public:
predictor_->Predict(data_filename, result_filename, data_has_header);
}
void SaveModelToFile(int num_used_model, const char* filename) {
boosting_->SaveModelToFile(num_used_model, true, filename);
void SaveModelToFile(int num_iteration, const char* filename) {
boosting_->SaveModelToFile(num_iteration, true, filename);
}
int GetEvalCounts() const {
......@@ -129,22 +133,25 @@ public:
return ret;
}
int GetEvalNames(const char*** out_strs) const {
int GetEvalNames(char** out_strs) const {
int idx = 0;
for (const auto& metric : train_metric_) {
for (const auto& name : metric->GetName()) {
*(out_strs[idx++]) = name.c_str();
int j = 0;
auto name_cstr = name.c_str();
while (name_cstr[j] != '\0') {
out_strs[idx][j] = name_cstr[j];
++j;
}
out_strs[idx][j] = '\0';
++idx;
}
}
return idx;
}
const Boosting* GetBoosting() const { return boosting_.get(); }
const float* GetTrainingScore(int* out_len) const { return boosting_->GetTrainingScore(out_len); }
const inline int NumberOfClasses() const { return boosting_->NumberOfClasses(); }
private:
std::unique_ptr<Boosting> boosting_;
......@@ -449,9 +456,12 @@ DllExport int LGBM_BoosterCreate(const DatesetHandle train_data,
DllExport int LGBM_BoosterCreateFromModelfile(
const char* filename,
int64_t* num_total_model,
BoosterHandle* out) {
API_BEGIN();
*out = new Booster(filename);
auto ret = std::unique_ptr<Booster>(new Booster(filename));
*num_total_model = static_cast<int64_t>(ret->GetBoosting()->NumberOfTotalModel());
*out = ret.release();
API_END();
}
......@@ -461,6 +471,13 @@ DllExport int LGBM_BoosterFree(BoosterHandle handle) {
API_END();
}
DllExport int LGBM_BoosterGetNumClasses(BoosterHandle handle, int64_t* out_len) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
*out_len = ref_booster->GetBoosting()->NumberOfClasses();
API_END();
}
DllExport int LGBM_BoosterUpdateOneIter(BoosterHandle handle, int* is_finished) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
......@@ -501,7 +518,7 @@ DllExport int LGBM_BoosterGetEvalCounts(BoosterHandle handle, int64_t* out_len)
* \brief Get number of eval
* \return total number of eval result
*/
DllExport int LGBM_BoosterGetEvalNames(BoosterHandle handle, int64_t* out_len, const char*** out_strs) {
DllExport int LGBM_BoosterGetEvalNames(BoosterHandle handle, int64_t* out_len, char** out_strs) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
*out_len = ref_booster->GetEvalNames(out_strs);
......@@ -524,39 +541,27 @@ DllExport int LGBM_BoosterGetEval(BoosterHandle handle,
API_END();
}
DllExport int LGBM_BoosterGetTrainingScore(BoosterHandle handle,
int64_t* out_len,
const float** out_result) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
int len = 0;
*out_result = ref_booster->GetTrainingScore(&len);
*out_len = static_cast<int64_t>(len);
API_END();
}
DllExport int LGBM_BoosterGetPredict(BoosterHandle handle,
int data,
int64_t* out_len,
float* out_result) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
auto boosting = ref_booster->GetBoosting();
int len = 0;
boosting->GetPredictAt(data, out_result, &len);
ref_booster->GetPredictAt(data, out_result, &len);
*out_len = static_cast<int64_t>(len);
API_END();
}
DllExport int LGBM_BoosterPredictForFile(BoosterHandle handle,
int predict_type,
int64_t n_used_trees,
int data_has_header,
const char* data_filename,
int data_has_header,
int predict_type,
int64_t num_iteration,
const char* result_filename) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
ref_booster->PrepareForPrediction(static_cast<int>(n_used_trees), predict_type);
ref_booster->PrepareForPrediction(static_cast<int>(num_iteration), predict_type);
bool bool_data_has_header = data_has_header > 0 ? true : false;
ref_booster->PredictForFile(data_filename, result_filename, bool_data_has_header);
API_END();
......@@ -572,23 +577,32 @@ DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
int64_t nelem,
int64_t,
int predict_type,
int64_t n_used_trees,
double* out_result) {
int64_t num_iteration,
int64_t* out_len,
float* out_result) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
ref_booster->PrepareForPrediction(static_cast<int>(n_used_trees), predict_type);
ref_booster->PrepareForPrediction(static_cast<int>(num_iteration), predict_type);
auto get_row_fun = RowFunctionFromCSR(indptr, indptr_type, indices, data, data_type, nindptr, nelem);
int num_class = ref_booster->NumberOfClasses();
int num_preb_in_one_row = ref_booster->GetBoosting()->NumberOfClasses();
if (predict_type == C_API_PREDICT_LEAF_INDEX) {
if (num_iteration > 0) {
num_preb_in_one_row *= static_cast<int>(num_iteration);
} else {
num_preb_in_one_row *= ref_booster->GetBoosting()->NumberOfTotalModel() / num_preb_in_one_row;
}
}
int nrow = static_cast<int>(nindptr - 1);
#pragma omp parallel for schedule(guided)
for (int i = 0; i < nrow; ++i) {
auto one_row = get_row_fun(i);
auto predicton_result = ref_booster->Predict(one_row);
for (int j = 0; j < num_class; ++j) {
out_result[i * num_class + j] = predicton_result[j];
for (int j = 0; j < static_cast<int>(predicton_result.size()); ++j) {
out_result[i * num_preb_in_one_row + j] = static_cast<float>(predicton_result[j]);
}
}
*out_len = nrow * num_preb_in_one_row;
API_END();
}
......@@ -599,31 +613,40 @@ DllExport int LGBM_BoosterPredictForMat(BoosterHandle handle,
int32_t ncol,
int is_row_major,
int predict_type,
int64_t n_used_trees,
double* out_result) {
int64_t num_iteration,
int64_t* out_len,
float* out_result) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
ref_booster->PrepareForPrediction(static_cast<int>(n_used_trees), predict_type);
ref_booster->PrepareForPrediction(static_cast<int>(num_iteration), predict_type);
auto get_row_fun = RowPairFunctionFromDenseMatric(data, nrow, ncol, data_type, is_row_major);
int num_class = ref_booster->NumberOfClasses();
int num_preb_in_one_row = ref_booster->GetBoosting()->NumberOfClasses();
if (predict_type == C_API_PREDICT_LEAF_INDEX) {
if (num_iteration > 0) {
num_preb_in_one_row *= static_cast<int>(num_iteration);
} else {
num_preb_in_one_row *= ref_booster->GetBoosting()->NumberOfTotalModel() / num_preb_in_one_row;
}
}
#pragma omp parallel for schedule(guided)
for (int i = 0; i < nrow; ++i) {
auto one_row = get_row_fun(i);
auto predicton_result = ref_booster->Predict(one_row);
for (int j = 0; j < num_class; ++j) {
out_result[i * num_class + j] = predicton_result[j];
for (int j = 0; j < static_cast<int>(predicton_result.size()); ++j) {
out_result[i * num_preb_in_one_row + j] = static_cast<float>(predicton_result[j]);
}
}
*out_len = nrow * num_preb_in_one_row;
API_END();
}
DllExport int LGBM_BoosterSaveModel(BoosterHandle handle,
int num_used_model,
int num_iteration,
const char* filename) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
ref_booster->SaveModelToFile(num_used_model, filename);
ref_booster->SaveModelToFile(num_iteration, filename);
API_END();
}
......
......@@ -183,7 +183,7 @@ void IOConfig::Set(const std::unordered_map<std::string, std::string>& params) {
GetInt(params, "data_random_seed", &data_random_seed);
GetString(params, "data", &data_filename);
GetInt(params, "verbose", &verbosity);
GetInt(params, "num_model_predict", &num_model_predict);
GetInt(params, "num_iteration_predict", &num_iteration_predict);
GetInt(params, "bin_construct_sample_cnt", &bin_construct_sample_cnt);
GetBool(params, "is_pre_partition", &is_pre_partition);
GetBool(params, "is_enable_sparse", &is_enable_sparse);
......
......@@ -190,14 +190,16 @@ def test_booster():
test_free_dataset(train)
test_free_dataset(test[0])
booster2 = ctypes.c_void_p()
LIB.LGBM_BoosterCreateFromModelfile(c_str('model.txt'), ctypes.byref(booster2))
num_total_model = ctypes.c_long()
LIB.LGBM_BoosterCreateFromModelfile(c_str('model.txt'), ctypes.byref(num_total_model), ctypes.byref(booster2))
data = []
inp = open('../../examples/binary_classification/binary.test', 'r')
for line in inp.readlines():
data.append( [float(x) for x in line.split('\t')[1:]] )
inp.close()
mat = np.array(data)
preb = np.zeros(( mat.shape[0],1 ), dtype=np.float64)
preb = np.zeros(mat.shape[0], dtype=np.float32)
num_preb = ctypes.c_long()
data = np.array(mat.reshape(mat.size), copy=False)
LIB.LGBM_BoosterPredictForMat(booster2,
data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)),
......@@ -207,8 +209,9 @@ def test_booster():
1,
1,
50,
ctypes.byref(num_preb),
preb.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))
LIB.LGBM_BoosterPredictForFile(booster2, 1, 50, 0, c_str('../../examples/binary_classification/binary.test'), c_str('preb.txt'))
LIB.LGBM_BoosterPredictForFile(booster2,c_str('../../examples/binary_classification/binary.test'),0 , 0, 50, c_str('preb.txt'))
LIB.LGBM_BoosterFree(booster2)
test_dataset()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment