Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
72c2d790
Commit
72c2d790
authored
Dec 31, 2016
by
Guolin Ke
Committed by
GitHub
Dec 31, 2016
Browse files
some refine for c_api (#152)
1. add csc support 2. some data type from float to double
parent
bd7274ba
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
410 additions
and
162 deletions
+410
-162
include/LightGBM/boosting.h
include/LightGBM/boosting.h
+7
-1
include/LightGBM/c_api.h
include/LightGBM/c_api.h
+75
-26
include/LightGBM/dataset.h
include/LightGBM/dataset.h
+3
-9
python-package/lightgbm/basic.py
python-package/lightgbm/basic.py
+79
-17
src/boosting/gbdt.cpp
src/boosting/gbdt.cpp
+6
-6
src/boosting/gbdt.h
src/boosting/gbdt.h
+9
-1
src/c_api.cpp
src/c_api.cpp
+228
-99
tests/c_api_test/test.py
tests/c_api_test/test.py
+3
-3
No files found.
include/LightGBM/boosting.h
View file @
72c2d790
...
...
@@ -96,13 +96,19 @@ public:
*/
virtual
const
score_t
*
GetTrainingScore
(
int64_t
*
out_len
)
=
0
;
/*!
* \brief Get prediction result at data_idx data
* \param data_idx 0: training data, 1: 1st validation data
* \return out_len lenght of returned score
*/
virtual
int64_t
GetNumPredictAt
(
int
data_idx
)
const
=
0
;
/*!
* \brief Get prediction result at data_idx data
* \param data_idx 0: training data, 1: 1st validation data
* \param result used to store prediction result, should allocate memory before call this function
* \param out_len lenght of returned score
*/
virtual
void
GetPredictAt
(
int
data_idx
,
score_t
*
result
,
int64_t
*
out_len
)
=
0
;
virtual
void
GetPredictAt
(
int
data_idx
,
double
*
result
,
int64_t
*
out_len
)
=
0
;
/*!
* \brief Prediction for one record, not sigmoid transform
...
...
include/LightGBM/c_api.h
View file @
72c2d790
...
...
@@ -370,7 +370,20 @@ DllExport int LGBM_BoosterGetEvalNames(BoosterHandle handle, int64_t* out_len, c
DllExport
int
LGBM_BoosterGetEval
(
BoosterHandle
handle
,
int
data_idx
,
int64_t
*
out_len
,
float
*
out_results
);
double
*
out_results
);
/*!
* \brief Get number of predict for inner dataset
this can be used to support customized eval function
Note: should pre-allocate memory for out_result, its length is equal to num_class * num_data
* \param handle handle
* \param data_idx 0:training data, 1: 1st valid data, 2:2nd valid data ...
* \param out_len len of output result
* \return 0 when succeed, -1 when failure happens
*/
DllExport
int
LGBM_BoosterGetNumPredict
(
BoosterHandle
handle
,
int
data_idx
,
int64_t
*
out_len
);
/*!
* \brief Get prediction for training data and validation data
...
...
@@ -385,7 +398,7 @@ DllExport int LGBM_BoosterGetEval(BoosterHandle handle,
DllExport
int
LGBM_BoosterGetPredict
(
BoosterHandle
handle
,
int
data_idx
,
int64_t
*
out_len
,
float
*
out_result
);
double
*
out_result
);
/*!
* \brief make prediction for file
...
...
@@ -407,6 +420,24 @@ DllExport int LGBM_BoosterPredictForFile(BoosterHandle handle,
int64_t
num_iteration
,
const
char
*
result_filename
);
/*!
* \brief Get number of prediction
* \param handle handle
* \param num_row
* \param predict_type
* C_API_PREDICT_NORMAL: normal prediction, with transform (if needed)
* C_API_PREDICT_RAW_SCORE: raw score
* C_API_PREDICT_LEAF_INDEX: leaf index
* \param num_iteration number of iteration for prediction, <= 0 means no limit
* \param out_len lenght of prediction
* \return 0 when succeed, -1 when failure happens
*/
DllExport
int
LGBM_BoosterCalcNumPredict
(
BoosterHandle
handle
,
int64_t
num_row
,
int
predict_type
,
int64_t
num_iteration
,
int64_t
*
out_len
);
/*!
* \brief make prediction for an new data set
* Note: should pre-allocate memory for out_result,
...
...
@@ -442,7 +473,44 @@ DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
int
predict_type
,
int64_t
num_iteration
,
int64_t
*
out_len
,
float
*
out_result
);
double
*
out_result
);
/*!
* \brief make prediction for an new data set
* Note: should pre-allocate memory for out_result,
* for noraml and raw score: its length is equal to num_class * num_data
* for leaf index, its length is equal to num_class * num_data * num_iteration
* \param handle handle
* \param col_ptr pointer to col headers
* \param col_ptr_type type of col_ptr, can be C_API_DTYPE_INT32 or C_API_DTYPE_INT64
* \param indices findex
* \param data fvalue
* \param data_type type of data pointer, can be C_API_DTYPE_FLOAT32 or C_API_DTYPE_FLOAT64
* \param ncol_ptr number of cols in the matrix + 1
* \param nelem number of nonzero elements in the matrix
* \param num_row number of rows
* \param predict_type
* C_API_PREDICT_NORMAL: normal prediction, with transform (if needed)
* C_API_PREDICT_RAW_SCORE: raw score
* C_API_PREDICT_LEAF_INDEX: leaf index
* \param num_iteration number of iteration for prediction, <= 0 means no limit
* \param out_len len of output result
* \param out_result used to set a pointer to array, should allocate memory before call this function
* \return 0 when succeed, -1 when failure happens
*/
DllExport
int
LGBM_BoosterPredictForCSC
(
BoosterHandle
handle
,
const
void
*
col_ptr
,
int
col_ptr_type
,
const
int32_t
*
indices
,
const
void
*
data
,
int
data_type
,
int64_t
ncol_ptr
,
int64_t
nelem
,
int64_t
num_row
,
int
predict_type
,
int64_t
num_iteration
,
int64_t
*
out_len
,
double
*
out_result
);
/*!
* \brief make prediction for an new data set
...
...
@@ -473,7 +541,7 @@ DllExport int LGBM_BoosterPredictForMat(BoosterHandle handle,
int
predict_type
,
int64_t
num_iteration
,
int64_t
*
out_len
,
float
*
out_result
);
double
*
out_result
);
/*!
* \brief save model into file
...
...
@@ -497,7 +565,7 @@ DllExport int LGBM_BoosterSaveModel(BoosterHandle handle,
DllExport
int
LGBM_BoosterDumpModel
(
BoosterHandle
handle
,
int
buffer_len
,
int64_t
*
out_len
,
char
*
*
out_str
);
char
*
out_str
);
/*!
* \brief Get leaf value
...
...
@@ -510,7 +578,7 @@ DllExport int LGBM_BoosterDumpModel(BoosterHandle handle,
DllExport
int
LGBM_BoosterGetLeafValue
(
BoosterHandle
handle
,
int
tree_idx
,
int
leaf_idx
,
float
*
out_val
);
double
*
out_val
);
/*!
* \brief Set leaf value
...
...
@@ -523,26 +591,7 @@ DllExport int LGBM_BoosterGetLeafValue(BoosterHandle handle,
DllExport
int
LGBM_BoosterSetLeafValue
(
BoosterHandle
handle
,
int
tree_idx
,
int
leaf_idx
,
float
val
);
// some help functions used to convert data
std
::
function
<
std
::
vector
<
double
>
(
int
row_idx
)
>
RowFunctionFromDenseMatric
(
const
void
*
data
,
int
num_row
,
int
num_col
,
int
data_type
,
int
is_row_major
);
std
::
function
<
std
::
vector
<
std
::
pair
<
int
,
double
>>
(
int
row_idx
)
>
RowPairFunctionFromDenseMatric
(
const
void
*
data
,
int
num_row
,
int
num_col
,
int
data_type
,
int
is_row_major
);
std
::
function
<
std
::
vector
<
std
::
pair
<
int
,
double
>>
(
int
idx
)
>
RowFunctionFromCSR
(
const
void
*
indptr
,
int
indptr_type
,
const
int32_t
*
indices
,
const
void
*
data
,
int
data_type
,
int64_t
nindptr
,
int64_t
nelem
);
std
::
function
<
std
::
vector
<
std
::
pair
<
int
,
double
>>
(
int
idx
)
>
ColumnFunctionFromCSC
(
const
void
*
col_ptr
,
int
col_ptr_type
,
const
int32_t
*
indices
,
const
void
*
data
,
int
data_type
,
int64_t
ncol_ptr
,
int64_t
nelem
);
std
::
vector
<
double
>
SampleFromOneColumn
(
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
data
,
const
std
::
vector
<
int
>&
indices
);
double
val
);
#if defined(_MSC_VER)
// exception handle and error msg
...
...
include/LightGBM/dataset.h
View file @
72c2d790
...
...
@@ -324,14 +324,8 @@ public:
}
}
inline
void
PushOneColumn
(
int
tid
,
data_size_t
col_idx
,
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
feature_values
)
{
if
(
col_idx
>=
num_total_features_
)
{
return
;
}
int
feature_idx
=
used_feature_map_
[
col_idx
];
if
(
feature_idx
>=
0
)
{
for
(
auto
&
inner_data
:
feature_values
)
{
features_
[
feature_idx
]
->
PushData
(
tid
,
inner_data
.
first
,
inner_data
.
second
);
}
}
inline
int
GetInnerFeatureIndex
(
int
col_idx
)
const
{
return
used_feature_map_
[
col_idx
];
}
Dataset
*
Subset
(
const
data_size_t
*
used_indices
,
data_size_t
num_used_indices
,
bool
is_enable_sparse
)
const
;
...
...
@@ -358,7 +352,7 @@ public:
* \param i Index for feature
* \return Pointer of feature
*/
inline
const
Feature
*
FeatureAt
(
int
i
)
const
{
return
features_
[
i
].
get
();
}
inline
Feature
*
FeatureAt
(
int
i
)
const
{
return
features_
[
i
].
get
();
}
/*!
* \brief Get meta data pointer
...
...
python-package/lightgbm/basic.py
View file @
72c2d790
...
...
@@ -288,10 +288,13 @@ class _InnerPredictor(object):
lines
=
tmp_file
.
readlines
()
nrow
=
len
(
lines
)
preds
=
[
float
(
token
)
for
line
in
lines
for
token
in
line
.
split
(
'
\t
'
)]
preds
=
np
.
array
(
preds
,
dtype
=
np
.
float
32
,
copy
=
False
)
preds
=
np
.
array
(
preds
,
dtype
=
np
.
float
64
,
copy
=
False
)
elif
isinstance
(
data
,
scipy
.
sparse
.
csr_matrix
):
preds
,
nrow
=
self
.
__pred_for_csr
(
data
,
num_iteration
,
predict_type
)
elif
isinstance
(
data
,
scipy
.
sparse
.
csc_matrix
):
preds
,
nrow
=
self
.
__pred_for_csc
(
data
,
num_iteration
,
predict_type
)
elif
isinstance
(
data
,
np
.
ndarray
):
preds
,
nrow
=
self
.
__pred_for_np2d
(
data
,
num_iteration
,
predict_type
)
...
...
@@ -319,13 +322,14 @@ class _InnerPredictor(object):
"""
Get size of prediction result
"""
n_preds
=
self
.
num_class
*
nrow
if
predict_type
==
C_API_PREDICT_LEAF_INDEX
:
if
num_iteration
>
0
:
n_preds
*=
min
(
num_iteration
,
self
.
num_total_iteration
)
else
:
n_preds
*=
self
.
num_total_iteration
return
n_preds
n_preds
=
ctypes
.
c_int64
(
0
)
_safe_call
(
_LIB
.
LGBM_BoosterCalcNumPredict
(
self
.
handle
,
nrow
,
predict_type
,
num_iteration
,
ctypes
.
byref
(
n_preds
)))
return
n_preds
.
value
def
__pred_for_np2d
(
self
,
mat
,
num_iteration
,
predict_type
):
"""
...
...
@@ -342,7 +346,7 @@ class _InnerPredictor(object):
ptr_data
,
type_ptr_data
=
c_float_array
(
data
)
n_preds
=
self
.
__get_num_preds
(
num_iteration
,
mat
.
shape
[
0
],
predict_type
)
preds
=
np
.
zeros
(
n_preds
,
dtype
=
np
.
float
32
)
preds
=
np
.
zeros
(
n_preds
,
dtype
=
np
.
float
64
)
out_num_preds
=
ctypes
.
c_int64
(
0
)
_safe_call
(
_LIB
.
LGBM_BoosterPredictForMat
(
self
.
handle
,
...
...
@@ -354,7 +358,7 @@ class _InnerPredictor(object):
predict_type
,
num_iteration
,
ctypes
.
byref
(
out_num_preds
),
preds
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_
float
))
preds
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_
double
))
))
if
n_preds
!=
out_num_preds
.
value
:
raise
ValueError
(
"Wrong length for predict results"
)
...
...
@@ -366,7 +370,7 @@ class _InnerPredictor(object):
"""
nrow
=
len
(
csr
.
indptr
)
-
1
n_preds
=
self
.
__get_num_preds
(
num_iteration
,
nrow
,
predict_type
)
preds
=
np
.
zeros
(
n_preds
,
dtype
=
np
.
float
32
)
preds
=
np
.
zeros
(
n_preds
,
dtype
=
np
.
float
64
)
out_num_preds
=
ctypes
.
c_int64
(
0
)
ptr_indptr
,
type_ptr_indptr
=
c_int_array
(
csr
.
indptr
)
...
...
@@ -385,7 +389,38 @@ class _InnerPredictor(object):
predict_type
,
num_iteration
,
ctypes
.
byref
(
out_num_preds
),
preds
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_float
))
preds
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_double
))
))
if
n_preds
!=
out_num_preds
.
value
:
raise
ValueError
(
"Wrong length for predict results"
)
return
preds
,
nrow
def
__pred_for_csc
(
self
,
csc
,
num_iteration
,
predict_type
):
"""
Predict for a csc data
"""
nrow
=
csc
.
shape
[
0
]
n_preds
=
self
.
__get_num_preds
(
num_iteration
,
nrow
,
predict_type
)
preds
=
np
.
zeros
(
n_preds
,
dtype
=
np
.
float64
)
out_num_preds
=
ctypes
.
c_int64
(
0
)
ptr_indptr
,
type_ptr_indptr
=
c_int_array
(
csc
.
indptr
)
ptr_data
,
type_ptr_data
=
c_float_array
(
csc
.
data
)
_safe_call
(
_LIB
.
LGBM_BoosterPredictForCSC
(
self
.
handle
,
ptr_indptr
,
type_ptr_indptr
,
csc
.
indices
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_int32
)),
ptr_data
,
type_ptr_data
,
len
(
csc
.
indptr
),
len
(
csc
.
data
),
csc
.
shape
[
0
],
predict_type
,
num_iteration
,
ctypes
.
byref
(
out_num_preds
),
preds
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_double
))
))
if
n_preds
!=
out_num_preds
.
value
:
raise
ValueError
(
"Wrong length for predict results"
)
...
...
@@ -511,6 +546,8 @@ class _InnerDataset(object):
ctypes
.
byref
(
self
.
handle
)))
elif
isinstance
(
data
,
scipy
.
sparse
.
csr_matrix
):
self
.
__init_from_csr
(
data
,
params_str
,
ref_dataset
)
elif
isinstance
(
data
,
scipy
.
sparse
.
csc_matrix
):
self
.
__init_from_csc
(
data
,
params_str
,
ref_dataset
)
elif
isinstance
(
data
,
np
.
ndarray
):
self
.
__init_from_np2d
(
data
,
params_str
,
ref_dataset
)
else
:
...
...
@@ -541,6 +578,7 @@ class _InnerDataset(object):
for
j
in
range
(
self
.
predictor
.
num_class
):
new_init_score
[
j
*
num_data
+
i
]
=
init_score
[
i
*
self
.
predictor
.
num_class
+
j
]
init_score
=
new_init_score
init_score
=
init_score
.
astype
(
dtype
=
np
.
float32
,
copy
=
False
)
self
.
set_init_score
(
init_score
)
elif
self
.
predictor
is
not
None
:
raise
TypeError
(
'wrong predictor type {}'
.
format
(
type
(
self
.
predictor
).
__name__
))
...
...
@@ -655,6 +693,30 @@ class _InnerDataset(object):
ref_dataset
,
ctypes
.
byref
(
self
.
handle
)))
def
__init_from_csc
(
self
,
csc
,
params_str
,
ref_dataset
):
"""
Initialize data from a csc matrix.
"""
if
len
(
csc
.
indices
)
!=
len
(
csc
.
data
):
raise
ValueError
(
'Length mismatch: {} vs {}'
.
format
(
len
(
csc
.
indices
),
len
(
csc
.
data
)))
self
.
handle
=
ctypes
.
c_void_p
()
ptr_indptr
,
type_ptr_indptr
=
c_int_array
(
csc
.
indptr
)
ptr_data
,
type_ptr_data
=
c_float_array
(
csc
.
data
)
_safe_call
(
_LIB
.
LGBM_DatasetCreateFromCSC
(
ptr_indptr
,
type_ptr_indptr
,
csc
.
indices
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_int32
)),
ptr_data
,
type_ptr_data
,
len
(
csc
.
indptr
),
len
(
csc
.
data
),
csc
.
shape
[
0
],
c_str
(
params_str
),
ref_dataset
,
ctypes
.
byref
(
self
.
handle
)))
def
__del__
(
self
):
_safe_call
(
_LIB
.
LGBM_DatasetFree
(
self
.
handle
))
...
...
@@ -1498,7 +1560,7 @@ class Booster(object):
self
.
handle
,
buffer_len
,
ctypes
.
byref
(
tmp_out_len
),
ctypes
.
byref
(
ptr_string_buffer
))
)
ptr_string_buffer
))
actual_len
=
tmp_out_len
.
value
'''if buffer length is not long enough, reallocate a buffer'''
if
actual_len
>
buffer_len
:
...
...
@@ -1577,13 +1639,13 @@ class Booster(object):
self
.
__get_eval_info
()
ret
=
[]
if
self
.
__num_inner_eval
>
0
:
result
=
np
.
array
([
0.0
for
_
in
range
(
self
.
__num_inner_eval
)],
dtype
=
np
.
float
32
)
result
=
np
.
array
([
0.0
for
_
in
range
(
self
.
__num_inner_eval
)],
dtype
=
np
.
float
64
)
tmp_out_len
=
ctypes
.
c_int64
(
0
)
_safe_call
(
_LIB
.
LGBM_BoosterGetEval
(
self
.
handle
,
data_idx
,
ctypes
.
byref
(
tmp_out_len
),
result
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_
float
))))
result
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_
double
))))
if
tmp_out_len
.
value
!=
self
.
__num_inner_eval
:
raise
ValueError
(
"Wrong length of eval results"
)
for
i
in
range
(
self
.
__num_inner_eval
):
...
...
@@ -1614,11 +1676,11 @@ class Booster(object):
else
:
n_preds
=
self
.
valid_sets
[
data_idx
-
1
].
num_data
()
*
self
.
__num_class
self
.
__inner_predict_buffer
[
data_idx
]
=
\
np
.
array
([
0.0
for
_
in
range
(
n_preds
)],
dtype
=
np
.
float
32
,
copy
=
False
)
np
.
array
([
0.0
for
_
in
range
(
n_preds
)],
dtype
=
np
.
float
64
,
copy
=
False
)
"""avoid to predict many time in one iteration"""
if
not
self
.
__is_predicted_cur_iter
[
data_idx
]:
tmp_out_len
=
ctypes
.
c_int64
(
0
)
data_ptr
=
self
.
__inner_predict_buffer
[
data_idx
].
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_
float
))
data_ptr
=
self
.
__inner_predict_buffer
[
data_idx
].
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_
double
))
_safe_call
(
_LIB
.
LGBM_BoosterGetPredict
(
self
.
handle
,
data_idx
,
...
...
src/boosting/gbdt.cpp
View file @
72c2d790
...
...
@@ -350,7 +350,7 @@ std::string GBDT::OutputMetric(int iter) {
/*! \brief Get eval result */
std
::
vector
<
double
>
GBDT
::
GetEvalAt
(
int
data_idx
)
const
{
CHECK
(
data_idx
>=
0
&&
data_idx
<=
static_cast
<
int
>
(
valid_
metrics
_
.
size
()));
CHECK
(
data_idx
>=
0
&&
data_idx
<=
static_cast
<
int
>
(
valid_
score_updater
_
.
size
()));
std
::
vector
<
double
>
ret
;
if
(
data_idx
==
0
)
{
for
(
auto
&
sub_metric
:
training_metrics_
)
{
...
...
@@ -378,8 +378,8 @@ const score_t* GBDT::GetTrainingScore(int64_t* out_len) {
return
train_score_updater_
->
score
();
}
void
GBDT
::
GetPredictAt
(
int
data_idx
,
score_t
*
out_result
,
int64_t
*
out_len
)
{
CHECK
(
data_idx
>=
0
&&
data_idx
<=
static_cast
<
int
>
(
valid_
metrics
_
.
size
()));
void
GBDT
::
GetPredictAt
(
int
data_idx
,
double
*
out_result
,
int64_t
*
out_len
)
{
CHECK
(
data_idx
>=
0
&&
data_idx
<=
static_cast
<
int
>
(
valid_
score_updater
_
.
size
()));
const
score_t
*
raw_scores
=
nullptr
;
data_size_t
num_data
=
0
;
...
...
@@ -401,18 +401,18 @@ void GBDT::GetPredictAt(int data_idx, score_t* out_result, int64_t* out_len) {
}
Common
::
Softmax
(
&
tmp_result
);
for
(
int
j
=
0
;
j
<
num_class_
;
++
j
)
{
out_result
[
j
*
num_data
+
i
]
=
static_cast
<
score_t
>
(
tmp_result
[
j
]);
out_result
[
j
*
num_data
+
i
]
=
static_cast
<
double
>
(
tmp_result
[
j
]);
}
}
}
else
if
(
sigmoid_
>
0.0
f
){
#pragma omp parallel for schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
out_result
[
i
]
=
static_cast
<
score_t
>
(
1.0
f
/
(
1.0
f
+
std
::
exp
(
-
2.0
f
*
sigmoid_
*
raw_scores
[
i
])));
out_result
[
i
]
=
static_cast
<
double
>
(
1.0
f
/
(
1.0
f
+
std
::
exp
(
-
2.0
f
*
sigmoid_
*
raw_scores
[
i
])));
}
}
else
{
#pragma omp parallel for schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
out_result
[
i
]
=
raw_scores
[
i
];
out_result
[
i
]
=
static_cast
<
double
>
(
raw_scores
[
i
]
)
;
}
}
...
...
src/boosting/gbdt.h
View file @
72c2d790
...
...
@@ -107,13 +107,21 @@ public:
*/
virtual
const
score_t
*
GetTrainingScore
(
int64_t
*
out_len
)
override
;
virtual
int64_t
GetNumPredictAt
(
int
data_idx
)
const
override
{
CHECK
(
data_idx
>=
0
&&
data_idx
<=
static_cast
<
int
>
(
valid_score_updater_
.
size
()));
data_size_t
num_data
=
train_data_
->
num_data
();
if
(
data_idx
>
0
)
{
num_data
=
valid_score_updater_
[
data_idx
-
1
]
->
num_data
();
}
return
num_data
*
num_class_
;
}
/*!
* \brief Get prediction result at data_idx data
* \param data_idx 0: training data, 1: 1st validation data
* \param result used to store prediction result, should allocate memory before call this function
* \param out_len lenght of returned score
*/
void
GetPredictAt
(
int
data_idx
,
score_t
*
out_result
,
int64_t
*
out_len
)
override
;
void
GetPredictAt
(
int
data_idx
,
double
*
out_result
,
int64_t
*
out_len
)
override
;
/*!
* \brief Prediction for one record without sigmoid transformation
...
...
src/c_api.cpp
View file @
72c2d790
...
...
@@ -2,6 +2,7 @@
#include <LightGBM/utils/common.h>
#include <LightGBM/utils/random.h>
#include <LightGBM/utils/threading.h>
#include <LightGBM/c_api.h>
#include <LightGBM/dataset_loader.h>
#include <LightGBM/dataset.h>
...
...
@@ -17,6 +18,7 @@
#include <memory>
#include <stdexcept>
#include <mutex>
#include <functional>
#include "./application/predictor.hpp"
#include "./boosting/gbdt.h"
...
...
@@ -171,7 +173,7 @@ public:
return
Predictor
(
boosting_
.
get
(),
is_raw_score
,
is_predict_leaf
);
}
void
GetPredictAt
(
int
data_idx
,
score_t
*
out_result
,
int64_t
*
out_len
)
{
void
GetPredictAt
(
int
data_idx
,
double
*
out_result
,
int64_t
*
out_len
)
{
boosting_
->
GetPredictAt
(
data_idx
,
out_result
,
out_len
);
}
...
...
@@ -233,6 +235,38 @@ private:
using
namespace
LightGBM
;
// some help functions used to convert data
std
::
function
<
std
::
vector
<
double
>
(
int
row_idx
)
>
RowFunctionFromDenseMatric
(
const
void
*
data
,
int
num_row
,
int
num_col
,
int
data_type
,
int
is_row_major
);
std
::
function
<
std
::
vector
<
std
::
pair
<
int
,
double
>>
(
int
row_idx
)
>
RowPairFunctionFromDenseMatric
(
const
void
*
data
,
int
num_row
,
int
num_col
,
int
data_type
,
int
is_row_major
);
std
::
function
<
std
::
vector
<
std
::
pair
<
int
,
double
>>
(
int
idx
)
>
RowFunctionFromCSR
(
const
void
*
indptr
,
int
indptr_type
,
const
int32_t
*
indices
,
const
void
*
data
,
int
data_type
,
int64_t
nindptr
,
int64_t
nelem
);
// Row iterator of on column for CSC matrix
class
CSC_RowIterator
{
public:
CSC_RowIterator
(
const
void
*
col_ptr
,
int
col_ptr_type
,
const
int32_t
*
indices
,
const
void
*
data
,
int
data_type
,
int64_t
ncol_ptr
,
int64_t
nelem
,
int
col_idx
);
~
CSC_RowIterator
()
{}
// return value at idx, only can access by ascent order
double
Get
(
int
idx
);
// return next non-zero pair, if index < 0, means no more data
std
::
pair
<
int
,
double
>
NextNonZero
();
private:
int
nonzero_idx_
=
0
;
int
cur_idx_
=
-
1
;
double
cur_val_
=
0.0
f
;
bool
is_end_
=
false
;
std
::
function
<
std
::
pair
<
int
,
double
>
(
int
idx
)
>
iter_fun_
;
};
// start of c_api functions
DllExport
const
char
*
LGBM_GetLastError
()
{
return
LastErrorMsg
();
}
...
...
@@ -382,10 +416,8 @@ DllExport int LGBM_DatasetCreateFromCSC(const void* col_ptr,
IOConfig
io_config
;
io_config
.
Set
(
param
);
std
::
unique_ptr
<
Dataset
>
ret
;
auto
get_col_fun
=
ColumnFunctionFromCSC
(
col_ptr
,
col_ptr_type
,
indices
,
data
,
data_type
,
ncol_ptr
,
nelem
);
int32_t
nrow
=
static_cast
<
int32_t
>
(
num_row
);
if
(
reference
==
nullptr
)
{
Log
::
Warning
(
"Construct from CSC format is not efficient"
);
// sample data first
Random
rand
(
io_config
.
data_random_seed
);
const
int
sample_cnt
=
static_cast
<
int
>
(
nrow
<
io_config
.
bin_construct_sample_cnt
?
nrow
:
io_config
.
bin_construct_sample_cnt
);
...
...
@@ -393,8 +425,13 @@ DllExport int LGBM_DatasetCreateFromCSC(const void* col_ptr,
std
::
vector
<
std
::
vector
<
double
>>
sample_values
(
ncol_ptr
-
1
);
#pragma omp parallel for schedule(guided)
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
sample_values
.
size
());
++
i
)
{
auto
cur_col
=
get_col_fun
(
i
);
sample_values
[
i
]
=
SampleFromOneColumn
(
cur_col
,
sample_indices
);
CSC_RowIterator
col_it
(
col_ptr
,
col_ptr_type
,
indices
,
data
,
data_type
,
ncol_ptr
,
nelem
,
i
);
for
(
int
j
=
0
;
j
<
sample_cnt
;
j
++
)
{
auto
val
=
col_it
.
Get
(
sample_indices
[
j
]);
if
(
std
::
fabs
(
val
)
>
kEpsilon
)
{
sample_values
[
i
].
push_back
(
val
);
}
}
}
DatasetLoader
loader
(
io_config
,
nullptr
,
1
,
nullptr
);
ret
.
reset
(
loader
.
CostructFromSampleData
(
sample_values
,
sample_cnt
,
nrow
));
...
...
@@ -408,8 +445,17 @@ DllExport int LGBM_DatasetCreateFromCSC(const void* col_ptr,
#pragma omp parallel for schedule(guided)
for
(
int
i
=
0
;
i
<
ncol_ptr
-
1
;
++
i
)
{
const
int
tid
=
omp_get_thread_num
();
auto
one_col
=
get_col_fun
(
i
);
ret
->
PushOneColumn
(
tid
,
i
,
one_col
);
int
feature_idx
=
ret
->
GetInnerFeatureIndex
(
i
);
if
(
feature_idx
<
0
)
{
continue
;
}
CSC_RowIterator
col_it
(
col_ptr
,
col_ptr_type
,
indices
,
data
,
data_type
,
ncol_ptr
,
nelem
,
i
);
int
row_idx
=
0
;
while
(
row_idx
<
nrow
)
{
auto
pair
=
col_it
.
NextNonZero
();
row_idx
=
pair
.
first
;
// no more data
if
(
row_idx
<
0
)
{
break
;
}
ret
->
FeatureAt
(
feature_idx
)
->
PushData
(
tid
,
row_idx
,
pair
.
second
);
}
}
ret
->
FinishLoad
();
*
out
=
ret
.
release
();
...
...
@@ -517,7 +563,6 @@ DllExport int LGBM_DatasetGetNumFeature(DatasetHandle handle,
API_END
();
}
// ---- start of booster
DllExport
int
LGBM_BoosterCreate
(
const
DatasetHandle
train_data
,
...
...
@@ -627,10 +672,7 @@ DllExport int LGBM_BoosterGetCurrentIteration(BoosterHandle handle, int64_t* out
*
out_iteration
=
ref_booster
->
GetBoosting
()
->
GetCurrentIteration
();
API_END
();
}
/*!
* \brief Get number of eval
* \return total number of eval result
*/
DllExport
int
LGBM_BoosterGetEvalCounts
(
BoosterHandle
handle
,
int64_t
*
out_len
)
{
API_BEGIN
();
Booster
*
ref_booster
=
reinterpret_cast
<
Booster
*>
(
handle
);
...
...
@@ -638,10 +680,6 @@ DllExport int LGBM_BoosterGetEvalCounts(BoosterHandle handle, int64_t* out_len)
API_END
();
}
/*!
* \brief Get number of eval
* \return total number of eval result
*/
DllExport
int
LGBM_BoosterGetEvalNames
(
BoosterHandle
handle
,
int64_t
*
out_len
,
char
**
out_strs
)
{
API_BEGIN
();
Booster
*
ref_booster
=
reinterpret_cast
<
Booster
*>
(
handle
);
...
...
@@ -649,26 +687,34 @@ DllExport int LGBM_BoosterGetEvalNames(BoosterHandle handle, int64_t* out_len, c
API_END
();
}
DllExport
int
LGBM_BoosterGetEval
(
BoosterHandle
handle
,
int
data_idx
,
int64_t
*
out_len
,
float
*
out_results
)
{
double
*
out_results
)
{
API_BEGIN
();
Booster
*
ref_booster
=
reinterpret_cast
<
Booster
*>
(
handle
);
auto
boosting
=
ref_booster
->
GetBoosting
();
auto
result_buf
=
boosting
->
GetEvalAt
(
data_idx
);
*
out_len
=
static_cast
<
int64_t
>
(
result_buf
.
size
());
for
(
size_t
i
=
0
;
i
<
result_buf
.
size
();
++
i
)
{
(
out_results
)[
i
]
=
static_cast
<
float
>
(
result_buf
[
i
]);
(
out_results
)[
i
]
=
static_cast
<
double
>
(
result_buf
[
i
]);
}
API_END
();
}
DllExport
int
LGBM_BoosterGetNumPredict
(
BoosterHandle
handle
,
int
data_idx
,
int64_t
*
out_len
)
{
API_BEGIN
();
auto
boosting
=
reinterpret_cast
<
Booster
*>
(
handle
)
->
GetBoosting
();
*
out_len
=
boosting
->
GetNumPredictAt
(
data_idx
);
API_END
();
}
DllExport
int
LGBM_BoosterGetPredict
(
BoosterHandle
handle
,
int
data_idx
,
int64_t
*
out_len
,
float
*
out_result
)
{
double
*
out_result
)
{
API_BEGIN
();
Booster
*
ref_booster
=
reinterpret_cast
<
Booster
*>
(
handle
);
ref_booster
->
GetPredictAt
(
data_idx
,
out_result
,
out_len
);
...
...
@@ -689,6 +735,30 @@ DllExport int LGBM_BoosterPredictForFile(BoosterHandle handle,
API_END
();
}
int
GetNumPredOneRow
(
const
Booster
*
ref_booster
,
int
predict_type
,
int64_t
num_iteration
)
{
int
num_preb_in_one_row
=
ref_booster
->
GetBoosting
()
->
NumberOfClasses
();
if
(
predict_type
==
C_API_PREDICT_LEAF_INDEX
)
{
int64_t
max_iteration
=
ref_booster
->
GetBoosting
()
->
GetCurrentIteration
();
if
(
num_iteration
>
0
)
{
num_preb_in_one_row
*=
static_cast
<
int
>
(
std
::
min
(
max_iteration
,
num_iteration
));
}
else
{
num_preb_in_one_row
*=
max_iteration
;
}
}
return
num_preb_in_one_row
;
}
DllExport
int
LGBM_BoosterCalcNumPredict
(
BoosterHandle
handle
,
int64_t
num_row
,
int
predict_type
,
int64_t
num_iteration
,
int64_t
*
out_len
)
{
API_BEGIN
();
Booster
*
ref_booster
=
reinterpret_cast
<
Booster
*>
(
handle
);
*
out_len
=
static_cast
<
int64_t
>
(
num_row
*
GetNumPredOneRow
(
ref_booster
,
predict_type
,
num_iteration
));
API_END
();
}
DllExport
int
LGBM_BoosterPredictForCSR
(
BoosterHandle
handle
,
const
void
*
indptr
,
int
indptr_type
,
...
...
@@ -701,32 +771,70 @@ DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
int
predict_type
,
int64_t
num_iteration
,
int64_t
*
out_len
,
float
*
out_result
)
{
double
*
out_result
)
{
API_BEGIN
();
Booster
*
ref_booster
=
reinterpret_cast
<
Booster
*>
(
handle
);
auto
predictor
=
ref_booster
->
NewPredictor
(
static_cast
<
int
>
(
num_iteration
),
predict_type
);
auto
get_row_fun
=
RowFunctionFromCSR
(
indptr
,
indptr_type
,
indices
,
data
,
data_type
,
nindptr
,
nelem
);
int
num_preb_in_one_row
=
ref_booster
->
GetBoosting
()
->
NumberOfClasses
();
if
(
predict_type
==
C_API_PREDICT_LEAF_INDEX
)
{
if
(
num_iteration
>
0
)
{
num_preb_in_one_row
*=
static_cast
<
int
>
(
num_iteration
);
}
else
{
num_preb_in_one_row
*=
ref_booster
->
GetBoosting
()
->
NumberOfTotalModel
()
/
num_preb_in_one_row
;
}
}
int
num_preb_in_one_row
=
GetNumPredOneRow
(
ref_booster
,
predict_type
,
num_iteration
);
int
nrow
=
static_cast
<
int
>
(
nindptr
-
1
);
#pragma omp parallel for schedule(guided)
for
(
int
i
=
0
;
i
<
nrow
;
++
i
)
{
auto
one_row
=
get_row_fun
(
i
);
auto
predicton_result
=
predictor
.
GetPredictFunction
()(
one_row
);
for
(
int
j
=
0
;
j
<
static_cast
<
int
>
(
predicton_result
.
size
());
++
j
)
{
out_result
[
i
*
num_preb_in_one_row
+
j
]
=
static_cast
<
float
>
(
predicton_result
[
j
]);
out_result
[
i
*
num_preb_in_one_row
+
j
]
=
static_cast
<
double
>
(
predicton_result
[
j
]);
}
}
*
out_len
=
nrow
*
num_preb_in_one_row
;
API_END
();
}
DllExport
int
LGBM_BoosterPredictForCSC
(
BoosterHandle
handle
,
const
void
*
col_ptr
,
int
col_ptr_type
,
const
int32_t
*
indices
,
const
void
*
data
,
int
data_type
,
int64_t
ncol_ptr
,
int64_t
nelem
,
int64_t
num_row
,
int
predict_type
,
int64_t
num_iteration
,
int64_t
*
out_len
,
double
*
out_result
)
{
API_BEGIN
();
Booster
*
ref_booster
=
reinterpret_cast
<
Booster
*>
(
handle
);
auto
predictor
=
ref_booster
->
NewPredictor
(
static_cast
<
int
>
(
num_iteration
),
predict_type
);
int
num_preb_in_one_row
=
GetNumPredOneRow
(
ref_booster
,
predict_type
,
num_iteration
);
int
ncol
=
static_cast
<
int
>
(
ncol_ptr
-
1
);
Threading
::
For
<
int64_t
>
(
0
,
num_row
,
[
&
predictor
,
&
out_result
,
num_preb_in_one_row
,
ncol
,
col_ptr
,
col_ptr_type
,
indices
,
data
,
data_type
,
ncol_ptr
,
nelem
]
(
int
,
data_size_t
start
,
data_size_t
end
)
{
std
::
vector
<
CSC_RowIterator
>
iterators
;
for
(
int
j
=
0
;
j
<
ncol
;
++
j
)
{
iterators
.
emplace_back
(
col_ptr
,
col_ptr_type
,
indices
,
data
,
data_type
,
ncol_ptr
,
nelem
,
j
);
}
std
::
vector
<
std
::
pair
<
int
,
double
>>
one_row
;
for
(
int64_t
i
=
start
;
i
<
end
;
++
i
)
{
one_row
.
clear
();
for
(
int
j
=
0
;
j
<
ncol
;
++
j
)
{
auto
val
=
iterators
[
j
].
Get
(
static_cast
<
int
>
(
i
));
if
(
std
::
fabs
(
val
)
>
kEpsilon
)
{
one_row
.
emplace_back
(
j
,
val
);
}
}
auto
predicton_result
=
predictor
.
GetPredictFunction
()(
one_row
);
for
(
int
j
=
0
;
j
<
static_cast
<
int
>
(
predicton_result
.
size
());
++
j
)
{
out_result
[
i
*
num_preb_in_one_row
+
j
]
=
static_cast
<
double
>
(
predicton_result
[
j
]);
}
}
});
*
out_len
=
num_row
*
num_preb_in_one_row
;
API_END
();
}
DllExport
int
LGBM_BoosterPredictForMat
(
BoosterHandle
handle
,
const
void
*
data
,
int
data_type
,
...
...
@@ -736,25 +844,18 @@ DllExport int LGBM_BoosterPredictForMat(BoosterHandle handle,
int
predict_type
,
int64_t
num_iteration
,
int64_t
*
out_len
,
float
*
out_result
)
{
double
*
out_result
)
{
API_BEGIN
();
Booster
*
ref_booster
=
reinterpret_cast
<
Booster
*>
(
handle
);
auto
predictor
=
ref_booster
->
NewPredictor
(
static_cast
<
int
>
(
num_iteration
),
predict_type
);
auto
get_row_fun
=
RowPairFunctionFromDenseMatric
(
data
,
nrow
,
ncol
,
data_type
,
is_row_major
);
int
num_preb_in_one_row
=
ref_booster
->
GetBoosting
()
->
NumberOfClasses
();
if
(
predict_type
==
C_API_PREDICT_LEAF_INDEX
)
{
if
(
num_iteration
>
0
)
{
num_preb_in_one_row
*=
static_cast
<
int
>
(
num_iteration
);
}
else
{
num_preb_in_one_row
*=
ref_booster
->
GetBoosting
()
->
NumberOfTotalModel
()
/
num_preb_in_one_row
;
}
}
int
num_preb_in_one_row
=
GetNumPredOneRow
(
ref_booster
,
predict_type
,
num_iteration
);
#pragma omp parallel for schedule(guided)
for
(
int
i
=
0
;
i
<
nrow
;
++
i
)
{
auto
one_row
=
get_row_fun
(
i
);
auto
predicton_result
=
predictor
.
GetPredictFunction
()(
one_row
);
for
(
int
j
=
0
;
j
<
static_cast
<
int
>
(
predicton_result
.
size
());
++
j
)
{
out_result
[
i
*
num_preb_in_one_row
+
j
]
=
static_cast
<
float
>
(
predicton_result
[
j
]);
out_result
[
i
*
num_preb_in_one_row
+
j
]
=
static_cast
<
double
>
(
predicton_result
[
j
]);
}
}
*
out_len
=
nrow
*
num_preb_in_one_row
;
...
...
@@ -773,37 +874,34 @@ DllExport int LGBM_BoosterSaveModel(BoosterHandle handle,
DllExport
int
LGBM_BoosterDumpModel
(
BoosterHandle
handle
,
int
buffer_len
,
int64_t
*
out_len
,
char
*
*
out_str
)
{
char
*
out_str
)
{
API_BEGIN
();
Booster
*
ref_booster
=
reinterpret_cast
<
Booster
*>
(
handle
);
std
::
string
model
=
ref_booster
->
DumpModel
();
*
out_len
=
static_cast
<
int64_t
>
(
model
.
size
())
+
1
;
if
(
*
out_len
<=
buffer_len
)
{
std
::
strcpy
(
*
out_str
,
model
.
c_str
());
std
::
strcpy
(
out_str
,
model
.
c_str
());
}
API_END
();
}
DllExport
int
LGBM_BoosterGetLeafValue
(
BoosterHandle
handle
,
int
tree_idx
,
int
leaf_idx
,
float
*
out_val
)
{
double
*
out_val
)
{
API_BEGIN
();
Booster
*
ref_booster
=
reinterpret_cast
<
Booster
*>
(
handle
);
*
out_val
=
static_cast
<
float
>
(
ref_booster
->
GetLeafValue
(
tree_idx
,
leaf_idx
));
*
out_val
=
static_cast
<
double
>
(
ref_booster
->
GetLeafValue
(
tree_idx
,
leaf_idx
));
API_END
();
}
DllExport
int
LGBM_BoosterSetLeafValue
(
BoosterHandle
handle
,
int
tree_idx
,
int
leaf_idx
,
float
val
)
{
double
val
)
{
API_BEGIN
();
Booster
*
ref_booster
=
reinterpret_cast
<
Booster
*>
(
handle
);
ref_booster
->
SetLeafValue
(
tree_idx
,
leaf_idx
,
static_cast
<
double
>
(
val
)
)
;
ref_booster
->
SetLeafValue
(
tree_idx
,
leaf_idx
,
val
);
API_END
();
}
...
...
@@ -929,72 +1027,103 @@ RowFunctionFromCSR(const void* indptr, int indptr_type, const int32_t* indices,
throw
std
::
runtime_error
(
"unknown data type in RowFunctionFromCSR"
);
}
std
::
function
<
std
::
vector
<
std
::
pair
<
int
,
double
>>
(
int
idx
)
>
ColumnFunctionFromCSC
(
const
void
*
col_ptr
,
int
col_ptr_type
,
const
int32_t
*
indices
,
const
void
*
data
,
int
data_type
,
int64_t
ncol_ptr
,
int64_t
nelem
)
{
std
::
function
<
std
::
pair
<
int
,
double
>
(
int
idx
)
>
IterateFunctionFromCSC
(
const
void
*
col_ptr
,
int
col_ptr_type
,
const
int32_t
*
indices
,
const
void
*
data
,
int
data_type
,
int64_t
ncol_ptr
,
int64_t
nelem
,
int
col_idx
)
{
CHECK
(
col_idx
<
ncol_ptr
&&
col_idx
>=
0
);
if
(
data_type
==
C_API_DTYPE_FLOAT32
)
{
const
float
*
data_ptr
=
reinterpret_cast
<
const
float
*>
(
data
);
if
(
col_ptr_type
==
C_API_DTYPE_INT32
)
{
const
int32_t
*
ptr_col_ptr
=
reinterpret_cast
<
const
int32_t
*>
(
col_ptr
);
return
[
ptr_col_ptr
,
indices
,
data_ptr
,
ncol_ptr
,
nelem
](
int
idx
)
{
std
::
vector
<
std
::
pair
<
int
,
double
>>
ret
;
int64_t
start
=
ptr_col_ptr
[
idx
];
int64_t
end
=
ptr_col_ptr
[
idx
+
1
];
for
(
int64_t
i
=
start
;
i
<
end
;
++
i
)
{
ret
.
emplace_back
(
indices
[
i
],
data_ptr
[
i
]);
}
return
ret
;
int64_t
start
=
ptr_col_ptr
[
col_idx
];
int64_t
end
=
ptr_col_ptr
[
col_idx
+
1
];
return
[
ptr_col_ptr
,
indices
,
data_ptr
,
ncol_ptr
,
nelem
,
start
,
end
](
int
bias
)
{
int64_t
i
=
static_cast
<
int64_t
>
(
start
+
bias
);
if
(
i
>=
end
)
{
return
std
::
make_pair
(
-
1
,
0.0
);
}
int
idx
=
static_cast
<
int
>
(
indices
[
i
]);
double
val
=
static_cast
<
double
>
(
data_ptr
[
i
]);
return
std
::
make_pair
(
idx
,
val
);
};
}
else
if
(
col_ptr_type
==
C_API_DTYPE_INT64
)
{
const
int64_t
*
ptr_col_ptr
=
reinterpret_cast
<
const
int64_t
*>
(
col_ptr
);
return
[
ptr_col_ptr
,
indices
,
data_ptr
,
ncol_ptr
,
nelem
](
int
idx
)
{
std
::
vector
<
std
::
pair
<
int
,
double
>>
ret
;
int64_t
start
=
ptr_col_ptr
[
idx
];
int64_t
end
=
ptr_col_ptr
[
idx
+
1
];
for
(
int64_t
i
=
start
;
i
<
end
;
++
i
)
{
ret
.
emplace_back
(
indices
[
i
],
data_ptr
[
i
]);
}
return
ret
;
int64_t
start
=
ptr_col_ptr
[
col_idx
];
int64_t
end
=
ptr_col_ptr
[
col_idx
+
1
];
return
[
ptr_col_ptr
,
indices
,
data_ptr
,
ncol_ptr
,
nelem
,
start
,
end
](
int
bias
)
{
int64_t
i
=
static_cast
<
int64_t
>
(
start
+
bias
);
if
(
i
>=
end
)
{
return
std
::
make_pair
(
-
1
,
0.0
);
}
int
idx
=
static_cast
<
int
>
(
indices
[
i
]);
double
val
=
static_cast
<
double
>
(
data_ptr
[
i
]);
return
std
::
make_pair
(
idx
,
val
);
};
}
}
else
if
(
data_type
==
C_API_DTYPE_FLOAT64
)
{
const
double
*
data_ptr
=
reinterpret_cast
<
const
double
*>
(
data
);
if
(
col_ptr_type
==
C_API_DTYPE_INT32
)
{
const
int32_t
*
ptr_col_ptr
=
reinterpret_cast
<
const
int32_t
*>
(
col_ptr
);
return
[
ptr_col_ptr
,
indices
,
data_ptr
,
ncol_ptr
,
nelem
](
int
idx
)
{
std
::
vector
<
std
::
pair
<
int
,
double
>>
ret
;
int64_t
start
=
ptr_col_ptr
[
idx
];
int64_t
end
=
ptr_col_ptr
[
idx
+
1
];
for
(
int64_t
i
=
start
;
i
<
end
;
++
i
)
{
ret
.
emplace_back
(
indices
[
i
],
data_ptr
[
i
]);
}
return
ret
;
int64_t
start
=
ptr_col_ptr
[
col_idx
];
int64_t
end
=
ptr_col_ptr
[
col_idx
+
1
];
return
[
ptr_col_ptr
,
indices
,
data_ptr
,
ncol_ptr
,
nelem
,
start
,
end
](
int
bias
)
{
int64_t
i
=
static_cast
<
int64_t
>
(
start
+
bias
);
if
(
i
>=
end
)
{
return
std
::
make_pair
(
-
1
,
0.0
);
}
int
idx
=
static_cast
<
int
>
(
indices
[
i
]);
double
val
=
static_cast
<
double
>
(
data_ptr
[
i
]);
return
std
::
make_pair
(
idx
,
val
);
};
}
else
if
(
col_ptr_type
==
C_API_DTYPE_INT64
)
{
const
int64_t
*
ptr_col_ptr
=
reinterpret_cast
<
const
int64_t
*>
(
col_ptr
);
return
[
ptr_col_ptr
,
indices
,
data_ptr
,
ncol_ptr
,
nelem
](
int
idx
)
{
std
::
vector
<
std
::
pair
<
int
,
double
>>
ret
;
int64_t
start
=
ptr_col_ptr
[
idx
];
int64_t
end
=
ptr_col_ptr
[
idx
+
1
];
for
(
int64_t
i
=
start
;
i
<
end
;
++
i
)
{
ret
.
emplace_back
(
indices
[
i
],
data_ptr
[
i
]);
}
return
ret
;
int64_t
start
=
ptr_col_ptr
[
col_idx
];
int64_t
end
=
ptr_col_ptr
[
col_idx
+
1
];
return
[
ptr_col_ptr
,
indices
,
data_ptr
,
ncol_ptr
,
nelem
,
start
,
end
](
int
bias
)
{
int64_t
i
=
static_cast
<
int64_t
>
(
start
+
bias
);
if
(
i
>=
end
)
{
return
std
::
make_pair
(
-
1
,
0.0
);
}
int
idx
=
static_cast
<
int
>
(
indices
[
i
]);
double
val
=
static_cast
<
double
>
(
data_ptr
[
i
]);
return
std
::
make_pair
(
idx
,
val
);
};
}
}
throw
std
::
runtime_error
(
"unknown data type in ColumnFunctionFromCSC"
);
throw
std
::
runtime_error
(
"unknown data type in CSC matrix"
);
}
CSC_RowIterator
::
CSC_RowIterator
(
const
void
*
col_ptr
,
int
col_ptr_type
,
const
int32_t
*
indices
,
const
void
*
data
,
int
data_type
,
int64_t
ncol_ptr
,
int64_t
nelem
,
int
col_idx
)
{
iter_fun_
=
IterateFunctionFromCSC
(
col_ptr
,
col_ptr_type
,
indices
,
data
,
data_type
,
ncol_ptr
,
nelem
,
col_idx
);
}
std
::
vector
<
double
>
SampleFromOneColumn
(
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
data
,
const
std
::
vector
<
int
>&
indices
)
{
size_t
j
=
0
;
std
::
vector
<
double
>
ret
;
for
(
auto
row_idx
:
indices
)
{
while
(
j
<
data
.
size
()
&&
data
[
j
].
first
<
static_cast
<
int
>
(
row_idx
))
{
++
j
;
double
CSC_RowIterator
::
Get
(
int
idx
)
{
while
(
idx
>
cur_idx_
&&
!
is_end_
)
{
auto
ret
=
iter_fun_
(
nonzero_idx_
)
;
if
(
ret
.
first
<
0
)
{
is_end_
=
true
;
break
;
}
if
(
j
<
data
.
size
()
&&
data
[
j
].
first
==
static_cast
<
int
>
(
row_idx
))
{
ret
.
push_back
(
data
[
j
].
second
);
cur_idx_
=
ret
.
first
;
cur_val_
=
ret
.
second
;
++
nonzero_idx_
;
}
if
(
idx
==
cur_idx_
)
{
return
cur_val_
;
}
else
{
return
0.0
f
;
}
}
std
::
pair
<
int
,
double
>
CSC_RowIterator
::
NextNonZero
()
{
if
(
!
is_end_
)
{
auto
ret
=
iter_fun_
(
nonzero_idx_
);
++
nonzero_idx_
;
if
(
ret
.
first
<
0
)
{
is_end_
=
true
;
}
return
ret
;
}
else
{
return
std
::
make_pair
(
-
1
,
0.0
);
}
}
tests/c_api_test/test.py
View file @
72c2d790
...
...
@@ -175,9 +175,9 @@ def test_booster():
is_finished
=
ctypes
.
c_int
(
0
)
for
i
in
range
(
100
):
LIB
.
LGBM_BoosterUpdateOneIter
(
booster
,
ctypes
.
byref
(
is_finished
))
result
=
np
.
array
([
0.0
],
dtype
=
np
.
float
32
)
result
=
np
.
array
([
0.0
],
dtype
=
np
.
float
64
)
out_len
=
ctypes
.
c_ulong
(
0
)
LIB
.
LGBM_BoosterGetEval
(
booster
,
0
,
ctypes
.
byref
(
out_len
),
result
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_
float
)))
LIB
.
LGBM_BoosterGetEval
(
booster
,
0
,
ctypes
.
byref
(
out_len
),
result
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_
double
)))
print
(
'%d Iteration test AUC %f'
%
(
i
,
result
[
0
]))
LIB
.
LGBM_BoosterSaveModel
(
booster
,
-
1
,
c_str
(
'model.txt'
))
LIB
.
LGBM_BoosterFree
(
booster
)
...
...
@@ -192,7 +192,7 @@ def test_booster():
data
.
append
(
[
float
(
x
)
for
x
in
line
.
split
(
'
\t
'
)[
1
:]]
)
inp
.
close
()
mat
=
np
.
array
(
data
)
preb
=
np
.
zeros
(
mat
.
shape
[
0
],
dtype
=
np
.
float
32
)
preb
=
np
.
zeros
(
mat
.
shape
[
0
],
dtype
=
np
.
float
64
)
num_preb
=
ctypes
.
c_long
()
data
=
np
.
array
(
mat
.
reshape
(
mat
.
size
),
copy
=
False
)
LIB
.
LGBM_BoosterPredictForMat
(
booster2
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment