Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
72c2d790
Commit
72c2d790
authored
Dec 31, 2016
by
Guolin Ke
Committed by
GitHub
Dec 31, 2016
Browse files
some refine for c_api (#152)
1. add csc support 2. some data type from float to double
parent
bd7274ba
Changes
8
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
410 additions
and
162 deletions
+410
-162
include/LightGBM/boosting.h
include/LightGBM/boosting.h
+7
-1
include/LightGBM/c_api.h
include/LightGBM/c_api.h
+75
-26
include/LightGBM/dataset.h
include/LightGBM/dataset.h
+3
-9
python-package/lightgbm/basic.py
python-package/lightgbm/basic.py
+79
-17
src/boosting/gbdt.cpp
src/boosting/gbdt.cpp
+6
-6
src/boosting/gbdt.h
src/boosting/gbdt.h
+9
-1
src/c_api.cpp
src/c_api.cpp
+228
-99
tests/c_api_test/test.py
tests/c_api_test/test.py
+3
-3
No files found.
include/LightGBM/boosting.h
View file @
72c2d790
...
@@ -96,13 +96,19 @@ public:
...
@@ -96,13 +96,19 @@ public:
*/
*/
virtual
const
score_t
*
GetTrainingScore
(
int64_t
*
out_len
)
=
0
;
virtual
const
score_t
*
GetTrainingScore
(
int64_t
*
out_len
)
=
0
;
/*!
* \brief Get prediction result at data_idx data
* \param data_idx 0: training data, 1: 1st validation data
* \return out_len lenght of returned score
*/
virtual
int64_t
GetNumPredictAt
(
int
data_idx
)
const
=
0
;
/*!
/*!
* \brief Get prediction result at data_idx data
* \brief Get prediction result at data_idx data
* \param data_idx 0: training data, 1: 1st validation data
* \param data_idx 0: training data, 1: 1st validation data
* \param result used to store prediction result, should allocate memory before call this function
* \param result used to store prediction result, should allocate memory before call this function
* \param out_len lenght of returned score
* \param out_len lenght of returned score
*/
*/
virtual
void
GetPredictAt
(
int
data_idx
,
score_t
*
result
,
int64_t
*
out_len
)
=
0
;
virtual
void
GetPredictAt
(
int
data_idx
,
double
*
result
,
int64_t
*
out_len
)
=
0
;
/*!
/*!
* \brief Prediction for one record, not sigmoid transform
* \brief Prediction for one record, not sigmoid transform
...
...
include/LightGBM/c_api.h
View file @
72c2d790
...
@@ -370,7 +370,20 @@ DllExport int LGBM_BoosterGetEvalNames(BoosterHandle handle, int64_t* out_len, c
...
@@ -370,7 +370,20 @@ DllExport int LGBM_BoosterGetEvalNames(BoosterHandle handle, int64_t* out_len, c
DllExport
int
LGBM_BoosterGetEval
(
BoosterHandle
handle
,
DllExport
int
LGBM_BoosterGetEval
(
BoosterHandle
handle
,
int
data_idx
,
int
data_idx
,
int64_t
*
out_len
,
int64_t
*
out_len
,
float
*
out_results
);
double
*
out_results
);
/*!
* \brief Get number of predict for inner dataset
this can be used to support customized eval function
Note: should pre-allocate memory for out_result, its length is equal to num_class * num_data
* \param handle handle
* \param data_idx 0:training data, 1: 1st valid data, 2:2nd valid data ...
* \param out_len len of output result
* \return 0 when succeed, -1 when failure happens
*/
DllExport
int
LGBM_BoosterGetNumPredict
(
BoosterHandle
handle
,
int
data_idx
,
int64_t
*
out_len
);
/*!
/*!
* \brief Get prediction for training data and validation data
* \brief Get prediction for training data and validation data
...
@@ -385,7 +398,7 @@ DllExport int LGBM_BoosterGetEval(BoosterHandle handle,
...
@@ -385,7 +398,7 @@ DllExport int LGBM_BoosterGetEval(BoosterHandle handle,
DllExport
int
LGBM_BoosterGetPredict
(
BoosterHandle
handle
,
DllExport
int
LGBM_BoosterGetPredict
(
BoosterHandle
handle
,
int
data_idx
,
int
data_idx
,
int64_t
*
out_len
,
int64_t
*
out_len
,
float
*
out_result
);
double
*
out_result
);
/*!
/*!
* \brief make prediction for file
* \brief make prediction for file
...
@@ -407,6 +420,24 @@ DllExport int LGBM_BoosterPredictForFile(BoosterHandle handle,
...
@@ -407,6 +420,24 @@ DllExport int LGBM_BoosterPredictForFile(BoosterHandle handle,
int64_t
num_iteration
,
int64_t
num_iteration
,
const
char
*
result_filename
);
const
char
*
result_filename
);
/*!
* \brief Get number of prediction
* \param handle handle
* \param num_row
* \param predict_type
* C_API_PREDICT_NORMAL: normal prediction, with transform (if needed)
* C_API_PREDICT_RAW_SCORE: raw score
* C_API_PREDICT_LEAF_INDEX: leaf index
* \param num_iteration number of iteration for prediction, <= 0 means no limit
* \param out_len lenght of prediction
* \return 0 when succeed, -1 when failure happens
*/
DllExport
int
LGBM_BoosterCalcNumPredict
(
BoosterHandle
handle
,
int64_t
num_row
,
int
predict_type
,
int64_t
num_iteration
,
int64_t
*
out_len
);
/*!
/*!
* \brief make prediction for an new data set
* \brief make prediction for an new data set
* Note: should pre-allocate memory for out_result,
* Note: should pre-allocate memory for out_result,
...
@@ -442,7 +473,44 @@ DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
...
@@ -442,7 +473,44 @@ DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
int
predict_type
,
int
predict_type
,
int64_t
num_iteration
,
int64_t
num_iteration
,
int64_t
*
out_len
,
int64_t
*
out_len
,
float
*
out_result
);
double
*
out_result
);
/*!
* \brief make prediction for an new data set
* Note: should pre-allocate memory for out_result,
* for noraml and raw score: its length is equal to num_class * num_data
* for leaf index, its length is equal to num_class * num_data * num_iteration
* \param handle handle
* \param col_ptr pointer to col headers
* \param col_ptr_type type of col_ptr, can be C_API_DTYPE_INT32 or C_API_DTYPE_INT64
* \param indices findex
* \param data fvalue
* \param data_type type of data pointer, can be C_API_DTYPE_FLOAT32 or C_API_DTYPE_FLOAT64
* \param ncol_ptr number of cols in the matrix + 1
* \param nelem number of nonzero elements in the matrix
* \param num_row number of rows
* \param predict_type
* C_API_PREDICT_NORMAL: normal prediction, with transform (if needed)
* C_API_PREDICT_RAW_SCORE: raw score
* C_API_PREDICT_LEAF_INDEX: leaf index
* \param num_iteration number of iteration for prediction, <= 0 means no limit
* \param out_len len of output result
* \param out_result used to set a pointer to array, should allocate memory before call this function
* \return 0 when succeed, -1 when failure happens
*/
DllExport
int
LGBM_BoosterPredictForCSC
(
BoosterHandle
handle
,
const
void
*
col_ptr
,
int
col_ptr_type
,
const
int32_t
*
indices
,
const
void
*
data
,
int
data_type
,
int64_t
ncol_ptr
,
int64_t
nelem
,
int64_t
num_row
,
int
predict_type
,
int64_t
num_iteration
,
int64_t
*
out_len
,
double
*
out_result
);
/*!
/*!
* \brief make prediction for an new data set
* \brief make prediction for an new data set
...
@@ -473,7 +541,7 @@ DllExport int LGBM_BoosterPredictForMat(BoosterHandle handle,
...
@@ -473,7 +541,7 @@ DllExport int LGBM_BoosterPredictForMat(BoosterHandle handle,
int
predict_type
,
int
predict_type
,
int64_t
num_iteration
,
int64_t
num_iteration
,
int64_t
*
out_len
,
int64_t
*
out_len
,
float
*
out_result
);
double
*
out_result
);
/*!
/*!
* \brief save model into file
* \brief save model into file
...
@@ -497,7 +565,7 @@ DllExport int LGBM_BoosterSaveModel(BoosterHandle handle,
...
@@ -497,7 +565,7 @@ DllExport int LGBM_BoosterSaveModel(BoosterHandle handle,
DllExport
int
LGBM_BoosterDumpModel
(
BoosterHandle
handle
,
DllExport
int
LGBM_BoosterDumpModel
(
BoosterHandle
handle
,
int
buffer_len
,
int
buffer_len
,
int64_t
*
out_len
,
int64_t
*
out_len
,
char
*
*
out_str
);
char
*
out_str
);
/*!
/*!
* \brief Get leaf value
* \brief Get leaf value
...
@@ -510,7 +578,7 @@ DllExport int LGBM_BoosterDumpModel(BoosterHandle handle,
...
@@ -510,7 +578,7 @@ DllExport int LGBM_BoosterDumpModel(BoosterHandle handle,
DllExport
int
LGBM_BoosterGetLeafValue
(
BoosterHandle
handle
,
DllExport
int
LGBM_BoosterGetLeafValue
(
BoosterHandle
handle
,
int
tree_idx
,
int
tree_idx
,
int
leaf_idx
,
int
leaf_idx
,
float
*
out_val
);
double
*
out_val
);
/*!
/*!
* \brief Set leaf value
* \brief Set leaf value
...
@@ -523,26 +591,7 @@ DllExport int LGBM_BoosterGetLeafValue(BoosterHandle handle,
...
@@ -523,26 +591,7 @@ DllExport int LGBM_BoosterGetLeafValue(BoosterHandle handle,
DllExport
int
LGBM_BoosterSetLeafValue
(
BoosterHandle
handle
,
DllExport
int
LGBM_BoosterSetLeafValue
(
BoosterHandle
handle
,
int
tree_idx
,
int
tree_idx
,
int
leaf_idx
,
int
leaf_idx
,
float
val
);
double
val
);
// some help functions used to convert data
std
::
function
<
std
::
vector
<
double
>
(
int
row_idx
)
>
RowFunctionFromDenseMatric
(
const
void
*
data
,
int
num_row
,
int
num_col
,
int
data_type
,
int
is_row_major
);
std
::
function
<
std
::
vector
<
std
::
pair
<
int
,
double
>>
(
int
row_idx
)
>
RowPairFunctionFromDenseMatric
(
const
void
*
data
,
int
num_row
,
int
num_col
,
int
data_type
,
int
is_row_major
);
std
::
function
<
std
::
vector
<
std
::
pair
<
int
,
double
>>
(
int
idx
)
>
RowFunctionFromCSR
(
const
void
*
indptr
,
int
indptr_type
,
const
int32_t
*
indices
,
const
void
*
data
,
int
data_type
,
int64_t
nindptr
,
int64_t
nelem
);
std
::
function
<
std
::
vector
<
std
::
pair
<
int
,
double
>>
(
int
idx
)
>
ColumnFunctionFromCSC
(
const
void
*
col_ptr
,
int
col_ptr_type
,
const
int32_t
*
indices
,
const
void
*
data
,
int
data_type
,
int64_t
ncol_ptr
,
int64_t
nelem
);
std
::
vector
<
double
>
SampleFromOneColumn
(
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
data
,
const
std
::
vector
<
int
>&
indices
);
#if defined(_MSC_VER)
#if defined(_MSC_VER)
// exception handle and error msg
// exception handle and error msg
...
...
include/LightGBM/dataset.h
View file @
72c2d790
...
@@ -324,14 +324,8 @@ public:
...
@@ -324,14 +324,8 @@ public:
}
}
}
}
inline
void
PushOneColumn
(
int
tid
,
data_size_t
col_idx
,
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
feature_values
)
{
inline
int
GetInnerFeatureIndex
(
int
col_idx
)
const
{
if
(
col_idx
>=
num_total_features_
)
{
return
;
}
return
used_feature_map_
[
col_idx
];
int
feature_idx
=
used_feature_map_
[
col_idx
];
if
(
feature_idx
>=
0
)
{
for
(
auto
&
inner_data
:
feature_values
)
{
features_
[
feature_idx
]
->
PushData
(
tid
,
inner_data
.
first
,
inner_data
.
second
);
}
}
}
}
Dataset
*
Subset
(
const
data_size_t
*
used_indices
,
data_size_t
num_used_indices
,
bool
is_enable_sparse
)
const
;
Dataset
*
Subset
(
const
data_size_t
*
used_indices
,
data_size_t
num_used_indices
,
bool
is_enable_sparse
)
const
;
...
@@ -358,7 +352,7 @@ public:
...
@@ -358,7 +352,7 @@ public:
* \param i Index for feature
* \param i Index for feature
* \return Pointer of feature
* \return Pointer of feature
*/
*/
inline
const
Feature
*
FeatureAt
(
int
i
)
const
{
return
features_
[
i
].
get
();
}
inline
Feature
*
FeatureAt
(
int
i
)
const
{
return
features_
[
i
].
get
();
}
/*!
/*!
* \brief Get meta data pointer
* \brief Get meta data pointer
...
...
python-package/lightgbm/basic.py
View file @
72c2d790
...
@@ -288,10 +288,13 @@ class _InnerPredictor(object):
...
@@ -288,10 +288,13 @@ class _InnerPredictor(object):
lines
=
tmp_file
.
readlines
()
lines
=
tmp_file
.
readlines
()
nrow
=
len
(
lines
)
nrow
=
len
(
lines
)
preds
=
[
float
(
token
)
for
line
in
lines
for
token
in
line
.
split
(
'
\t
'
)]
preds
=
[
float
(
token
)
for
line
in
lines
for
token
in
line
.
split
(
'
\t
'
)]
preds
=
np
.
array
(
preds
,
dtype
=
np
.
float
32
,
copy
=
False
)
preds
=
np
.
array
(
preds
,
dtype
=
np
.
float
64
,
copy
=
False
)
elif
isinstance
(
data
,
scipy
.
sparse
.
csr_matrix
):
elif
isinstance
(
data
,
scipy
.
sparse
.
csr_matrix
):
preds
,
nrow
=
self
.
__pred_for_csr
(
data
,
num_iteration
,
preds
,
nrow
=
self
.
__pred_for_csr
(
data
,
num_iteration
,
predict_type
)
predict_type
)
elif
isinstance
(
data
,
scipy
.
sparse
.
csc_matrix
):
preds
,
nrow
=
self
.
__pred_for_csc
(
data
,
num_iteration
,
predict_type
)
elif
isinstance
(
data
,
np
.
ndarray
):
elif
isinstance
(
data
,
np
.
ndarray
):
preds
,
nrow
=
self
.
__pred_for_np2d
(
data
,
num_iteration
,
preds
,
nrow
=
self
.
__pred_for_np2d
(
data
,
num_iteration
,
predict_type
)
predict_type
)
...
@@ -319,13 +322,14 @@ class _InnerPredictor(object):
...
@@ -319,13 +322,14 @@ class _InnerPredictor(object):
"""
"""
Get size of prediction result
Get size of prediction result
"""
"""
n_preds
=
self
.
num_class
*
nrow
n_preds
=
ctypes
.
c_int64
(
0
)
if
predict_type
==
C_API_PREDICT_LEAF_INDEX
:
_safe_call
(
_LIB
.
LGBM_BoosterCalcNumPredict
(
if
num_iteration
>
0
:
self
.
handle
,
n_preds
*=
min
(
num_iteration
,
self
.
num_total_iteration
)
nrow
,
else
:
predict_type
,
n_preds
*=
self
.
num_total_iteration
num_iteration
,
return
n_preds
ctypes
.
byref
(
n_preds
)))
return
n_preds
.
value
def
__pred_for_np2d
(
self
,
mat
,
num_iteration
,
predict_type
):
def
__pred_for_np2d
(
self
,
mat
,
num_iteration
,
predict_type
):
"""
"""
...
@@ -342,7 +346,7 @@ class _InnerPredictor(object):
...
@@ -342,7 +346,7 @@ class _InnerPredictor(object):
ptr_data
,
type_ptr_data
=
c_float_array
(
data
)
ptr_data
,
type_ptr_data
=
c_float_array
(
data
)
n_preds
=
self
.
__get_num_preds
(
num_iteration
,
mat
.
shape
[
0
],
n_preds
=
self
.
__get_num_preds
(
num_iteration
,
mat
.
shape
[
0
],
predict_type
)
predict_type
)
preds
=
np
.
zeros
(
n_preds
,
dtype
=
np
.
float
32
)
preds
=
np
.
zeros
(
n_preds
,
dtype
=
np
.
float
64
)
out_num_preds
=
ctypes
.
c_int64
(
0
)
out_num_preds
=
ctypes
.
c_int64
(
0
)
_safe_call
(
_LIB
.
LGBM_BoosterPredictForMat
(
_safe_call
(
_LIB
.
LGBM_BoosterPredictForMat
(
self
.
handle
,
self
.
handle
,
...
@@ -354,7 +358,7 @@ class _InnerPredictor(object):
...
@@ -354,7 +358,7 @@ class _InnerPredictor(object):
predict_type
,
predict_type
,
num_iteration
,
num_iteration
,
ctypes
.
byref
(
out_num_preds
),
ctypes
.
byref
(
out_num_preds
),
preds
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_
float
))
preds
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_
double
))
))
))
if
n_preds
!=
out_num_preds
.
value
:
if
n_preds
!=
out_num_preds
.
value
:
raise
ValueError
(
"Wrong length for predict results"
)
raise
ValueError
(
"Wrong length for predict results"
)
...
@@ -366,7 +370,7 @@ class _InnerPredictor(object):
...
@@ -366,7 +370,7 @@ class _InnerPredictor(object):
"""
"""
nrow
=
len
(
csr
.
indptr
)
-
1
nrow
=
len
(
csr
.
indptr
)
-
1
n_preds
=
self
.
__get_num_preds
(
num_iteration
,
nrow
,
predict_type
)
n_preds
=
self
.
__get_num_preds
(
num_iteration
,
nrow
,
predict_type
)
preds
=
np
.
zeros
(
n_preds
,
dtype
=
np
.
float
32
)
preds
=
np
.
zeros
(
n_preds
,
dtype
=
np
.
float
64
)
out_num_preds
=
ctypes
.
c_int64
(
0
)
out_num_preds
=
ctypes
.
c_int64
(
0
)
ptr_indptr
,
type_ptr_indptr
=
c_int_array
(
csr
.
indptr
)
ptr_indptr
,
type_ptr_indptr
=
c_int_array
(
csr
.
indptr
)
...
@@ -385,7 +389,38 @@ class _InnerPredictor(object):
...
@@ -385,7 +389,38 @@ class _InnerPredictor(object):
predict_type
,
predict_type
,
num_iteration
,
num_iteration
,
ctypes
.
byref
(
out_num_preds
),
ctypes
.
byref
(
out_num_preds
),
preds
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_float
))
preds
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_double
))
))
if
n_preds
!=
out_num_preds
.
value
:
raise
ValueError
(
"Wrong length for predict results"
)
return
preds
,
nrow
def
__pred_for_csc
(
self
,
csc
,
num_iteration
,
predict_type
):
"""
Predict for a csc data
"""
nrow
=
csc
.
shape
[
0
]
n_preds
=
self
.
__get_num_preds
(
num_iteration
,
nrow
,
predict_type
)
preds
=
np
.
zeros
(
n_preds
,
dtype
=
np
.
float64
)
out_num_preds
=
ctypes
.
c_int64
(
0
)
ptr_indptr
,
type_ptr_indptr
=
c_int_array
(
csc
.
indptr
)
ptr_data
,
type_ptr_data
=
c_float_array
(
csc
.
data
)
_safe_call
(
_LIB
.
LGBM_BoosterPredictForCSC
(
self
.
handle
,
ptr_indptr
,
type_ptr_indptr
,
csc
.
indices
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_int32
)),
ptr_data
,
type_ptr_data
,
len
(
csc
.
indptr
),
len
(
csc
.
data
),
csc
.
shape
[
0
],
predict_type
,
num_iteration
,
ctypes
.
byref
(
out_num_preds
),
preds
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_double
))
))
))
if
n_preds
!=
out_num_preds
.
value
:
if
n_preds
!=
out_num_preds
.
value
:
raise
ValueError
(
"Wrong length for predict results"
)
raise
ValueError
(
"Wrong length for predict results"
)
...
@@ -511,6 +546,8 @@ class _InnerDataset(object):
...
@@ -511,6 +546,8 @@ class _InnerDataset(object):
ctypes
.
byref
(
self
.
handle
)))
ctypes
.
byref
(
self
.
handle
)))
elif
isinstance
(
data
,
scipy
.
sparse
.
csr_matrix
):
elif
isinstance
(
data
,
scipy
.
sparse
.
csr_matrix
):
self
.
__init_from_csr
(
data
,
params_str
,
ref_dataset
)
self
.
__init_from_csr
(
data
,
params_str
,
ref_dataset
)
elif
isinstance
(
data
,
scipy
.
sparse
.
csc_matrix
):
self
.
__init_from_csc
(
data
,
params_str
,
ref_dataset
)
elif
isinstance
(
data
,
np
.
ndarray
):
elif
isinstance
(
data
,
np
.
ndarray
):
self
.
__init_from_np2d
(
data
,
params_str
,
ref_dataset
)
self
.
__init_from_np2d
(
data
,
params_str
,
ref_dataset
)
else
:
else
:
...
@@ -541,6 +578,7 @@ class _InnerDataset(object):
...
@@ -541,6 +578,7 @@ class _InnerDataset(object):
for
j
in
range
(
self
.
predictor
.
num_class
):
for
j
in
range
(
self
.
predictor
.
num_class
):
new_init_score
[
j
*
num_data
+
i
]
=
init_score
[
i
*
self
.
predictor
.
num_class
+
j
]
new_init_score
[
j
*
num_data
+
i
]
=
init_score
[
i
*
self
.
predictor
.
num_class
+
j
]
init_score
=
new_init_score
init_score
=
new_init_score
init_score
=
init_score
.
astype
(
dtype
=
np
.
float32
,
copy
=
False
)
self
.
set_init_score
(
init_score
)
self
.
set_init_score
(
init_score
)
elif
self
.
predictor
is
not
None
:
elif
self
.
predictor
is
not
None
:
raise
TypeError
(
'wrong predictor type {}'
.
format
(
type
(
self
.
predictor
).
__name__
))
raise
TypeError
(
'wrong predictor type {}'
.
format
(
type
(
self
.
predictor
).
__name__
))
...
@@ -655,6 +693,30 @@ class _InnerDataset(object):
...
@@ -655,6 +693,30 @@ class _InnerDataset(object):
ref_dataset
,
ref_dataset
,
ctypes
.
byref
(
self
.
handle
)))
ctypes
.
byref
(
self
.
handle
)))
def
__init_from_csc
(
self
,
csc
,
params_str
,
ref_dataset
):
"""
Initialize data from a csc matrix.
"""
if
len
(
csc
.
indices
)
!=
len
(
csc
.
data
):
raise
ValueError
(
'Length mismatch: {} vs {}'
.
format
(
len
(
csc
.
indices
),
len
(
csc
.
data
)))
self
.
handle
=
ctypes
.
c_void_p
()
ptr_indptr
,
type_ptr_indptr
=
c_int_array
(
csc
.
indptr
)
ptr_data
,
type_ptr_data
=
c_float_array
(
csc
.
data
)
_safe_call
(
_LIB
.
LGBM_DatasetCreateFromCSC
(
ptr_indptr
,
type_ptr_indptr
,
csc
.
indices
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_int32
)),
ptr_data
,
type_ptr_data
,
len
(
csc
.
indptr
),
len
(
csc
.
data
),
csc
.
shape
[
0
],
c_str
(
params_str
),
ref_dataset
,
ctypes
.
byref
(
self
.
handle
)))
def
__del__
(
self
):
def
__del__
(
self
):
_safe_call
(
_LIB
.
LGBM_DatasetFree
(
self
.
handle
))
_safe_call
(
_LIB
.
LGBM_DatasetFree
(
self
.
handle
))
...
@@ -1498,7 +1560,7 @@ class Booster(object):
...
@@ -1498,7 +1560,7 @@ class Booster(object):
self
.
handle
,
self
.
handle
,
buffer_len
,
buffer_len
,
ctypes
.
byref
(
tmp_out_len
),
ctypes
.
byref
(
tmp_out_len
),
ctypes
.
byref
(
ptr_string_buffer
))
)
ptr_string_buffer
))
actual_len
=
tmp_out_len
.
value
actual_len
=
tmp_out_len
.
value
'''if buffer length is not long enough, reallocate a buffer'''
'''if buffer length is not long enough, reallocate a buffer'''
if
actual_len
>
buffer_len
:
if
actual_len
>
buffer_len
:
...
@@ -1577,13 +1639,13 @@ class Booster(object):
...
@@ -1577,13 +1639,13 @@ class Booster(object):
self
.
__get_eval_info
()
self
.
__get_eval_info
()
ret
=
[]
ret
=
[]
if
self
.
__num_inner_eval
>
0
:
if
self
.
__num_inner_eval
>
0
:
result
=
np
.
array
([
0.0
for
_
in
range
(
self
.
__num_inner_eval
)],
dtype
=
np
.
float
32
)
result
=
np
.
array
([
0.0
for
_
in
range
(
self
.
__num_inner_eval
)],
dtype
=
np
.
float
64
)
tmp_out_len
=
ctypes
.
c_int64
(
0
)
tmp_out_len
=
ctypes
.
c_int64
(
0
)
_safe_call
(
_LIB
.
LGBM_BoosterGetEval
(
_safe_call
(
_LIB
.
LGBM_BoosterGetEval
(
self
.
handle
,
self
.
handle
,
data_idx
,
data_idx
,
ctypes
.
byref
(
tmp_out_len
),
ctypes
.
byref
(
tmp_out_len
),
result
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_
float
))))
result
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_
double
))))
if
tmp_out_len
.
value
!=
self
.
__num_inner_eval
:
if
tmp_out_len
.
value
!=
self
.
__num_inner_eval
:
raise
ValueError
(
"Wrong length of eval results"
)
raise
ValueError
(
"Wrong length of eval results"
)
for
i
in
range
(
self
.
__num_inner_eval
):
for
i
in
range
(
self
.
__num_inner_eval
):
...
@@ -1614,11 +1676,11 @@ class Booster(object):
...
@@ -1614,11 +1676,11 @@ class Booster(object):
else
:
else
:
n_preds
=
self
.
valid_sets
[
data_idx
-
1
].
num_data
()
*
self
.
__num_class
n_preds
=
self
.
valid_sets
[
data_idx
-
1
].
num_data
()
*
self
.
__num_class
self
.
__inner_predict_buffer
[
data_idx
]
=
\
self
.
__inner_predict_buffer
[
data_idx
]
=
\
np
.
array
([
0.0
for
_
in
range
(
n_preds
)],
dtype
=
np
.
float
32
,
copy
=
False
)
np
.
array
([
0.0
for
_
in
range
(
n_preds
)],
dtype
=
np
.
float
64
,
copy
=
False
)
"""avoid to predict many time in one iteration"""
"""avoid to predict many time in one iteration"""
if
not
self
.
__is_predicted_cur_iter
[
data_idx
]:
if
not
self
.
__is_predicted_cur_iter
[
data_idx
]:
tmp_out_len
=
ctypes
.
c_int64
(
0
)
tmp_out_len
=
ctypes
.
c_int64
(
0
)
data_ptr
=
self
.
__inner_predict_buffer
[
data_idx
].
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_
float
))
data_ptr
=
self
.
__inner_predict_buffer
[
data_idx
].
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_
double
))
_safe_call
(
_LIB
.
LGBM_BoosterGetPredict
(
_safe_call
(
_LIB
.
LGBM_BoosterGetPredict
(
self
.
handle
,
self
.
handle
,
data_idx
,
data_idx
,
...
...
src/boosting/gbdt.cpp
View file @
72c2d790
...
@@ -350,7 +350,7 @@ std::string GBDT::OutputMetric(int iter) {
...
@@ -350,7 +350,7 @@ std::string GBDT::OutputMetric(int iter) {
/*! \brief Get eval result */
/*! \brief Get eval result */
std
::
vector
<
double
>
GBDT
::
GetEvalAt
(
int
data_idx
)
const
{
std
::
vector
<
double
>
GBDT
::
GetEvalAt
(
int
data_idx
)
const
{
CHECK
(
data_idx
>=
0
&&
data_idx
<=
static_cast
<
int
>
(
valid_
metrics
_
.
size
()));
CHECK
(
data_idx
>=
0
&&
data_idx
<=
static_cast
<
int
>
(
valid_
score_updater
_
.
size
()));
std
::
vector
<
double
>
ret
;
std
::
vector
<
double
>
ret
;
if
(
data_idx
==
0
)
{
if
(
data_idx
==
0
)
{
for
(
auto
&
sub_metric
:
training_metrics_
)
{
for
(
auto
&
sub_metric
:
training_metrics_
)
{
...
@@ -378,8 +378,8 @@ const score_t* GBDT::GetTrainingScore(int64_t* out_len) {
...
@@ -378,8 +378,8 @@ const score_t* GBDT::GetTrainingScore(int64_t* out_len) {
return
train_score_updater_
->
score
();
return
train_score_updater_
->
score
();
}
}
void
GBDT
::
GetPredictAt
(
int
data_idx
,
score_t
*
out_result
,
int64_t
*
out_len
)
{
void
GBDT
::
GetPredictAt
(
int
data_idx
,
double
*
out_result
,
int64_t
*
out_len
)
{
CHECK
(
data_idx
>=
0
&&
data_idx
<=
static_cast
<
int
>
(
valid_
metrics
_
.
size
()));
CHECK
(
data_idx
>=
0
&&
data_idx
<=
static_cast
<
int
>
(
valid_
score_updater
_
.
size
()));
const
score_t
*
raw_scores
=
nullptr
;
const
score_t
*
raw_scores
=
nullptr
;
data_size_t
num_data
=
0
;
data_size_t
num_data
=
0
;
...
@@ -401,18 +401,18 @@ void GBDT::GetPredictAt(int data_idx, score_t* out_result, int64_t* out_len) {
...
@@ -401,18 +401,18 @@ void GBDT::GetPredictAt(int data_idx, score_t* out_result, int64_t* out_len) {
}
}
Common
::
Softmax
(
&
tmp_result
);
Common
::
Softmax
(
&
tmp_result
);
for
(
int
j
=
0
;
j
<
num_class_
;
++
j
)
{
for
(
int
j
=
0
;
j
<
num_class_
;
++
j
)
{
out_result
[
j
*
num_data
+
i
]
=
static_cast
<
score_t
>
(
tmp_result
[
j
]);
out_result
[
j
*
num_data
+
i
]
=
static_cast
<
double
>
(
tmp_result
[
j
]);
}
}
}
}
}
else
if
(
sigmoid_
>
0.0
f
){
}
else
if
(
sigmoid_
>
0.0
f
){
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
out_result
[
i
]
=
static_cast
<
score_t
>
(
1.0
f
/
(
1.0
f
+
std
::
exp
(
-
2.0
f
*
sigmoid_
*
raw_scores
[
i
])));
out_result
[
i
]
=
static_cast
<
double
>
(
1.0
f
/
(
1.0
f
+
std
::
exp
(
-
2.0
f
*
sigmoid_
*
raw_scores
[
i
])));
}
}
}
else
{
}
else
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
out_result
[
i
]
=
raw_scores
[
i
];
out_result
[
i
]
=
static_cast
<
double
>
(
raw_scores
[
i
]
)
;
}
}
}
}
...
...
src/boosting/gbdt.h
View file @
72c2d790
...
@@ -107,13 +107,21 @@ public:
...
@@ -107,13 +107,21 @@ public:
*/
*/
virtual
const
score_t
*
GetTrainingScore
(
int64_t
*
out_len
)
override
;
virtual
const
score_t
*
GetTrainingScore
(
int64_t
*
out_len
)
override
;
virtual
int64_t
GetNumPredictAt
(
int
data_idx
)
const
override
{
CHECK
(
data_idx
>=
0
&&
data_idx
<=
static_cast
<
int
>
(
valid_score_updater_
.
size
()));
data_size_t
num_data
=
train_data_
->
num_data
();
if
(
data_idx
>
0
)
{
num_data
=
valid_score_updater_
[
data_idx
-
1
]
->
num_data
();
}
return
num_data
*
num_class_
;
}
/*!
/*!
* \brief Get prediction result at data_idx data
* \brief Get prediction result at data_idx data
* \param data_idx 0: training data, 1: 1st validation data
* \param data_idx 0: training data, 1: 1st validation data
* \param result used to store prediction result, should allocate memory before call this function
* \param result used to store prediction result, should allocate memory before call this function
* \param out_len lenght of returned score
* \param out_len lenght of returned score
*/
*/
void
GetPredictAt
(
int
data_idx
,
score_t
*
out_result
,
int64_t
*
out_len
)
override
;
void
GetPredictAt
(
int
data_idx
,
double
*
out_result
,
int64_t
*
out_len
)
override
;
/*!
/*!
* \brief Prediction for one record without sigmoid transformation
* \brief Prediction for one record without sigmoid transformation
...
...
src/c_api.cpp
View file @
72c2d790
This diff is collapsed.
Click to expand it.
tests/c_api_test/test.py
View file @
72c2d790
...
@@ -175,9 +175,9 @@ def test_booster():
...
@@ -175,9 +175,9 @@ def test_booster():
is_finished
=
ctypes
.
c_int
(
0
)
is_finished
=
ctypes
.
c_int
(
0
)
for
i
in
range
(
100
):
for
i
in
range
(
100
):
LIB
.
LGBM_BoosterUpdateOneIter
(
booster
,
ctypes
.
byref
(
is_finished
))
LIB
.
LGBM_BoosterUpdateOneIter
(
booster
,
ctypes
.
byref
(
is_finished
))
result
=
np
.
array
([
0.0
],
dtype
=
np
.
float
32
)
result
=
np
.
array
([
0.0
],
dtype
=
np
.
float
64
)
out_len
=
ctypes
.
c_ulong
(
0
)
out_len
=
ctypes
.
c_ulong
(
0
)
LIB
.
LGBM_BoosterGetEval
(
booster
,
0
,
ctypes
.
byref
(
out_len
),
result
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_
float
)))
LIB
.
LGBM_BoosterGetEval
(
booster
,
0
,
ctypes
.
byref
(
out_len
),
result
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_
double
)))
print
(
'%d Iteration test AUC %f'
%
(
i
,
result
[
0
]))
print
(
'%d Iteration test AUC %f'
%
(
i
,
result
[
0
]))
LIB
.
LGBM_BoosterSaveModel
(
booster
,
-
1
,
c_str
(
'model.txt'
))
LIB
.
LGBM_BoosterSaveModel
(
booster
,
-
1
,
c_str
(
'model.txt'
))
LIB
.
LGBM_BoosterFree
(
booster
)
LIB
.
LGBM_BoosterFree
(
booster
)
...
@@ -192,7 +192,7 @@ def test_booster():
...
@@ -192,7 +192,7 @@ def test_booster():
data
.
append
(
[
float
(
x
)
for
x
in
line
.
split
(
'
\t
'
)[
1
:]]
)
data
.
append
(
[
float
(
x
)
for
x
in
line
.
split
(
'
\t
'
)[
1
:]]
)
inp
.
close
()
inp
.
close
()
mat
=
np
.
array
(
data
)
mat
=
np
.
array
(
data
)
preb
=
np
.
zeros
(
mat
.
shape
[
0
],
dtype
=
np
.
float
32
)
preb
=
np
.
zeros
(
mat
.
shape
[
0
],
dtype
=
np
.
float
64
)
num_preb
=
ctypes
.
c_long
()
num_preb
=
ctypes
.
c_long
()
data
=
np
.
array
(
mat
.
reshape
(
mat
.
size
),
copy
=
False
)
data
=
np
.
array
(
mat
.
reshape
(
mat
.
size
),
copy
=
False
)
LIB
.
LGBM_BoosterPredictForMat
(
booster2
,
LIB
.
LGBM_BoosterPredictForMat
(
booster2
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment