Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
422c0ef7
"src/treelearner/vscode:/vscode.git/clone" did not exist on "7d1276ad49fc8c07dfdf46f3eaf1863fe1cab3b3"
Commit
422c0ef7
authored
Nov 23, 2016
by
Guolin Ke
Browse files
almost finish, need some tests
parent
fc383361
Changes
11
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
564 additions
and
190 deletions
+564
-190
include/LightGBM/boosting.h
include/LightGBM/boosting.h
+3
-3
include/LightGBM/c_api.h
include/LightGBM/c_api.h
+27
-26
include/LightGBM/config.h
include/LightGBM/config.h
+1
-1
python-package/lightgbm/basic.py
python-package/lightgbm/basic.py
+418
-81
src/application/application.cpp
src/application/application.cpp
+2
-2
src/boosting/dart.hpp
src/boosting/dart.hpp
+11
-4
src/boosting/gbdt.cpp
src/boosting/gbdt.cpp
+16
-16
src/boosting/gbdt.h
src/boosting/gbdt.h
+12
-9
src/c_api.cpp
src/c_api.cpp
+67
-44
src/io/config.cpp
src/io/config.cpp
+1
-1
tests/c_api_test/test.py
tests/c_api_test/test.py
+6
-3
No files found.
include/LightGBM/boosting.h
View file @
422c0ef7
...
...
@@ -73,7 +73,7 @@ public:
* \param result used to store prediction result, should allocate memory before call this function
* \param out_len lenght of returned score
*/
virtual
void
GetPredictAt
(
int
data_idx
,
score_t
*
result
,
data_size_t
*
out_len
)
const
=
0
;
virtual
void
GetPredictAt
(
int
data_idx
,
score_t
*
result
,
data_size_t
*
out_len
)
=
0
;
/*!
* \brief Prediction for one record, not sigmoid transform
...
...
@@ -127,7 +127,7 @@ public:
* \brief Get number of weak sub-models
* \return Number of weak sub-models
*/
virtual
int
NumberOf
Sub
Model
s
()
const
=
0
;
virtual
int
NumberOf
Total
Model
()
const
=
0
;
/*!
* \brief Get number of classes
...
...
@@ -138,7 +138,7 @@ public:
/*!
* \brief Set number of used model for prediction
*/
virtual
void
SetNum
UsedModel
(
int
num_used_model
)
=
0
;
virtual
void
SetNum
IterationForPred
(
int
num_iteration
)
=
0
;
/*!
* \brief Get Type name of this boosting object
...
...
include/LightGBM/c_api.h
View file @
422c0ef7
...
...
@@ -230,11 +230,13 @@ DllExport int LGBM_BoosterCreate(const DatesetHandle train_data,
/*!
* \brief load an existing boosting from model file
* \param filename filename of model
* \param out_num_total_model number of total models
* \param out handle of created Booster
* \return 0 when success, -1 when failure happens
*/
DllExport
int
LGBM_BoosterCreateFromModelfile
(
const
char
*
filename
,
int64_t
*
out_num_total_model
,
BoosterHandle
*
out
);
/*!
...
...
@@ -244,6 +246,12 @@ DllExport int LGBM_BoosterCreateFromModelfile(
*/
DllExport
int
LGBM_BoosterFree
(
BoosterHandle
handle
);
/*!
* \brief Get number of class
* \return number of class
*/
DllExport
int
LGBM_BoosterGetNumClasses
(
BoosterHandle
handle
,
int64_t
*
out_len
);
/*!
* \brief update the model in one round
* \param handle handle
...
...
@@ -276,7 +284,7 @@ DllExport int LGBM_BoosterGetEvalCounts(BoosterHandle handle, int64_t* out_len);
* \brief Get number of eval
* \return total number of eval result
*/
DllExport
int
LGBM_BoosterGetEvalNames
(
BoosterHandle
handle
,
int64_t
*
out_len
,
const
char
**
*
out_strs
);
DllExport
int
LGBM_BoosterGetEvalNames
(
BoosterHandle
handle
,
int64_t
*
out_len
,
char
**
out_strs
);
/*!
* \brief get evaluation for training data and validation data
...
...
@@ -291,17 +299,6 @@ DllExport int LGBM_BoosterGetEval(BoosterHandle handle,
int64_t
*
out_len
,
float
*
out_results
);
/*!
* \brief get raw score for training data, used to calculate gradients outside
* \param handle handle
* \param out_len len of output result
* \param out_result used to set a pointer to array
* \return 0 when success, -1 when failure happens
*/
DllExport
int
LGBM_BoosterGetTrainingScore
(
BoosterHandle
handle
,
int64_t
*
out_len
,
const
float
**
out_result
);
/*!
* \brief Get prediction for training data and validation data
this can be used to support customized eval function
...
...
@@ -319,21 +316,21 @@ DllExport int LGBM_BoosterGetPredict(BoosterHandle handle,
/*!
* \brief make prediction for file
* \param handle handle
* \param data_filename filename of data file
* \param data_has_header data file has header or not
* \param predict_type
* 0:raw score
* 1:with transform(if needed)
* 2:leaf index
* \param n_used_trees number of used tree
* \param data_has_header data file has header or not
* \param data_filename filename of data file
* \param num_iteration number of iteration for prediction
* \param result_filename filename of result file
* \return 0 when success, -1 when failure happens
*/
DllExport
int
LGBM_BoosterPredictForFile
(
BoosterHandle
handle
,
int
predict_type
,
int64_t
n_used_trees
,
int
data_has_header
,
const
char
*
data_filename
,
int
data_has_header
,
int
predict_type
,
int64_t
num_iteration
,
const
char
*
result_filename
);
/*!
...
...
@@ -351,7 +348,8 @@ DllExport int LGBM_BoosterPredictForFile(BoosterHandle handle,
* 0:raw score
* 1:with transform(if needed)
* 2:leaf index
* \param n_used_trees number of used tree
* \param num_iteration number of iteration for prediction
* \param out_len len of output result
* \param out_result used to set a pointer to array, should allocate memory before call this function
* \return 0 when success, -1 when failure happens
*/
...
...
@@ -365,8 +363,9 @@ DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
int64_t
nelem
,
int64_t
num_col
,
int
predict_type
,
int64_t
n_used_trees
,
double
*
out_result
);
int64_t
num_iteration
,
int64_t
*
out_len
,
float
*
out_result
);
/*!
* \brief make prediction for an new data set
...
...
@@ -380,7 +379,8 @@ DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
* 0:raw score
* 1:with transform(if needed)
* 2:leaf index
* \param n_used_trees number of used tree
* \param num_iteration number of iteration for prediction
* \param out_len len of output result
* \param out_result used to set a pointer to array, should allocate memory before call this function
* \return 0 when success, -1 when failure happens
*/
...
...
@@ -391,18 +391,19 @@ DllExport int LGBM_BoosterPredictForMat(BoosterHandle handle,
int32_t
ncol
,
int
is_row_major
,
int
predict_type
,
int64_t
n_used_trees
,
double
*
out_result
);
int64_t
num_iteration
,
int64_t
*
out_len
,
float
*
out_result
);
/*!
* \brief save model into file
* \param handle handle
* \param num_
used_model
* \param num_
iteration
* \param filename file name
* \return 0 when success, -1 when failure happens
*/
DllExport
int
LGBM_BoosterSaveModel
(
BoosterHandle
handle
,
int
num_
used_model
,
int
num_
iteration
,
const
char
*
filename
);
...
...
include/LightGBM/config.h
View file @
422c0ef7
...
...
@@ -97,7 +97,7 @@ public:
std
::
string
output_result
=
"LightGBM_predict_result.txt"
;
std
::
string
input_model
=
""
;
int
verbosity
=
1
;
int
num_
model
_predict
=
NO_LIMIT
;
int
num_
iteration
_predict
=
NO_LIMIT
;
bool
is_pre_partition
=
false
;
bool
is_enable_sparse
=
true
;
bool
use_two_round_loading
=
false
;
...
...
python-package/lightgbm/basic.py
View file @
422c0ef7
This diff is collapsed.
Click to expand it.
src/application/application.cpp
View file @
422c0ef7
...
...
@@ -108,7 +108,7 @@ void Application::LoadData() {
// prediction is needed if using input initial model(continued train)
PredictFunction
predict_fun
=
nullptr
;
// need to continue training
if
(
boosting_
->
NumberOf
Sub
Model
s
()
>
0
)
{
if
(
boosting_
->
NumberOf
Total
Model
()
>
0
)
{
Predictor
predictor
(
boosting_
.
get
(),
true
,
false
);
predict_fun
=
predictor
.
GetPredictFunction
();
}
...
...
@@ -235,7 +235,7 @@ void Application::Train() {
void
Application
::
Predict
()
{
boosting_
->
SetNum
UsedModel
(
config_
.
io_config
.
num_
model
_predict
);
boosting_
->
SetNum
IterationForPred
(
config_
.
io_config
.
num_
iteration
_predict
);
// create predictor
Predictor
predictor
(
boosting_
.
get
(),
config_
.
io_config
.
is_predict_raw_score
,
config_
.
io_config
.
is_predict_leaf_index
);
...
...
src/boosting/dart.hpp
View file @
422c0ef7
...
...
@@ -43,6 +43,7 @@ public:
* \brief one training iteration
*/
bool
TrainOneIter
(
const
score_t
*
gradient
,
const
score_t
*
hessian
,
bool
is_eval
)
override
{
is_update_score_cur_iter_
=
false
;
GBDT
::
TrainOneIter
(
gradient
,
hessian
,
false
);
// normalize
Normalize
();
...
...
@@ -58,20 +59,24 @@ public:
* \return training score
*/
const
score_t
*
GetTrainingScore
(
data_size_t
*
out_len
)
override
{
DroppingTrees
();
if
(
!
is_update_score_cur_iter_
)
{
// only drop one time in one iteration
DroppingTrees
();
is_update_score_cur_iter_
=
true
;
}
*
out_len
=
train_score_updater_
->
num_data
()
*
num_class_
;
return
train_score_updater_
->
score
();
}
/*!
* \brief save model to file
* \param num_
used_model number of model that want to save,
-1 means save all
* \param num_
iteration
-1 means save all
* \param is_finish is training finished or not
* \param filename filename that want to save to
*/
void
SaveModelToFile
(
int
num_
used_model
,
bool
is_finish
,
const
char
*
filename
)
override
{
void
SaveModelToFile
(
int
num_
iteration
,
bool
is_finish
,
const
char
*
filename
)
override
{
// only save model once when is_finish = true
if
(
is_finish
&&
saved_model_size_
<
0
)
{
GBDT
::
SaveModelToFile
(
num_
used_model
,
is_finish
,
filename
);
GBDT
::
SaveModelToFile
(
num_
iteration
,
is_finish
,
filename
);
}
}
/*!
...
...
@@ -133,6 +138,8 @@ private:
double
drop_rate_
;
/*! \brief Random generator, used to select dropping trees */
Random
random_for_drop_
;
/*! \brief Flag that the score is update on current iter or not*/
bool
is_update_score_cur_iter_
;
};
}
// namespace LightGBM
...
...
src/boosting/gbdt.cpp
View file @
422c0ef7
...
...
@@ -16,7 +16,7 @@
namespace
LightGBM
{
GBDT
::
GBDT
()
:
saved_model_size_
(
-
1
),
num_
used_model
_
(
0
)
{
GBDT
::
GBDT
()
:
saved_model_size_
(
-
1
),
num_
iteration_for_pred
_
(
0
)
{
}
...
...
@@ -29,7 +29,7 @@ void GBDT::Init(const BoostingConfig* config, const Dataset* train_data, const O
gbdt_config_
=
config
;
iter_
=
0
;
saved_model_size_
=
-
1
;
num_
used_model
_
=
0
;
num_
iteration_for_pred
_
=
0
;
max_feature_idx_
=
0
;
early_stopping_round_
=
gbdt_config_
->
early_stopping_round
;
shrinkage_rate_
=
gbdt_config_
->
learning_rate
;
...
...
@@ -296,24 +296,23 @@ const score_t* GBDT::GetTrainingScore(data_size_t* out_len) {
return
train_score_updater_
->
score
();
}
void
GBDT
::
GetPredictAt
(
int
data_idx
,
score_t
*
out_result
,
data_size_t
*
out_len
)
const
{
void
GBDT
::
GetPredictAt
(
int
data_idx
,
score_t
*
out_result
,
data_size_t
*
out_len
)
{
CHECK
(
data_idx
>=
0
&&
data_idx
<=
static_cast
<
int
>
(
valid_metrics_
.
size
()));
std
::
vector
<
double
>
ret
;
const
score_t
*
raw_scores
=
nullptr
;
data_size_t
num_data
=
0
;
if
(
data_idx
==
0
)
{
raw_scores
=
train_score_updater_
->
score
(
);
raw_scores
=
GetTrainingScore
(
out_len
);
num_data
=
train_score_updater_
->
num_data
();
}
else
{
auto
used_idx
=
data_idx
-
1
;
raw_scores
=
valid_score_updater_
[
used_idx
]
->
score
();
num_data
=
valid_score_updater_
[
used_idx
]
->
num_data
();
*
out_len
=
num_data
*
num_class_
;
}
*
out_len
=
num_data
*
num_class_
;
if
(
num_class_
>
1
)
{
#pragma omp parallel for schedule(
guided
)
#pragma omp parallel for schedule(
static
)
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
std
::
vector
<
double
>
tmp_result
;
for
(
int
j
=
0
;
j
<
num_class_
;
++
j
)
{
...
...
@@ -325,12 +324,12 @@ void GBDT::GetPredictAt(int data_idx, score_t* out_result, data_size_t* out_len)
}
}
}
else
if
(
sigmoid_
>
0.0
f
){
#pragma omp parallel for schedule(
guided
)
#pragma omp parallel for schedule(
static
)
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
out_result
[
i
]
=
static_cast
<
score_t
>
(
1.0
f
/
(
1.0
f
+
std
::
exp
(
-
2.0
f
*
sigmoid_
*
raw_scores
[
i
])));
}
}
else
{
#pragma omp parallel for schedule(
guided
)
#pragma omp parallel for schedule(
static
)
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
out_result
[
i
]
=
raw_scores
[
i
];
}
...
...
@@ -348,7 +347,7 @@ void GBDT::Boosting() {
GetGradients
(
GetTrainingScore
(
&
num_score
),
gradients_
.
data
(),
hessians_
.
data
());
}
void
GBDT
::
SaveModelToFile
(
int
num_
used_model
,
bool
is_finish
,
const
char
*
filename
)
{
void
GBDT
::
SaveModelToFile
(
int
num_
iteration
,
bool
is_finish
,
const
char
*
filename
)
{
// first time to this function, open file
if
(
saved_model_size_
<
0
)
{
model_output_file_
.
open
(
filename
);
...
...
@@ -373,10 +372,11 @@ void GBDT::SaveModelToFile(int num_used_model, bool is_finish, const char* filen
if
(
!
model_output_file_
.
is_open
())
{
return
;
}
if
(
num_used_model
==
NO_LIMIT
)
{
int
num_used_model
=
0
;
if
(
num_iteration
==
NO_LIMIT
)
{
num_used_model
=
static_cast
<
int
>
(
models_
.
size
());
}
else
{
num_used_model
=
num_
used_model
*
num_class_
;
num_used_model
=
num_
iteration
*
num_class_
;
}
int
rest
=
num_used_model
-
early_stopping_round_
*
num_class_
;
// output tree models
...
...
@@ -452,7 +452,7 @@ void GBDT::LoadModelFromString(const std::string& model_str) {
}
}
Log
::
Info
(
"Finished loading %d models"
,
models_
.
size
());
num_
used_model
_
=
static_cast
<
int
>
(
models_
.
size
())
/
num_class_
;
num_
iteration_for_pred
_
=
static_cast
<
int
>
(
models_
.
size
())
/
num_class_
;
}
std
::
string
GBDT
::
FeatureImportance
()
const
{
...
...
@@ -486,7 +486,7 @@ std::string GBDT::FeatureImportance() const {
std
::
vector
<
double
>
GBDT
::
PredictRaw
(
const
double
*
value
)
const
{
std
::
vector
<
double
>
ret
(
num_class_
,
0.0
f
);
for
(
int
i
=
0
;
i
<
num_
used_model
_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_
iteration_for_pred
_
;
++
i
)
{
for
(
int
j
=
0
;
j
<
num_class_
;
++
j
)
{
ret
[
j
]
+=
models_
[
i
*
num_class_
+
j
]
->
Predict
(
value
);
}
...
...
@@ -496,7 +496,7 @@ std::vector<double> GBDT::PredictRaw(const double* value) const {
std
::
vector
<
double
>
GBDT
::
Predict
(
const
double
*
value
)
const
{
std
::
vector
<
double
>
ret
(
num_class_
,
0.0
f
);
for
(
int
i
=
0
;
i
<
num_
used_model
_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_
iteration_for_pred
_
;
++
i
)
{
for
(
int
j
=
0
;
j
<
num_class_
;
++
j
)
{
ret
[
j
]
+=
models_
[
i
*
num_class_
+
j
]
->
Predict
(
value
);
}
...
...
@@ -512,7 +512,7 @@ std::vector<double> GBDT::Predict(const double* value) const {
std
::
vector
<
int
>
GBDT
::
PredictLeafIndex
(
const
double
*
value
)
const
{
std
::
vector
<
int
>
ret
;
for
(
int
i
=
0
;
i
<
num_
used_model
_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_
iteration_for_pred
_
;
++
i
)
{
for
(
int
j
=
0
;
j
<
num_class_
;
++
j
)
{
ret
.
push_back
(
models_
[
i
*
num_class_
+
j
]
->
PredictLeafIndex
(
value
));
}
...
...
src/boosting/gbdt.h
View file @
422c0ef7
...
...
@@ -73,7 +73,7 @@ public:
* \param result used to store prediction result, should allocate memory before call this function
* \param out_len lenght of returned score
*/
void
GetPredictAt
(
int
data_idx
,
score_t
*
out_result
,
data_size_t
*
out_len
)
const
override
;
void
GetPredictAt
(
int
data_idx
,
score_t
*
out_result
,
data_size_t
*
out_len
)
override
;
/*!
* \brief Predtion for one record without sigmoid transformation
...
...
@@ -98,11 +98,11 @@ public:
/*!
* \brief save model to file
* \param num_
used_model number of model that want to save,
-1 means save all
* \param num_
iteration
-1 means save all
* \param is_finish is training finished or not
* \param filename filename that want to save to
*/
virtual
void
SaveModelToFile
(
int
num_
used_model
,
bool
is_finish
,
const
char
*
filename
)
override
;
virtual
void
SaveModelToFile
(
int
num_
iteration
,
bool
is_finish
,
const
char
*
filename
)
override
;
/*!
* \brief Restore from a serialized string
*/
...
...
@@ -119,11 +119,12 @@ public:
*/
inline
int
LabelIdx
()
const
override
{
return
label_idx_
;
}
/*!
* \brief Get number of weak sub-models
* \return Number of weak sub-models
*/
inline
int
NumberOf
Sub
Model
s
()
const
override
{
return
static_cast
<
int
>
(
models_
.
size
());
}
inline
int
NumberOf
Total
Model
()
const
override
{
return
static_cast
<
int
>
(
models_
.
size
());
}
/*!
* \brief Get number of classes
...
...
@@ -132,11 +133,13 @@ public:
inline
int
NumberOfClasses
()
const
override
{
return
num_class_
;
}
/*!
* \brief Set number of
used model
for prediction
* \brief Set number of
iterations
for prediction
*/
inline
void
SetNumUsedModel
(
int
num_used_model
)
{
if
(
num_used_model
>=
0
)
{
num_used_model_
=
static_cast
<
int
>
(
num_used_model
/
num_class_
);
inline
void
SetNumIterationForPred
(
int
num_iteration
)
override
{
if
(
num_iteration
>
0
)
{
num_iteration_for_pred_
=
num_iteration
;
}
else
{
num_iteration_for_pred_
=
static_cast
<
int
>
(
models_
.
size
())
/
num_class_
;
}
}
...
...
@@ -236,7 +239,7 @@ protected:
/*! \brief File to write models */
std
::
ofstream
model_output_file_
;
/*! \brief number of used model */
int
num_
used_model
_
;
int
num_
iteration_for_pred
_
;
/*! \brief Shrinkage rate for one iteration */
double
shrinkage_rate_
;
};
...
...
src/c_api.cpp
View file @
422c0ef7
...
...
@@ -95,8 +95,8 @@ public:
return
boosting_
->
TrainOneIter
(
gradients
,
hessians
,
false
);
}
void
PrepareForPrediction
(
int
num_
used_model
,
int
predict_type
)
{
boosting_
->
SetNum
UsedModel
(
num_used_model
);
void
PrepareForPrediction
(
int
num_
iteration
,
int
predict_type
)
{
boosting_
->
SetNum
IterationForPred
(
num_iteration
);
bool
is_predict_leaf
=
false
;
bool
is_raw_score
=
false
;
if
(
predict_type
==
C_API_PREDICT_LEAF_INDEX
)
{
...
...
@@ -109,6 +109,10 @@ public:
predictor_
.
reset
(
new
Predictor
(
boosting_
.
get
(),
is_raw_score
,
is_predict_leaf
));
}
void
GetPredictAt
(
int
data_idx
,
score_t
*
out_result
,
data_size_t
*
out_len
)
{
boosting_
->
GetPredictAt
(
data_idx
,
out_result
,
out_len
);
}
std
::
vector
<
double
>
Predict
(
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
features
)
{
return
predictor_
->
GetPredictFunction
()(
features
);
}
...
...
@@ -117,8 +121,8 @@ public:
predictor_
->
Predict
(
data_filename
,
result_filename
,
data_has_header
);
}
void
SaveModelToFile
(
int
num_
used_model
,
const
char
*
filename
)
{
boosting_
->
SaveModelToFile
(
num_
used_model
,
true
,
filename
);
void
SaveModelToFile
(
int
num_
iteration
,
const
char
*
filename
)
{
boosting_
->
SaveModelToFile
(
num_
iteration
,
true
,
filename
);
}
int
GetEvalCounts
()
const
{
...
...
@@ -129,22 +133,25 @@ public:
return
ret
;
}
int
GetEvalNames
(
const
char
**
*
out_strs
)
const
{
int
GetEvalNames
(
char
**
out_strs
)
const
{
int
idx
=
0
;
for
(
const
auto
&
metric
:
train_metric_
)
{
for
(
const
auto
&
name
:
metric
->
GetName
())
{
*
(
out_strs
[
idx
++
])
=
name
.
c_str
();
int
j
=
0
;
auto
name_cstr
=
name
.
c_str
();
while
(
name_cstr
[
j
]
!=
'\0'
)
{
out_strs
[
idx
][
j
]
=
name_cstr
[
j
];
++
j
;
}
out_strs
[
idx
][
j
]
=
'\0'
;
++
idx
;
}
}
return
idx
;
}
const
Boosting
*
GetBoosting
()
const
{
return
boosting_
.
get
();
}
const
float
*
GetTrainingScore
(
int
*
out_len
)
const
{
return
boosting_
->
GetTrainingScore
(
out_len
);
}
const
inline
int
NumberOfClasses
()
const
{
return
boosting_
->
NumberOfClasses
();
}
private:
std
::
unique_ptr
<
Boosting
>
boosting_
;
...
...
@@ -449,9 +456,12 @@ DllExport int LGBM_BoosterCreate(const DatesetHandle train_data,
DllExport
int
LGBM_BoosterCreateFromModelfile
(
const
char
*
filename
,
int64_t
*
num_total_model
,
BoosterHandle
*
out
)
{
API_BEGIN
();
*
out
=
new
Booster
(
filename
);
auto
ret
=
std
::
unique_ptr
<
Booster
>
(
new
Booster
(
filename
));
*
num_total_model
=
static_cast
<
int64_t
>
(
ret
->
GetBoosting
()
->
NumberOfTotalModel
());
*
out
=
ret
.
release
();
API_END
();
}
...
...
@@ -461,6 +471,13 @@ DllExport int LGBM_BoosterFree(BoosterHandle handle) {
API_END
();
}
DllExport
int
LGBM_BoosterGetNumClasses
(
BoosterHandle
handle
,
int64_t
*
out_len
)
{
API_BEGIN
();
Booster
*
ref_booster
=
reinterpret_cast
<
Booster
*>
(
handle
);
*
out_len
=
ref_booster
->
GetBoosting
()
->
NumberOfClasses
();
API_END
();
}
DllExport
int
LGBM_BoosterUpdateOneIter
(
BoosterHandle
handle
,
int
*
is_finished
)
{
API_BEGIN
();
Booster
*
ref_booster
=
reinterpret_cast
<
Booster
*>
(
handle
);
...
...
@@ -501,7 +518,7 @@ DllExport int LGBM_BoosterGetEvalCounts(BoosterHandle handle, int64_t* out_len)
* \brief Get number of eval
* \return total number of eval result
*/
DllExport
int
LGBM_BoosterGetEvalNames
(
BoosterHandle
handle
,
int64_t
*
out_len
,
const
char
**
*
out_strs
)
{
DllExport
int
LGBM_BoosterGetEvalNames
(
BoosterHandle
handle
,
int64_t
*
out_len
,
char
**
out_strs
)
{
API_BEGIN
();
Booster
*
ref_booster
=
reinterpret_cast
<
Booster
*>
(
handle
);
*
out_len
=
ref_booster
->
GetEvalNames
(
out_strs
);
...
...
@@ -524,39 +541,27 @@ DllExport int LGBM_BoosterGetEval(BoosterHandle handle,
API_END
();
}
DllExport
int
LGBM_BoosterGetTrainingScore
(
BoosterHandle
handle
,
int64_t
*
out_len
,
const
float
**
out_result
)
{
API_BEGIN
();
Booster
*
ref_booster
=
reinterpret_cast
<
Booster
*>
(
handle
);
int
len
=
0
;
*
out_result
=
ref_booster
->
GetTrainingScore
(
&
len
);
*
out_len
=
static_cast
<
int64_t
>
(
len
);
API_END
();
}
DllExport
int
LGBM_BoosterGetPredict
(
BoosterHandle
handle
,
int
data
,
int64_t
*
out_len
,
float
*
out_result
)
{
API_BEGIN
();
Booster
*
ref_booster
=
reinterpret_cast
<
Booster
*>
(
handle
);
auto
boosting
=
ref_booster
->
GetBoosting
();
int
len
=
0
;
boost
ing
->
GetPredictAt
(
data
,
out_result
,
&
len
);
ref_
boost
er
->
GetPredictAt
(
data
,
out_result
,
&
len
);
*
out_len
=
static_cast
<
int64_t
>
(
len
);
API_END
();
}
DllExport
int
LGBM_BoosterPredictForFile
(
BoosterHandle
handle
,
int
predict_type
,
int64_t
n_used_trees
,
int
data_has_header
,
const
char
*
data_filename
,
int
data_has_header
,
int
predict_type
,
int64_t
num_iteration
,
const
char
*
result_filename
)
{
API_BEGIN
();
Booster
*
ref_booster
=
reinterpret_cast
<
Booster
*>
(
handle
);
ref_booster
->
PrepareForPrediction
(
static_cast
<
int
>
(
n
_used_trees
),
predict_type
);
ref_booster
->
PrepareForPrediction
(
static_cast
<
int
>
(
n
um_iteration
),
predict_type
);
bool
bool_data_has_header
=
data_has_header
>
0
?
true
:
false
;
ref_booster
->
PredictForFile
(
data_filename
,
result_filename
,
bool_data_has_header
);
API_END
();
...
...
@@ -572,23 +577,32 @@ DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
int64_t
nelem
,
int64_t
,
int
predict_type
,
int64_t
n_used_trees
,
double
*
out_result
)
{
int64_t
num_iteration
,
int64_t
*
out_len
,
float
*
out_result
)
{
API_BEGIN
();
Booster
*
ref_booster
=
reinterpret_cast
<
Booster
*>
(
handle
);
ref_booster
->
PrepareForPrediction
(
static_cast
<
int
>
(
n
_used_trees
),
predict_type
);
ref_booster
->
PrepareForPrediction
(
static_cast
<
int
>
(
n
um_iteration
),
predict_type
);
auto
get_row_fun
=
RowFunctionFromCSR
(
indptr
,
indptr_type
,
indices
,
data
,
data_type
,
nindptr
,
nelem
);
int
num_class
=
ref_booster
->
NumberOfClasses
();
int
num_preb_in_one_row
=
ref_booster
->
GetBoosting
()
->
NumberOfClasses
();
if
(
predict_type
==
C_API_PREDICT_LEAF_INDEX
)
{
if
(
num_iteration
>
0
)
{
num_preb_in_one_row
*=
static_cast
<
int
>
(
num_iteration
);
}
else
{
num_preb_in_one_row
*=
ref_booster
->
GetBoosting
()
->
NumberOfTotalModel
()
/
num_preb_in_one_row
;
}
}
int
nrow
=
static_cast
<
int
>
(
nindptr
-
1
);
#pragma omp parallel for schedule(guided)
for
(
int
i
=
0
;
i
<
nrow
;
++
i
)
{
auto
one_row
=
get_row_fun
(
i
);
auto
predicton_result
=
ref_booster
->
Predict
(
one_row
);
for
(
int
j
=
0
;
j
<
num
_c
l
as
s
;
++
j
)
{
out_result
[
i
*
num_
class
+
j
]
=
predicton_result
[
j
];
for
(
int
j
=
0
;
j
<
static
_cas
t
<
int
>
(
predicton_result
.
size
())
;
++
j
)
{
out_result
[
i
*
num_
preb_in_one_row
+
j
]
=
static_cast
<
float
>
(
predicton_result
[
j
]
)
;
}
}
*
out_len
=
nrow
*
num_preb_in_one_row
;
API_END
();
}
...
...
@@ -599,31 +613,40 @@ DllExport int LGBM_BoosterPredictForMat(BoosterHandle handle,
int32_t
ncol
,
int
is_row_major
,
int
predict_type
,
int64_t
n_used_trees
,
double
*
out_result
)
{
int64_t
num_iteration
,
int64_t
*
out_len
,
float
*
out_result
)
{
API_BEGIN
();
Booster
*
ref_booster
=
reinterpret_cast
<
Booster
*>
(
handle
);
ref_booster
->
PrepareForPrediction
(
static_cast
<
int
>
(
n
_used_trees
),
predict_type
);
ref_booster
->
PrepareForPrediction
(
static_cast
<
int
>
(
n
um_iteration
),
predict_type
);
auto
get_row_fun
=
RowPairFunctionFromDenseMatric
(
data
,
nrow
,
ncol
,
data_type
,
is_row_major
);
int
num_class
=
ref_booster
->
NumberOfClasses
();
int
num_preb_in_one_row
=
ref_booster
->
GetBoosting
()
->
NumberOfClasses
();
if
(
predict_type
==
C_API_PREDICT_LEAF_INDEX
)
{
if
(
num_iteration
>
0
)
{
num_preb_in_one_row
*=
static_cast
<
int
>
(
num_iteration
);
}
else
{
num_preb_in_one_row
*=
ref_booster
->
GetBoosting
()
->
NumberOfTotalModel
()
/
num_preb_in_one_row
;
}
}
#pragma omp parallel for schedule(guided)
for
(
int
i
=
0
;
i
<
nrow
;
++
i
)
{
auto
one_row
=
get_row_fun
(
i
);
auto
predicton_result
=
ref_booster
->
Predict
(
one_row
);
for
(
int
j
=
0
;
j
<
num
_c
l
as
s
;
++
j
)
{
out_result
[
i
*
num_
class
+
j
]
=
predicton_result
[
j
];
for
(
int
j
=
0
;
j
<
static
_cas
t
<
int
>
(
predicton_result
.
size
())
;
++
j
)
{
out_result
[
i
*
num_
preb_in_one_row
+
j
]
=
static_cast
<
float
>
(
predicton_result
[
j
]
)
;
}
}
*
out_len
=
nrow
*
num_preb_in_one_row
;
API_END
();
}
DllExport
int
LGBM_BoosterSaveModel
(
BoosterHandle
handle
,
int
num_
used_model
,
int
num_
iteration
,
const
char
*
filename
)
{
API_BEGIN
();
Booster
*
ref_booster
=
reinterpret_cast
<
Booster
*>
(
handle
);
ref_booster
->
SaveModelToFile
(
num_
used_model
,
filename
);
ref_booster
->
SaveModelToFile
(
num_
iteration
,
filename
);
API_END
();
}
...
...
src/io/config.cpp
View file @
422c0ef7
...
...
@@ -183,7 +183,7 @@ void IOConfig::Set(const std::unordered_map<std::string, std::string>& params) {
GetInt
(
params
,
"data_random_seed"
,
&
data_random_seed
);
GetString
(
params
,
"data"
,
&
data_filename
);
GetInt
(
params
,
"verbose"
,
&
verbosity
);
GetInt
(
params
,
"num_
model
_predict"
,
&
num_
model
_predict
);
GetInt
(
params
,
"num_
iteration
_predict"
,
&
num_
iteration
_predict
);
GetInt
(
params
,
"bin_construct_sample_cnt"
,
&
bin_construct_sample_cnt
);
GetBool
(
params
,
"is_pre_partition"
,
&
is_pre_partition
);
GetBool
(
params
,
"is_enable_sparse"
,
&
is_enable_sparse
);
...
...
tests/c_api_test/test.py
View file @
422c0ef7
...
...
@@ -190,14 +190,16 @@ def test_booster():
test_free_dataset
(
train
)
test_free_dataset
(
test
[
0
])
booster2
=
ctypes
.
c_void_p
()
LIB
.
LGBM_BoosterCreateFromModelfile
(
c_str
(
'model.txt'
),
ctypes
.
byref
(
booster2
))
num_total_model
=
ctypes
.
c_long
()
LIB
.
LGBM_BoosterCreateFromModelfile
(
c_str
(
'model.txt'
),
ctypes
.
byref
(
num_total_model
),
ctypes
.
byref
(
booster2
))
data
=
[]
inp
=
open
(
'../../examples/binary_classification/binary.test'
,
'r'
)
for
line
in
inp
.
readlines
():
data
.
append
(
[
float
(
x
)
for
x
in
line
.
split
(
'
\t
'
)[
1
:]]
)
inp
.
close
()
mat
=
np
.
array
(
data
)
preb
=
np
.
zeros
((
mat
.
shape
[
0
],
1
),
dtype
=
np
.
float64
)
preb
=
np
.
zeros
(
mat
.
shape
[
0
],
dtype
=
np
.
float32
)
num_preb
=
ctypes
.
c_long
()
data
=
np
.
array
(
mat
.
reshape
(
mat
.
size
),
copy
=
False
)
LIB
.
LGBM_BoosterPredictForMat
(
booster2
,
data
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_void_p
)),
...
...
@@ -207,8 +209,9 @@ def test_booster():
1
,
1
,
50
,
ctypes
.
byref
(
num_preb
),
preb
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_double
)))
LIB
.
LGBM_BoosterPredictForFile
(
booster2
,
1
,
50
,
0
,
c_str
(
'../../examples/binary_classification/binary.test'
),
c_str
(
'preb.txt'
))
LIB
.
LGBM_BoosterPredictForFile
(
booster2
,
c_str
(
'../../examples/binary_classification/binary.test'
),
0
,
0
,
50
,
c_str
(
'preb.txt'
))
LIB
.
LGBM_BoosterFree
(
booster2
)
test_dataset
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment