Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
e161a746
Commit
e161a746
authored
Nov 07, 2016
by
Guolin Ke
Browse files
Add Interface of Booster
parent
792826e1
Changes
12
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
194 additions
and
160 deletions
+194
-160
include/LightGBM/boosting.h
include/LightGBM/boosting.h
+3
-1
include/LightGBM/c_api.h
include/LightGBM/c_api.h
+4
-32
include/LightGBM/config.h
include/LightGBM/config.h
+5
-3
include/LightGBM/dataset.h
include/LightGBM/dataset.h
+0
-3
include/LightGBM/dataset_loader.h
include/LightGBM/dataset_loader.h
+2
-2
include/LightGBM/meta.h
include/LightGBM/meta.h
+6
-0
src/application/application.cpp
src/application/application.cpp
+4
-7
src/application/predictor.hpp
src/application/predictor.hpp
+28
-51
src/boosting/gbdt.cpp
src/boosting/gbdt.cpp
+54
-6
src/boosting/gbdt.h
src/boosting/gbdt.h
+3
-1
src/c_api.cpp
src/c_api.cpp
+84
-53
src/io/dataset_loader.cpp
src/io/dataset_loader.cpp
+1
-1
No files found.
include/LightGBM/boosting.h
View file @
e161a746
...
...
@@ -50,6 +50,8 @@ public:
virtual
const
score_t
*
GetTrainingScore
(
data_size_t
*
out_len
)
const
=
0
;
virtual
void
GetPredict
(
int
data_idx
,
score_t
*
out_result
,
data_size_t
*
out_len
)
const
=
0
;
/*!
* \brief Prediction for one record, not sigmoid transform
* \param feature_values Feature value on this record
...
...
@@ -75,7 +77,7 @@ public:
/*!
* \brief save model to file
*/
virtual
void
SaveModelToFile
(
bool
is_finish
,
const
char
*
filename
)
=
0
;
virtual
void
SaveModelToFile
(
int
num_used_model
,
bool
is_finish
,
const
char
*
filename
)
=
0
;
/*!
* \brief Restore from a serialized string
...
...
include/LightGBM/c_api.h
View file @
e161a746
...
...
@@ -262,7 +262,7 @@ DllExport int LGBM_BoosterUpdateOneIterCustom(BoosterHandle handle,
DllExport
int
LGBM_BoosterEval
(
BoosterHandle
handle
,
int
data
,
uint64_t
*
out_len
,
double
*
out_results
);
float
*
out_results
);
/*!
* \brief get raw score for training data, used to calculate gradients outside
...
...
@@ -287,7 +287,7 @@ this can be used to support customized eval function
DllExport
int
LGBM_BoosterGetPredict
(
BoosterHandle
handle
,
int
data
,
uint64_t
*
out_len
,
const
float
*
*
out_result
);
float
*
out_result
);
/*!
* \brief make prediction for an new data set
...
...
@@ -319,36 +319,6 @@ DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
uint64_t
n_used_trees
,
double
*
out_result
);
/*!
* \brief make prediction for an new data set
* \param handle handle
* \param col_ptr pointer to col headers
* \param indices findex
* \param data fvalue
* \param float_type 0:float_32 1:float64
* \param ncol_ptr number of rows in the matix + 1
* \param nelem number of nonzero elements in the matrix
* \param num_row number of rows; when it's set to 0, then guess from data
* \param predict_type
* 0:raw score
* 1:with transform(if needed)
* 2:leaf index
* \param n_used_trees number of used tree
* \param out_result used to set a pointer to array
* \return 0 when success, -1 when failure happens
*/
DllExport
int
LGBM_BoosterPredictForCSC
(
BoosterHandle
handle
,
const
int32_t
*
col_ptr
,
const
int32_t
*
indices
,
const
void
*
data
,
int
float_type
,
uint64_t
ncol_ptr
,
uint64_t
nelem
,
uint64_t
num_row
,
int
predict_type
,
uint64_t
n_used_trees
,
double
*
out_result
);
/*!
* \brief make prediction for an new data set
* \param handle handle
...
...
@@ -356,6 +326,7 @@ DllExport int LGBM_BoosterPredictForCSC(BoosterHandle handle,
* \param float_type 0:float_32 1:float64
* \param nrow number of rows
* \param ncol number columns
* \param is_row_major 1 for row major, 0 for column major
* \param predict_type
* 0:raw score
* 1:with transform(if needed)
...
...
@@ -369,6 +340,7 @@ DllExport int LGBM_BoosterPredictForMat(BoosterHandle handle,
int
float_type
,
int32_t
nrow
,
int32_t
ncol
,
int
is_row_major
,
int
predict_type
,
uint64_t
n_used_trees
,
double
*
out_result
);
...
...
include/LightGBM/config.h
View file @
e161a746
...
...
@@ -4,6 +4,8 @@
#include <LightGBM/utils/common.h>
#include <LightGBM/utils/log.h>
#include <LightGBM/meta.h>
#include <vector>
#include <string>
#include <unordered_map>
...
...
@@ -94,7 +96,7 @@ public:
std
::
string
output_result
=
"LightGBM_predict_result.txt"
;
std
::
string
input_model
=
""
;
int
verbosity
=
1
;
int
num_model_predict
=
-
1
;
int
num_model_predict
=
NO_LIMIT
;
bool
is_pre_partition
=
false
;
bool
is_enable_sparse
=
true
;
bool
use_two_round_loading
=
false
;
...
...
@@ -157,12 +159,12 @@ public:
int
feature_fraction_seed
=
2
;
double
feature_fraction
=
1.0
f
;
// max cache size(unit:MB) for historical histogram. < 0 means not limit
double
histogram_pool_size
=
-
1.0
f
;
double
histogram_pool_size
=
NO_LIMIT
;
// max depth of tree model.
// Still grow tree by leaf-wise, but limit the max depth to avoid over-fitting
// And the max leaves will be min(num_leaves, pow(2, max_depth - 1))
// max_depth < 0 means not limit
int
max_depth
=
-
1
;
int
max_depth
=
NO_LIMIT
;
void
Set
(
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>&
params
)
override
;
};
...
...
include/LightGBM/dataset.h
View file @
e161a746
...
...
@@ -234,9 +234,6 @@ public:
static
Parser
*
CreateParser
(
const
char
*
filename
,
bool
has_header
,
int
num_features
,
int
label_idx
);
};
using
PredictFunction
=
std
::
function
<
std
::
vector
<
double
>
(
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
)
>
;
/*! \brief The main class of data set,
* which are used to traning or validation
*/
...
...
include/LightGBM/dataset_loader.h
View file @
e161a746
...
...
@@ -59,9 +59,9 @@ private:
/*! \brief index of label column */
int
label_idx_
=
0
;
/*! \brief index of weight column */
int
weight_idx_
=
-
1
;
int
weight_idx_
=
NO_SPECIFIC
;
/*! \brief index of group column */
int
group_idx_
=
-
1
;
int
group_idx_
=
NO_SPECIFIC
;
/*! \brief Mapper from real feature index to used index*/
std
::
unordered_set
<
int
>
ignore_features_
;
/*! \brief store feature names */
...
...
include/LightGBM/meta.h
View file @
e161a746
...
...
@@ -25,6 +25,12 @@ std::vector<const T*> ConstPtrInVectorWarpper(std::vector<T*> input) {
using
ReduceFunction
=
std
::
function
<
void
(
const
char
*
,
char
*
,
int
)
>
;
using
PredictFunction
=
std
::
function
<
std
::
vector
<
double
>
(
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
)
>
;
#define NO_LIMIT (-1)
#define NO_SPECIFIC (-1)
}
// namespace LightGBM
#endif // LightGBM_META_H_
src/application/application.cpp
View file @
e161a746
...
...
@@ -125,11 +125,8 @@ void Application::LoadData() {
Predictor
*
predictor
=
nullptr
;
// need to continue training
if
(
boosting_
->
NumberOfSubModels
()
>
0
)
{
predictor
=
new
Predictor
(
boosting_
,
config_
.
io_config
.
is_raw_score
,
config_
.
predict_leaf_index
);
predict_fun
=
[
&
predictor
](
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
features
)
{
return
predictor
->
PredictRawOneLine
(
features
);
};
predictor
=
new
Predictor
(
boosting_
,
true
,
false
);
predict_fun
=
predictor
->
GetPredictFunction
();
}
// sync up random seed for data partition
...
...
@@ -244,11 +241,11 @@ void Application::Train() {
// output used time per iteration
Log
::
Info
(
"%f seconds elapsed, finished iteration %d"
,
std
::
chrono
::
duration
<
double
,
std
::
milli
>
(
end_time
-
start_time
)
*
1e-3
,
iter
+
1
);
boosting_
->
SaveModelToFile
(
is_finished
,
config_
.
io_config
.
output_model
.
c_str
());
boosting_
->
SaveModelToFile
(
NO_LIMIT
,
is_finished
,
config_
.
io_config
.
output_model
.
c_str
());
}
is_finished
=
true
;
// save model to file
boosting_
->
SaveModelToFile
(
is_finished
,
config_
.
io_config
.
output_model
.
c_str
());
boosting_
->
SaveModelToFile
(
NO_LIMIT
,
is_finished
,
config_
.
io_config
.
output_model
.
c_str
());
Log
::
Info
(
"Finished training"
);
}
...
...
src/application/predictor.hpp
View file @
e161a746
...
...
@@ -41,6 +41,28 @@ public:
for
(
int
i
=
0
;
i
<
num_threads_
;
++
i
)
{
features_
[
i
]
=
new
double
[
num_features_
];
}
if
(
is_predict_leaf_index_
)
{
predict_fun_
=
[
this
](
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
features
)
{
const
int
tid
=
PutFeatureValuesToBuffer
(
features
);
// get result for leaf index
auto
result
=
boosting_
->
PredictLeafIndex
(
features_
[
tid
]);
return
std
::
vector
<
double
>
(
result
.
begin
(),
result
.
end
());
};
}
else
{
if
(
is_raw_score_
)
{
predict_fun_
=
[
this
](
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
features
)
{
const
int
tid
=
PutFeatureValuesToBuffer
(
features
);
// get result without sigmoid transformation
return
boosting_
->
PredictRaw
(
features_
[
tid
]);
};
}
else
{
predict_fun_
=
[
this
](
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
features
)
{
const
int
tid
=
PutFeatureValuesToBuffer
(
features
);
return
boosting_
->
Predict
(
features_
[
tid
]);
};
}
}
}
/*!
* \brief Destructor
...
...
@@ -54,37 +76,8 @@ public:
}
}
/*!
* \brief prediction for one record, only raw result (without sigmoid transformation)
* \param features Feature for this record
* \return Prediction result
*/
std
::
vector
<
double
>
PredictRawOneLine
(
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
features
)
{
const
int
tid
=
PutFeatureValuesToBuffer
(
features
);
// get result without sigmoid transformation
return
boosting_
->
PredictRaw
(
features_
[
tid
]);
}
/*!
* \brief prediction for one record, only raw result (without sigmoid transformation)
* \param features Feature for this record
* \return Predictied leaf index
*/
std
::
vector
<
int
>
PredictLeafIndexOneLine
(
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
features
)
{
const
int
tid
=
PutFeatureValuesToBuffer
(
features
);
// get result for leaf index
return
boosting_
->
PredictLeafIndex
(
features_
[
tid
]);
}
/*!
* \brief prediction for one record, will use sigmoid transformation if needed (only enabled for binary classification noe)
* \param features Feature of this record
* \return Prediction result
*/
std
::
vector
<
double
>
PredictOneLine
(
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
features
)
{
const
int
tid
=
PutFeatureValuesToBuffer
(
features
);
// get result with sigmoid transform if needed
return
boosting_
->
Predict
(
features_
[
tid
]);
inline
const
PredictFunction
&
GetPredictFunction
()
{
return
predict_fun_
;
}
/*!
...
...
@@ -119,26 +112,8 @@ public:
parser
->
ParseOneLine
(
buffer
,
feature
,
&
tmp_label
);
};
std
::
function
<
std
::
string
(
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
)
>
predict_fun
;
if
(
is_predict_leaf_index_
)
{
predict_fun
=
[
this
](
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
features
){
return
Common
::
Join
<
int
>
(
PredictLeafIndexOneLine
(
features
),
'\t'
);
};
}
else
{
if
(
is_raw_score_
)
{
predict_fun
=
[
this
](
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
features
){
return
Common
::
Join
<
double
>
(
PredictRawOneLine
(
features
),
'\t'
);
};
}
else
{
predict_fun
=
[
this
](
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
features
){
return
Common
::
Join
<
double
>
(
PredictOneLine
(
features
),
'\t'
);
};
}
}
std
::
function
<
void
(
data_size_t
,
const
std
::
vector
<
std
::
string
>&
)
>
process_fun
=
[
this
,
&
parser_fun
,
&
predict_fun
,
&
result_file
]
[
this
,
&
parser_fun
,
&
result_file
]
(
data_size_t
,
const
std
::
vector
<
std
::
string
>&
lines
)
{
std
::
vector
<
std
::
pair
<
int
,
double
>>
oneline_features
;
std
::
vector
<
std
::
string
>
pred_result
(
lines
.
size
(),
""
);
...
...
@@ -148,7 +123,7 @@ public:
// parser
parser_fun
(
lines
[
i
].
c_str
(),
&
oneline_features
);
// predict
pred_result
[
i
]
=
predict_fun
(
oneline_features
);
pred_result
[
i
]
=
Common
::
Join
<
double
>
(
predict_fun
_
(
oneline_features
)
,
'\t'
)
;
}
for
(
size_t
i
=
0
;
i
<
pred_result
.
size
();
++
i
)
{
...
...
@@ -187,6 +162,8 @@ private:
int
num_threads_
;
/*! \brief True if output leaf index instead of prediction score */
bool
is_predict_leaf_index_
;
/*! \brief function for prediction */
PredictFunction
predict_fun_
;
};
}
// namespace LightGBM
...
...
src/boosting/gbdt.cpp
View file @
e161a746
...
...
@@ -293,25 +293,68 @@ std::vector<double> GBDT::GetEvalAt(int data_idx) const {
return
ret
;
}
/*! \brief Get
prediction
result */
/*! \brief Get
training scores
result */
const
score_t
*
GBDT
::
GetTrainingScore
(
data_size_t
*
out_len
)
const
{
*
out_len
=
train_score_updater_
->
num_data
()
*
num_class_
;
return
train_score_updater_
->
score
();
}
void
GBDT
::
GetPredict
(
int
data_idx
,
score_t
*
out_result
,
data_size_t
*
out_len
)
const
{
CHECK
(
data_idx
>=
0
&&
data_idx
<=
static_cast
<
int
>
(
valid_metrics_
.
size
()));
std
::
vector
<
double
>
ret
;
const
score_t
*
raw_scores
=
nullptr
;
data_size_t
num_data
=
0
;
if
(
data_idx
==
0
)
{
raw_scores
=
train_score_updater_
->
score
();
num_data
=
train_score_updater_
->
num_data
();
}
else
{
auto
used_idx
=
data_idx
-
1
;
raw_scores
=
valid_score_updater_
[
used_idx
]
->
score
();
num_data
=
valid_score_updater_
[
used_idx
]
->
num_data
();
}
*
out_len
=
num_data
*
num_class_
;
if
(
num_class_
>
1
)
{
#pragma omp parallel for schedule(guided)
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
std
::
vector
<
double
>
tmp_result
;
for
(
int
j
=
0
;
j
<
num_class_
;
++
j
)
{
tmp_result
.
push_back
(
raw_scores
[
j
*
num_data
+
i
]);
}
Common
::
Softmax
(
&
tmp_result
);
for
(
int
j
=
0
;
j
<
num_class_
;
++
j
)
{
out_result
[
j
*
num_data
+
i
]
=
static_cast
<
score_t
>
(
tmp_result
[
i
]);
}
}
}
else
if
(
sigmoid_
>
0
){
#pragma omp parallel for schedule(guided)
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
out_result
[
i
]
=
static_cast
<
score_t
>
(
1.0
f
/
(
1.0
f
+
std
::
exp
(
-
2.0
f
*
sigmoid_
*
raw_scores
[
i
])));
}
}
else
{
#pragma omp parallel for schedule(guided)
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
out_result
[
i
]
=
raw_scores
[
i
];
}
}
}
void
GBDT
::
Boosting
()
{
if
(
object_function_
==
nullptr
)
{
Log
::
Fatal
(
"No object function provided"
);
}
// objective function will calculate gradients and hessians
int
num_score
=
0
;
object_function_
->
GetGradients
(
train_score_updater_
->
score
(
),
gradients_
,
hessians_
);
GetGradients
(
GetTrainingScore
(
&
num_
score
),
gradients_
,
hessians_
);
}
void
GBDT
::
SaveModelToFile
(
bool
is_finish
,
const
char
*
filename
)
{
void
GBDT
::
SaveModelToFile
(
int
num_used_model
,
bool
is_finish
,
const
char
*
filename
)
{
// first time to this function, open file
if
(
saved_model_size_
==
-
1
)
{
if
(
saved_model_size_
<
0
)
{
model_output_file_
.
open
(
filename
);
// output model type
model_output_file_
<<
"gbdt"
<<
std
::
endl
;
...
...
@@ -330,7 +373,12 @@ void GBDT::SaveModelToFile(bool is_finish, const char* filename) {
if
(
!
model_output_file_
.
is_open
())
{
return
;
}
int
rest
=
static_cast
<
int
>
(
models_
.
size
())
-
early_stopping_round_
*
num_class_
;
if
(
num_used_model_
==
NO_LIMIT
)
{
num_used_model
=
static_cast
<
int
>
(
models_
.
size
());
}
else
{
num_used_model
=
num_used_model
*
num_class_
;
}
int
rest
=
num_used_model
-
early_stopping_round_
*
num_class_
;
// output tree models
for
(
int
i
=
saved_model_size_
;
i
<
rest
;
++
i
)
{
model_output_file_
<<
"Tree="
<<
i
<<
std
::
endl
;
...
...
@@ -342,7 +390,7 @@ void GBDT::SaveModelToFile(bool is_finish, const char* filename) {
model_output_file_
.
flush
();
// training finished, can close file
if
(
is_finish
)
{
for
(
int
i
=
saved_model_size_
;
i
<
static_cast
<
int
>
(
models_
.
size
())
;
++
i
)
{
for
(
int
i
=
saved_model_size_
;
i
<
num_used_model
;
++
i
)
{
model_output_file_
<<
"Tree="
<<
i
<<
std
::
endl
;
model_output_file_
<<
models_
[
i
]
->
ToString
()
<<
std
::
endl
;
}
...
...
src/boosting/gbdt.h
View file @
e161a746
...
...
@@ -52,6 +52,8 @@ public:
/*! \brief Get prediction result */
const
score_t
*
GetTrainingScore
(
data_size_t
*
out_len
)
const
override
;
void
GetPredict
(
int
data_idx
,
score_t
*
out_result
,
data_size_t
*
out_len
)
const
override
;
/*!
* \brief Predtion for one record without sigmoid transformation
* \param feature_values Feature value on this record
...
...
@@ -77,7 +79,7 @@ public:
* \brief Serialize models by string
* \return String output of tranined model
*/
void
SaveModelToFile
(
bool
is_finish
,
const
char
*
filename
)
override
;
void
SaveModelToFile
(
int
num_used_model
,
bool
is_finish
,
const
char
*
filename
)
override
;
/*!
* \brief Restore from a serialized string
*/
...
...
src/c_api.cpp
View file @
e161a746
...
...
@@ -16,19 +16,21 @@
#include <cstring>
#include <memory>
#include "./application/predictor.hpp"
namespace
LightGBM
{
class
Booster
{
public:
explicit
Booster
(
const
char
*
filename
)
:
boosting_
(
Boosting
::
CreateBoosting
(
filename
))
{
boosting_
(
Boosting
::
CreateBoosting
(
filename
))
,
predictor_
(
nullptr
)
{
}
Booster
(
const
Dataset
*
train_data
,
std
::
vector
<
const
Dataset
*>
valid_data
,
std
::
vector
<
std
::
string
>
valid_names
,
const
char
*
parameters
)
:
train_data_
(
train_data
),
valid_datas_
(
valid_data
)
{
:
train_data_
(
train_data
),
valid_datas_
(
valid_data
)
,
predictor_
(
nullptr
)
{
config_
.
LoadFromString
(
parameters
);
// create boosting
if
(
config_
.
io_config
.
input_model
.
size
()
>
0
)
{
...
...
@@ -87,6 +89,7 @@ public:
valid_metrics_
.
clear
();
if
(
boosting_
!=
nullptr
)
{
delete
boosting_
;
}
if
(
objective_fun_
!=
nullptr
)
{
delete
objective_fun_
;
}
if
(
predictor_
!=
nullptr
)
{
delete
predictor_
;
}
}
bool
TrainOneIter
()
{
...
...
@@ -99,13 +102,31 @@ public:
void
PrepareForPrediction
(
int
num_used_model
,
int
predict_type
)
{
boosting_
->
SetNumUsedModel
(
num_used_model
);
if
(
predictor_
!=
nullptr
)
{
delete
predictor_
;
}
bool
is_predict_leaf
=
false
;
bool
is_raw_score
=
false
;
if
(
predict_type
==
2
)
{
is_predict_leaf
=
true
;
}
else
if
(
predict_type
==
1
)
{
is_raw_score
=
false
;
}
else
{
is_raw_score
=
true
;
}
predictor_
=
new
Predictor
(
boosting_
,
is_raw_score
,
is_predict_leaf
);
}
std
::
vector
<
double
>
Predict
(
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
features
)
{
return
predictor_
->
GetPredictFunction
()(
features
);
}
void
SaveModelToFile
(
int
num_used_model
,
const
char
*
filename
)
{
boosting_
->
SaveModelToFile
(
num_used_model
,
true
,
filename
);
}
const
Boosting
*
GetBoosting
()
const
{
return
boosting_
;
}
private:
const
inline
int
NumberOfClass
()
const
{
return
boosting_
->
NumberOfClass
();
}
std
::
function
<
std
::
vector
<
double
>
(
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
)
>
predict_fun
;
private:
Boosting
*
boosting_
;
/*! \brief All configs */
...
...
@@ -120,6 +141,8 @@ private:
std
::
vector
<
std
::
vector
<
Metric
*>>
valid_metrics_
;
/*! \brief Training objective function */
ObjectiveFunction
*
objective_fun_
;
/*! \brief Using predictor for prediction task */
Predictor
*
predictor_
;
};
...
...
@@ -421,14 +444,14 @@ DllExport int LGBM_BoosterUpdateOneIterCustom(BoosterHandle handle,
DllExport
int
LGBM_BoosterEval
(
BoosterHandle
handle
,
int
data
,
uint64_t
*
out_len
,
double
*
out_results
)
{
float
*
out_results
)
{
Booster
*
ref_booster
=
reinterpret_cast
<
Booster
*>
(
handle
);
auto
boosting
=
ref_booster
->
GetBoosting
();
auto
result_buf
=
boosting
->
GetEvalAt
(
data
);
*
out_len
=
static_cast
<
uint64_t
>
(
result_buf
.
size
());
for
(
size_t
i
=
0
;
i
<
result_buf
.
size
();
++
i
)
{
(
out_results
)[
i
]
=
result_buf
[
i
];
(
out_results
)[
i
]
=
static_cast
<
float
>
(
result_buf
[
i
]
)
;
}
return
0
;
}
...
...
@@ -446,6 +469,19 @@ DllExport int LGBM_BoosterGetScore(BoosterHandle handle,
return
0
;
}
DllExport
int
LGBM_BoosterGetPredict
(
BoosterHandle
handle
,
int
data
,
uint64_t
*
out_len
,
float
*
out_result
)
{
Booster
*
ref_booster
=
reinterpret_cast
<
Booster
*>
(
handle
);
auto
boosting
=
ref_booster
->
GetBoosting
();
int
len
=
0
;
boosting
->
GetPredict
(
data
,
out_result
,
&
len
);
*
out_len
=
static_cast
<
uint64_t
>
(
len
);
return
0
;
}
DllExport
int
LGBM_BoosterPredictForCSR
(
BoosterHandle
handle
,
const
int32_t
*
indptr
,
const
int32_t
*
indices
,
...
...
@@ -453,63 +489,53 @@ DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
int
float_type
,
uint64_t
nindptr
,
uint64_t
nelem
,
uint64_t
num_col
,
uint64_t
,
int
predict_type
,
uint64_t
n_used_trees
,
double
*
out_result
)
;
double
*
out_result
)
{
/*!
* \brief make prediction for an new data set
* \param handle handle
* \param col_ptr pointer to col headers
* \param indices findex
* \param data fvalue
* \param nindptr number of rows in the matix + 1
* \param nelem number of nonzero elements in the matrix
* \param num_row number of rows; when it's set to 0, then guess from data
* \param predict_type
* 0:raw score
* 1:with sigmoid transform(if needed)
* 2:leaf index
* \param n_used_trees number of used tree
* \param out_result used to set a pointer to array
* \return 0 when success, -1 when failure happens
*/
DllExport
int
LGBM_BoosterPredictForCSC
(
BoosterHandle
handle
,
const
int32_t
*
col_ptr
,
const
int32_t
*
indices
,
const
void
*
data
,
int
float_type
,
uint64_t
nindptr
,
uint64_t
nelem
,
uint64_t
num_row
,
int
predict_type
,
uint64_t
n_used_trees
,
double
*
out_result
);
Booster
*
ref_booster
=
reinterpret_cast
<
Booster
*>
(
handle
);
ref_booster
->
PrepareForPrediction
(
static_cast
<
int
>
(
n_used_trees
),
predict_type
);
auto
get_row_fun
=
Common
::
RowFunctionFromCSR
(
indptr
,
indices
,
data
,
float_type
,
nindptr
,
nelem
);
int
num_class
=
ref_booster
->
NumberOfClass
();
int
nrow
=
static_cast
<
int
>
(
nindptr
-
1
);
#pragma omp parallel for schedule(guided)
for
(
int
i
=
0
;
i
<
nrow
;
++
i
)
{
auto
one_row
=
get_row_fun
(
i
);
auto
predicton_result
=
ref_booster
->
Predict
(
one_row
);
for
(
int
j
=
0
;
j
<
num_class
;
j
++
)
{
out_result
[
i
*
num_class
+
j
]
=
predicton_result
[
j
];
}
}
return
0
;
}
/*!
* \brief make prediction for an new data set
* \param handle handle
* \param data pointer to the data space
* \param nrow number of rows
* \param ncol number columns
* \param missing which value to represent missing value
* \param predict_type
* 0:raw score
* 1:with sigmoid transform(if needed)
* 2:leaf index
* \param n_used_trees number of used tree
* \param out_result used to set a pointer to array
* \return 0 when success, -1 when failure happens
*/
DllExport
int
LGBM_BoosterPredictForMat
(
BoosterHandle
handle
,
const
void
*
data
,
int
float_type
,
int32_t
nrow
,
int32_t
ncol
,
int
is_row_major
,
int
predict_type
,
uint64_t
n_used_trees
,
double
*
out_result
);
double
*
out_result
)
{
Booster
*
ref_booster
=
reinterpret_cast
<
Booster
*>
(
handle
);
ref_booster
->
PrepareForPrediction
(
static_cast
<
int
>
(
n_used_trees
),
predict_type
);
auto
get_row_fun
=
Common
::
RowPairFunctionFromDenseMatric
(
data
,
nrow
,
ncol
,
float_type
,
is_row_major
);
int
num_class
=
ref_booster
->
NumberOfClass
();
#pragma omp parallel for schedule(guided)
for
(
int
i
=
0
;
i
<
nrow
;
++
i
)
{
auto
one_row
=
get_row_fun
(
i
);
auto
predicton_result
=
ref_booster
->
Predict
(
one_row
);
for
(
int
j
=
0
;
j
<
num_class
;
j
++
)
{
out_result
[
i
*
num_class
+
j
]
=
predicton_result
[
j
];
}
}
return
0
;
}
/*!
* \brief save model into file
...
...
@@ -520,4 +546,9 @@ DllExport int LGBM_BoosterPredictForMat(BoosterHandle handle,
*/
DllExport
int
LGBM_BoosterSaveModel
(
BoosterHandle
handle
,
int
num_used_model
,
const
char
*
filename
);
const
char
*
filename
)
{
Booster
*
ref_booster
=
reinterpret_cast
<
Booster
*>
(
handle
);
ref_booster
->
SaveModelToFile
(
num_used_model
,
filename
);
return
0
;
}
src/io/dataset_loader.cpp
View file @
e161a746
...
...
@@ -437,7 +437,7 @@ Dataset* DatasetLoader::CostructFromSampleData(std::vector<std::vector<double>>&
delete
bin_mappers
[
i
];
}
}
dataset
->
metadata_
.
Init
(
dataset
->
num_data_
,
dataset
->
num_class_
,
-
1
,
-
1
);
dataset
->
metadata_
.
Init
(
dataset
->
num_data_
,
dataset
->
num_class_
,
NO_SPECIFIC
,
NO_SPECIFIC
);
return
dataset
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment