Commit 823fc03c authored by mjmckp's avatar mjmckp Committed by Guolin Ke
Browse files

Add API method LGBM_BoosterPredictForMats (#2008)

* Fix index out-of-range exception generated by BaggingHelper on small datasets.

Prior to this change, the line "score_t threshold = tmp_gradients[top_k - 1];" would generate an exception, since tmp_gradients would be empty when the cnt input value to the function is zero.

* Update goss.hpp

* Update goss.hpp

* Add API method LGBM_BoosterPredictForMats which runs prediction on a data set given as of array of pointers to rows (as opposed to existing method LGBM_BoosterPredictForMat which requires data given as contiguous array)

* Fix incorrect upstream merge

* Add link to LightGBM.NET

* Fix indenting to 2 spaces

* Dummy edit to trigger CI

* Dummy edit to trigger CI
parent 011cc90a
...@@ -70,6 +70,8 @@ MMLSpark (Spark-package): https://github.com/Azure/mmlspark ...@@ -70,6 +70,8 @@ MMLSpark (Spark-package): https://github.com/Azure/mmlspark
ML.NET (.NET/C#-package): https://github.com/dotnet/machinelearning ML.NET (.NET/C#-package): https://github.com/dotnet/machinelearning
LightGBM.NET (.NET/C#-package): https://github.com/rca22/LightGBM.Net
Dask-LightGBM (distributed and parallel Python-package): https://github.com/dask/dask-lightgbm Dask-LightGBM (distributed and parallel Python-package): https://github.com/dask/dask-lightgbm
Get Started and Documentation Get Started and Documentation
......
...@@ -822,6 +822,37 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForMat(BoosterHandle handle, ...@@ -822,6 +822,37 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForMat(BoosterHandle handle,
int64_t* out_len, int64_t* out_len,
double* out_result); double* out_result);
/*!
* \brief make prediction for an new data set
* Note: should pre-allocate memory for out_result,
* for noraml and raw score: its length is equal to num_class * num_data
* for leaf index, its length is equal to num_class * num_data * num_iteration
* \param handle handle
* \param data pointer to the data space
* \param data_type type of data pointer, can be C_API_DTYPE_FLOAT32 or C_API_DTYPE_FLOAT64
* \param nrow number of rows
* \param ncol number columns
* \param predict_type
* C_API_PREDICT_NORMAL: normal prediction, with transform (if needed)
* C_API_PREDICT_RAW_SCORE: raw score
* C_API_PREDICT_LEAF_INDEX: leaf index
* \param num_iteration number of iteration for prediction, <= 0 means no limit
* \param parameter Other parameters for the parameters, e.g. early stopping for prediction.
* \param out_len len of output result
* \param out_result used to set a pointer to array, should allocate memory before call this function
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForMats(BoosterHandle handle,
const void** data,
int data_type,
int32_t nrow,
int32_t ncol,
int predict_type,
int num_iteration,
const char* parameter,
int64_t* out_len,
double* out_result);
/*! /*!
* \brief save model into file * \brief save model into file
* \param handle handle * \param handle handle
......
...@@ -389,6 +389,9 @@ RowFunctionFromDenseMatric(const void* data, int num_row, int num_col, int data_ ...@@ -389,6 +389,9 @@ RowFunctionFromDenseMatric(const void* data, int num_row, int num_col, int data_
std::function<std::vector<std::pair<int, double>>(int row_idx)> std::function<std::vector<std::pair<int, double>>(int row_idx)>
RowPairFunctionFromDenseMatric(const void* data, int num_row, int num_col, int data_type, int is_row_major); RowPairFunctionFromDenseMatric(const void* data, int num_row, int num_col, int data_type, int is_row_major);
std::function<std::vector<std::pair<int, double>>(int row_idx)>
RowPairFunctionFromDenseRows(const void** data, int num_col, int data_type);
std::function<std::vector<std::pair<int, double>>(int idx)> std::function<std::vector<std::pair<int, double>>(int idx)>
RowFunctionFromCSR(const void* indptr, int indptr_type, const int32_t* indices, RowFunctionFromCSR(const void* indptr, int indptr_type, const int32_t* indices,
const void* data, int data_type, int64_t nindptr, int64_t nelem); const void* data, int data_type, int64_t nindptr, int64_t nelem);
...@@ -1416,6 +1419,30 @@ int LGBM_BoosterPredictForMatSingleRow(BoosterHandle handle, ...@@ -1416,6 +1419,30 @@ int LGBM_BoosterPredictForMatSingleRow(BoosterHandle handle,
} }
int LGBM_BoosterPredictForMats(BoosterHandle handle,
const void** data,
int data_type,
int32_t nrow,
int32_t ncol,
int predict_type,
int num_iteration,
const char* parameter,
int64_t* out_len,
double* out_result) {
API_BEGIN();
auto param = Config::Str2Map(parameter);
Config config;
config.Set(param);
if (config.num_threads > 0) {
omp_set_num_threads(config.num_threads);
}
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
auto get_row_fun = RowPairFunctionFromDenseRows(data, ncol, data_type);
ref_booster->Predict(num_iteration, predict_type, nrow, get_row_fun,
config, out_result, out_len);
API_END();
}
int LGBM_BoosterSaveModel(BoosterHandle handle, int LGBM_BoosterSaveModel(BoosterHandle handle,
int start_iteration, int start_iteration,
int num_iteration, int num_iteration,
...@@ -1589,6 +1616,22 @@ RowPairFunctionFromDenseMatric(const void* data, int num_row, int num_col, int d ...@@ -1589,6 +1616,22 @@ RowPairFunctionFromDenseMatric(const void* data, int num_row, int num_col, int d
return nullptr; return nullptr;
} }
// data is array of pointers to individual rows
std::function<std::vector<std::pair<int, double>>(int row_idx)>
RowPairFunctionFromDenseRows(const void** data, int num_col, int data_type) {
return [=](int row_idx) {
auto inner_function = RowFunctionFromDenseMatric(data[row_idx], 1, num_col, data_type, /* is_row_major */ true);
auto raw_values = inner_function(0);
std::vector<std::pair<int, double>> ret;
for (int i = 0; i < static_cast<int>(raw_values.size()); ++i) {
if (std::fabs(raw_values[i]) > kZeroThreshold || std::isnan(raw_values[i])) {
ret.emplace_back(i, raw_values[i]);
}
}
return ret;
};
}
std::function<std::vector<std::pair<int, double>>(int idx)> std::function<std::vector<std::pair<int, double>>(int idx)>
RowFunctionFromCSR(const void* indptr, int indptr_type, const int32_t* indices, const void* data, int data_type, int64_t , int64_t ) { RowFunctionFromCSR(const void* indptr, int indptr_type, const int32_t* indices, const void* data, int data_type, int64_t , int64_t ) {
if (data_type == C_API_DTYPE_FLOAT32) { if (data_type == C_API_DTYPE_FLOAT32) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment