Unverified Commit 3654ecaa authored by AndreyOrb's avatar AndreyOrb Committed by GitHub
Browse files

[c++] Fixed Predictor lifecycle and trees initialization in Contrib mode (#6778)



* 1) Fixed Predictor lifecycle
2) Fixed Boosting trees initialization

#5482

* Added tests for LGBM_BoosterPredictForMat in Contrib mode

* #6778 Reverted indentation to 4 spaces

---------
Co-authored-by: default avatarJames Lamb <jaylamb20@gmail.com>
Co-authored-by: default avatarNikita Titov <nekit94-08@mail.ru>
parent 226e7f7d
...@@ -433,11 +433,18 @@ class GBDT : public GBDTBase { ...@@ -433,11 +433,18 @@ class GBDT : public GBDTBase {
num_iteration_for_pred_ = num_iteration_for_pred_ - start_iteration; num_iteration_for_pred_ = num_iteration_for_pred_ - start_iteration;
} }
start_iteration_for_pred_ = start_iteration; start_iteration_for_pred_ = start_iteration;
if (is_pred_contrib) {
if (is_pred_contrib && !models_initialized_) {
std::lock_guard<std::mutex> lock(instance_mutex_);
if (models_initialized_)
return;
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
for (int i = 0; i < static_cast<int>(models_.size()); ++i) { for (int i = 0; i < static_cast<int>(models_.size()); ++i) {
models_[i]->RecomputeMaxDepth(); models_[i]->RecomputeMaxDepth();
} }
models_initialized_ = true;
} }
} }
...@@ -548,6 +555,10 @@ class GBDT : public GBDTBase { ...@@ -548,6 +555,10 @@ class GBDT : public GBDTBase {
int max_feature_idx_; int max_feature_idx_;
/*! \brief Parser config file content */ /*! \brief Parser config file content */
std::string parser_config_str_ = ""; std::string parser_config_str_ = "";
/*! \brief Are the models initialized (passed RecomputeMaxDepth phase) */
bool models_initialized_ = false;
/*! \brief Mutex for exclusive models initialization */
std::mutex instance_mutex_;
#ifdef USE_CUDA #ifdef USE_CUDA
/*! \brief First order derivative of training data */ /*! \brief First order derivative of training data */
......
...@@ -460,7 +460,7 @@ class Booster { ...@@ -460,7 +460,7 @@ class Booster {
*out_len = single_row_predictor->num_pred_in_one_row; *out_len = single_row_predictor->num_pred_in_one_row;
} }
Predictor CreatePredictor(int start_iteration, int num_iteration, int predict_type, int ncol, const Config& config) const { std::shared_ptr<Predictor> CreatePredictor(int start_iteration, int num_iteration, int predict_type, int ncol, const Config& config) const {
if (!config.predict_disable_shape_check && ncol != boosting_->MaxFeatureIdx() + 1) { if (!config.predict_disable_shape_check && ncol != boosting_->MaxFeatureIdx() + 1) {
Log::Fatal("The number of features in data (%d) is not the same as it was in training data (%d).\n" \ Log::Fatal("The number of features in data (%d) is not the same as it was in training data (%d).\n" \
"You can set ``predict_disable_shape_check=true`` to discard this error, but please be aware what you are doing.", ncol, boosting_->MaxFeatureIdx() + 1); "You can set ``predict_disable_shape_check=true`` to discard this error, but please be aware what you are doing.", ncol, boosting_->MaxFeatureIdx() + 1);
...@@ -478,7 +478,7 @@ class Booster { ...@@ -478,7 +478,7 @@ class Booster {
is_raw_score = false; is_raw_score = false;
} }
return Predictor(boosting_.get(), start_iteration, num_iteration, is_raw_score, is_predict_leaf, predict_contrib, return std::make_shared<Predictor>(boosting_.get(), start_iteration, num_iteration, is_raw_score, is_predict_leaf, predict_contrib,
config.pred_early_stop, config.pred_early_stop_freq, config.pred_early_stop_margin); config.pred_early_stop, config.pred_early_stop_freq, config.pred_early_stop_margin);
} }
...@@ -496,7 +496,7 @@ class Booster { ...@@ -496,7 +496,7 @@ class Booster {
predict_contrib = true; predict_contrib = true;
} }
int64_t num_pred_in_one_row = boosting_->NumPredictOneRow(start_iteration, num_iteration, is_predict_leaf, predict_contrib); int64_t num_pred_in_one_row = boosting_->NumPredictOneRow(start_iteration, num_iteration, is_predict_leaf, predict_contrib);
auto pred_fun = predictor.GetPredictFunction(); auto pred_fun = predictor->GetPredictFunction();
OMP_INIT_EX(); OMP_INIT_EX();
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
for (int i = 0; i < nrow; ++i) { for (int i = 0; i < nrow; ++i) {
...@@ -517,7 +517,7 @@ class Booster { ...@@ -517,7 +517,7 @@ class Booster {
int32_t** out_indices, void** out_data, int data_type, int32_t** out_indices, void** out_data, int data_type,
bool* is_data_float32_ptr, int num_matrices) const { bool* is_data_float32_ptr, int num_matrices) const {
auto predictor = CreatePredictor(start_iteration, num_iteration, predict_type, ncol, config); auto predictor = CreatePredictor(start_iteration, num_iteration, predict_type, ncol, config);
auto pred_sparse_fun = predictor.GetPredictSparseFunction(); auto pred_sparse_fun = predictor->GetPredictSparseFunction();
std::vector<std::vector<std::unordered_map<int, double>>>& agg = *agg_ptr; std::vector<std::vector<std::unordered_map<int, double>>>& agg = *agg_ptr;
OMP_INIT_EX(); OMP_INIT_EX();
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
...@@ -652,7 +652,7 @@ class Booster { ...@@ -652,7 +652,7 @@ class Booster {
// Get the number of trees per iteration (for multiclass scenario we output multiple sparse matrices) // Get the number of trees per iteration (for multiclass scenario we output multiple sparse matrices)
int num_matrices = boosting_->NumModelPerIteration(); int num_matrices = boosting_->NumModelPerIteration();
auto predictor = CreatePredictor(start_iteration, num_iteration, predict_type, ncol, config); auto predictor = CreatePredictor(start_iteration, num_iteration, predict_type, ncol, config);
auto pred_sparse_fun = predictor.GetPredictSparseFunction(); auto pred_sparse_fun = predictor->GetPredictSparseFunction();
bool is_col_ptr_int32 = false; bool is_col_ptr_int32 = false;
bool is_data_float32 = false; bool is_data_float32 = false;
int num_output_cols = ncol + 1; int num_output_cols = ncol + 1;
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
using LightGBM::TestUtils; using LightGBM::TestUtils;
TEST(SingleRow, JustWorks) { void test_predict_type(int predict_type, int num_predicts) {
// Load some test data // Load some test data
int result; int result;
...@@ -37,17 +37,19 @@ TEST(SingleRow, JustWorks) { ...@@ -37,17 +37,19 @@ TEST(SingleRow, JustWorks) {
booster_handle, booster_handle,
&n_features); &n_features);
EXPECT_EQ(0, result) << "LGBM_BoosterGetNumFeature result code: " << result; EXPECT_EQ(0, result) << "LGBM_BoosterGetNumFeature result code: " << result;
EXPECT_EQ(28, n_features) << "LGBM_BoosterGetNumFeature number of features: " << n_features;
// Run a single row prediction and compare with regular Mat prediction: // Run a single row prediction and compare with regular Mat prediction:
int64_t output_size; int64_t output_size;
result = LGBM_BoosterCalcNumPredict( result = LGBM_BoosterCalcNumPredict(
booster_handle, booster_handle,
1, 1,
C_API_PREDICT_NORMAL, // predict_type predict_type, // predict_type
0, // start_iteration 0, // start_iteration
-1, // num_iteration -1, // num_iteration
&output_size); &output_size);
EXPECT_EQ(0, result) << "LGBM_BoosterCalcNumPredict result code: " << result; EXPECT_EQ(0, result) << "LGBM_BoosterCalcNumPredict result code: " << result;
EXPECT_EQ(num_predicts, output_size) << "LGBM_BoosterCalcNumPredict output size: " << output_size;
std::ifstream test_file("examples/binary_classification/binary.test"); std::ifstream test_file("examples/binary_classification/binary.test");
std::vector<double> test; std::vector<double> test;
...@@ -77,7 +79,7 @@ TEST(SingleRow, JustWorks) { ...@@ -77,7 +79,7 @@ TEST(SingleRow, JustWorks) {
test_set_size, // nrow test_set_size, // nrow
n_features, // ncol n_features, // ncol
1, // is_row_major 1, // is_row_major
C_API_PREDICT_NORMAL, // predict_type predict_type, // predict_type
0, // start_iteration 0, // start_iteration
-1, // num_iteration -1, // num_iteration
"", "",
...@@ -85,13 +87,47 @@ TEST(SingleRow, JustWorks) { ...@@ -85,13 +87,47 @@ TEST(SingleRow, JustWorks) {
&mat_output[0]); &mat_output[0]);
EXPECT_EQ(0, result) << "LGBM_BoosterPredictForMat result code: " << result; EXPECT_EQ(0, result) << "LGBM_BoosterPredictForMat result code: " << result;
// Now let's run with the single row fast prediction API: // Test LGBM_BoosterPredictForMat in multi-threaded mode
const int kNThreads = 10; const int kNThreads = 10;
const int numIterations = 5;
std::vector<std::thread> predict_for_mat_threads(kNThreads);
for (int i = 0; i < kNThreads; i++) {
predict_for_mat_threads[i] = std::thread(
[
i, test_set_size, output_size, n_features,
test = &test[0], booster_handle, predict_type, numIterations
]() {
for (int j = 0; j < numIterations; j++) {
int result;
std::vector<double> mat_output(output_size * test_set_size, -1);
int64_t written;
result = LGBM_BoosterPredictForMat(
booster_handle,
&test[0],
C_API_DTYPE_FLOAT64,
test_set_size, // nrow
n_features, // ncol
1, // is_row_major
predict_type, // predict_type
0, // start_iteration
-1, // num_iteration
"",
&written,
&mat_output[0]);
EXPECT_EQ(0, result) << "LGBM_BoosterPredictForMat result code: " << result;
}
});
}
for (std::thread& t : predict_for_mat_threads) {
t.join();
}
// Now let's run with the single row fast prediction API:
FastConfigHandle fast_configs[kNThreads]; FastConfigHandle fast_configs[kNThreads];
for (int i = 0; i < kNThreads; i++) { for (int i = 0; i < kNThreads; i++) {
result = LGBM_BoosterPredictForMatSingleRowFastInit( result = LGBM_BoosterPredictForMatSingleRowFastInit(
booster_handle, booster_handle,
C_API_PREDICT_NORMAL, // predict_type predict_type, // predict_type
0, // start_iteration 0, // start_iteration
-1, // num_iteration -1, // num_iteration
C_API_DTYPE_FLOAT64, C_API_DTYPE_FLOAT64,
...@@ -102,14 +138,14 @@ TEST(SingleRow, JustWorks) { ...@@ -102,14 +138,14 @@ TEST(SingleRow, JustWorks) {
} }
std::vector<double> single_row_output(output_size * test_set_size, -1); std::vector<double> single_row_output(output_size * test_set_size, -1);
std::vector<std::thread> threads(kNThreads); std::vector<std::thread> single_row_threads(kNThreads);
int batch_size = (test_set_size + kNThreads - 1) / kNThreads; // round up int batch_size = (test_set_size + kNThreads - 1) / kNThreads; // round up
for (int i = 0; i < kNThreads; i++) { for (int i = 0; i < kNThreads; i++) {
threads[i] = std::thread( single_row_threads[i] = std::thread(
[ [
i, batch_size, test_set_size, output_size, n_features, i, batch_size, test_set_size, output_size, n_features,
test = &test[0], fast_configs = &fast_configs[0], single_row_output = &single_row_output[0] test = &test[0], fast_configs = &fast_configs[0], single_row_output = &single_row_output[0]
](){ ]() {
int result; int result;
int64_t written; int64_t written;
for (int j = i * batch_size; j < std::min((i + 1) * batch_size, test_set_size); j++) { for (int j = i * batch_size; j < std::min((i + 1) * batch_size, test_set_size); j++) {
...@@ -122,8 +158,8 @@ TEST(SingleRow, JustWorks) { ...@@ -122,8 +158,8 @@ TEST(SingleRow, JustWorks) {
EXPECT_EQ(written, output_size) << "LGBM_BoosterPredictForMatSingleRowFast unexpected written output size"; EXPECT_EQ(written, output_size) << "LGBM_BoosterPredictForMatSingleRowFast unexpected written output size";
} }
}); });
} }
for (std::thread &t : threads) { for (std::thread& t : single_row_threads) {
t.join(); t.join();
} }
...@@ -141,3 +177,11 @@ TEST(SingleRow, JustWorks) { ...@@ -141,3 +177,11 @@ TEST(SingleRow, JustWorks) {
result = LGBM_DatasetFree(train_dataset); result = LGBM_DatasetFree(train_dataset);
EXPECT_EQ(0, result) << "LGBM_DatasetFree result code: " << result; EXPECT_EQ(0, result) << "LGBM_DatasetFree result code: " << result;
} }
TEST(SingleRow, Normal) {
test_predict_type(C_API_PREDICT_NORMAL, 1);
}
TEST(SingleRow, Contrib) {
test_predict_type(C_API_PREDICT_CONTRIB, 29);
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment