#ifndef LIGHTGBM_PREDICTOR_HPP_ #define LIGHTGBM_PREDICTOR_HPP_ #include #include #include #include #include #include #include #include #include #include #include namespace LightGBM { /*! * \brief Used to prediction data with input model */ class Predictor { public: /*! * \brief Constructor * \param boosting Input boosting model * \param is_raw_score True if need to predict result with raw score * \param predict_leaf_index True if output leaf index instead of prediction score */ Predictor(const Boosting* boosting, bool is_raw_score, bool is_predict_leaf_index) : is_raw_score_(is_raw_score), is_predict_leaf_index_(is_predict_leaf_index) { boosting_ = boosting; num_features_ = boosting_->MaxFeatureIdx() + 1; #pragma omp parallel #pragma omp master { num_threads_ = omp_get_num_threads(); } features_ = new double*[num_threads_]; for (int i = 0; i < num_threads_; ++i) { features_[i] = new double[num_features_]; } } /*! * \brief Destructor */ ~Predictor() { if (features_ != nullptr) { for (int i = 0; i < num_threads_; ++i) { delete[] features_[i]; } delete[] features_; } } /*! * \brief prediction for one record, only raw result (without sigmoid transformation) * \param features Feature for this record * \return Prediction result */ std::vector PredictRawOneLine(const std::vector>& features) { const int tid = PutFeatureValuesToBuffer(features); // get result without sigmoid transformation return boosting_->PredictRaw(features_[tid]); } /*! * \brief prediction for one record, only raw result (without sigmoid transformation) * \param features Feature for this record * \return Predictied leaf index */ std::vector PredictLeafIndexOneLine(const std::vector>& features) { const int tid = PutFeatureValuesToBuffer(features); // get result for leaf index return boosting_->PredictLeafIndex(features_[tid]); } /*! * \brief prediction for one record, will use sigmoid transformation if needed (only enabled for binary classification noe) * \param features Feature of this record * \return Prediction result */ std::vector PredictOneLine(const std::vector>& features) { const int tid = PutFeatureValuesToBuffer(features); // get result with sigmoid transform if needed return boosting_->Predict(features_[tid]); } /*! * \brief predicting on data, then saving result to disk * \param data_filename Filename of data * \param has_label True if this data contains label * \param result_filename Filename of output result */ void Predict(const char* data_filename, const char* result_filename, bool has_header) { FILE* result_file; #ifdef _MSC_VER fopen_s(&result_file, result_filename, "w"); #else result_file = fopen(result_filename, "w"); #endif if (result_file == NULL) { Log::Fatal("Prediction results file %s doesn't exist", data_filename); } Parser* parser = Parser::CreateParser(data_filename, has_header, num_features_, boosting_->LabelIdx()); if (parser == nullptr) { Log::Fatal("Could not recognize the data format of data file %s", data_filename); } // function for parse data std::function>*)> parser_fun; double tmp_label; parser_fun = [this, &parser, &tmp_label] (const char* buffer, std::vector>* feature) { parser->ParseOneLine(buffer, feature, &tmp_label); }; std::function>&)> predict_fun; if (is_predict_leaf_index_) { predict_fun = [this](const std::vector>& features){ return Common::Join(PredictLeafIndexOneLine(features), '\t'); }; } else { if (is_raw_score_) { predict_fun = [this](const std::vector>& features){ return Common::Join(PredictRawOneLine(features), '\t'); }; } else { predict_fun = [this](const std::vector>& features){ return Common::Join(PredictOneLine(features), '\t'); }; } } std::function&)> process_fun = [this, &parser_fun, &predict_fun, &result_file] (data_size_t, const std::vector& lines) { std::vector> oneline_features; std::vector pred_result(lines.size(), ""); #pragma omp parallel for schedule(static) private(oneline_features) for (data_size_t i = 0; i < static_cast(lines.size()); ++i) { oneline_features.clear(); // parser parser_fun(lines[i].c_str(), &oneline_features); // predict pred_result[i] = predict_fun(oneline_features); } for (size_t i = 0; i < pred_result.size(); ++i) { fprintf(result_file, "%s\n", pred_result[i].c_str()); } }; TextReader predict_data_reader(data_filename, has_header); predict_data_reader.ReadAllAndProcessParallel(process_fun); fclose(result_file); delete parser; } private: int PutFeatureValuesToBuffer(const std::vector>& features) { int tid = omp_get_thread_num(); // init feature value std::memset(features_[tid], 0, sizeof(double)*num_features_); // put feature value for (const auto& p : features) { if (p.first < num_features_) { features_[tid][p.first] = p.second; } } return tid; } /*! \brief Boosting model */ const Boosting* boosting_; /*! \brief Buffer for feature values */ double** features_; /*! \brief Number of features */ int num_features_; /*! \brief True if need to predict result with sigmoid transform */ bool is_raw_score_; /*! \brief Number of threads */ int num_threads_; /*! \brief True if output leaf index instead of prediction score */ bool is_predict_leaf_index_; }; } // namespace LightGBM #endif // LightGBM_PREDICTOR_HPP_