"...tests/git@developer.sourcefind.cn:tianlh/lightgbm-dcu.git" did not exist on "9713ff40accf2d878b034a8b862a39e9932a10d3"
Commit 01e10529 authored by Guolin Ke's avatar Guolin Ke
Browse files

clean code

parent e161a746
...@@ -43,14 +43,36 @@ public: ...@@ -43,14 +43,36 @@ public:
virtual void AddDataset(const Dataset* valid_data, virtual void AddDataset(const Dataset* valid_data,
const std::vector<const Metric*>& valid_metrics) = 0; const std::vector<const Metric*>& valid_metrics) = 0;
/*! \brief Training logic */ /*!
* \brief Training logic
* \param gradient nullptr for using default objective, otherwise use self-defined boosting
* \param hessian nullptr for using default objective, otherwise use self-defined boosting
* \param is_eval true if need evalulation or early stop
* \return True if meet early stopping or cannot boosting
*/
virtual bool TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) = 0; virtual bool TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) = 0;
/*!
* \brief Get evaluation result at data_idx data
* \param data_idx 0: training data, 1: 1st validation data
* \return evaluation result
*/
virtual std::vector<double> GetEvalAt(int data_idx) const = 0; virtual std::vector<double> GetEvalAt(int data_idx) const = 0;
/*!
* \brief Get current training score
* \param out_len lenght of returned score
* \return training score
*/
virtual const score_t* GetTrainingScore(data_size_t* out_len) const = 0; virtual const score_t* GetTrainingScore(data_size_t* out_len) const = 0;
virtual void GetPredict(int data_idx, score_t* out_result, data_size_t* out_len) const = 0; /*!
* \brief Get prediction result at data_idx data
* \param data_idx 0: training data, 1: 1st validation data
* \param result used to store prediction result, should allocate memory before call this function
* \param out_len lenght of returned score
*/
virtual void GetPredictAt(int data_idx, score_t* result, data_size_t* out_len) const = 0;
/*! /*!
* \brief Prediction for one record, not sigmoid transform * \brief Prediction for one record, not sigmoid transform
...@@ -83,7 +105,7 @@ public: ...@@ -83,7 +105,7 @@ public:
* \brief Restore from a serialized string * \brief Restore from a serialized string
* \param model_str The string of model * \param model_str The string of model
*/ */
virtual void ModelsFromString(const std::string& model_str) = 0; virtual void LoadModelFromString(const std::string& model_str) = 0;
/*! /*!
* \brief Get max feature index of this model * \brief Get max feature index of this model
...@@ -107,7 +129,7 @@ public: ...@@ -107,7 +129,7 @@ public:
* \brief Get number of classes * \brief Get number of classes
* \return Number of classes * \return Number of classes
*/ */
virtual int NumberOfClass() const = 0; virtual int NumberOfClasses() const = 0;
/*! /*!
* \brief Set number of used model for prediction * \brief Set number of used model for prediction
......
...@@ -256,7 +256,7 @@ DllExport int LGBM_BoosterUpdateOneIterCustom(BoosterHandle handle, ...@@ -256,7 +256,7 @@ DllExport int LGBM_BoosterUpdateOneIterCustom(BoosterHandle handle,
* \param handle handle * \param handle handle
* \param data 0:training data, 1: 1st valid data, 2:2nd valid data ... * \param data 0:training data, 1: 1st valid data, 2:2nd valid data ...
* \param out_len len of output result * \param out_len len of output result
* \param out_result the string containing evaluation statistics * \param out_result the string containing evaluation statistics, should allocate memory before call this function
* \return 0 when success, -1 when failure happens * \return 0 when success, -1 when failure happens
*/ */
DllExport int LGBM_BoosterEval(BoosterHandle handle, DllExport int LGBM_BoosterEval(BoosterHandle handle,
...@@ -281,7 +281,7 @@ this can be used to support customized eval function ...@@ -281,7 +281,7 @@ this can be used to support customized eval function
* \param handle handle * \param handle handle
* \param data 0:training data, 1: 1st valid data, 2:2nd valid data ... * \param data 0:training data, 1: 1st valid data, 2:2nd valid data ...
* \param out_len len of output result * \param out_len len of output result
* \param out_result used to set a pointer to array * \param out_result used to set a pointer to array, should allocate memory before call this function
* \return 0 when success, -1 when failure happens * \return 0 when success, -1 when failure happens
*/ */
DllExport int LGBM_BoosterGetPredict(BoosterHandle handle, DllExport int LGBM_BoosterGetPredict(BoosterHandle handle,
...@@ -304,7 +304,7 @@ DllExport int LGBM_BoosterGetPredict(BoosterHandle handle, ...@@ -304,7 +304,7 @@ DllExport int LGBM_BoosterGetPredict(BoosterHandle handle,
* 1:with transform(if needed) * 1:with transform(if needed)
* 2:leaf index * 2:leaf index
* \param n_used_trees number of used tree * \param n_used_trees number of used tree
* \param out_result used to set a pointer to array * \param out_result used to set a pointer to array, should allocate memory before call this function
* \return 0 when success, -1 when failure happens * \return 0 when success, -1 when failure happens
*/ */
DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle, DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
...@@ -332,7 +332,7 @@ DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle, ...@@ -332,7 +332,7 @@ DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
* 1:with transform(if needed) * 1:with transform(if needed)
* 2:leaf index * 2:leaf index
* \param n_used_trees number of used tree * \param n_used_trees number of used tree
* \param out_result used to set a pointer to array * \param out_result used to set a pointer to array, should allocate memory before call this function
* \return 0 when success, -1 when failure happens * \return 0 when success, -1 when failure happens
*/ */
DllExport int LGBM_BoosterPredictForMat(BoosterHandle handle, DllExport int LGBM_BoosterPredictForMat(BoosterHandle handle,
......
...@@ -28,8 +28,7 @@ public: ...@@ -28,8 +28,7 @@ public:
* \param is_raw_score True if need to predict result with raw score * \param is_raw_score True if need to predict result with raw score
* \param predict_leaf_index True if output leaf index instead of prediction score * \param predict_leaf_index True if output leaf index instead of prediction score
*/ */
Predictor(const Boosting* boosting, bool is_raw_score, bool is_predict_leaf_index) Predictor(const Boosting* boosting, bool is_raw_score, bool is_predict_leaf_index) {
: is_raw_score_(is_raw_score), is_predict_leaf_index_(is_predict_leaf_index) {
boosting_ = boosting; boosting_ = boosting;
num_features_ = boosting_->MaxFeatureIdx() + 1; num_features_ = boosting_->MaxFeatureIdx() + 1;
#pragma omp parallel #pragma omp parallel
...@@ -42,7 +41,7 @@ public: ...@@ -42,7 +41,7 @@ public:
features_[i] = new double[num_features_]; features_[i] = new double[num_features_];
} }
if (is_predict_leaf_index_) { if (is_predict_leaf_index) {
predict_fun_ = [this](const std::vector<std::pair<int, double>>& features) { predict_fun_ = [this](const std::vector<std::pair<int, double>>& features) {
const int tid = PutFeatureValuesToBuffer(features); const int tid = PutFeatureValuesToBuffer(features);
// get result for leaf index // get result for leaf index
...@@ -50,7 +49,7 @@ public: ...@@ -50,7 +49,7 @@ public:
return std::vector<double>(result.begin(), result.end()); return std::vector<double>(result.begin(), result.end());
}; };
} else { } else {
if (is_raw_score_) { if (is_raw_score) {
predict_fun_ = [this](const std::vector<std::pair<int, double>>& features) { predict_fun_ = [this](const std::vector<std::pair<int, double>>& features) {
const int tid = PutFeatureValuesToBuffer(features); const int tid = PutFeatureValuesToBuffer(features);
// get result without sigmoid transformation // get result without sigmoid transformation
...@@ -156,12 +155,8 @@ private: ...@@ -156,12 +155,8 @@ private:
double** features_; double** features_;
/*! \brief Number of features */ /*! \brief Number of features */
int num_features_; int num_features_;
/*! \brief True if need to predict result with sigmoid transform */
bool is_raw_score_;
/*! \brief Number of threads */ /*! \brief Number of threads */
int num_threads_; int num_threads_;
/*! \brief True if output leaf index instead of prediction score */
bool is_predict_leaf_index_;
/*! \brief function for prediction */ /*! \brief function for prediction */
PredictFunction predict_fun_; PredictFunction predict_fun_;
}; };
......
...@@ -20,7 +20,7 @@ void LoadFileToBoosting(Boosting* boosting, const char* filename) { ...@@ -20,7 +20,7 @@ void LoadFileToBoosting(Boosting* boosting, const char* filename) {
for (auto& line : model_reader.Lines()) { for (auto& line : model_reader.Lines()) {
str_buf << line << '\n'; str_buf << line << '\n';
} }
boosting->ModelsFromString(str_buf.str()); boosting->LoadModelFromString(str_buf.str());
} }
} }
......
...@@ -238,7 +238,7 @@ bool GBDT::OutputMetric(int iter) { ...@@ -238,7 +238,7 @@ bool GBDT::OutputMetric(int iter) {
for (auto& sub_metric : training_metrics_) { for (auto& sub_metric : training_metrics_) {
auto name = sub_metric->GetName(); auto name = sub_metric->GetName();
auto scores = sub_metric->Eval(train_score_updater_->score()); auto scores = sub_metric->Eval(train_score_updater_->score());
for (size_t k = 0; k < name.size(); k++) { for (size_t k = 0; k < name.size(); ++k) {
Log::Info("Iteration: %d, %s : %f", iter, name[k].c_str(), scores[k]); Log::Info("Iteration: %d, %s : %f", iter, name[k].c_str(), scores[k]);
} }
} }
...@@ -250,7 +250,7 @@ bool GBDT::OutputMetric(int iter) { ...@@ -250,7 +250,7 @@ bool GBDT::OutputMetric(int iter) {
auto test_scores = valid_metrics_[i][j]->Eval(valid_score_updater_[i]->score()); auto test_scores = valid_metrics_[i][j]->Eval(valid_score_updater_[i]->score());
if ((iter % gbdt_config_->output_freq) == 0) { if ((iter % gbdt_config_->output_freq) == 0) {
auto name = valid_metrics_[i][j]->GetName(); auto name = valid_metrics_[i][j]->GetName();
for (size_t k = 0; k < name.size(); k++) { for (size_t k = 0; k < name.size(); ++k) {
Log::Info("Iteration: %d, %s : %f", iter, name[k].c_str(), test_scores[k]); Log::Info("Iteration: %d, %s : %f", iter, name[k].c_str(), test_scores[k]);
} }
} }
...@@ -299,7 +299,7 @@ const score_t* GBDT::GetTrainingScore(data_size_t* out_len) const { ...@@ -299,7 +299,7 @@ const score_t* GBDT::GetTrainingScore(data_size_t* out_len) const {
return train_score_updater_->score(); return train_score_updater_->score();
} }
void GBDT::GetPredict(int data_idx, score_t* out_result, data_size_t* out_len) const { void GBDT::GetPredictAt(int data_idx, score_t* out_result, data_size_t* out_len) const {
CHECK(data_idx >= 0 && data_idx <= static_cast<int>(valid_metrics_.size())); CHECK(data_idx >= 0 && data_idx <= static_cast<int>(valid_metrics_.size()));
std::vector<double> ret; std::vector<double> ret;
...@@ -373,7 +373,7 @@ void GBDT::SaveModelToFile(int num_used_model, bool is_finish, const char* filen ...@@ -373,7 +373,7 @@ void GBDT::SaveModelToFile(int num_used_model, bool is_finish, const char* filen
if (!model_output_file_.is_open()) { if (!model_output_file_.is_open()) {
return; return;
} }
if (num_used_model_ == NO_LIMIT) { if (num_used_model == NO_LIMIT) {
num_used_model = static_cast<int>(models_.size()); num_used_model = static_cast<int>(models_.size());
} else { } else {
num_used_model = num_used_model * num_class_; num_used_model = num_used_model * num_class_;
...@@ -399,7 +399,7 @@ void GBDT::SaveModelToFile(int num_used_model, bool is_finish, const char* filen ...@@ -399,7 +399,7 @@ void GBDT::SaveModelToFile(int num_used_model, bool is_finish, const char* filen
} }
} }
void GBDT::ModelsFromString(const std::string& model_str) { void GBDT::LoadModelFromString(const std::string& model_str) {
// use serialized string to restore this object // use serialized string to restore this object
models_.clear(); models_.clear();
std::vector<std::string> lines = Common::Split(model_str.c_str(), '\n'); std::vector<std::string> lines = Common::Split(model_str.c_str(), '\n');
......
...@@ -42,17 +42,35 @@ public: ...@@ -42,17 +42,35 @@ public:
void AddDataset(const Dataset* valid_data, void AddDataset(const Dataset* valid_data,
const std::vector<const Metric*>& valid_metrics) override; const std::vector<const Metric*>& valid_metrics) override;
/*! /*!
* \brief one training iteration * \brief Training logic
* \param gradient nullptr for using default objective, otherwise use self-defined boosting
* \param hessian nullptr for using default objective, otherwise use self-defined boosting
* \param is_eval true if need evalulation or early stop
* \return True if meet early stopping or cannot boosting
*/ */
bool TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) override; bool TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) override;
/*!
* \brief Get evaluation result at data_idx data
* \param data_idx 0: training data, 1: 1st validation data
* \return evaluation result
*/
std::vector<double> GetEvalAt(int data_idx) const override; std::vector<double> GetEvalAt(int data_idx) const override;
/*! \brief Get prediction result */ /*!
* \brief Get current training score
* \param out_len lenght of returned score
* \return training score
*/
const score_t* GetTrainingScore(data_size_t* out_len) const override; const score_t* GetTrainingScore(data_size_t* out_len) const override;
void GetPredict(int data_idx, score_t* out_result, data_size_t* out_len) const override; /*!
* \brief Get prediction result at data_idx data
* \param data_idx 0: training data, 1: 1st validation data
* \param result used to store prediction result, should allocate memory before call this function
* \param out_len lenght of returned score
*/
void GetPredictAt(int data_idx, score_t* out_result, data_size_t* out_len) const override;
/*! /*!
* \brief Predtion for one record without sigmoid transformation * \brief Predtion for one record without sigmoid transformation
...@@ -83,7 +101,7 @@ public: ...@@ -83,7 +101,7 @@ public:
/*! /*!
* \brief Restore from a serialized string * \brief Restore from a serialized string
*/ */
void ModelsFromString(const std::string& model_str) override; void LoadModelFromString(const std::string& model_str) override;
/*! /*!
* \brief Get max feature index of this model * \brief Get max feature index of this model
* \return Max feature index of this model * \return Max feature index of this model
...@@ -106,7 +124,7 @@ public: ...@@ -106,7 +124,7 @@ public:
* \brief Get number of classes * \brief Get number of classes
* \return Number of classes * \return Number of classes
*/ */
inline int NumberOfClass() const override { return num_class_; } inline int NumberOfClasses() const override { return num_class_; }
/*! /*!
* \brief Set number of used model for prediction * \brief Set number of used model for prediction
......
...@@ -124,7 +124,7 @@ public: ...@@ -124,7 +124,7 @@ public:
} }
const Boosting* GetBoosting() const { return boosting_; } const Boosting* GetBoosting() const { return boosting_; }
const inline int NumberOfClass() const { return boosting_->NumberOfClass(); } const inline int NumberOfClasses() const { return boosting_->NumberOfClasses(); }
private: private:
...@@ -203,10 +203,10 @@ DllExport int LGBM_CreateDatasetFromMat(const void* data, ...@@ -203,10 +203,10 @@ DllExport int LGBM_CreateDatasetFromMat(const void* data,
const size_t sample_cnt = static_cast<size_t>(nrow < config.io_config.bin_construct_sample_cnt ? nrow : config.io_config.bin_construct_sample_cnt); const size_t sample_cnt = static_cast<size_t>(nrow < config.io_config.bin_construct_sample_cnt ? nrow : config.io_config.bin_construct_sample_cnt);
auto sample_indices = rand.Sample(nrow, sample_cnt); auto sample_indices = rand.Sample(nrow, sample_cnt);
std::vector<std::vector<double>> sample_values(ncol); std::vector<std::vector<double>> sample_values(ncol);
for (size_t i = 0; i < sample_indices.size(); i++) { for (size_t i = 0; i < sample_indices.size(); ++i) {
auto idx = sample_indices[i]; auto idx = sample_indices[i];
auto row = get_row_fun(static_cast<int>(idx)); auto row = get_row_fun(static_cast<int>(idx));
for (size_t j = 0; j < row.size(); j++) { for (size_t j = 0; j < row.size(); ++j) {
sample_values[j].push_back(row[j]); sample_values[j].push_back(row[j]);
} }
} }
...@@ -477,7 +477,7 @@ DllExport int LGBM_BoosterGetPredict(BoosterHandle handle, ...@@ -477,7 +477,7 @@ DllExport int LGBM_BoosterGetPredict(BoosterHandle handle,
Booster* ref_booster = reinterpret_cast<Booster*>(handle); Booster* ref_booster = reinterpret_cast<Booster*>(handle);
auto boosting = ref_booster->GetBoosting(); auto boosting = ref_booster->GetBoosting();
int len = 0; int len = 0;
boosting->GetPredict(data, out_result, &len); boosting->GetPredictAt(data, out_result, &len);
*out_len = static_cast<uint64_t>(len); *out_len = static_cast<uint64_t>(len);
return 0; return 0;
} }
...@@ -498,13 +498,13 @@ DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle, ...@@ -498,13 +498,13 @@ DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
ref_booster->PrepareForPrediction(static_cast<int>(n_used_trees), predict_type); ref_booster->PrepareForPrediction(static_cast<int>(n_used_trees), predict_type);
auto get_row_fun = Common::RowFunctionFromCSR(indptr, indices, data, float_type, nindptr, nelem); auto get_row_fun = Common::RowFunctionFromCSR(indptr, indices, data, float_type, nindptr, nelem);
int num_class = ref_booster->NumberOfClass(); int num_class = ref_booster->NumberOfClasses();
int nrow = static_cast<int>(nindptr - 1); int nrow = static_cast<int>(nindptr - 1);
#pragma omp parallel for schedule(guided) #pragma omp parallel for schedule(guided)
for (int i = 0; i < nrow; ++i) { for (int i = 0; i < nrow; ++i) {
auto one_row = get_row_fun(i); auto one_row = get_row_fun(i);
auto predicton_result = ref_booster->Predict(one_row); auto predicton_result = ref_booster->Predict(one_row);
for (int j = 0; j < num_class; j++) { for (int j = 0; j < num_class; ++j) {
out_result[i * num_class + j] = predicton_result[j]; out_result[i * num_class + j] = predicton_result[j];
} }
} }
...@@ -525,25 +525,18 @@ DllExport int LGBM_BoosterPredictForMat(BoosterHandle handle, ...@@ -525,25 +525,18 @@ DllExport int LGBM_BoosterPredictForMat(BoosterHandle handle,
ref_booster->PrepareForPrediction(static_cast<int>(n_used_trees), predict_type); ref_booster->PrepareForPrediction(static_cast<int>(n_used_trees), predict_type);
auto get_row_fun = Common::RowPairFunctionFromDenseMatric(data, nrow, ncol, float_type, is_row_major); auto get_row_fun = Common::RowPairFunctionFromDenseMatric(data, nrow, ncol, float_type, is_row_major);
int num_class = ref_booster->NumberOfClass(); int num_class = ref_booster->NumberOfClasses();
#pragma omp parallel for schedule(guided) #pragma omp parallel for schedule(guided)
for (int i = 0; i < nrow; ++i) { for (int i = 0; i < nrow; ++i) {
auto one_row = get_row_fun(i); auto one_row = get_row_fun(i);
auto predicton_result = ref_booster->Predict(one_row); auto predicton_result = ref_booster->Predict(one_row);
for (int j = 0; j < num_class; j++) { for (int j = 0; j < num_class; ++j) {
out_result[i * num_class + j] = predicton_result[j]; out_result[i * num_class + j] = predicton_result[j];
} }
} }
return 0; return 0;
} }
/*!
* \brief save model into file
* \param handle handle
* \param num_used_model
* \param filename file name
* \return 0 when success, -1 when failure happens
*/
DllExport int LGBM_BoosterSaveModel(BoosterHandle handle, DllExport int LGBM_BoosterSaveModel(BoosterHandle handle,
int num_used_model, int num_used_model,
const char* filename) { const char* filename) {
......
...@@ -27,7 +27,7 @@ BruckMap BruckMap::Construct(int rank, int num_machines) { ...@@ -27,7 +27,7 @@ BruckMap BruckMap::Construct(int rank, int num_machines) {
// distance at k-th communication, distance[k] = 2^k // distance at k-th communication, distance[k] = 2^k
std::vector<int> distance; std::vector<int> distance;
int k = 0; int k = 0;
for (k = 0; (1 << k) < num_machines; k++) { for (k = 0; (1 << k) < num_machines; ++k) {
distance.push_back(1 << k); distance.push_back(1 << k);
} }
BruckMap bruckMap(k); BruckMap bruckMap(k);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment