Commit 01e10529 authored by Guolin Ke's avatar Guolin Ke
Browse files

clean code

parent e161a746
......@@ -43,14 +43,36 @@ public:
virtual void AddDataset(const Dataset* valid_data,
const std::vector<const Metric*>& valid_metrics) = 0;
/*! \brief Training logic */
/*!
* \brief Training logic
* \param gradient nullptr for using default objective, otherwise use self-defined boosting
* \param hessian nullptr for using default objective, otherwise use self-defined boosting
* \param is_eval true if need evalulation or early stop
* \return True if meet early stopping or cannot boosting
*/
virtual bool TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) = 0;
/*!
* \brief Get evaluation result at data_idx data
* \param data_idx 0: training data, 1: 1st validation data
* \return evaluation result
*/
virtual std::vector<double> GetEvalAt(int data_idx) const = 0;
/*!
* \brief Get current training score
* \param out_len lenght of returned score
* \return training score
*/
virtual const score_t* GetTrainingScore(data_size_t* out_len) const = 0;
virtual void GetPredict(int data_idx, score_t* out_result, data_size_t* out_len) const = 0;
/*!
* \brief Get prediction result at data_idx data
* \param data_idx 0: training data, 1: 1st validation data
* \param result used to store prediction result, should allocate memory before call this function
* \param out_len lenght of returned score
*/
virtual void GetPredictAt(int data_idx, score_t* result, data_size_t* out_len) const = 0;
/*!
* \brief Prediction for one record, not sigmoid transform
......@@ -83,7 +105,7 @@ public:
* \brief Restore from a serialized string
* \param model_str The string of model
*/
virtual void ModelsFromString(const std::string& model_str) = 0;
virtual void LoadModelFromString(const std::string& model_str) = 0;
/*!
* \brief Get max feature index of this model
......@@ -107,7 +129,7 @@ public:
* \brief Get number of classes
* \return Number of classes
*/
virtual int NumberOfClass() const = 0;
virtual int NumberOfClasses() const = 0;
/*!
* \brief Set number of used model for prediction
......
......@@ -256,7 +256,7 @@ DllExport int LGBM_BoosterUpdateOneIterCustom(BoosterHandle handle,
* \param handle handle
* \param data 0:training data, 1: 1st valid data, 2:2nd valid data ...
* \param out_len len of output result
* \param out_result the string containing evaluation statistics
* \param out_result the string containing evaluation statistics, should allocate memory before call this function
* \return 0 when success, -1 when failure happens
*/
DllExport int LGBM_BoosterEval(BoosterHandle handle,
......@@ -281,7 +281,7 @@ this can be used to support customized eval function
* \param handle handle
* \param data 0:training data, 1: 1st valid data, 2:2nd valid data ...
* \param out_len len of output result
* \param out_result used to set a pointer to array
* \param out_result used to set a pointer to array, should allocate memory before call this function
* \return 0 when success, -1 when failure happens
*/
DllExport int LGBM_BoosterGetPredict(BoosterHandle handle,
......@@ -304,7 +304,7 @@ DllExport int LGBM_BoosterGetPredict(BoosterHandle handle,
* 1:with transform(if needed)
* 2:leaf index
* \param n_used_trees number of used tree
* \param out_result used to set a pointer to array
* \param out_result used to set a pointer to array, should allocate memory before call this function
* \return 0 when success, -1 when failure happens
*/
DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
......@@ -332,7 +332,7 @@ DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
* 1:with transform(if needed)
* 2:leaf index
* \param n_used_trees number of used tree
* \param out_result used to set a pointer to array
* \param out_result used to set a pointer to array, should allocate memory before call this function
* \return 0 when success, -1 when failure happens
*/
DllExport int LGBM_BoosterPredictForMat(BoosterHandle handle,
......
......@@ -28,8 +28,7 @@ public:
* \param is_raw_score True if need to predict result with raw score
* \param predict_leaf_index True if output leaf index instead of prediction score
*/
Predictor(const Boosting* boosting, bool is_raw_score, bool is_predict_leaf_index)
: is_raw_score_(is_raw_score), is_predict_leaf_index_(is_predict_leaf_index) {
Predictor(const Boosting* boosting, bool is_raw_score, bool is_predict_leaf_index) {
boosting_ = boosting;
num_features_ = boosting_->MaxFeatureIdx() + 1;
#pragma omp parallel
......@@ -42,7 +41,7 @@ public:
features_[i] = new double[num_features_];
}
if (is_predict_leaf_index_) {
if (is_predict_leaf_index) {
predict_fun_ = [this](const std::vector<std::pair<int, double>>& features) {
const int tid = PutFeatureValuesToBuffer(features);
// get result for leaf index
......@@ -50,7 +49,7 @@ public:
return std::vector<double>(result.begin(), result.end());
};
} else {
if (is_raw_score_) {
if (is_raw_score) {
predict_fun_ = [this](const std::vector<std::pair<int, double>>& features) {
const int tid = PutFeatureValuesToBuffer(features);
// get result without sigmoid transformation
......@@ -156,12 +155,8 @@ private:
double** features_;
/*! \brief Number of features */
int num_features_;
/*! \brief True if need to predict result with sigmoid transform */
bool is_raw_score_;
/*! \brief Number of threads */
int num_threads_;
/*! \brief True if output leaf index instead of prediction score */
bool is_predict_leaf_index_;
/*! \brief function for prediction */
PredictFunction predict_fun_;
};
......
......@@ -20,7 +20,7 @@ void LoadFileToBoosting(Boosting* boosting, const char* filename) {
for (auto& line : model_reader.Lines()) {
str_buf << line << '\n';
}
boosting->ModelsFromString(str_buf.str());
boosting->LoadModelFromString(str_buf.str());
}
}
......
......@@ -238,7 +238,7 @@ bool GBDT::OutputMetric(int iter) {
for (auto& sub_metric : training_metrics_) {
auto name = sub_metric->GetName();
auto scores = sub_metric->Eval(train_score_updater_->score());
for (size_t k = 0; k < name.size(); k++) {
for (size_t k = 0; k < name.size(); ++k) {
Log::Info("Iteration: %d, %s : %f", iter, name[k].c_str(), scores[k]);
}
}
......@@ -250,7 +250,7 @@ bool GBDT::OutputMetric(int iter) {
auto test_scores = valid_metrics_[i][j]->Eval(valid_score_updater_[i]->score());
if ((iter % gbdt_config_->output_freq) == 0) {
auto name = valid_metrics_[i][j]->GetName();
for (size_t k = 0; k < name.size(); k++) {
for (size_t k = 0; k < name.size(); ++k) {
Log::Info("Iteration: %d, %s : %f", iter, name[k].c_str(), test_scores[k]);
}
}
......@@ -299,7 +299,7 @@ const score_t* GBDT::GetTrainingScore(data_size_t* out_len) const {
return train_score_updater_->score();
}
void GBDT::GetPredict(int data_idx, score_t* out_result, data_size_t* out_len) const {
void GBDT::GetPredictAt(int data_idx, score_t* out_result, data_size_t* out_len) const {
CHECK(data_idx >= 0 && data_idx <= static_cast<int>(valid_metrics_.size()));
std::vector<double> ret;
......@@ -373,7 +373,7 @@ void GBDT::SaveModelToFile(int num_used_model, bool is_finish, const char* filen
if (!model_output_file_.is_open()) {
return;
}
if (num_used_model_ == NO_LIMIT) {
if (num_used_model == NO_LIMIT) {
num_used_model = static_cast<int>(models_.size());
} else {
num_used_model = num_used_model * num_class_;
......@@ -399,7 +399,7 @@ void GBDT::SaveModelToFile(int num_used_model, bool is_finish, const char* filen
}
}
void GBDT::ModelsFromString(const std::string& model_str) {
void GBDT::LoadModelFromString(const std::string& model_str) {
// use serialized string to restore this object
models_.clear();
std::vector<std::string> lines = Common::Split(model_str.c_str(), '\n');
......
......@@ -42,17 +42,35 @@ public:
void AddDataset(const Dataset* valid_data,
const std::vector<const Metric*>& valid_metrics) override;
/*!
* \brief one training iteration
* \brief Training logic
* \param gradient nullptr for using default objective, otherwise use self-defined boosting
* \param hessian nullptr for using default objective, otherwise use self-defined boosting
* \param is_eval true if need evalulation or early stop
* \return True if meet early stopping or cannot boosting
*/
bool TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) override;
/*!
* \brief Get evaluation result at data_idx data
* \param data_idx 0: training data, 1: 1st validation data
* \return evaluation result
*/
std::vector<double> GetEvalAt(int data_idx) const override;
/*! \brief Get prediction result */
/*!
* \brief Get current training score
* \param out_len lenght of returned score
* \return training score
*/
const score_t* GetTrainingScore(data_size_t* out_len) const override;
void GetPredict(int data_idx, score_t* out_result, data_size_t* out_len) const override;
/*!
* \brief Get prediction result at data_idx data
* \param data_idx 0: training data, 1: 1st validation data
* \param result used to store prediction result, should allocate memory before call this function
* \param out_len lenght of returned score
*/
void GetPredictAt(int data_idx, score_t* out_result, data_size_t* out_len) const override;
/*!
* \brief Predtion for one record without sigmoid transformation
......@@ -83,7 +101,7 @@ public:
/*!
* \brief Restore from a serialized string
*/
void ModelsFromString(const std::string& model_str) override;
void LoadModelFromString(const std::string& model_str) override;
/*!
* \brief Get max feature index of this model
* \return Max feature index of this model
......@@ -106,7 +124,7 @@ public:
* \brief Get number of classes
* \return Number of classes
*/
inline int NumberOfClass() const override { return num_class_; }
inline int NumberOfClasses() const override { return num_class_; }
/*!
* \brief Set number of used model for prediction
......
......@@ -124,7 +124,7 @@ public:
}
const Boosting* GetBoosting() const { return boosting_; }
const inline int NumberOfClass() const { return boosting_->NumberOfClass(); }
const inline int NumberOfClasses() const { return boosting_->NumberOfClasses(); }
private:
......@@ -203,10 +203,10 @@ DllExport int LGBM_CreateDatasetFromMat(const void* data,
const size_t sample_cnt = static_cast<size_t>(nrow < config.io_config.bin_construct_sample_cnt ? nrow : config.io_config.bin_construct_sample_cnt);
auto sample_indices = rand.Sample(nrow, sample_cnt);
std::vector<std::vector<double>> sample_values(ncol);
for (size_t i = 0; i < sample_indices.size(); i++) {
for (size_t i = 0; i < sample_indices.size(); ++i) {
auto idx = sample_indices[i];
auto row = get_row_fun(static_cast<int>(idx));
for (size_t j = 0; j < row.size(); j++) {
for (size_t j = 0; j < row.size(); ++j) {
sample_values[j].push_back(row[j]);
}
}
......@@ -477,7 +477,7 @@ DllExport int LGBM_BoosterGetPredict(BoosterHandle handle,
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
auto boosting = ref_booster->GetBoosting();
int len = 0;
boosting->GetPredict(data, out_result, &len);
boosting->GetPredictAt(data, out_result, &len);
*out_len = static_cast<uint64_t>(len);
return 0;
}
......@@ -498,13 +498,13 @@ DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
ref_booster->PrepareForPrediction(static_cast<int>(n_used_trees), predict_type);
auto get_row_fun = Common::RowFunctionFromCSR(indptr, indices, data, float_type, nindptr, nelem);
int num_class = ref_booster->NumberOfClass();
int num_class = ref_booster->NumberOfClasses();
int nrow = static_cast<int>(nindptr - 1);
#pragma omp parallel for schedule(guided)
for (int i = 0; i < nrow; ++i) {
auto one_row = get_row_fun(i);
auto predicton_result = ref_booster->Predict(one_row);
for (int j = 0; j < num_class; j++) {
for (int j = 0; j < num_class; ++j) {
out_result[i * num_class + j] = predicton_result[j];
}
}
......@@ -525,25 +525,18 @@ DllExport int LGBM_BoosterPredictForMat(BoosterHandle handle,
ref_booster->PrepareForPrediction(static_cast<int>(n_used_trees), predict_type);
auto get_row_fun = Common::RowPairFunctionFromDenseMatric(data, nrow, ncol, float_type, is_row_major);
int num_class = ref_booster->NumberOfClass();
int num_class = ref_booster->NumberOfClasses();
#pragma omp parallel for schedule(guided)
for (int i = 0; i < nrow; ++i) {
auto one_row = get_row_fun(i);
auto predicton_result = ref_booster->Predict(one_row);
for (int j = 0; j < num_class; j++) {
for (int j = 0; j < num_class; ++j) {
out_result[i * num_class + j] = predicton_result[j];
}
}
return 0;
}
/*!
* \brief save model into file
* \param handle handle
* \param num_used_model
* \param filename file name
* \return 0 when success, -1 when failure happens
*/
DllExport int LGBM_BoosterSaveModel(BoosterHandle handle,
int num_used_model,
const char* filename) {
......
......@@ -27,7 +27,7 @@ BruckMap BruckMap::Construct(int rank, int num_machines) {
// distance at k-th communication, distance[k] = 2^k
std::vector<int> distance;
int k = 0;
for (k = 0; (1 << k) < num_machines; k++) {
for (k = 0; (1 << k) < num_machines; ++k) {
distance.push_back(1 << k);
}
BruckMap bruckMap(k);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment