clean code

01e10529 · Guolin Ke · e161a746 · 01e10529 · 01e10529 · 01e10529
Commit 01e10529 authored Nov 07, 2016 by Guolin Ke
8 changed files
--- a/include/LightGBM/boosting.h
+++ b/include/LightGBM/boosting.h
@@ -43,14 +43,36 @@ public:
  virtual void AddDataset(const Dataset* valid_data,
    const std::vector<const Metric*>& valid_metrics) = 0;

-  /*! \brief Training logic */
+  /*!
+  * \brief Training logic
+  * \param gradient nullptr for using default objective, otherwise use self-defined boosting
+  * \param hessian nullptr for using default objective, otherwise use self-defined boosting
+  * \param is_eval true if need evalulation or early stop
+  * \return True if meet early stopping or cannot boosting
+  */
  virtual bool TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) = 0;

+  /*!
+  * \brief Get evaluation result at data_idx data
+  * \param data_idx 0: training data, 1: 1st validation data
+  * \return evaluation result
+  */
  virtual std::vector<double> GetEvalAt(int data_idx) const = 0;

+  /*!
+  * \brief Get current training score
+  * \param out_len lenght of returned score
+  * \return training score
+  */
  virtual const score_t* GetTrainingScore(data_size_t* out_len) const = 0;

-  virtual void GetPredict(int data_idx, score_t* out_result, data_size_t* out_len) const = 0;
+  /*!
+  * \brief Get prediction result at data_idx data
+  * \param data_idx 0: training data, 1: 1st validation data
+  * \param result used to store prediction result, should allocate memory before call this function
+  * \param out_len lenght of returned score
+  */
+  virtual void GetPredictAt(int data_idx, score_t* result, data_size_t* out_len) const = 0;

  /*!
  * \brief Prediction for one record, not sigmoid transform
@@ -83,7 +105,7 @@ public:
  * \brief Restore from a serialized string
  * \param model_str The string of model
  */
-  virtual void ModelsFromString(const std::string& model_str) = 0;
+  virtual void LoadModelFromString(const std::string& model_str) = 0;

  /*!
  * \brief Get max feature index of this model
@@ -107,7 +129,7 @@ public:
  * \brief Get number of classes
  * \return Number of classes
  */
-  virtual int NumberOfClass() const = 0;
+  virtual int NumberOfClasses() const = 0;

  /*!
  * \brief Set number of used model for prediction

--- a/include/LightGBM/c_api.h
+++ b/include/LightGBM/c_api.h
@@ -256,7 +256,7 @@ DllExport int LGBM_BoosterUpdateOneIterCustom(BoosterHandle handle,
 * \param handle handle
 * \param data 0:training data, 1: 1st valid data, 2:2nd valid data ...
 * \param out_len len of output result
-* \param out_result the string containing evaluation statistics
+* \param out_result the string containing evaluation statistics, should allocate memory before call this function
 * \return 0 when success, -1 when failure happens
 */
 DllExport int LGBM_BoosterEval(BoosterHandle handle,
@@ -281,7 +281,7 @@ this can be used to support customized eval function
 * \param handle handle
 * \param data 0:training data, 1: 1st valid data, 2:2nd valid data ...
 * \param out_len len of output result
-* \param out_result used to set a pointer to array
+* \param out_result used to set a pointer to array, should allocate memory before call this function
 * \return 0 when success, -1 when failure happens
 */
 DllExport int LGBM_BoosterGetPredict(BoosterHandle handle,
@@ -304,7 +304,7 @@ DllExport int LGBM_BoosterGetPredict(BoosterHandle handle,
 *          1:with transform(if needed)
 *          2:leaf index
 * \param n_used_trees number of used tree
-* \param out_result used to set a pointer to array
+* \param out_result used to set a pointer to array, should allocate memory before call this function
 * \return 0 when success, -1 when failure happens
 */
 DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
@@ -332,7 +332,7 @@ DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
 *          1:with transform(if needed)
 *          2:leaf index
 * \param n_used_trees number of used tree
-* \param out_result used to set a pointer to array
+* \param out_result used to set a pointer to array, should allocate memory before call this function
 * \return 0 when success, -1 when failure happens
 */
 DllExport int LGBM_BoosterPredictForMat(BoosterHandle handle,

--- a/src/application/predictor.hpp
+++ b/src/application/predictor.hpp
@@ -28,8 +28,7 @@ public:
  * \param is_raw_score True if need to predict result with raw score
  * \param predict_leaf_index True if output leaf index instead of prediction score
  */
-  Predictor(const Boosting* boosting, bool is_raw_score, bool is_predict_leaf_index)
-    : is_raw_score_(is_raw_score), is_predict_leaf_index_(is_predict_leaf_index) {
+  Predictor(const Boosting* boosting, bool is_raw_score, bool is_predict_leaf_index) {
    boosting_ = boosting;
    num_features_ = boosting_->MaxFeatureIdx() + 1;
 #pragma omp parallel
@@ -42,7 +41,7 @@ public:
      features_[i] = new double[num_features_];
    }

-    if (is_predict_leaf_index_) {
+    if (is_predict_leaf_index) {
      predict_fun_ = [this](const std::vector<std::pair<int, double>>& features) {
        const int tid = PutFeatureValuesToBuffer(features);
        // get result for leaf index
@@ -50,7 +49,7 @@ public:
        return std::vector<double>(result.begin(), result.end());
      };
    } else {
-      if (is_raw_score_) {
+      if (is_raw_score) {
        predict_fun_ = [this](const std::vector<std::pair<int, double>>& features) {
          const int tid = PutFeatureValuesToBuffer(features);
          // get result without sigmoid transformation
@@ -156,12 +155,8 @@ private:
  double** features_;
  /*! \brief Number of features */
  int num_features_;
-  /*! \brief True if need to predict result with sigmoid transform */
-  bool is_raw_score_;
  /*! \brief Number of threads */
  int num_threads_;
-  /*! \brief True if output leaf index instead of prediction score */
-  bool is_predict_leaf_index_;
  /*! \brief function for prediction */
  PredictFunction predict_fun_;
 };

--- a/src/boosting/boosting.cpp
+++ b/src/boosting/boosting.cpp
@@ -20,7 +20,7 @@ void LoadFileToBoosting(Boosting* boosting, const char* filename) {
    for (auto& line : model_reader.Lines()) {
      str_buf << line << '\n';
    }
-    boosting->ModelsFromString(str_buf.str());
+    boosting->LoadModelFromString(str_buf.str());
  }
 }


--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -238,7 +238,7 @@ bool GBDT::OutputMetric(int iter) {
    for (auto& sub_metric : training_metrics_) {
      auto name = sub_metric->GetName();
      auto scores = sub_metric->Eval(train_score_updater_->score());
-      for (size_t k = 0; k < name.size(); k++) {
+      for (size_t k = 0; k < name.size(); ++k) {
        Log::Info("Iteration: %d, %s : %f", iter, name[k].c_str(), scores[k]);
      }
    }
@@ -250,7 +250,7 @@ bool GBDT::OutputMetric(int iter) {
        auto test_scores = valid_metrics_[i][j]->Eval(valid_score_updater_[i]->score());
        if ((iter % gbdt_config_->output_freq) == 0) {
          auto name = valid_metrics_[i][j]->GetName();
-          for (size_t k = 0; k < name.size(); k++) {
+          for (size_t k = 0; k < name.size(); ++k) {
            Log::Info("Iteration: %d, %s : %f", iter, name[k].c_str(), test_scores[k]);
          }
        }
@@ -299,7 +299,7 @@ const score_t* GBDT::GetTrainingScore(data_size_t* out_len) const {
  return train_score_updater_->score();
 }

-void GBDT::GetPredict(int data_idx, score_t* out_result, data_size_t* out_len) const {
+void GBDT::GetPredictAt(int data_idx, score_t* out_result, data_size_t* out_len) const {
  CHECK(data_idx >= 0 && data_idx <= static_cast<int>(valid_metrics_.size()));
  std::vector<double> ret;

@@ -373,7 +373,7 @@ void GBDT::SaveModelToFile(int num_used_model, bool is_finish, const char* filen
  if (!model_output_file_.is_open()) {
    return;
  }
-  if (num_used_model_ == NO_LIMIT) {
+  if (num_used_model == NO_LIMIT) {
    num_used_model = static_cast<int>(models_.size());
  } else {
    num_used_model = num_used_model * num_class_;
@@ -399,7 +399,7 @@ void GBDT::SaveModelToFile(int num_used_model, bool is_finish, const char* filen
  }
 }

-void GBDT::ModelsFromString(const std::string& model_str) {
+void GBDT::LoadModelFromString(const std::string& model_str) {
  // use serialized string to restore this object
  models_.clear();
  std::vector<std::string> lines = Common::Split(model_str.c_str(), '\n');

--- a/src/boosting/gbdt.h
+++ b/src/boosting/gbdt.h
@@ -42,17 +42,35 @@ public:
  void AddDataset(const Dataset* valid_data,
       const std::vector<const Metric*>& valid_metrics) override;
  /*!
-  * \brief one training iteration
+  * \brief Training logic
+  * \param gradient nullptr for using default objective, otherwise use self-defined boosting
+  * \param hessian nullptr for using default objective, otherwise use self-defined boosting
+  * \param is_eval true if need evalulation or early stop
+  * \return True if meet early stopping or cannot boosting
  */
  bool TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) override;

-
+  /*!
+  * \brief Get evaluation result at data_idx data
+  * \param data_idx 0: training data, 1: 1st validation data
+  * \return evaluation result
+  */
  std::vector<double> GetEvalAt(int data_idx) const override;

-  /*! \brief Get prediction result */
+  /*!
+  * \brief Get current training score
+  * \param out_len lenght of returned score
+  * \return training score
+  */
  const score_t* GetTrainingScore(data_size_t* out_len) const override;

-  void GetPredict(int data_idx, score_t* out_result, data_size_t* out_len) const override;
+  /*!
+  * \brief Get prediction result at data_idx data
+  * \param data_idx 0: training data, 1: 1st validation data
+  * \param result used to store prediction result, should allocate memory before call this function
+  * \param out_len lenght of returned score
+  */
+  void GetPredictAt(int data_idx, score_t* out_result, data_size_t* out_len) const override;

  /*!
  * \brief Predtion for one record without sigmoid transformation
@@ -83,7 +101,7 @@ public:
  /*!
  * \brief Restore from a serialized string
  */
-  void ModelsFromString(const std::string& model_str) override;
+  void LoadModelFromString(const std::string& model_str) override;
  /*!
  * \brief Get max feature index of this model
  * \return Max feature index of this model
@@ -106,7 +124,7 @@ public:
  * \brief Get number of classes
  * \return Number of classes
  */
-  inline int NumberOfClass() const override { return num_class_; }
+  inline int NumberOfClasses() const override { return num_class_; }

  /*!
  * \brief Set number of used model for prediction

--- a/src/c_api.cpp
+++ b/src/c_api.cpp
@@ -124,7 +124,7 @@ public:
  }
  const Boosting* GetBoosting() const { return boosting_; }

-  const inline int NumberOfClass() const { return boosting_->NumberOfClass(); }
+  const inline int NumberOfClasses() const { return boosting_->NumberOfClasses(); }

 private:

@@ -203,10 +203,10 @@ DllExport int LGBM_CreateDatasetFromMat(const void* data,
    const size_t sample_cnt = static_cast<size_t>(nrow < config.io_config.bin_construct_sample_cnt ? nrow : config.io_config.bin_construct_sample_cnt);
    auto sample_indices = rand.Sample(nrow, sample_cnt);
    std::vector<std::vector<double>> sample_values(ncol);
-    for (size_t i = 0; i < sample_indices.size(); i++) {
+    for (size_t i = 0; i < sample_indices.size(); ++i) {
      auto idx = sample_indices[i];
      auto row = get_row_fun(static_cast<int>(idx));
-      for (size_t j = 0; j < row.size(); j++) {
+      for (size_t j = 0; j < row.size(); ++j) {
        sample_values[j].push_back(row[j]);
      }
    }
@@ -477,7 +477,7 @@ DllExport int LGBM_BoosterGetPredict(BoosterHandle handle,
  Booster* ref_booster = reinterpret_cast<Booster*>(handle);
  auto boosting = ref_booster->GetBoosting();
  int len = 0;
-  boosting->GetPredict(data, out_result, &len);
+  boosting->GetPredictAt(data, out_result, &len);
  *out_len = static_cast<uint64_t>(len);
  return 0;
 }
@@ -498,13 +498,13 @@ DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
  ref_booster->PrepareForPrediction(static_cast<int>(n_used_trees), predict_type);

  auto get_row_fun = Common::RowFunctionFromCSR(indptr, indices, data, float_type, nindptr, nelem);
-  int num_class = ref_booster->NumberOfClass();
+  int num_class = ref_booster->NumberOfClasses();
  int nrow = static_cast<int>(nindptr - 1);
 #pragma omp parallel for schedule(guided)
  for (int i = 0; i < nrow; ++i) {
    auto one_row = get_row_fun(i);
    auto predicton_result = ref_booster->Predict(one_row);
-    for (int j = 0; j < num_class; j++) {
+    for (int j = 0; j < num_class; ++j) {
      out_result[i * num_class + j] = predicton_result[j];
    }
  }
@@ -525,25 +525,18 @@ DllExport int LGBM_BoosterPredictForMat(BoosterHandle handle,
  ref_booster->PrepareForPrediction(static_cast<int>(n_used_trees), predict_type);

  auto get_row_fun = Common::RowPairFunctionFromDenseMatric(data, nrow, ncol, float_type, is_row_major);
-  int num_class = ref_booster->NumberOfClass();
+  int num_class = ref_booster->NumberOfClasses();
 #pragma omp parallel for schedule(guided)
  for (int i = 0; i < nrow; ++i) {
    auto one_row = get_row_fun(i);
    auto predicton_result = ref_booster->Predict(one_row);
-    for (int j = 0; j < num_class; j++) {
+    for (int j = 0; j < num_class; ++j) {
      out_result[i * num_class + j] = predicton_result[j];
    }
  }
  return 0;
 }

-/*!
-* \brief save model into file
-* \param handle handle
-* \param num_used_model
-* \param filename file name
-* \return 0 when success, -1 when failure happens
-*/
 DllExport int LGBM_BoosterSaveModel(BoosterHandle handle,
  int num_used_model,
  const char* filename) {

--- a/src/network/linker_topo.cpp
+++ b/src/network/linker_topo.cpp
@@ -27,7 +27,7 @@ BruckMap BruckMap::Construct(int rank, int num_machines) {
  // distance at k-th communication, distance[k] = 2^k
  std::vector<int> distance;
  int k = 0;
-  for (k = 0; (1 << k) < num_machines; k++) {
+  for (k = 0; (1 << k) < num_machines; ++k) {
    distance.push_back(1 << k);
  }
  BruckMap bruckMap(k);