t push origin masterMerge branch 'xuehui1991-update_for_dcg'

merge to master.

t push origin masterMerge branch 'xuehui1991-update_for_dcg'
merge to master.
0b9fe27a · Hui Xue · 0dcd422a · bb05a06f · 0b9fe27a · 0b9fe27a
Commit 0b9fe27a authored Oct 25, 2016 by Hui Xue
20 changed files
--- a/include/LightGBM/bin.h
+++ b/include/LightGBM/bin.h
@@ -119,7 +119,7 @@ private:
 };

 /*!
-* \brief Interface for ordered bin data. It very efficient for construct histogram, especially for sparse bin
+* \brief Interface for ordered bin data. It's very efficient for constructing histogram, especially for sparse bin
 * There are 2 advantages for using ordered bin.
 * 1. group the data by leaf, improve the cache hit.
 * 2. only store the non-zero bin, which can speed up the histogram consturction for sparse feature.
@@ -253,7 +253,7 @@ public:
  virtual OrderedBin* CreateOrderedBin() const = 0;

  /*!
-  * \brief After pushed all feature data, should call this to have better refactor for bin data
+  * \brief After pushed all feature data, call this could have better refactor for bin data
  */
  virtual void FinishLoad() = 0;

@@ -261,7 +261,7 @@ public:
  * \brief Create object for bin data of one feature, will call CreateDenseBin or CreateSparseBin according to "is_sparse"
  * \param num_data Total number of data
  * \param num_bin Number of bin
-  * \param is_sparse True if this feature is saprese
+  * \param is_sparse True if this feature is sparse
  * \param sparse_rate Sparse rate of this bins( num_bin0/num_data )
  * \param is_enable_sparse True if enable sparse feature
  * \param is_sparse Will set to true if this bin is sparse

--- a/include/LightGBM/boosting.h
+++ b/include/LightGBM/boosting.h
@@ -47,14 +47,14 @@ public:
  virtual void Train() = 0;

  /*!
-  * \brief Predtion for one record, not sigmoid transform
+  * \brief Prediction for one record, not sigmoid transform
  * \param feature_values Feature value on this record
  * \return Prediction result for this record
  */
  virtual double PredictRaw(const double * feature_values) const = 0;

  /*!
-  * \brief Predtion for one record, will use sigmoid transform if needed
+  * \brief Prediction for one record, will use sigmoid transform if needed
  * \param feature_values Feature value on this record
  * \return Prediction result for this record
  */

--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -20,7 +20,7 @@ public:
  virtual ~ConfigBase() {}

  /*!
-  * \brief SetLabelAt current config object by params
+  * \brief Set current config object by params
  * \param params Store the key and value for params
  */
  virtual void Set(
@@ -30,7 +30,7 @@ public:
  * \brief Get string value by specific name of key
  * \param params Store the key and value for params
  * \param name Name of key
-  * \param out Value will asign to out if key exists
+  * \param out Value will assign to out if key exists
  * \return True if key exists
  */
  inline bool GetString(
@@ -41,7 +41,7 @@ public:
  * \brief Get int value by specific name of key
  * \param params Store the key and value for params
  * \param name Name of key
-  * \param out Value will asign to out if key exists
+  * \param out Value will assign to out if key exists
  * \return True if key exists
  */
  inline bool GetInt(
@@ -52,7 +52,7 @@ public:
  * \brief Get double value by specific name of key
  * \param params Store the key and value for params
  * \param name Name of key
-  * \param out Value will asign to out if key exists
+  * \param out Value will assign to out if key exists
  * \return True if key exists
  */
  inline bool GetDouble(
@@ -63,7 +63,7 @@ public:
  * \brief Get bool value by specific name of key
  * \param params Store the key and value for params
  * \param name Name of key
-  * \param out Value will asign to out if key exists
+  * \param out Value will assign to out if key exists
  * \return True if key exists
  */
  inline bool GetBool(

--- a/include/LightGBM/dataset.h
+++ b/include/LightGBM/dataset.h
@@ -17,7 +17,7 @@ namespace LightGBM {
 class Feature;

 /*!
-* \brief This class is used to store some meta(non-feature) data for tranining data,
+* \brief This class is used to store some meta(non-feature) data for training data,
 *        e.g. labels, weights, initial scores, qurey level informations.
 *
 * Some details:
@@ -110,14 +110,14 @@ public:
  }

  /*!
-  * \brief Get weights, if not exists, will return nullput
+  * \brief Get weights, if not exists, will return nullptr
  * \return Pointer of weights
  */
  inline const float* weights()
            const { return weights_; }

  /*!
-  * \brief Get data boundaries on queries, if not exists, will return nullput
+  * \brief Get data boundaries on queries, if not exists, will return nullptr
  *        we assume data will order by query, 
  *        the interval of [query_boundaris[i], query_boundaris[i+1])
  *        is the data indices for query i.
@@ -133,13 +133,13 @@ public:
  inline const data_size_t num_queries() const { return num_queries_; }

  /*!
-  * \brief Get weights for queries, if not exists, will return nullput
+  * \brief Get weights for queries, if not exists, will return nullptr
  * \return Pointer of weights for queries
  */
  inline const float* query_weights() const { return query_weights_; }

  /*!
-  * \brief Get initial scores, if not exists, will return nullput
+  * \brief Get initial scores, if not exists, will return nullptr
  * \return Pointer of initial scores
  */
  inline const score_t* init_score() const { return init_score_; }
@@ -231,7 +231,7 @@ public:
  * \param max_bin The maximal number of bin that feature values will bucket in
  * \param random_seed The seed for random generator
  * \param is_enable_sparse True for sparse feature
-  * \param predict_fun Used for initial model, will give a prediction score based on this function, thenn set as initial score
+  * \param predict_fun Used for initial model, will give a prediction score based on this function, then set as initial score
  */
  Dataset(const char* data_filename, const char* init_score_filename,
    int max_bin, int random_seed, bool is_enable_sparse, const PredictFunction& predict_fun);
@@ -243,7 +243,7 @@ public:
  * \param max_bin The maximal number of bin that feature values will bucket in
  * \param random_seed The seed for random generator
  * \param is_enable_sparse True for sparse feature
-  * \param predict_fun Used for initial model, will give a prediction score based on this function, thenn set as initial score
+  * \param predict_fun Used for initial model, will give a prediction score based on this function, then set as initial score
  */
  Dataset(const char* data_filename,
    int max_bin, int random_seed, bool is_enable_sparse,

--- a/include/LightGBM/network.h
+++ b/include/LightGBM/network.h
@@ -37,7 +37,7 @@ public:

 /*!
 * \brief node type on recursive halving algorithm
-* When number of machines is not power of 2, need group maiches into power of 2 group.
+* When number of machines is not power of 2, need group machines into power of 2 group.
 * And we can let each group has at most 2 machines.
 * if the group only has 1 machine. this machine is the normal node
 * if the grou has 2 machines, this group will have two type of nodes, one is the leader.

--- a/src/application/application.cpp
+++ b/src/application/application.cpp
@@ -80,7 +80,7 @@ void Application::LoadParameters(int argc, char** argv) {
    config_reader.ReadAllLines();
    if (config_reader.Lines().size() > 0) {
      for (auto& line : config_reader.Lines()) {
-        // remove str after #
+        // remove str after "#"
        if (line.size() > 0 && std::string::npos != line.find_first_of("#")) {
          line.erase(line.find_first_of("#"));
        }

--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -248,7 +248,6 @@ std::string GBDT::ModelsToString() const {

 void GBDT::ModelsFromString(const std::string& model_str, int num_used_model) {
  // use serialized string to restore this object
-  // deseialize string to object????
  models_.clear();
  std::vector<std::string> lines = Common::Split(model_str.c_str(), '\n');
  size_t i = 0;

--- a/src/boosting/gbdt.h
+++ b/src/boosting/gbdt.h
@@ -107,7 +107,7 @@ private:
  */
  void UpdateScore(const Tree* tree);
  /*!
-  * \brief Print Metric result of current iteration
+  * \brief Print metric result of current iteration
  * \param iter Current interation
  */
  void OutputMetric(int iter);
@@ -116,11 +116,11 @@ private:
  const Dataset* train_data_;
  /*! \brief Config of gbdt */
  const GBDTConfig* gbdt_config_;
-  /*! \brief Tree learner, will use tihs class to learn trees */
+  /*! \brief Tree learner, will use this class to learn trees */
  TreeLearner* tree_learner_;
  /*! \brief Objective function */
  const ObjectiveFunction* object_function_;
-  /*! \brief Store and update traning data's score */
+  /*! \brief Store and update training data's score */
  ScoreUpdater* train_score_updater_;
  /*! \brief Metrics for training data */
  std::vector<const Metric*> training_metrics_;

--- a/src/boosting/score_updater.hpp
+++ b/src/boosting/score_updater.hpp
@@ -57,8 +57,8 @@ public:
  * \brief Like AddScore(const Tree* tree), but only for part of data
  * Used for prediction of training out-of-bad data
  * \param tree Trained tree model
-  * \param data_indices Indices of data that want proccess to
-  * \param data_cnt Number of data that want proccess to
+  * \param data_indices Indices of data that will be proccessed
+  * \param data_cnt Number of data that will be proccessed
  */
  inline void AddScore(const Tree* tree, const data_size_t* data_indices,
                                                  data_size_t data_cnt) {

--- a/src/io/dataset.cpp
+++ b/src/io/dataset.cpp
@@ -31,12 +31,12 @@ Dataset::Dataset(const char* data_filename, const char* init_score_filename,
    // create text parser
    parser_ = Parser::CreateParser(data_filename_, 0, nullptr);
    if (parser_ == nullptr) {
-      Log::Stderr("cannot recognise input data format, filename: %s", data_filename_);
+      Log::Stderr("cannot recognize input data format, filename: %s", data_filename_);
    }
    // create text reader
    text_reader_ = new TextReader<data_size_t>(data_filename);
  } else {
-    // only need to load initilize score, other meta data will load from bin flie
+    // only need to load initilize score, other meta data will be loaded from bin flie
    metadata_.Init(init_score_filename);
    Log::Stdout("will load data set from binary file");
    parser_ = nullptr;
@@ -613,7 +613,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit

  size_t size_of_metadata = *(reinterpret_cast<size_t*>(buffer));

-  // re-allocmate space if not enough
+  // re-allocate space if not enough
  if (size_of_metadata > buffer_size) {
    delete[] buffer;
    buffer_size = size_of_metadata;
@@ -673,7 +673,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit
      Log::Stderr("binary file format error at feature %d's size", i);
    }
    size_t size_of_feature = *(reinterpret_cast<size_t*>(buffer));
-    // re-allocmate space if not enough
+    // re-allocate space if not enough
    if (size_of_feature > buffer_size) {
      delete[] buffer;
      buffer_size = size_of_feature;

--- a/src/io/dense_bin.hpp
+++ b/src/io/dense_bin.hpp
@@ -10,7 +10,7 @@
 namespace LightGBM {

 /*!
-* \brief Used to Store bins for dense feature
+* \brief Used to store bins for dense feature
 * Use template to reduce memory cost
 */
 template <typename VAL_T>

--- a/src/io/ordered_sparse_bin.hpp
+++ b/src/io/ordered_sparse_bin.hpp
@@ -13,7 +13,7 @@
 namespace LightGBM {

 /*!
-* \brief Ordered bin for sparse feature . efficient for construct histogram, especally for sparse bin
+* \brief Ordered bin for sparse feature . Efficient for construct histogram, especally for sparse bin
 * There are 2 advantages for using ordered bin.
 * 1. group the data by leaf, improve the cache hit.
 * 2. only store the non-zero bin, which can speed up the histogram cconsturction for sparse feature.

--- a/src/metric/binary_metric.hpp
+++ b/src/metric/binary_metric.hpp
@@ -225,7 +225,7 @@ public:
  }

 private:
-  /*! \brief Output frequently */
+  /*! \brief Output frequency */
  int output_freq_;
  /*! \brief Number of data */
  data_size_t num_data_;

--- a/src/metric/dcg_calculator.cpp
+++ b/src/metric/dcg_calculator.cpp
@@ -21,7 +21,7 @@ void DCGCalculator::Init(std::vector<double> input_label_gain) {
  label_gain_ = input_label_gain;
  discount_.clear();
  for (data_size_t i = 0; i < kMaxPosition; ++i) {
-    discount_.emplace_back(1.0 / std::log(2.0 + i));
+    discount_.emplace_back(1.0 / std::log2(2.0 + i));
  }
  is_inited_ = true;
 }

--- a/src/metric/regression_metric.hpp
+++ b/src/metric/regression_metric.hpp
@@ -65,7 +65,7 @@ public:
  }

 private:
-  /*! \brief Output frequently */
+  /*! \brief Output frequency */
  int output_freq_;
  /*! \brief Number of data */
  data_size_t num_data_;

--- a/src/network/network.cpp
+++ b/src/network/network.cpp
@@ -9,7 +9,7 @@

 namespace LightGBM {

-// static member defination
+// static member definition
 int Network::num_machines_;
 int Network::rank_;
 Linkers* Network::linkers_;
@@ -141,7 +141,7 @@ void Network::ReduceScatter(char* input, int input_size, int* block_start, int*
      // send local data to neighbor first
      linkers_->Send(recursive_halving_map_.neighbor, input, input_size);
    } else if (recursive_halving_map_.type == RecursiveHalvingNodeType::GroupLeader) {
-      // recieve neighbor data first
+      // receive neighbor data first
      int need_recv_cnt = input_size;
      linkers_->Recv(recursive_halving_map_.neighbor, output, need_recv_cnt);
      // reduce

--- a/src/objective/rank_objective.hpp
+++ b/src/objective/rank_objective.hpp
@@ -50,7 +50,7 @@ public:
      Log::Stderr("For NDCG metric, should have query information");
    }
    num_queries_ = metadata.num_queries();
-    // cache inverse max DCG, avoid compution many times
+    // cache inverse max DCG, avoid computation many times
    inverse_max_dcgs_ = new score_t[num_queries_];
    for (data_size_t i = 0; i < num_queries_; ++i) {
      inverse_max_dcgs_[i] = static_cast<score_t>(

--- a/src/treelearner/feature_histogram.hpp
+++ b/src/treelearner/feature_histogram.hpp
@@ -40,7 +40,7 @@ public:
  * \brief Construct a histogram
  * \param num_data number of data in current leaf
  * \param sum_gradients sum of gradients of current leaf
-  * \param sum_hessians sum of hissians of current leaf
+  * \param sum_hessians sum of hessians of current leaf
  * \param ordered_gradients Orederd gradients
  * \param ordered_hessians  Ordered hessians
  * \param data_indices data indices of current leaf
@@ -59,7 +59,7 @@ public:
  * \param leaf current leaf
  * \param num_data number of data in current leaf
  * \param sum_gradients sum of gradients of current leaf
-  * \param sum_hessians sum of hissians of current leaf
+  * \param sum_hessians sum of hessians of current leaf
  * \param gradients
  * \param hessian
  */
@@ -76,7 +76,7 @@ public:
  * \brief Set sumup information for current histogram
  * \param num_data number of data in current leaf
  * \param sum_gradients sum of gradients of current leaf
-  * \param sum_hessians sum of hissians of current leaf
+  * \param sum_hessians sum of hessians of current leaf
  */
  void SetSumup(data_size_t num_data, score_t sum_gradients, score_t sum_hessians) {
    num_data_ = num_data;

--- a/src/treelearner/leaf_splits.hpp
+++ b/src/treelearner/leaf_splits.hpp
@@ -26,7 +26,7 @@ public:
  }

  /*!
-  * \brief Init splits on current leaf, don't need to travesal all data
+  * \brief Init splits on current leaf, don't need to traverse all data
  * \param leaf Index of current leaf
  * \param data_partition current data partition
  * \param sum_gradients
@@ -43,7 +43,7 @@ public:
  }

  /*!
-  * \brief Init splits on current leaf, need to travesal all data to sum up
+  * \brief Init splits on current leaf, need to traverse all data to sum up
  * \param gradients
  * \param hessians
  */

--- a/src/treelearner/parallel_tree_learner.h
+++ b/src/treelearner/parallel_tree_learner.h
@@ -77,9 +77,9 @@ private:
  int* block_start_;
  /*! \brief Block size for reduce scatter */
  int* block_len_;
-  /*! \brief Write positions for feature histgrams */
+  /*! \brief Write positions for feature histograms */
  int* buffer_write_start_pos_;
-  /*! \brief Read positions for local feature histgrams */
+  /*! \brief Read positions for local feature histograms */
  int* buffer_read_start_pos_;
  /*! \brief Size for reduce scatter */
  int reduce_scatter_size_;