updates comments for easy-reading

6f7eac7e · Qiwei Ye · aee30126 · 6f7eac7e · 6f7eac7e
Commit 6f7eac7e authored Oct 19, 2016 by Qiwei Ye
Hide whitespace changes
Inline Side-by-side

Showing with 41 additions and 40 deletions

include/LightGBM/application.h include/LightGBM/application.h +10 -10

include/LightGBM/bin.h include/LightGBM/bin.h +31 -30

No files found.
--- a/include/LightGBM/application.h
+++ b/include/LightGBM/application.h
@@ -8,17 +8,17 @@

 namespace LightGBM {

-/*! \brief forward declaration */
 class Dataset;
 class Boosting;
 class ObjectiveFunction;
 class Metric;

 /*!
-* \brief The entrance of LightGBM. this application has two tasks:
+* \brief The main entrance of LightGBM. this application has two tasks:
 * Train and Predict.
 * Train task will train a new model
-* Predict task will predicting the scores of test data then saving the score to local disk
+* Predict task will predicting the scores of test data using exsiting model,
+* and saving the score to disk.
 */
 class Application {
 public:
@@ -32,9 +32,9 @@ public:

 private:
  /*! 
-  * \brief Global Sync by minimal, will return minimal of global
+  * \brief Global Sync by minimal, will return minimal T across nodes
  * \param local Local data
-  * \return Global minimal data
+  * \return minimal values across nodes 
  */
  template<typename T>
  T GlobalSyncUpByMin(T& local);
@@ -45,19 +45,19 @@ private:
  /*! \brief Load data, including training data and validation data*/
  void LoadData();

-  /*! \brief Some initial works before training*/
+  /*! \brief Initialization before training*/
  void InitTrain();

-  /*! \brief The training logic */
+  /*! \brief Main Training logic */
  void Train();

-  /*! \brief Initialize the enviroment needed by prediction */
+  /*! \brief Initializations before prediction */
  void InitPredict();

-  /*! \brief Load model */
+  /*! \brief Load model from local disk */
  void LoadModel();

-  /*! \brief The prediction logic */
+  /*! \brief Main predicting logic */
  void Predict();

  /*! \brief All configs */

--- a/include/LightGBM/bin.h
+++ b/include/LightGBM/bin.h
@@ -20,7 +20,7 @@ public:
  data_size_t cnt = 0;

  /*!
-  * \brief Sum up reduce function for histogram bin
+  * \brief Sum up (reducers) functions for histogram bin
  */
  inline static void SumReducer(const char *src, char *dst, int len) {
    const int type_size = sizeof(HistogramBinEntry);
@@ -42,8 +42,8 @@ public:
  }
 };

-/*! \brief This class used to convert featrue value to bin,
-* and store some meta infomartion for bin*/
+/*! \brief This class used to convert feature values into bin,
+*          and store some meta information for bin*/
 class BinMapper {
 public:
  BinMapper();
@@ -53,9 +53,9 @@ public:

  /*! \brief Get number of bins */
  inline int num_bin() const { return num_bin_; }
-  /*! \brief True if bin is trival(only contain one bin) */
+  /*! \brief True if bin is trival (contains only one bin) */
  inline bool is_trival() const { return is_trival_; }
-  /*! \brief Sparse rate of this bins( num_zero_bins / num_data ) */
+  /*! \brief Sparsity of this bin ( num_zero_bins / num_data ) */
  inline double sparse_rate() const { return sparse_rate_; }
  /*!
  * \brief Save binary data to file
@@ -63,9 +63,9 @@ public:
  */
  void SaveBinaryToFile(FILE* file) const;
  /*!
-  * \brief Map bin to feature value
+  * \brief Mapping bin into feature value
  * \param bin
-  * \return Feature value for this bin
+  * \return Feature value of this bin
  */
  inline double BinToValue(unsigned int bin) const {
    return bin_upper_bound_[bin];
@@ -75,7 +75,7 @@ public:
  */
  size_t SizesInByte() const;
  /*!
-  * \brief Map feature value to bin
+  * \brief Mapping feature value into bin 
  * \param value
  * \return bin for this feature value
  */
@@ -96,13 +96,13 @@ public:
  static int SizeForSpecificBin(int bin);

  /*!
-  * \brief Copy this object to buffer
+  * \brief Seirilizing this object to buffer
  * \param buffer The destination
  */
  void CopyTo(char* buffer);

  /*!
-  * \brief Restore this object from buffer
+  * \brief Deserilizing this object from buffer
  * \param buffer The source
  */
  void CopyFrom(const char* buffer);
@@ -119,12 +119,12 @@ private:
 };

 /*!
-* \brief Interface for ordered bin data. efficient for construct histogram, especally for sparse bin
-* There are 2 advantages for using ordered bin.
-* 1. group the data by leaf, improve the cache hit.
-* 2. only store the non-zero bin, which can speed up the histogram cconsturction for sparse feature.
-* But it has a additional cost, it need re-order the bins after leaf split, which will cost much for dense feature.
-* So we only use ordered bin for sparse features now.
+* \brief Interface for ordered bin data. efficient for construct histogram, especially for sparse bin
+*        There are 2 advantages by using ordered bin.
+*        1. group the data by leafs to improve the cache hit.
+*        2. only store the non-zero bin, which can speed up the histogram consturction for sparse features.
+*        However it brings additional cost: it need re-order the bins after every split, which will cost much for dense feature.
+*        So we only using ordered bin for sparse situations.
 */
 class OrderedBin {
 public:
@@ -132,16 +132,17 @@ public:
  virtual ~OrderedBin() {}

  /*!
-  * \brief Initial logic, call before train one tree.
-  * \param used_indices If used_indices==nullptr means using all data, otherwise, used_indices[i] != 0 means i-th data is used(for bagging logic)
-  * \param num_leavas Number of leveas on this iteration
+  * \brief Initialization logic.
+  * \param used_indices If used_indices==nullptr means using all data, otherwise, used_indices[i] != 0 means i-th data is used
+           (this logic was build for bagging logic)
+  * \param num_leaves Number of leaves on this iteration
  */
-  virtual void Init(const char* used_indices, data_size_t num_leavas) = 0;
+  virtual void Init(const char* used_indices, data_size_t num_leaves) = 0;

  /*!
  * \brief Construct histogram by using this bin
-  * Note: Unlike Bin, OrderedBin doesn't use ordered gradients and ordered hessians.
-  * Because it is hard to know the relative index in one leaf for sparse bin, since we skipped zero bins.
+  *        Note: Unlike Bin, OrderedBin doesn't use ordered gradients and ordered hessians.
+  *        Because it is hard to know the relative index in one leaf for sparse bin, since we skipped zero bins.
  * \param leaf Using which leaf's data to construct
  * \param gradients Gradients, Note:non-oredered by leaf
  * \param hessians Hessians, Note:non-oredered by leaf
@@ -172,9 +173,9 @@ public:

 /*!
 * \brief Interface for bin data. This class will store bin data for one feature.
-* unlike OrderedBin, this class will store data by original order.
-* Though it may have many cache miss when construct histogram,
-* but it doesn't need to re-order operation, So it is still faster than OrderedBin for dense feature
+*        unlike OrderedBin, this class will store data by original order.
+*        Note that it may cause cache misses when construct histogram,
+*        but it doesn't need to re-order operation, So it will be faster than OrderedBin for dense feature
 */
 class Bin {
 public:
@@ -218,10 +219,11 @@ public:

  /*!
  * \brief Construct histogram of this feature,
-  * Note: here use ordered_gradients and ordered_hessians to improve cache hit chance
-  * The navie solution is use gradients[data_indices[i]] for data_indices[i] to get gradients, which is not cache friendly, since the access of memory is not continuous.
-  * ordered_gradients and ordered_hessians are preprocessed, they are re-ordered by data_indices.
-  * It uses ordered_gradients[i] for data_indices[i]'s gradients (same for ordered_hessians).
+  *        Note: We use ordered_gradients and ordered_hessians to improve cache hit chance
+  *        The navie solution is use gradients[data_indices[i]] for data_indices[i] to get gradients, 
+           which is not cache friendly, since the access of memory is not continuous.
+  *        ordered_gradients and ordered_hessians are preprocessed, and they are re-ordered by data_indices.
+  *        Ordered_gradients[i] is aligned with data_indices[i]'s gradients (same for ordered_hessians).
  * \param data_indices Used data indices in current leaf
  * \param num_data Number of used data
  * \param ordered_gradients Pointer to gradients, the data_indices[i]-th data's gradient is ordered_gradients[i]
@@ -282,7 +284,6 @@ public:
  * \brief Create object for bin data of one feature, used for sparse feature
  * \param num_data Total number of data
  * \param num_bin Number of bin
-  * \param sparse_rate Sparse rate of this bins( num_bin0/num_data )
  * \return The bin data object
  */
  static Bin* CreateSparseBin(data_size_t num_data,