Commit 6f7eac7e authored by Qiwei Ye's avatar Qiwei Ye
Browse files

updates comments for easy-reading

parent aee30126
...@@ -8,17 +8,17 @@ ...@@ -8,17 +8,17 @@
namespace LightGBM { namespace LightGBM {
/*! \brief forward declaration */
class Dataset; class Dataset;
class Boosting; class Boosting;
class ObjectiveFunction; class ObjectiveFunction;
class Metric; class Metric;
/*! /*!
* \brief The entrance of LightGBM. this application has two tasks: * \brief The main entrance of LightGBM. this application has two tasks:
* Train and Predict. * Train and Predict.
* Train task will train a new model * Train task will train a new model
* Predict task will predicting the scores of test data then saving the score to local disk * Predict task will predicting the scores of test data using exsiting model,
* and saving the score to disk.
*/ */
class Application { class Application {
public: public:
...@@ -32,9 +32,9 @@ public: ...@@ -32,9 +32,9 @@ public:
private: private:
/*! /*!
* \brief Global Sync by minimal, will return minimal of global * \brief Global Sync by minimal, will return minimal T across nodes
* \param local Local data * \param local Local data
* \return Global minimal data * \return minimal values across nodes
*/ */
template<typename T> template<typename T>
T GlobalSyncUpByMin(T& local); T GlobalSyncUpByMin(T& local);
...@@ -45,19 +45,19 @@ private: ...@@ -45,19 +45,19 @@ private:
/*! \brief Load data, including training data and validation data*/ /*! \brief Load data, including training data and validation data*/
void LoadData(); void LoadData();
/*! \brief Some initial works before training*/ /*! \brief Initialization before training*/
void InitTrain(); void InitTrain();
/*! \brief The training logic */ /*! \brief Main Training logic */
void Train(); void Train();
/*! \brief Initialize the enviroment needed by prediction */ /*! \brief Initializations before prediction */
void InitPredict(); void InitPredict();
/*! \brief Load model */ /*! \brief Load model from local disk */
void LoadModel(); void LoadModel();
/*! \brief The prediction logic */ /*! \brief Main predicting logic */
void Predict(); void Predict();
/*! \brief All configs */ /*! \brief All configs */
......
...@@ -20,7 +20,7 @@ public: ...@@ -20,7 +20,7 @@ public:
data_size_t cnt = 0; data_size_t cnt = 0;
/*! /*!
* \brief Sum up reduce function for histogram bin * \brief Sum up (reducers) functions for histogram bin
*/ */
inline static void SumReducer(const char *src, char *dst, int len) { inline static void SumReducer(const char *src, char *dst, int len) {
const int type_size = sizeof(HistogramBinEntry); const int type_size = sizeof(HistogramBinEntry);
...@@ -42,8 +42,8 @@ public: ...@@ -42,8 +42,8 @@ public:
} }
}; };
/*! \brief This class used to convert featrue value to bin, /*! \brief This class used to convert feature values into bin,
* and store some meta infomartion for bin*/ * and store some meta information for bin*/
class BinMapper { class BinMapper {
public: public:
BinMapper(); BinMapper();
...@@ -53,9 +53,9 @@ public: ...@@ -53,9 +53,9 @@ public:
/*! \brief Get number of bins */ /*! \brief Get number of bins */
inline int num_bin() const { return num_bin_; } inline int num_bin() const { return num_bin_; }
/*! \brief True if bin is trival(only contain one bin) */ /*! \brief True if bin is trival (contains only one bin) */
inline bool is_trival() const { return is_trival_; } inline bool is_trival() const { return is_trival_; }
/*! \brief Sparse rate of this bins( num_zero_bins / num_data ) */ /*! \brief Sparsity of this bin ( num_zero_bins / num_data ) */
inline double sparse_rate() const { return sparse_rate_; } inline double sparse_rate() const { return sparse_rate_; }
/*! /*!
* \brief Save binary data to file * \brief Save binary data to file
...@@ -63,9 +63,9 @@ public: ...@@ -63,9 +63,9 @@ public:
*/ */
void SaveBinaryToFile(FILE* file) const; void SaveBinaryToFile(FILE* file) const;
/*! /*!
* \brief Map bin to feature value * \brief Mapping bin into feature value
* \param bin * \param bin
* \return Feature value for this bin * \return Feature value of this bin
*/ */
inline double BinToValue(unsigned int bin) const { inline double BinToValue(unsigned int bin) const {
return bin_upper_bound_[bin]; return bin_upper_bound_[bin];
...@@ -75,7 +75,7 @@ public: ...@@ -75,7 +75,7 @@ public:
*/ */
size_t SizesInByte() const; size_t SizesInByte() const;
/*! /*!
* \brief Map feature value to bin * \brief Mapping feature value into bin
* \param value * \param value
* \return bin for this feature value * \return bin for this feature value
*/ */
...@@ -96,13 +96,13 @@ public: ...@@ -96,13 +96,13 @@ public:
static int SizeForSpecificBin(int bin); static int SizeForSpecificBin(int bin);
/*! /*!
* \brief Copy this object to buffer * \brief Seirilizing this object to buffer
* \param buffer The destination * \param buffer The destination
*/ */
void CopyTo(char* buffer); void CopyTo(char* buffer);
/*! /*!
* \brief Restore this object from buffer * \brief Deserilizing this object from buffer
* \param buffer The source * \param buffer The source
*/ */
void CopyFrom(const char* buffer); void CopyFrom(const char* buffer);
...@@ -119,12 +119,12 @@ private: ...@@ -119,12 +119,12 @@ private:
}; };
/*! /*!
* \brief Interface for ordered bin data. efficient for construct histogram, especally for sparse bin * \brief Interface for ordered bin data. efficient for construct histogram, especially for sparse bin
* There are 2 advantages for using ordered bin. * There are 2 advantages by using ordered bin.
* 1. group the data by leaf, improve the cache hit. * 1. group the data by leafs to improve the cache hit.
* 2. only store the non-zero bin, which can speed up the histogram cconsturction for sparse feature. * 2. only store the non-zero bin, which can speed up the histogram consturction for sparse features.
* But it has a additional cost, it need re-order the bins after leaf split, which will cost much for dense feature. * However it brings additional cost: it need re-order the bins after every split, which will cost much for dense feature.
* So we only use ordered bin for sparse features now. * So we only using ordered bin for sparse situations.
*/ */
class OrderedBin { class OrderedBin {
public: public:
...@@ -132,16 +132,17 @@ public: ...@@ -132,16 +132,17 @@ public:
virtual ~OrderedBin() {} virtual ~OrderedBin() {}
/*! /*!
* \brief Initial logic, call before train one tree. * \brief Initialization logic.
* \param used_indices If used_indices==nullptr means using all data, otherwise, used_indices[i] != 0 means i-th data is used(for bagging logic) * \param used_indices If used_indices==nullptr means using all data, otherwise, used_indices[i] != 0 means i-th data is used
* \param num_leavas Number of leveas on this iteration (this logic was build for bagging logic)
* \param num_leaves Number of leaves on this iteration
*/ */
virtual void Init(const char* used_indices, data_size_t num_leavas) = 0; virtual void Init(const char* used_indices, data_size_t num_leaves) = 0;
/*! /*!
* \brief Construct histogram by using this bin * \brief Construct histogram by using this bin
* Note: Unlike Bin, OrderedBin doesn't use ordered gradients and ordered hessians. * Note: Unlike Bin, OrderedBin doesn't use ordered gradients and ordered hessians.
* Because it is hard to know the relative index in one leaf for sparse bin, since we skipped zero bins. * Because it is hard to know the relative index in one leaf for sparse bin, since we skipped zero bins.
* \param leaf Using which leaf's data to construct * \param leaf Using which leaf's data to construct
* \param gradients Gradients, Note:non-oredered by leaf * \param gradients Gradients, Note:non-oredered by leaf
* \param hessians Hessians, Note:non-oredered by leaf * \param hessians Hessians, Note:non-oredered by leaf
...@@ -172,9 +173,9 @@ public: ...@@ -172,9 +173,9 @@ public:
/*! /*!
* \brief Interface for bin data. This class will store bin data for one feature. * \brief Interface for bin data. This class will store bin data for one feature.
* unlike OrderedBin, this class will store data by original order. * unlike OrderedBin, this class will store data by original order.
* Though it may have many cache miss when construct histogram, * Note that it may cause cache misses when construct histogram,
* but it doesn't need to re-order operation, So it is still faster than OrderedBin for dense feature * but it doesn't need to re-order operation, So it will be faster than OrderedBin for dense feature
*/ */
class Bin { class Bin {
public: public:
...@@ -218,10 +219,11 @@ public: ...@@ -218,10 +219,11 @@ public:
/*! /*!
* \brief Construct histogram of this feature, * \brief Construct histogram of this feature,
* Note: here use ordered_gradients and ordered_hessians to improve cache hit chance * Note: We use ordered_gradients and ordered_hessians to improve cache hit chance
* The navie solution is use gradients[data_indices[i]] for data_indices[i] to get gradients, which is not cache friendly, since the access of memory is not continuous. * The navie solution is use gradients[data_indices[i]] for data_indices[i] to get gradients,
* ordered_gradients and ordered_hessians are preprocessed, they are re-ordered by data_indices. which is not cache friendly, since the access of memory is not continuous.
* It uses ordered_gradients[i] for data_indices[i]'s gradients (same for ordered_hessians). * ordered_gradients and ordered_hessians are preprocessed, and they are re-ordered by data_indices.
* Ordered_gradients[i] is aligned with data_indices[i]'s gradients (same for ordered_hessians).
* \param data_indices Used data indices in current leaf * \param data_indices Used data indices in current leaf
* \param num_data Number of used data * \param num_data Number of used data
* \param ordered_gradients Pointer to gradients, the data_indices[i]-th data's gradient is ordered_gradients[i] * \param ordered_gradients Pointer to gradients, the data_indices[i]-th data's gradient is ordered_gradients[i]
...@@ -282,7 +284,6 @@ public: ...@@ -282,7 +284,6 @@ public:
* \brief Create object for bin data of one feature, used for sparse feature * \brief Create object for bin data of one feature, used for sparse feature
* \param num_data Total number of data * \param num_data Total number of data
* \param num_bin Number of bin * \param num_bin Number of bin
* \param sparse_rate Sparse rate of this bins( num_bin0/num_data )
* \return The bin data object * \return The bin data object
*/ */
static Bin* CreateSparseBin(data_size_t num_data, static Bin* CreateSparseBin(data_size_t num_data,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment