"include/vscode:/vscode.git/clone" did not exist on "55cd60968de2751ae954ef903582f0c7537fbf01"
Commit 39e47323 authored by Qiwei Ye's avatar Qiwei Ye
Browse files

updating comments for easy read

parent 70873a98
...@@ -291,7 +291,7 @@ public: ...@@ -291,7 +291,7 @@ public:
}; };
inline unsigned int BinMapper::ValueToBin(double value) const { inline unsigned int BinMapper::ValueToBin(double value) const {
// use binary search to find bin // binary search to find bin
int l = 0; int l = 0;
int r = num_bin_ - 1; int r = num_bin_ - 1;
while (l < r) { while (l < r) {
......
...@@ -23,8 +23,8 @@ public: ...@@ -23,8 +23,8 @@ public:
virtual ~Boosting() {} virtual ~Boosting() {}
/*! /*!
* \brief Initial logic * \brief Initialization logic
* \param config Config for boosting * \param config Configs for boosting
* \param train_data Training data * \param train_data Training data
* \param object_function Training objective function * \param object_function Training objective function
* \param training_metrics Training metric * \param training_metrics Training metric
...@@ -54,7 +54,7 @@ public: ...@@ -54,7 +54,7 @@ public:
virtual double PredictRaw(const double * feature_values) const = 0; virtual double PredictRaw(const double * feature_values) const = 0;
/*! /*!
* \brief Prediction for one record, will use sigmoid transform if needed * \brief Prediction for one record, sigmoid transformation will be used if needed
* \param feature_values Feature value on this record * \param feature_values Feature value on this record
* \return Prediction result for this record * \return Prediction result for this record
*/ */
......
...@@ -22,7 +22,7 @@ class Feature; ...@@ -22,7 +22,7 @@ class Feature;
* *
* Some details: * Some details:
* 1. Label, used for traning. * 1. Label, used for traning.
* 2. Weights, weighs of record, optional * 2. Weights, weighs of records, optional
* 3. Query Boundaries, necessary for lambdarank. * 3. Query Boundaries, necessary for lambdarank.
* The documents of i-th query is in [ query_boundarise[i], query_boundarise[i+1] ) * The documents of i-th query is in [ query_boundarise[i], query_boundarise[i+1] )
* 4. Query Weights, auto calculate by weights and query_boundarise(if both of them are existed) * 4. Query Weights, auto calculate by weights and query_boundarise(if both of them are existed)
...@@ -36,7 +36,7 @@ public: ...@@ -36,7 +36,7 @@ public:
*/ */
Metadata(); Metadata();
/*! /*!
* \brief Initialize, will load qurey level informations, since it is need for sampling data * \brief Initialization will load qurey level informations, since it is need for sampling data
* \param data_filename Filename of data * \param data_filename Filename of data
* \param init_score_filename Filename of initial score * \param init_score_filename Filename of initial score
* \param is_int_label True if label is int type * \param is_int_label True if label is int type
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
namespace LightGBM { namespace LightGBM {
/*! \brief Used to store data and provide some operations on one feature*/ /*! \brief Using to store data and providing some operations on one feature*/
class Feature { class Feature {
public: public:
/*! /*!
......
...@@ -28,7 +28,7 @@ public: ...@@ -28,7 +28,7 @@ public:
const Metadata& metadata, data_size_t num_data) = 0; const Metadata& metadata, data_size_t num_data) = 0;
/*! /*!
* \brief Calcalute and print metric result * \brief Calcaluting and printing metric result
* \param iter Current iteration * \param iter Current iteration
* \param score Current prediction score * \param score Current prediction score
*/ */
...@@ -55,7 +55,7 @@ public: ...@@ -55,7 +55,7 @@ public:
/*! /*!
* \brief Calculate the DCG score at position k * \brief Calculate the DCG score at position k
* \param k The position want to eval at * \param k The position to evaluate
* \param label Pointer of label * \param label Pointer of label
* \param score Pointer of score * \param score Pointer of score
* \param num_data Number of data * \param num_data Number of data
...@@ -66,7 +66,7 @@ public: ...@@ -66,7 +66,7 @@ public:
/*! /*!
* \brief Calculate the DCG score at multi position * \brief Calculate the DCG score at multi position
* \param ks The positions want to eval at * \param ks The positions to evaluate
* \param label Pointer of label * \param label Pointer of label
* \param score Pointer of score * \param score Pointer of score
* \param num_data Number of data * \param num_data Number of data
......
...@@ -14,7 +14,7 @@ namespace LightGBM { ...@@ -14,7 +14,7 @@ namespace LightGBM {
/*! \brief forward declaration */ /*! \brief forward declaration */
class Linkers; class Linkers;
/*! \brief The network structure for all gather */ /*! \brief The network structure for all_gather */
class BruckMap { class BruckMap {
public: public:
/*! \brief The communication times for one all gather operation */ /*! \brief The communication times for one all gather operation */
...@@ -98,7 +98,7 @@ public: ...@@ -98,7 +98,7 @@ public:
static inline int num_machines(); static inline int num_machines();
/*! /*!
* \brief Perform all reduce. if data size is small, * \brief Perform all_reduce. if data size is small,
will perform AllreduceByAllGather, else with call ReduceScatter followed allgather will perform AllreduceByAllGather, else with call ReduceScatter followed allgather
* \param input Input data * \param input Input data
* \param input_size The size of input data * \param input_size The size of input data
...@@ -110,7 +110,7 @@ public: ...@@ -110,7 +110,7 @@ public:
char* output, const ReduceFunction& reducer); char* output, const ReduceFunction& reducer);
/*! /*!
* \brief Perform all reduce, use all gather. When data is small, can use this to reduce communication times * \brief Perform all_reduce by using all_gather. it can be use to reduce communication time when data is small
* \param input Input data * \param input Input data
* \param input_size The size of input data * \param input_size The size of input data
* \param output Output result * \param output Output result
...@@ -120,8 +120,9 @@ public: ...@@ -120,8 +120,9 @@ public:
const ReduceFunction& reducer); const ReduceFunction& reducer);
/*! /*!
* \brief Perform all gather, use bruck algorithm. Communication times is O(log(n)), and communication cost is O(send_size * number_machine) * \brief Performing all_gather by using bruck algorithm.
* if all machine have same input size, can call this function Communication times is O(log(n)), and communication cost is O(send_size * number_machine)
* It can be used when all nodes have same input size.
* \param input Input data * \param input Input data
* \param send_size The size of input data * \param send_size The size of input data
* \param output Output result * \param output Output result
...@@ -129,8 +130,9 @@ public: ...@@ -129,8 +130,9 @@ public:
static void Allgather(char* input, int send_size, char* output); static void Allgather(char* input, int send_size, char* output);
/*! /*!
* \brief Perform all gather, use bruck algorithm. Communication times is O(log(n)), and communication cost is O(all_size) * \brief Performing all_gather by using bruck algorithm.
* if all machine have different input size, can call this function Communication times is O(log(n)), and communication cost is O(all_size)
* It can be used when nodes have different input size.
* \param input Input data * \param input Input data
* \param all_size The size of input data * \param all_size The size of input data
* \param block_start The block start for different machines * \param block_start The block start for different machines
...@@ -141,7 +143,8 @@ public: ...@@ -141,7 +143,8 @@ public:
int* block_len, char* output); int* block_len, char* output);
/*! /*!
* \brief Perform reduce scatter, use recursive halving algorithm. Communication times is O(log(n)), and communication cost is O(input_size) * \brief Perform reduce scatter by using recursive halving algorithm.
Communication times is O(log(n)), and communication cost is O(input_size)
* \param input Input data * \param input Input data
* \param input_size The size of input data * \param input_size The size of input data
* \param block_start The block start for different machines * \param block_start The block start for different machines
......
...@@ -9,7 +9,6 @@ namespace LightGBM { ...@@ -9,7 +9,6 @@ namespace LightGBM {
/*! /*!
* \brief The interface of Objective Function. * \brief The interface of Objective Function.
* Objective function is used to get gradients
*/ */
class ObjectiveFunction { class ObjectiveFunction {
public: public:
...@@ -24,8 +23,8 @@ public: ...@@ -24,8 +23,8 @@ public:
virtual void Init(const Metadata& metadata, data_size_t num_data) = 0; virtual void Init(const Metadata& metadata, data_size_t num_data) = 0;
/*! /*!
* \brief calculate first order derivative of loss function * \brief calculating first order derivative of loss function
* \param score Current prediction score * \param score prediction score in this round
* \gradients Output gradients * \gradients Output gradients
* \hessians Output hessians * \hessians Output hessians
*/ */
......
...@@ -31,9 +31,9 @@ public: ...@@ -31,9 +31,9 @@ public:
~Tree(); ~Tree();
/*! /*!
* \brief Split a tree leave, * \brief Performing a split on tree leaves.
* \param leaf Index of leaf that want to split * \param leaf Index of leaf to be split
* \param feature Index of feature, the converted index after remove useless features * \param feature Index of feature; the converted index after removing useless features
* \param threshold Threshold(bin) of split * \param threshold Threshold(bin) of split
* \param real_feature Index of feature, the original index on data * \param real_feature Index of feature, the original index on data
* \param threshold_double Threshold on feature value * \param threshold_double Threshold on feature value
...@@ -50,7 +50,7 @@ public: ...@@ -50,7 +50,7 @@ public:
inline score_t LeafOutput(int leaf) const { return leaf_value_[leaf]; } inline score_t LeafOutput(int leaf) const { return leaf_value_[leaf]; }
/*! /*!
* \brief Add prediction of this tree model to score * \brief Adding prediction value of this tree model to scores
* \param data The dataset * \param data The dataset
* \param num_data Number of total data * \param num_data Number of total data
* \param score Will add prediction to score * \param score Will add prediction to score
...@@ -59,7 +59,7 @@ public: ...@@ -59,7 +59,7 @@ public:
score_t* score) const; score_t* score) const;
/*! /*!
* \brief Add prediction of this tree model to score * \brief Adding prediction value of this tree model to scorese
* \param data The dataset * \param data The dataset
* \param used_data_indices Indices of used data * \param used_data_indices Indices of used data
* \param num_data Number of total data * \param num_data Number of total data
...@@ -70,7 +70,7 @@ public: ...@@ -70,7 +70,7 @@ public:
data_size_t num_data, score_t* score) const; data_size_t num_data, score_t* score) const;
/*! /*!
* \brief Prediction for one record * \brief Prediction on one record
* \param feature_values Feature value of this record * \param feature_values Feature value of this record
* \return Prediction result * \return Prediction result
*/ */
...@@ -81,6 +81,7 @@ public: ...@@ -81,6 +81,7 @@ public:
/*! /*!
* \brief Shrinkage for the tree's output * \brief Shrinkage for the tree's output
* shrinkage rate (a.k.a learning rate) is used to tune the traning process
* \param rate The factor of shrinkage * \param rate The factor of shrinkage
*/ */
inline void Shrinkage(double rate) { inline void Shrinkage(double rate) {
...@@ -98,7 +99,7 @@ public: ...@@ -98,7 +99,7 @@ public:
Tree(const Tree&) = delete; Tree(const Tree&) = delete;
private: private:
/*! /*!
* \brief Find leaf index that this record belongs * \brief Find leaf index of which record belongs by data
* \param data The dataset * \param data The dataset
* \param data_idx Index of record * \param data_idx Index of record
* \return Leaf index * \return Leaf index
...@@ -107,7 +108,7 @@ private: ...@@ -107,7 +108,7 @@ private:
data_size_t data_idx) const; data_size_t data_idx) const;
/*! /*!
* \brief Find leaf index that this record belongs * \brief Find leaf index of which record belongs by features
* \param feature_values Feature value of this record * \param feature_values Feature value of this record
* \return Leaf index * \return Leaf index
*/ */
......
...@@ -22,14 +22,13 @@ public: ...@@ -22,14 +22,13 @@ public:
virtual ~TreeLearner() {} virtual ~TreeLearner() {}
/*! /*!
* \brief Init tree learner with training data set and tree config * \brief Initialize tree learner with training dataset and configs
* \param train_data The used training data * \param train_data The used training data
* \param tree_config The tree setting
*/ */
virtual void Init(const Dataset* train_data) = 0; virtual void Init(const Dataset* train_data) = 0;
/*! /*!
* \brief fit train data set and return a trained tree * \brief training tree model on dataset
* \param gradients The first order gradients * \param gradients The first order gradients
* \param hessians The second order gradients * \param hessians The second order gradients
* \return A trained tree * \return A trained tree
...@@ -45,7 +44,7 @@ public: ...@@ -45,7 +44,7 @@ public:
data_size_t num_data) = 0; data_size_t num_data) = 0;
/*! /*!
* \brief Use last trained tree to predition training score, and add to out_score; * \brief Using last trained tree to predict score then adding to out_score;
* \param out_score output score * \param out_score output score
*/ */
virtual void AddPredictionToScore(score_t *out_score) const = 0; virtual void AddPredictionToScore(score_t *out_score) const = 0;
......
...@@ -54,7 +54,7 @@ public: ...@@ -54,7 +54,7 @@ public:
} }
/*! /*!
* \brief prediction for one record, only raw result(not sigmoid transform) * \brief prediction for one record, only raw result(without sigmoid transformation)
* \param features Feature for this record * \param features Feature for this record
* \return Prediction result * \return Prediction result
*/ */
...@@ -68,13 +68,13 @@ public: ...@@ -68,13 +68,13 @@ public:
features_[tid][p.first] = p.second; features_[tid][p.first] = p.second;
} }
} }
// get result without sigmoid transform // get result without sigmoid transformation
return boosting_->PredictRaw(features_[tid]); return boosting_->PredictRaw(features_[tid]);
} }
/*! /*!
* \brief prediction for one record, will use sigmoid transform if needed(only needs in binary classification now) * \brief prediction for one record, will use sigmoid transformation if needed(only enabled for binary classification noe)
* \param features Feature for this record * \param features Feature of this record
* \return Prediction result * \return Prediction result
*/ */
double PredictOneLine(const std::vector<std::pair<int, double>>& features) { double PredictOneLine(const std::vector<std::pair<int, double>>& features) {
...@@ -91,7 +91,7 @@ public: ...@@ -91,7 +91,7 @@ public:
return boosting_->Predict(features_[tid]); return boosting_->Predict(features_[tid]);
} }
/*! /*!
* \brief prediction for a data, and save result * \brief predicting on data, then saving result to disk
* \param data_filename Filename of data * \param data_filename Filename of data
* \param has_label True if this data contains label * \param has_label True if this data contains label
* \param result_filename Filename of output result * \param result_filename Filename of output result
...@@ -112,7 +112,7 @@ public: ...@@ -112,7 +112,7 @@ public:
Parser* parser = Parser::CreateParser(data_filename, num_features_, &has_label); Parser* parser = Parser::CreateParser(data_filename, num_features_, &has_label);
if (parser == nullptr) { if (parser == nullptr) {
Log::Stderr("can regonise input data format, filename %s", data_filename); Log::Stderr("recongnizing input data format failed, filename %s", data_filename);
} }
// function for parse data // function for parse data
......
...@@ -24,7 +24,7 @@ public: ...@@ -24,7 +24,7 @@ public:
*/ */
~GBDT(); ~GBDT();
/*! /*!
* \brief Initial logic * \brief Initialization logic
* \param config Config for boosting * \param config Config for boosting
* \param train_data Training data * \param train_data Training data
* \param object_function Training objective function * \param object_function Training objective function
...@@ -36,9 +36,9 @@ public: ...@@ -36,9 +36,9 @@ public:
const char* output_model_filename) const char* output_model_filename)
override; override;
/*! /*!
* \brief Add a validation data * \brief Adding a validation dataset
* \param valid_data Validation data * \param valid_data Validation dataset
* \param valid_metrics Metrics for validation data * \param valid_metrics Metrics for validation dataset
*/ */
void AddDataset(const Dataset* valid_data, void AddDataset(const Dataset* valid_data,
const std::vector<const Metric*>& valid_metrics) override; const std::vector<const Metric*>& valid_metrics) override;
...@@ -47,14 +47,14 @@ public: ...@@ -47,14 +47,14 @@ public:
*/ */
void Train() override; void Train() override;
/*! /*!
* \brief Predtion for one record, not use sigmoid * \brief Predtion for one record without sigmoid transformation
* \param feature_values Feature value on this record * \param feature_values Feature value on this record
* \return Prediction result for this record * \return Prediction result for this record
*/ */
double PredictRaw(const double * feature_values) const override; double PredictRaw(const double * feature_values) const override;
/*! /*!
* \brief Predtion for one record, will use sigmoid transform if needed * \brief Predtion for one record with sigmoid transformation if enabled
* \param feature_values Feature value on this record * \param feature_values Feature value on this record
* \return Prediction result for this record * \return Prediction result for this record
*/ */
...@@ -87,8 +87,8 @@ private: ...@@ -87,8 +87,8 @@ private:
*/ */
void Bagging(int iter); void Bagging(int iter);
/*! /*!
* \brief update score for out-of-bag data. * \brief updating score for out-of-bag data.
* It is necessary for this update, since we may re-bagging data on training * Data should be update since we may re-bagging data on training
* \param tree Trained tree of this iteration * \param tree Trained tree of this iteration
*/ */
void UpdateScoreOutOfBag(const Tree* tree); void UpdateScoreOutOfBag(const Tree* tree);
...@@ -97,12 +97,12 @@ private: ...@@ -97,12 +97,12 @@ private:
*/ */
void Boosting(); void Boosting();
/*! /*!
* \brief train one tree * \brief training one tree
* \return Trained tree of this iteration * \return Trained tree of this iteration
*/ */
Tree* TrainOneTree(); Tree* TrainOneTree();
/*! /*!
* \brief update score after tree trained * \brief updating score after tree was trained
* \param tree Trained tree of this iteration * \param tree Trained tree of this iteration
*/ */
void UpdateScore(const Tree* tree); void UpdateScore(const Tree* tree);
......
...@@ -37,25 +37,25 @@ public: ...@@ -37,25 +37,25 @@ public:
delete[] score_; delete[] score_;
} }
/*! /*!
* \brief Use tree model to get prediction, then add to score for all data * \brief Using tree model to get prediction number, then adding to scores for all data
* Note: this function generally will be used for validation data. * Note: this function generally will be used on validation data too.
* \param tree Trained tree model * \param tree Trained tree model
*/ */
inline void AddScore(const Tree* tree) { inline void AddScore(const Tree* tree) {
tree->AddPredictionToScore(data_, num_data_, score_); tree->AddPredictionToScore(data_, num_data_, score_);
} }
/*! /*!
* \brief Add prediction score, only used for training data. * \brief Adding prediction score, only used for training data.
* After trained a tree, the training data is partitioned into tree leaves. * The training data is partitioned into tree leaves after training
* We can get prediction by faster speed based on this. * Based on which We can get prediction quckily.
* \param tree_learner * \param tree_learner
*/ */
inline void AddScore(const TreeLearner* tree_learner) { inline void AddScore(const TreeLearner* tree_learner) {
tree_learner->AddPredictionToScore(score_); tree_learner->AddPredictionToScore(score_);
} }
/*! /*!
* \brief Like AddScore(const Tree* tree), but only for part of data * \brief Using tree model to get prediction number, then adding to scores for parts of data
* Used for prediction of training out-of-bad data * Used for prediction of training out-of-bag data
* \param tree Trained tree model * \param tree Trained tree model
* \param data_indices Indices of data that want proccess to * \param data_indices Indices of data that want proccess to
* \param data_cnt Number of data that want proccess to * \param data_cnt Number of data that want proccess to
......
...@@ -13,12 +13,12 @@ ...@@ -13,12 +13,12 @@
namespace LightGBM { namespace LightGBM {
/*! /*!
* \brief Ordered bin for sparse feature . efficient for construct histogram, especally for sparse bin * \brief Interface for ordered bin data. efficient for construct histogram, especially for sparse bin
* There are 2 advantages for using ordered bin. * There are 2 advantages by using ordered bin.
* 1. group the data by leaf, improve the cache hit. * 1. group the data by leafs to improve the cache hit.
* 2. only store the non-zero bin, which can speed up the histogram cconsturction for sparse feature. * 2. only store the non-zero bin, which can speed up the histogram consturction for sparse features.
* But it has a additional cost, it need re-order the bins after leaf split, which will cost much for dense feature. * However it brings additional cost: it need re-order the bins after every split, which will cost much for dense feature.
* So we only use ordered bin for sparse features now. * So we only using ordered bin for sparse situations.
*/ */
template <typename VAL_T> template <typename VAL_T>
class OrderedSparseBin:public OrderedBin { class OrderedSparseBin:public OrderedBin {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment