Commit 70873a98 authored by Qiwei Ye's avatar Qiwei Ye
Browse files

Merge branch 'master' of https://github.com/Microsoft/LightGBM

Conflicts:
	include/LightGBM/application.h
	include/LightGBM/bin.h
parents 6f7eac7e 3a6c0946
...@@ -15,10 +15,10 @@ class Metric; ...@@ -15,10 +15,10 @@ class Metric;
/*! /*!
* \brief The main entrance of LightGBM. this application has two tasks: * \brief The main entrance of LightGBM. this application has two tasks:
* Train and Predict. * Train and Predict.
* Train task will train a new model * Train task will train a new model
* Predict task will predicting the scores of test data using exsiting model, * Predict task will predicting the scores of test data using exsiting model,
* and saving the score to disk. * and saving the score to disk.
*/ */
class Application { class Application {
public: public:
......
...@@ -255,7 +255,7 @@ public: ...@@ -255,7 +255,7 @@ public:
virtual OrderedBin* CreateOrderedBin() const = 0; virtual OrderedBin* CreateOrderedBin() const = 0;
/*! /*!
* \brief After pushed all feature data, should call this to have better refactor for bin data * \brief After pushed all feature data, call this could have better refactor for bin data
*/ */
virtual void FinishLoad() = 0; virtual void FinishLoad() = 0;
...@@ -263,7 +263,7 @@ public: ...@@ -263,7 +263,7 @@ public:
* \brief Create object for bin data of one feature, will call CreateDenseBin or CreateSparseBin according to "is_sparse" * \brief Create object for bin data of one feature, will call CreateDenseBin or CreateSparseBin according to "is_sparse"
* \param num_data Total number of data * \param num_data Total number of data
* \param num_bin Number of bin * \param num_bin Number of bin
* \param is_sparse True if this feature is saprese * \param is_sparse True if this feature is sparse
* \param sparse_rate Sparse rate of this bins( num_bin0/num_data ) * \param sparse_rate Sparse rate of this bins( num_bin0/num_data )
* \param is_enable_sparse True if enable sparse feature * \param is_enable_sparse True if enable sparse feature
* \param is_sparse Will set to true if this bin is sparse * \param is_sparse Will set to true if this bin is sparse
......
...@@ -47,14 +47,14 @@ public: ...@@ -47,14 +47,14 @@ public:
virtual void Train() = 0; virtual void Train() = 0;
/*! /*!
* \brief Predtion for one record, not sigmoid transform * \brief Prediction for one record, not sigmoid transform
* \param feature_values Feature value on this record * \param feature_values Feature value on this record
* \return Prediction result for this record * \return Prediction result for this record
*/ */
virtual double PredictRaw(const double * feature_values) const = 0; virtual double PredictRaw(const double * feature_values) const = 0;
/*! /*!
* \brief Predtion for one record, will use sigmoid transform if needed * \brief Prediction for one record, will use sigmoid transform if needed
* \param feature_values Feature value on this record * \param feature_values Feature value on this record
* \return Prediction result for this record * \return Prediction result for this record
*/ */
......
...@@ -20,7 +20,7 @@ public: ...@@ -20,7 +20,7 @@ public:
virtual ~ConfigBase() {} virtual ~ConfigBase() {}
/*! /*!
* \brief SetLabelAt current config object by params * \brief Set current config object by params
* \param params Store the key and value for params * \param params Store the key and value for params
*/ */
virtual void Set( virtual void Set(
...@@ -30,7 +30,7 @@ public: ...@@ -30,7 +30,7 @@ public:
* \brief Get string value by specific name of key * \brief Get string value by specific name of key
* \param params Store the key and value for params * \param params Store the key and value for params
* \param name Name of key * \param name Name of key
* \param out Value will asign to out if key exists * \param out Value will assign to out if key exists
* \return True if key exists * \return True if key exists
*/ */
inline bool GetString( inline bool GetString(
...@@ -41,7 +41,7 @@ public: ...@@ -41,7 +41,7 @@ public:
* \brief Get int value by specific name of key * \brief Get int value by specific name of key
* \param params Store the key and value for params * \param params Store the key and value for params
* \param name Name of key * \param name Name of key
* \param out Value will asign to out if key exists * \param out Value will assign to out if key exists
* \return True if key exists * \return True if key exists
*/ */
inline bool GetInt( inline bool GetInt(
...@@ -52,7 +52,7 @@ public: ...@@ -52,7 +52,7 @@ public:
* \brief Get double value by specific name of key * \brief Get double value by specific name of key
* \param params Store the key and value for params * \param params Store the key and value for params
* \param name Name of key * \param name Name of key
* \param out Value will asign to out if key exists * \param out Value will assign to out if key exists
* \return True if key exists * \return True if key exists
*/ */
inline bool GetDouble( inline bool GetDouble(
...@@ -63,7 +63,7 @@ public: ...@@ -63,7 +63,7 @@ public:
* \brief Get bool value by specific name of key * \brief Get bool value by specific name of key
* \param params Store the key and value for params * \param params Store the key and value for params
* \param name Name of key * \param name Name of key
* \param out Value will asign to out if key exists * \param out Value will assign to out if key exists
* \return True if key exists * \return True if key exists
*/ */
inline bool GetBool( inline bool GetBool(
......
...@@ -17,7 +17,7 @@ namespace LightGBM { ...@@ -17,7 +17,7 @@ namespace LightGBM {
class Feature; class Feature;
/*! /*!
* \brief This class is used to store some meta(non-feature) data for tranining data, * \brief This class is used to store some meta(non-feature) data for training data,
* e.g. labels, weights, initial scores, qurey level informations. * e.g. labels, weights, initial scores, qurey level informations.
* *
* Some details: * Some details:
...@@ -110,14 +110,14 @@ public: ...@@ -110,14 +110,14 @@ public:
} }
/*! /*!
* \brief Get weights, if not exists, will return nullput * \brief Get weights, if not exists, will return nullptr
* \return Pointer of weights * \return Pointer of weights
*/ */
inline const float* weights() inline const float* weights()
const { return weights_; } const { return weights_; }
/*! /*!
* \brief Get data boundaries on queries, if not exists, will return nullput * \brief Get data boundaries on queries, if not exists, will return nullptr
* we assume data will order by query, * we assume data will order by query,
* the interval of [query_boundaris[i], query_boundaris[i+1]) * the interval of [query_boundaris[i], query_boundaris[i+1])
* is the data indices for query i. * is the data indices for query i.
...@@ -133,13 +133,13 @@ public: ...@@ -133,13 +133,13 @@ public:
inline const data_size_t num_queries() const { return num_queries_; } inline const data_size_t num_queries() const { return num_queries_; }
/*! /*!
* \brief Get weights for queries, if not exists, will return nullput * \brief Get weights for queries, if not exists, will return nullptr
* \return Pointer of weights for queries * \return Pointer of weights for queries
*/ */
inline const float* query_weights() const { return query_weights_; } inline const float* query_weights() const { return query_weights_; }
/*! /*!
* \brief Get initial scores, if not exists, will return nullput * \brief Get initial scores, if not exists, will return nullptr
* \return Pointer of initial scores * \return Pointer of initial scores
*/ */
inline const score_t* init_score() const { return init_score_; } inline const score_t* init_score() const { return init_score_; }
...@@ -231,7 +231,7 @@ public: ...@@ -231,7 +231,7 @@ public:
* \param max_bin The maximal number of bin that feature values will bucket in * \param max_bin The maximal number of bin that feature values will bucket in
* \param random_seed The seed for random generator * \param random_seed The seed for random generator
* \param is_enable_sparse True for sparse feature * \param is_enable_sparse True for sparse feature
* \param predict_fun Used for initial model, will give a prediction score based on this function, thenn set as initial score * \param predict_fun Used for initial model, will give a prediction score based on this function, then set as initial score
*/ */
Dataset(const char* data_filename, const char* init_score_filename, Dataset(const char* data_filename, const char* init_score_filename,
int max_bin, int random_seed, bool is_enable_sparse, const PredictFunction& predict_fun); int max_bin, int random_seed, bool is_enable_sparse, const PredictFunction& predict_fun);
...@@ -243,7 +243,7 @@ public: ...@@ -243,7 +243,7 @@ public:
* \param max_bin The maximal number of bin that feature values will bucket in * \param max_bin The maximal number of bin that feature values will bucket in
* \param random_seed The seed for random generator * \param random_seed The seed for random generator
* \param is_enable_sparse True for sparse feature * \param is_enable_sparse True for sparse feature
* \param predict_fun Used for initial model, will give a prediction score based on this function, thenn set as initial score * \param predict_fun Used for initial model, will give a prediction score based on this function, then set as initial score
*/ */
Dataset(const char* data_filename, Dataset(const char* data_filename,
int max_bin, int random_seed, bool is_enable_sparse, int max_bin, int random_seed, bool is_enable_sparse,
......
...@@ -37,7 +37,7 @@ public: ...@@ -37,7 +37,7 @@ public:
/*! /*!
* \brief node type on recursive halving algorithm * \brief node type on recursive halving algorithm
* When number of machines is not power of 2, need group maiches into power of 2 group. * When number of machines is not power of 2, need group machines into power of 2 group.
* And we can let each group has at most 2 machines. * And we can let each group has at most 2 machines.
* if the group only has 1 machine. this machine is the normal node * if the group only has 1 machine. this machine is the normal node
* if the grou has 2 machines, this group will have two type of nodes, one is the leader. * if the grou has 2 machines, this group will have two type of nodes, one is the leader.
......
...@@ -152,7 +152,7 @@ void GBDT::Bagging(int iter) { ...@@ -152,7 +152,7 @@ void GBDT::Bagging(int iter) {
} }
void GBDT::UpdateScoreOutOfBag(const Tree* tree) { void GBDT::UpdateScoreOutOfBag(const Tree* tree) {
// we need to predict out-of-bag data's socres for boosing // we need to predict out-of-bag socres of data for boosting
if (out_of_bag_data_indices_ != nullptr) { if (out_of_bag_data_indices_ != nullptr) {
train_score_updater_-> train_score_updater_->
AddScore(tree, out_of_bag_data_indices_, out_of_bag_data_cnt_); AddScore(tree, out_of_bag_data_indices_, out_of_bag_data_cnt_);
...@@ -169,12 +169,12 @@ void GBDT::Train() { ...@@ -169,12 +169,12 @@ void GBDT::Train() {
Bagging(iter); Bagging(iter);
// train a new tree // train a new tree
Tree * new_tree = TrainOneTree(); Tree * new_tree = TrainOneTree();
// if cannon learn a new tree, stop // if cannot learn a new tree, then stop
if (new_tree->num_leaves() <= 1) { if (new_tree->num_leaves() <= 1) {
Log::Stdout("Cannot do any boosting for tree cannot split"); Log::Stdout("Cannot do any boosting for tree cannot split");
break; break;
} }
// Shrinkage by learning rate // shrinkage by learning rate
new_tree->Shrinkage(gbdt_config_->learning_rate); new_tree->Shrinkage(gbdt_config_->learning_rate);
// update score // update score
UpdateScore(new_tree); UpdateScore(new_tree);
...@@ -183,12 +183,12 @@ void GBDT::Train() { ...@@ -183,12 +183,12 @@ void GBDT::Train() {
OutputMetric(iter + 1); OutputMetric(iter + 1);
// add model // add model
models_.push_back(new_tree); models_.push_back(new_tree);
// write model to file on every iteration // save model to file per iteration
fprintf(output_model_file, "Tree=%d\n", iter); fprintf(output_model_file, "Tree=%d\n", iter);
fprintf(output_model_file, "%s\n", new_tree->ToString().c_str()); fprintf(output_model_file, "%s\n", new_tree->ToString().c_str());
fflush(output_model_file); fflush(output_model_file);
auto end_time = std::chrono::high_resolution_clock::now(); auto end_time = std::chrono::high_resolution_clock::now();
// output used time on each iteration // output used time per iteration
Log::Stdout("%f seconds elapsed, finished %d iteration", std::chrono::duration<double, Log::Stdout("%f seconds elapsed, finished %d iteration", std::chrono::duration<double,
std::milli>(end_time - start_time) * 1e-3, iter + 1); std::milli>(end_time - start_time) * 1e-3, iter + 1);
} }
...@@ -223,7 +223,7 @@ void GBDT::OutputMetric(int iter) { ...@@ -223,7 +223,7 @@ void GBDT::OutputMetric(int iter) {
} }
void GBDT::Boosting() { void GBDT::Boosting() {
// objective function will calculation gradients and hessians // objective function will calculate gradients and hessians
object_function_-> object_function_->
GetGradients(train_score_updater_->score(), gradients_, hessians_); GetGradients(train_score_updater_->score(), gradients_, hessians_);
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment