Commit 888e2b18 authored by Guolin Ke's avatar Guolin Ke Committed by GitHub
Browse files

Merge pull request #13 from xuehui1991/update_for_typo

update for typo
parents aee30126 0dcd422a
...@@ -18,7 +18,7 @@ class Metric; ...@@ -18,7 +18,7 @@ class Metric;
* \brief The entrance of LightGBM. this application has two tasks: * \brief The entrance of LightGBM. this application has two tasks:
* Train and Predict. * Train and Predict.
* Train task will train a new model * Train task will train a new model
* Predict task will predicting the scores of test data then saving the score to local disk * Predict task will predict the scores of test data and save the score to local disk
*/ */
class Application { class Application {
public: public:
......
...@@ -119,10 +119,10 @@ private: ...@@ -119,10 +119,10 @@ private:
}; };
/*! /*!
* \brief Interface for ordered bin data. efficient for construct histogram, especally for sparse bin * \brief Interface for ordered bin data. It very efficient for construct histogram, especially for sparse bin
* There are 2 advantages for using ordered bin. * There are 2 advantages for using ordered bin.
* 1. group the data by leaf, improve the cache hit. * 1. group the data by leaf, improve the cache hit.
* 2. only store the non-zero bin, which can speed up the histogram cconsturction for sparse feature. * 2. only store the non-zero bin, which can speed up the histogram consturction for sparse feature.
* But it has a additional cost, it need re-order the bins after leaf split, which will cost much for dense feature. * But it has a additional cost, it need re-order the bins after leaf split, which will cost much for dense feature.
* So we only use ordered bin for sparse features now. * So we only use ordered bin for sparse features now.
*/ */
......
...@@ -152,7 +152,7 @@ void GBDT::Bagging(int iter) { ...@@ -152,7 +152,7 @@ void GBDT::Bagging(int iter) {
} }
void GBDT::UpdateScoreOutOfBag(const Tree* tree) { void GBDT::UpdateScoreOutOfBag(const Tree* tree) {
// we need to predict out-of-bag data's socres for boosing // we need to predict out-of-bag socres of data for boosting
if (out_of_bag_data_indices_ != nullptr) { if (out_of_bag_data_indices_ != nullptr) {
train_score_updater_-> train_score_updater_->
AddScore(tree, out_of_bag_data_indices_, out_of_bag_data_cnt_); AddScore(tree, out_of_bag_data_indices_, out_of_bag_data_cnt_);
...@@ -169,12 +169,12 @@ void GBDT::Train() { ...@@ -169,12 +169,12 @@ void GBDT::Train() {
Bagging(iter); Bagging(iter);
// train a new tree // train a new tree
Tree * new_tree = TrainOneTree(); Tree * new_tree = TrainOneTree();
// if cannon learn a new tree, stop // if cannot learn a new tree, then stop
if (new_tree->num_leaves() <= 1) { if (new_tree->num_leaves() <= 1) {
Log::Stdout("Cannot do any boosting for tree cannot split"); Log::Stdout("Cannot do any boosting for tree cannot split");
break; break;
} }
// Shrinkage by learning rate // shrinkage by learning rate
new_tree->Shrinkage(gbdt_config_->learning_rate); new_tree->Shrinkage(gbdt_config_->learning_rate);
// update score // update score
UpdateScore(new_tree); UpdateScore(new_tree);
...@@ -183,12 +183,12 @@ void GBDT::Train() { ...@@ -183,12 +183,12 @@ void GBDT::Train() {
OutputMetric(iter + 1); OutputMetric(iter + 1);
// add model // add model
models_.push_back(new_tree); models_.push_back(new_tree);
// write model to file on every iteration // save model to file per iteration
fprintf(output_model_file, "Tree=%d\n", iter); fprintf(output_model_file, "Tree=%d\n", iter);
fprintf(output_model_file, "%s\n", new_tree->ToString().c_str()); fprintf(output_model_file, "%s\n", new_tree->ToString().c_str());
fflush(output_model_file); fflush(output_model_file);
auto end_time = std::chrono::high_resolution_clock::now(); auto end_time = std::chrono::high_resolution_clock::now();
// output used time on each iteration // output used time per iteration
Log::Stdout("%f seconds elapsed, finished %d iteration", std::chrono::duration<double, Log::Stdout("%f seconds elapsed, finished %d iteration", std::chrono::duration<double,
std::milli>(end_time - start_time) * 1e-3, iter + 1); std::milli>(end_time - start_time) * 1e-3, iter + 1);
} }
...@@ -223,7 +223,7 @@ void GBDT::OutputMetric(int iter) { ...@@ -223,7 +223,7 @@ void GBDT::OutputMetric(int iter) {
} }
void GBDT::Boosting() { void GBDT::Boosting() {
// objective function will calculation gradients and hessians // objective function will calculate gradients and hessians
object_function_-> object_function_->
GetGradients(train_score_updater_->score(), gradients_, hessians_); GetGradients(train_score_updater_->score(), gradients_, hessians_);
} }
...@@ -248,6 +248,7 @@ std::string GBDT::ModelsToString() const { ...@@ -248,6 +248,7 @@ std::string GBDT::ModelsToString() const {
void GBDT::ModelsFromString(const std::string& model_str, int num_used_model) { void GBDT::ModelsFromString(const std::string& model_str, int num_used_model) {
// use serialized string to restore this object // use serialized string to restore this object
// deseialize string to object????
models_.clear(); models_.clear();
std::vector<std::string> lines = Common::Split(model_str.c_str(), '\n'); std::vector<std::string> lines = Common::Split(model_str.c_str(), '\n');
size_t i = 0; size_t i = 0;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment