Commit 888e2b18 authored by Guolin Ke's avatar Guolin Ke Committed by GitHub
Browse files

Merge pull request #13 from xuehui1991/update_for_typo

update for typo
parents aee30126 0dcd422a
......@@ -18,7 +18,7 @@ class Metric;
* \brief The entrance of LightGBM. this application has two tasks:
* Train and Predict.
* Train task will train a new model
* Predict task will predicting the scores of test data then saving the score to local disk
* Predict task will predict the scores of test data and save the score to local disk
*/
class Application {
public:
......
......@@ -119,10 +119,10 @@ private:
};
/*!
* \brief Interface for ordered bin data. efficient for construct histogram, especally for sparse bin
* \brief Interface for ordered bin data. It very efficient for construct histogram, especially for sparse bin
* There are 2 advantages for using ordered bin.
* 1. group the data by leaf, improve the cache hit.
* 2. only store the non-zero bin, which can speed up the histogram cconsturction for sparse feature.
* 2. only store the non-zero bin, which can speed up the histogram consturction for sparse feature.
* But it has a additional cost, it need re-order the bins after leaf split, which will cost much for dense feature.
* So we only use ordered bin for sparse features now.
*/
......
......@@ -152,7 +152,7 @@ void GBDT::Bagging(int iter) {
}
void GBDT::UpdateScoreOutOfBag(const Tree* tree) {
// we need to predict out-of-bag data's socres for boosing
// we need to predict out-of-bag socres of data for boosting
if (out_of_bag_data_indices_ != nullptr) {
train_score_updater_->
AddScore(tree, out_of_bag_data_indices_, out_of_bag_data_cnt_);
......@@ -169,12 +169,12 @@ void GBDT::Train() {
Bagging(iter);
// train a new tree
Tree * new_tree = TrainOneTree();
// if cannon learn a new tree, stop
// if cannot learn a new tree, then stop
if (new_tree->num_leaves() <= 1) {
Log::Stdout("Cannot do any boosting for tree cannot split");
break;
}
// Shrinkage by learning rate
// shrinkage by learning rate
new_tree->Shrinkage(gbdt_config_->learning_rate);
// update score
UpdateScore(new_tree);
......@@ -183,12 +183,12 @@ void GBDT::Train() {
OutputMetric(iter + 1);
// add model
models_.push_back(new_tree);
// write model to file on every iteration
// save model to file per iteration
fprintf(output_model_file, "Tree=%d\n", iter);
fprintf(output_model_file, "%s\n", new_tree->ToString().c_str());
fflush(output_model_file);
auto end_time = std::chrono::high_resolution_clock::now();
// output used time on each iteration
// output used time per iteration
Log::Stdout("%f seconds elapsed, finished %d iteration", std::chrono::duration<double,
std::milli>(end_time - start_time) * 1e-3, iter + 1);
}
......@@ -223,7 +223,7 @@ void GBDT::OutputMetric(int iter) {
}
void GBDT::Boosting() {
// objective function will calculation gradients and hessians
// objective function will calculate gradients and hessians
object_function_->
GetGradients(train_score_updater_->score(), gradients_, hessians_);
}
......@@ -248,6 +248,7 @@ std::string GBDT::ModelsToString() const {
void GBDT::ModelsFromString(const std::string& model_str, int num_used_model) {
// use serialized string to restore this object
// deseialize string to object????
models_.clear();
std::vector<std::string> lines = Common::Split(model_str.c_str(), '\n');
size_t i = 0;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment