Commit 70873a98 authored by Qiwei Ye's avatar Qiwei Ye
Browse files

Merge branch 'master' of https://github.com/Microsoft/LightGBM

Conflicts:
	include/LightGBM/application.h
	include/LightGBM/bin.h
parents 6f7eac7e 3a6c0946
......@@ -15,10 +15,10 @@ class Metric;
/*!
* \brief The main entrance of LightGBM. this application has two tasks:
* Train and Predict.
* Train task will train a new model
* Predict task will predicting the scores of test data using exsiting model,
* and saving the score to disk.
* Train and Predict.
* Train task will train a new model
* Predict task will predicting the scores of test data using exsiting model,
* and saving the score to disk.
*/
class Application {
public:
......
......@@ -255,7 +255,7 @@ public:
virtual OrderedBin* CreateOrderedBin() const = 0;
/*!
* \brief After pushed all feature data, should call this to have better refactor for bin data
* \brief After pushed all feature data, call this could have better refactor for bin data
*/
virtual void FinishLoad() = 0;
......@@ -263,7 +263,7 @@ public:
* \brief Create object for bin data of one feature, will call CreateDenseBin or CreateSparseBin according to "is_sparse"
* \param num_data Total number of data
* \param num_bin Number of bin
* \param is_sparse True if this feature is saprese
* \param is_sparse True if this feature is sparse
* \param sparse_rate Sparse rate of this bins( num_bin0/num_data )
* \param is_enable_sparse True if enable sparse feature
* \param is_sparse Will set to true if this bin is sparse
......
......@@ -47,14 +47,14 @@ public:
virtual void Train() = 0;
/*!
* \brief Predtion for one record, not sigmoid transform
* \brief Prediction for one record, not sigmoid transform
* \param feature_values Feature value on this record
* \return Prediction result for this record
*/
virtual double PredictRaw(const double * feature_values) const = 0;
/*!
* \brief Predtion for one record, will use sigmoid transform if needed
* \brief Prediction for one record, will use sigmoid transform if needed
* \param feature_values Feature value on this record
* \return Prediction result for this record
*/
......
......@@ -20,7 +20,7 @@ public:
virtual ~ConfigBase() {}
/*!
* \brief SetLabelAt current config object by params
* \brief Set current config object by params
* \param params Store the key and value for params
*/
virtual void Set(
......@@ -30,7 +30,7 @@ public:
* \brief Get string value by specific name of key
* \param params Store the key and value for params
* \param name Name of key
* \param out Value will asign to out if key exists
* \param out Value will assign to out if key exists
* \return True if key exists
*/
inline bool GetString(
......@@ -41,7 +41,7 @@ public:
* \brief Get int value by specific name of key
* \param params Store the key and value for params
* \param name Name of key
* \param out Value will asign to out if key exists
* \param out Value will assign to out if key exists
* \return True if key exists
*/
inline bool GetInt(
......@@ -52,7 +52,7 @@ public:
* \brief Get double value by specific name of key
* \param params Store the key and value for params
* \param name Name of key
* \param out Value will asign to out if key exists
* \param out Value will assign to out if key exists
* \return True if key exists
*/
inline bool GetDouble(
......@@ -63,7 +63,7 @@ public:
* \brief Get bool value by specific name of key
* \param params Store the key and value for params
* \param name Name of key
* \param out Value will asign to out if key exists
* \param out Value will assign to out if key exists
* \return True if key exists
*/
inline bool GetBool(
......
......@@ -17,7 +17,7 @@ namespace LightGBM {
class Feature;
/*!
* \brief This class is used to store some meta(non-feature) data for tranining data,
* \brief This class is used to store some meta(non-feature) data for training data,
* e.g. labels, weights, initial scores, qurey level informations.
*
* Some details:
......@@ -110,14 +110,14 @@ public:
}
/*!
* \brief Get weights, if not exists, will return nullput
* \brief Get weights, if not exists, will return nullptr
* \return Pointer of weights
*/
inline const float* weights()
const { return weights_; }
/*!
* \brief Get data boundaries on queries, if not exists, will return nullput
* \brief Get data boundaries on queries, if not exists, will return nullptr
* we assume data will order by query,
* the interval of [query_boundaris[i], query_boundaris[i+1])
* is the data indices for query i.
......@@ -133,13 +133,13 @@ public:
inline const data_size_t num_queries() const { return num_queries_; }
/*!
* \brief Get weights for queries, if not exists, will return nullput
* \brief Get weights for queries, if not exists, will return nullptr
* \return Pointer of weights for queries
*/
inline const float* query_weights() const { return query_weights_; }
/*!
* \brief Get initial scores, if not exists, will return nullput
* \brief Get initial scores, if not exists, will return nullptr
* \return Pointer of initial scores
*/
inline const score_t* init_score() const { return init_score_; }
......@@ -231,7 +231,7 @@ public:
* \param max_bin The maximal number of bin that feature values will bucket in
* \param random_seed The seed for random generator
* \param is_enable_sparse True for sparse feature
* \param predict_fun Used for initial model, will give a prediction score based on this function, thenn set as initial score
* \param predict_fun Used for initial model, will give a prediction score based on this function, then set as initial score
*/
Dataset(const char* data_filename, const char* init_score_filename,
int max_bin, int random_seed, bool is_enable_sparse, const PredictFunction& predict_fun);
......@@ -243,7 +243,7 @@ public:
* \param max_bin The maximal number of bin that feature values will bucket in
* \param random_seed The seed for random generator
* \param is_enable_sparse True for sparse feature
* \param predict_fun Used for initial model, will give a prediction score based on this function, thenn set as initial score
* \param predict_fun Used for initial model, will give a prediction score based on this function, then set as initial score
*/
Dataset(const char* data_filename,
int max_bin, int random_seed, bool is_enable_sparse,
......
......@@ -37,7 +37,7 @@ public:
/*!
* \brief node type on recursive halving algorithm
* When number of machines is not power of 2, need group maiches into power of 2 group.
* When number of machines is not power of 2, need group machines into power of 2 group.
* And we can let each group has at most 2 machines.
* if the group only has 1 machine. this machine is the normal node
* if the grou has 2 machines, this group will have two type of nodes, one is the leader.
......
......@@ -152,7 +152,7 @@ void GBDT::Bagging(int iter) {
}
void GBDT::UpdateScoreOutOfBag(const Tree* tree) {
// we need to predict out-of-bag data's socres for boosing
// we need to predict out-of-bag socres of data for boosting
if (out_of_bag_data_indices_ != nullptr) {
train_score_updater_->
AddScore(tree, out_of_bag_data_indices_, out_of_bag_data_cnt_);
......@@ -169,12 +169,12 @@ void GBDT::Train() {
Bagging(iter);
// train a new tree
Tree * new_tree = TrainOneTree();
// if cannon learn a new tree, stop
// if cannot learn a new tree, then stop
if (new_tree->num_leaves() <= 1) {
Log::Stdout("Cannot do any boosting for tree cannot split");
break;
}
// Shrinkage by learning rate
// shrinkage by learning rate
new_tree->Shrinkage(gbdt_config_->learning_rate);
// update score
UpdateScore(new_tree);
......@@ -183,12 +183,12 @@ void GBDT::Train() {
OutputMetric(iter + 1);
// add model
models_.push_back(new_tree);
// write model to file on every iteration
// save model to file per iteration
fprintf(output_model_file, "Tree=%d\n", iter);
fprintf(output_model_file, "%s\n", new_tree->ToString().c_str());
fflush(output_model_file);
auto end_time = std::chrono::high_resolution_clock::now();
// output used time on each iteration
// output used time per iteration
Log::Stdout("%f seconds elapsed, finished %d iteration", std::chrono::duration<double,
std::milli>(end_time - start_time) * 1e-3, iter + 1);
}
......@@ -223,7 +223,7 @@ void GBDT::OutputMetric(int iter) {
}
void GBDT::Boosting() {
// objective function will calculation gradients and hessians
// objective function will calculate gradients and hessians
object_function_->
GetGradients(train_score_updater_->score(), gradients_, hessians_);
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment