Commit d3657628 authored by Qiwei Ye's avatar Qiwei Ye Committed by GitHub
Browse files

Merge pull request #36 from xuehui1991/update_for_dcg

update for typo.
parents a6a75fe9 bb6971b4
...@@ -115,7 +115,7 @@ private: ...@@ -115,7 +115,7 @@ private:
*/ */
void UpdateScore(const Tree* tree); void UpdateScore(const Tree* tree);
/*! /*!
* \brief Print Metric result of current iteration * \brief Print metric result of current iteration
* \param iter Current interation * \param iter Current interation
*/ */
bool OutputMetric(int iter); bool OutputMetric(int iter);
...@@ -126,11 +126,11 @@ private: ...@@ -126,11 +126,11 @@ private:
const Dataset* train_data_; const Dataset* train_data_;
/*! \brief Config of gbdt */ /*! \brief Config of gbdt */
const GBDTConfig* gbdt_config_; const GBDTConfig* gbdt_config_;
/*! \brief Tree learner, will use tihs class to learn trees */ /*! \brief Tree learner, will use this class to learn trees */
TreeLearner* tree_learner_; TreeLearner* tree_learner_;
/*! \brief Objective function */ /*! \brief Objective function */
const ObjectiveFunction* object_function_; const ObjectiveFunction* object_function_;
/*! \brief Store and update traning data's score */ /*! \brief Store and update training data's score */
ScoreUpdater* train_score_updater_; ScoreUpdater* train_score_updater_;
/*! \brief Metrics for training data */ /*! \brief Metrics for training data */
std::vector<const Metric*> training_metrics_; std::vector<const Metric*> training_metrics_;
......
...@@ -57,8 +57,8 @@ public: ...@@ -57,8 +57,8 @@ public:
* \brief Using tree model to get prediction number, then adding to scores for parts of data * \brief Using tree model to get prediction number, then adding to scores for parts of data
* Used for prediction of training out-of-bag data * Used for prediction of training out-of-bag data
* \param tree Trained tree model * \param tree Trained tree model
* \param data_indices Indices of data that want proccess to * \param data_indices Indices of data that will be proccessed
* \param data_cnt Number of data that want proccess to * \param data_cnt Number of data that will be proccessed
*/ */
inline void AddScore(const Tree* tree, const data_size_t* data_indices, inline void AddScore(const Tree* tree, const data_size_t* data_indices,
data_size_t data_cnt) { data_size_t data_cnt) {
......
...@@ -36,7 +36,7 @@ Dataset::Dataset(const char* data_filename, const char* init_score_filename, ...@@ -36,7 +36,7 @@ Dataset::Dataset(const char* data_filename, const char* init_score_filename,
// create text reader // create text reader
text_reader_ = new TextReader<data_size_t>(data_filename); text_reader_ = new TextReader<data_size_t>(data_filename);
} else { } else {
// only need to load initilize score, other meta data will load from bin flie // only need to load initilize score, other meta data will be loaded from bin flie
metadata_.Init(init_score_filename); metadata_.Init(init_score_filename);
Log::Info("Loading data set from binary file"); Log::Info("Loading data set from binary file");
parser_ = nullptr; parser_ = nullptr;
...@@ -613,7 +613,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit ...@@ -613,7 +613,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit
size_t size_of_metadata = *(reinterpret_cast<size_t*>(buffer)); size_t size_of_metadata = *(reinterpret_cast<size_t*>(buffer));
// re-allocmate space if not enough // re-allocate space if not enough
if (size_of_metadata > buffer_size) { if (size_of_metadata > buffer_size) {
delete[] buffer; delete[] buffer;
buffer_size = size_of_metadata; buffer_size = size_of_metadata;
...@@ -673,7 +673,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit ...@@ -673,7 +673,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit
Log::Fatal("Binary file format error at feature %d's size", i); Log::Fatal("Binary file format error at feature %d's size", i);
} }
size_t size_of_feature = *(reinterpret_cast<size_t*>(buffer)); size_t size_of_feature = *(reinterpret_cast<size_t*>(buffer));
// re-allocmate space if not enough // re-allocate space if not enough
if (size_of_feature > buffer_size) { if (size_of_feature > buffer_size) {
delete[] buffer; delete[] buffer;
buffer_size = size_of_feature; buffer_size = size_of_feature;
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
namespace LightGBM { namespace LightGBM {
/*! /*!
* \brief Used to Store bins for dense feature * \brief Used to store bins for dense feature
* Use template to reduce memory cost * Use template to reduce memory cost
*/ */
template <typename VAL_T> template <typename VAL_T>
......
...@@ -238,7 +238,7 @@ public: ...@@ -238,7 +238,7 @@ public:
} }
private: private:
/*! \brief Output frequently */ /*! \brief Output frequency */
int output_freq_; int output_freq_;
/*! \brief Number of data */ /*! \brief Number of data */
data_size_t num_data_; data_size_t num_data_;
......
...@@ -72,7 +72,7 @@ public: ...@@ -72,7 +72,7 @@ public:
} }
private: private:
/*! \brief Output frequently */ /*! \brief Output frequency */
int output_freq_; int output_freq_;
/*! \brief Number of data */ /*! \brief Number of data */
data_size_t num_data_; data_size_t num_data_;
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
namespace LightGBM { namespace LightGBM {
// static member defination // static member definition
int Network::num_machines_; int Network::num_machines_;
int Network::rank_; int Network::rank_;
Linkers* Network::linkers_; Linkers* Network::linkers_;
...@@ -141,7 +141,7 @@ void Network::ReduceScatter(char* input, int input_size, int* block_start, int* ...@@ -141,7 +141,7 @@ void Network::ReduceScatter(char* input, int input_size, int* block_start, int*
// send local data to neighbor first // send local data to neighbor first
linkers_->Send(recursive_halving_map_.neighbor, input, input_size); linkers_->Send(recursive_halving_map_.neighbor, input, input_size);
} else if (recursive_halving_map_.type == RecursiveHalvingNodeType::GroupLeader) { } else if (recursive_halving_map_.type == RecursiveHalvingNodeType::GroupLeader) {
// recieve neighbor data first // receive neighbor data first
int need_recv_cnt = input_size; int need_recv_cnt = input_size;
linkers_->Recv(recursive_halving_map_.neighbor, output, need_recv_cnt); linkers_->Recv(recursive_halving_map_.neighbor, output, need_recv_cnt);
// reduce // reduce
......
...@@ -50,7 +50,7 @@ public: ...@@ -50,7 +50,7 @@ public:
Log::Fatal("For NDCG metric, should have query information"); Log::Fatal("For NDCG metric, should have query information");
} }
num_queries_ = metadata.num_queries(); num_queries_ = metadata.num_queries();
// cache inverse max DCG, avoid compution many times // cache inverse max DCG, avoid computation many times
inverse_max_dcgs_ = new score_t[num_queries_]; inverse_max_dcgs_ = new score_t[num_queries_];
for (data_size_t i = 0; i < num_queries_; ++i) { for (data_size_t i = 0; i < num_queries_; ++i) {
inverse_max_dcgs_[i] = static_cast<score_t>( inverse_max_dcgs_[i] = static_cast<score_t>(
......
...@@ -40,7 +40,7 @@ public: ...@@ -40,7 +40,7 @@ public:
* \brief Construct a histogram * \brief Construct a histogram
* \param num_data number of data in current leaf * \param num_data number of data in current leaf
* \param sum_gradients sum of gradients of current leaf * \param sum_gradients sum of gradients of current leaf
* \param sum_hessians sum of hissians of current leaf * \param sum_hessians sum of hessians of current leaf
* \param ordered_gradients Orederd gradients * \param ordered_gradients Orederd gradients
* \param ordered_hessians Ordered hessians * \param ordered_hessians Ordered hessians
* \param data_indices data indices of current leaf * \param data_indices data indices of current leaf
...@@ -59,7 +59,7 @@ public: ...@@ -59,7 +59,7 @@ public:
* \param leaf current leaf * \param leaf current leaf
* \param num_data number of data in current leaf * \param num_data number of data in current leaf
* \param sum_gradients sum of gradients of current leaf * \param sum_gradients sum of gradients of current leaf
* \param sum_hessians sum of hissians of current leaf * \param sum_hessians sum of hessians of current leaf
* \param gradients * \param gradients
* \param hessian * \param hessian
*/ */
...@@ -76,7 +76,7 @@ public: ...@@ -76,7 +76,7 @@ public:
* \brief Set sumup information for current histogram * \brief Set sumup information for current histogram
* \param num_data number of data in current leaf * \param num_data number of data in current leaf
* \param sum_gradients sum of gradients of current leaf * \param sum_gradients sum of gradients of current leaf
* \param sum_hessians sum of hissians of current leaf * \param sum_hessians sum of hessians of current leaf
*/ */
void SetSumup(data_size_t num_data, score_t sum_gradients, score_t sum_hessians) { void SetSumup(data_size_t num_data, score_t sum_gradients, score_t sum_hessians) {
num_data_ = num_data; num_data_ = num_data;
......
...@@ -26,6 +26,7 @@ public: ...@@ -26,6 +26,7 @@ public:
} }
/*! /*!
* \brief Init split on current leaf on partial data. * \brief Init split on current leaf on partial data.
* \param leaf Index of current leaf * \param leaf Index of current leaf
* \param data_partition current data partition * \param data_partition current data partition
...@@ -43,7 +44,7 @@ public: ...@@ -43,7 +44,7 @@ public:
} }
/*! /*!
* \brief Init splits on current leaf, it will travese all data to sum up the results * \brief Init splits on current leaf, it will traverse all data to sum up the results
* \param gradients * \param gradients
* \param hessians * \param hessians
*/ */
......
...@@ -77,9 +77,9 @@ private: ...@@ -77,9 +77,9 @@ private:
int* block_start_; int* block_start_;
/*! \brief Block size for reduce scatter */ /*! \brief Block size for reduce scatter */
int* block_len_; int* block_len_;
/*! \brief Write positions for feature histgrams */ /*! \brief Write positions for feature histograms */
int* buffer_write_start_pos_; int* buffer_write_start_pos_;
/*! \brief Read positions for local feature histgrams */ /*! \brief Read positions for local feature histograms */
int* buffer_read_start_pos_; int* buffer_read_start_pos_;
/*! \brief Size for reduce scatter */ /*! \brief Size for reduce scatter */
int reduce_scatter_size_; int reduce_scatter_size_;
......
...@@ -107,7 +107,7 @@ void SerialTreeLearner::Init(const Dataset* train_data) { ...@@ -107,7 +107,7 @@ void SerialTreeLearner::Init(const Dataset* train_data) {
// initialize ordered gradients and hessians // initialize ordered gradients and hessians
ordered_gradients_ = new score_t[num_data_]; ordered_gradients_ = new score_t[num_data_];
ordered_hessians_ = new score_t[num_data_]; ordered_hessians_ = new score_t[num_data_];
// if has ordered bin, need allocata a buffer to fast split // if has ordered bin, need allocate a buffer to fast split
if (has_ordered_bin_) { if (has_ordered_bin_) {
is_data_in_leaf_ = new char[num_data_]; is_data_in_leaf_ = new char[num_data_];
} }
...@@ -269,14 +269,14 @@ bool SerialTreeLearner::BeforeFindBestSplit(int left_leaf, int right_leaf) { ...@@ -269,14 +269,14 @@ bool SerialTreeLearner::BeforeFindBestSplit(int left_leaf, int right_leaf) {
} else if (num_data_in_left_child < num_data_in_right_child) { } else if (num_data_in_left_child < num_data_in_right_child) {
smaller_leaf = left_leaf; smaller_leaf = left_leaf;
larger_leaf = right_leaf; larger_leaf = right_leaf;
// put parent(left) leaf's histograms into larger leaf's histgrams // put parent(left) leaf's histograms into larger leaf's histograms
if (histogram_pool_.Get(left_leaf, &larger_leaf_histogram_array_)) { parent_leaf_histogram_array_ = larger_leaf_histogram_array_; } if (histogram_pool_.Get(left_leaf, &larger_leaf_histogram_array_)) { parent_leaf_histogram_array_ = larger_leaf_histogram_array_; }
histogram_pool_.Move(left_leaf, right_leaf); histogram_pool_.Move(left_leaf, right_leaf);
histogram_pool_.Get(left_leaf, &smaller_leaf_histogram_array_); histogram_pool_.Get(left_leaf, &smaller_leaf_histogram_array_);
} else { } else {
smaller_leaf = right_leaf; smaller_leaf = right_leaf;
larger_leaf = left_leaf; larger_leaf = left_leaf;
// put parent(left) leaf's histograms to larger leaf's histgrams // put parent(left) leaf's histograms to larger leaf's histograms
if (histogram_pool_.Get(left_leaf, &larger_leaf_histogram_array_)) { parent_leaf_histogram_array_ = larger_leaf_histogram_array_; } if (histogram_pool_.Get(left_leaf, &larger_leaf_histogram_array_)) { parent_leaf_histogram_array_ = larger_leaf_histogram_array_; }
histogram_pool_.Get(right_leaf, &smaller_leaf_histogram_array_); histogram_pool_.Get(right_leaf, &smaller_leaf_histogram_array_);
} }
......
...@@ -121,7 +121,7 @@ protected: ...@@ -121,7 +121,7 @@ protected:
DataPartition* data_partition_; DataPartition* data_partition_;
/*! \brief used for generate used features */ /*! \brief used for generate used features */
Random random_; Random random_;
/*! \brief used for sub feature training, is_feature_used_[i] = falase means don't used feature i */ /*! \brief used for sub feature training, is_feature_used_[i] = false means don't used feature i */
bool* is_feature_used_; bool* is_feature_used_;
/*! \brief pointer to histograms array of parent of current leaves */ /*! \brief pointer to histograms array of parent of current leaves */
FeatureHistogram* parent_leaf_histogram_array_; FeatureHistogram* parent_leaf_histogram_array_;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment