Commit 0b9fe27a authored by Hui Xue's avatar Hui Xue
Browse files

t push origin masterMerge branch 'xuehui1991-update_for_dcg'

merge to master.
parents 0dcd422a bb05a06f
...@@ -119,7 +119,7 @@ private: ...@@ -119,7 +119,7 @@ private:
}; };
/*! /*!
* \brief Interface for ordered bin data. It very efficient for construct histogram, especially for sparse bin * \brief Interface for ordered bin data. It's very efficient for constructing histogram, especially for sparse bin
* There are 2 advantages for using ordered bin. * There are 2 advantages for using ordered bin.
* 1. group the data by leaf, improve the cache hit. * 1. group the data by leaf, improve the cache hit.
* 2. only store the non-zero bin, which can speed up the histogram consturction for sparse feature. * 2. only store the non-zero bin, which can speed up the histogram consturction for sparse feature.
...@@ -253,7 +253,7 @@ public: ...@@ -253,7 +253,7 @@ public:
virtual OrderedBin* CreateOrderedBin() const = 0; virtual OrderedBin* CreateOrderedBin() const = 0;
/*! /*!
* \brief After pushed all feature data, should call this to have better refactor for bin data * \brief After pushed all feature data, call this could have better refactor for bin data
*/ */
virtual void FinishLoad() = 0; virtual void FinishLoad() = 0;
...@@ -261,7 +261,7 @@ public: ...@@ -261,7 +261,7 @@ public:
* \brief Create object for bin data of one feature, will call CreateDenseBin or CreateSparseBin according to "is_sparse" * \brief Create object for bin data of one feature, will call CreateDenseBin or CreateSparseBin according to "is_sparse"
* \param num_data Total number of data * \param num_data Total number of data
* \param num_bin Number of bin * \param num_bin Number of bin
* \param is_sparse True if this feature is saprese * \param is_sparse True if this feature is sparse
* \param sparse_rate Sparse rate of this bins( num_bin0/num_data ) * \param sparse_rate Sparse rate of this bins( num_bin0/num_data )
* \param is_enable_sparse True if enable sparse feature * \param is_enable_sparse True if enable sparse feature
* \param is_sparse Will set to true if this bin is sparse * \param is_sparse Will set to true if this bin is sparse
......
...@@ -47,14 +47,14 @@ public: ...@@ -47,14 +47,14 @@ public:
virtual void Train() = 0; virtual void Train() = 0;
/*! /*!
* \brief Predtion for one record, not sigmoid transform * \brief Prediction for one record, not sigmoid transform
* \param feature_values Feature value on this record * \param feature_values Feature value on this record
* \return Prediction result for this record * \return Prediction result for this record
*/ */
virtual double PredictRaw(const double * feature_values) const = 0; virtual double PredictRaw(const double * feature_values) const = 0;
/*! /*!
* \brief Predtion for one record, will use sigmoid transform if needed * \brief Prediction for one record, will use sigmoid transform if needed
* \param feature_values Feature value on this record * \param feature_values Feature value on this record
* \return Prediction result for this record * \return Prediction result for this record
*/ */
......
...@@ -20,7 +20,7 @@ public: ...@@ -20,7 +20,7 @@ public:
virtual ~ConfigBase() {} virtual ~ConfigBase() {}
/*! /*!
* \brief SetLabelAt current config object by params * \brief Set current config object by params
* \param params Store the key and value for params * \param params Store the key and value for params
*/ */
virtual void Set( virtual void Set(
...@@ -30,7 +30,7 @@ public: ...@@ -30,7 +30,7 @@ public:
* \brief Get string value by specific name of key * \brief Get string value by specific name of key
* \param params Store the key and value for params * \param params Store the key and value for params
* \param name Name of key * \param name Name of key
* \param out Value will asign to out if key exists * \param out Value will assign to out if key exists
* \return True if key exists * \return True if key exists
*/ */
inline bool GetString( inline bool GetString(
...@@ -41,7 +41,7 @@ public: ...@@ -41,7 +41,7 @@ public:
* \brief Get int value by specific name of key * \brief Get int value by specific name of key
* \param params Store the key and value for params * \param params Store the key and value for params
* \param name Name of key * \param name Name of key
* \param out Value will asign to out if key exists * \param out Value will assign to out if key exists
* \return True if key exists * \return True if key exists
*/ */
inline bool GetInt( inline bool GetInt(
...@@ -52,7 +52,7 @@ public: ...@@ -52,7 +52,7 @@ public:
* \brief Get double value by specific name of key * \brief Get double value by specific name of key
* \param params Store the key and value for params * \param params Store the key and value for params
* \param name Name of key * \param name Name of key
* \param out Value will asign to out if key exists * \param out Value will assign to out if key exists
* \return True if key exists * \return True if key exists
*/ */
inline bool GetDouble( inline bool GetDouble(
...@@ -63,7 +63,7 @@ public: ...@@ -63,7 +63,7 @@ public:
* \brief Get bool value by specific name of key * \brief Get bool value by specific name of key
* \param params Store the key and value for params * \param params Store the key and value for params
* \param name Name of key * \param name Name of key
* \param out Value will asign to out if key exists * \param out Value will assign to out if key exists
* \return True if key exists * \return True if key exists
*/ */
inline bool GetBool( inline bool GetBool(
......
...@@ -17,7 +17,7 @@ namespace LightGBM { ...@@ -17,7 +17,7 @@ namespace LightGBM {
class Feature; class Feature;
/*! /*!
* \brief This class is used to store some meta(non-feature) data for tranining data, * \brief This class is used to store some meta(non-feature) data for training data,
* e.g. labels, weights, initial scores, qurey level informations. * e.g. labels, weights, initial scores, qurey level informations.
* *
* Some details: * Some details:
...@@ -110,14 +110,14 @@ public: ...@@ -110,14 +110,14 @@ public:
} }
/*! /*!
* \brief Get weights, if not exists, will return nullput * \brief Get weights, if not exists, will return nullptr
* \return Pointer of weights * \return Pointer of weights
*/ */
inline const float* weights() inline const float* weights()
const { return weights_; } const { return weights_; }
/*! /*!
* \brief Get data boundaries on queries, if not exists, will return nullput * \brief Get data boundaries on queries, if not exists, will return nullptr
* we assume data will order by query, * we assume data will order by query,
* the interval of [query_boundaris[i], query_boundaris[i+1]) * the interval of [query_boundaris[i], query_boundaris[i+1])
* is the data indices for query i. * is the data indices for query i.
...@@ -133,13 +133,13 @@ public: ...@@ -133,13 +133,13 @@ public:
inline const data_size_t num_queries() const { return num_queries_; } inline const data_size_t num_queries() const { return num_queries_; }
/*! /*!
* \brief Get weights for queries, if not exists, will return nullput * \brief Get weights for queries, if not exists, will return nullptr
* \return Pointer of weights for queries * \return Pointer of weights for queries
*/ */
inline const float* query_weights() const { return query_weights_; } inline const float* query_weights() const { return query_weights_; }
/*! /*!
* \brief Get initial scores, if not exists, will return nullput * \brief Get initial scores, if not exists, will return nullptr
* \return Pointer of initial scores * \return Pointer of initial scores
*/ */
inline const score_t* init_score() const { return init_score_; } inline const score_t* init_score() const { return init_score_; }
...@@ -231,7 +231,7 @@ public: ...@@ -231,7 +231,7 @@ public:
* \param max_bin The maximal number of bin that feature values will bucket in * \param max_bin The maximal number of bin that feature values will bucket in
* \param random_seed The seed for random generator * \param random_seed The seed for random generator
* \param is_enable_sparse True for sparse feature * \param is_enable_sparse True for sparse feature
* \param predict_fun Used for initial model, will give a prediction score based on this function, thenn set as initial score * \param predict_fun Used for initial model, will give a prediction score based on this function, then set as initial score
*/ */
Dataset(const char* data_filename, const char* init_score_filename, Dataset(const char* data_filename, const char* init_score_filename,
int max_bin, int random_seed, bool is_enable_sparse, const PredictFunction& predict_fun); int max_bin, int random_seed, bool is_enable_sparse, const PredictFunction& predict_fun);
...@@ -243,7 +243,7 @@ public: ...@@ -243,7 +243,7 @@ public:
* \param max_bin The maximal number of bin that feature values will bucket in * \param max_bin The maximal number of bin that feature values will bucket in
* \param random_seed The seed for random generator * \param random_seed The seed for random generator
* \param is_enable_sparse True for sparse feature * \param is_enable_sparse True for sparse feature
* \param predict_fun Used for initial model, will give a prediction score based on this function, thenn set as initial score * \param predict_fun Used for initial model, will give a prediction score based on this function, then set as initial score
*/ */
Dataset(const char* data_filename, Dataset(const char* data_filename,
int max_bin, int random_seed, bool is_enable_sparse, int max_bin, int random_seed, bool is_enable_sparse,
......
...@@ -37,7 +37,7 @@ public: ...@@ -37,7 +37,7 @@ public:
/*! /*!
* \brief node type on recursive halving algorithm * \brief node type on recursive halving algorithm
* When number of machines is not power of 2, need group maiches into power of 2 group. * When number of machines is not power of 2, need group machines into power of 2 group.
* And we can let each group has at most 2 machines. * And we can let each group has at most 2 machines.
* if the group only has 1 machine. this machine is the normal node * if the group only has 1 machine. this machine is the normal node
* if the grou has 2 machines, this group will have two type of nodes, one is the leader. * if the grou has 2 machines, this group will have two type of nodes, one is the leader.
......
...@@ -80,7 +80,7 @@ void Application::LoadParameters(int argc, char** argv) { ...@@ -80,7 +80,7 @@ void Application::LoadParameters(int argc, char** argv) {
config_reader.ReadAllLines(); config_reader.ReadAllLines();
if (config_reader.Lines().size() > 0) { if (config_reader.Lines().size() > 0) {
for (auto& line : config_reader.Lines()) { for (auto& line : config_reader.Lines()) {
// remove str after # // remove str after "#"
if (line.size() > 0 && std::string::npos != line.find_first_of("#")) { if (line.size() > 0 && std::string::npos != line.find_first_of("#")) {
line.erase(line.find_first_of("#")); line.erase(line.find_first_of("#"));
} }
......
...@@ -248,7 +248,6 @@ std::string GBDT::ModelsToString() const { ...@@ -248,7 +248,6 @@ std::string GBDT::ModelsToString() const {
void GBDT::ModelsFromString(const std::string& model_str, int num_used_model) { void GBDT::ModelsFromString(const std::string& model_str, int num_used_model) {
// use serialized string to restore this object // use serialized string to restore this object
// deseialize string to object????
models_.clear(); models_.clear();
std::vector<std::string> lines = Common::Split(model_str.c_str(), '\n'); std::vector<std::string> lines = Common::Split(model_str.c_str(), '\n');
size_t i = 0; size_t i = 0;
......
...@@ -107,7 +107,7 @@ private: ...@@ -107,7 +107,7 @@ private:
*/ */
void UpdateScore(const Tree* tree); void UpdateScore(const Tree* tree);
/*! /*!
* \brief Print Metric result of current iteration * \brief Print metric result of current iteration
* \param iter Current interation * \param iter Current interation
*/ */
void OutputMetric(int iter); void OutputMetric(int iter);
...@@ -116,11 +116,11 @@ private: ...@@ -116,11 +116,11 @@ private:
const Dataset* train_data_; const Dataset* train_data_;
/*! \brief Config of gbdt */ /*! \brief Config of gbdt */
const GBDTConfig* gbdt_config_; const GBDTConfig* gbdt_config_;
/*! \brief Tree learner, will use tihs class to learn trees */ /*! \brief Tree learner, will use this class to learn trees */
TreeLearner* tree_learner_; TreeLearner* tree_learner_;
/*! \brief Objective function */ /*! \brief Objective function */
const ObjectiveFunction* object_function_; const ObjectiveFunction* object_function_;
/*! \brief Store and update traning data's score */ /*! \brief Store and update training data's score */
ScoreUpdater* train_score_updater_; ScoreUpdater* train_score_updater_;
/*! \brief Metrics for training data */ /*! \brief Metrics for training data */
std::vector<const Metric*> training_metrics_; std::vector<const Metric*> training_metrics_;
......
...@@ -57,8 +57,8 @@ public: ...@@ -57,8 +57,8 @@ public:
* \brief Like AddScore(const Tree* tree), but only for part of data * \brief Like AddScore(const Tree* tree), but only for part of data
* Used for prediction of training out-of-bad data * Used for prediction of training out-of-bad data
* \param tree Trained tree model * \param tree Trained tree model
* \param data_indices Indices of data that want proccess to * \param data_indices Indices of data that will be proccessed
* \param data_cnt Number of data that want proccess to * \param data_cnt Number of data that will be proccessed
*/ */
inline void AddScore(const Tree* tree, const data_size_t* data_indices, inline void AddScore(const Tree* tree, const data_size_t* data_indices,
data_size_t data_cnt) { data_size_t data_cnt) {
......
...@@ -31,12 +31,12 @@ Dataset::Dataset(const char* data_filename, const char* init_score_filename, ...@@ -31,12 +31,12 @@ Dataset::Dataset(const char* data_filename, const char* init_score_filename,
// create text parser // create text parser
parser_ = Parser::CreateParser(data_filename_, 0, nullptr); parser_ = Parser::CreateParser(data_filename_, 0, nullptr);
if (parser_ == nullptr) { if (parser_ == nullptr) {
Log::Stderr("cannot recognise input data format, filename: %s", data_filename_); Log::Stderr("cannot recognize input data format, filename: %s", data_filename_);
} }
// create text reader // create text reader
text_reader_ = new TextReader<data_size_t>(data_filename); text_reader_ = new TextReader<data_size_t>(data_filename);
} else { } else {
// only need to load initilize score, other meta data will load from bin flie // only need to load initilize score, other meta data will be loaded from bin flie
metadata_.Init(init_score_filename); metadata_.Init(init_score_filename);
Log::Stdout("will load data set from binary file"); Log::Stdout("will load data set from binary file");
parser_ = nullptr; parser_ = nullptr;
...@@ -613,7 +613,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit ...@@ -613,7 +613,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit
size_t size_of_metadata = *(reinterpret_cast<size_t*>(buffer)); size_t size_of_metadata = *(reinterpret_cast<size_t*>(buffer));
// re-allocmate space if not enough // re-allocate space if not enough
if (size_of_metadata > buffer_size) { if (size_of_metadata > buffer_size) {
delete[] buffer; delete[] buffer;
buffer_size = size_of_metadata; buffer_size = size_of_metadata;
...@@ -673,7 +673,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit ...@@ -673,7 +673,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit
Log::Stderr("binary file format error at feature %d's size", i); Log::Stderr("binary file format error at feature %d's size", i);
} }
size_t size_of_feature = *(reinterpret_cast<size_t*>(buffer)); size_t size_of_feature = *(reinterpret_cast<size_t*>(buffer));
// re-allocmate space if not enough // re-allocate space if not enough
if (size_of_feature > buffer_size) { if (size_of_feature > buffer_size) {
delete[] buffer; delete[] buffer;
buffer_size = size_of_feature; buffer_size = size_of_feature;
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
namespace LightGBM { namespace LightGBM {
/*! /*!
* \brief Used to Store bins for dense feature * \brief Used to store bins for dense feature
* Use template to reduce memory cost * Use template to reduce memory cost
*/ */
template <typename VAL_T> template <typename VAL_T>
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
namespace LightGBM { namespace LightGBM {
/*! /*!
* \brief Ordered bin for sparse feature . efficient for construct histogram, especally for sparse bin * \brief Ordered bin for sparse feature . Efficient for construct histogram, especally for sparse bin
* There are 2 advantages for using ordered bin. * There are 2 advantages for using ordered bin.
* 1. group the data by leaf, improve the cache hit. * 1. group the data by leaf, improve the cache hit.
* 2. only store the non-zero bin, which can speed up the histogram cconsturction for sparse feature. * 2. only store the non-zero bin, which can speed up the histogram cconsturction for sparse feature.
......
...@@ -225,7 +225,7 @@ public: ...@@ -225,7 +225,7 @@ public:
} }
private: private:
/*! \brief Output frequently */ /*! \brief Output frequency */
int output_freq_; int output_freq_;
/*! \brief Number of data */ /*! \brief Number of data */
data_size_t num_data_; data_size_t num_data_;
......
...@@ -21,7 +21,7 @@ void DCGCalculator::Init(std::vector<double> input_label_gain) { ...@@ -21,7 +21,7 @@ void DCGCalculator::Init(std::vector<double> input_label_gain) {
label_gain_ = input_label_gain; label_gain_ = input_label_gain;
discount_.clear(); discount_.clear();
for (data_size_t i = 0; i < kMaxPosition; ++i) { for (data_size_t i = 0; i < kMaxPosition; ++i) {
discount_.emplace_back(1.0 / std::log(2.0 + i)); discount_.emplace_back(1.0 / std::log2(2.0 + i));
} }
is_inited_ = true; is_inited_ = true;
} }
......
...@@ -65,7 +65,7 @@ public: ...@@ -65,7 +65,7 @@ public:
} }
private: private:
/*! \brief Output frequently */ /*! \brief Output frequency */
int output_freq_; int output_freq_;
/*! \brief Number of data */ /*! \brief Number of data */
data_size_t num_data_; data_size_t num_data_;
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
namespace LightGBM { namespace LightGBM {
// static member defination // static member definition
int Network::num_machines_; int Network::num_machines_;
int Network::rank_; int Network::rank_;
Linkers* Network::linkers_; Linkers* Network::linkers_;
...@@ -141,7 +141,7 @@ void Network::ReduceScatter(char* input, int input_size, int* block_start, int* ...@@ -141,7 +141,7 @@ void Network::ReduceScatter(char* input, int input_size, int* block_start, int*
// send local data to neighbor first // send local data to neighbor first
linkers_->Send(recursive_halving_map_.neighbor, input, input_size); linkers_->Send(recursive_halving_map_.neighbor, input, input_size);
} else if (recursive_halving_map_.type == RecursiveHalvingNodeType::GroupLeader) { } else if (recursive_halving_map_.type == RecursiveHalvingNodeType::GroupLeader) {
// recieve neighbor data first // receive neighbor data first
int need_recv_cnt = input_size; int need_recv_cnt = input_size;
linkers_->Recv(recursive_halving_map_.neighbor, output, need_recv_cnt); linkers_->Recv(recursive_halving_map_.neighbor, output, need_recv_cnt);
// reduce // reduce
......
...@@ -50,7 +50,7 @@ public: ...@@ -50,7 +50,7 @@ public:
Log::Stderr("For NDCG metric, should have query information"); Log::Stderr("For NDCG metric, should have query information");
} }
num_queries_ = metadata.num_queries(); num_queries_ = metadata.num_queries();
// cache inverse max DCG, avoid compution many times // cache inverse max DCG, avoid computation many times
inverse_max_dcgs_ = new score_t[num_queries_]; inverse_max_dcgs_ = new score_t[num_queries_];
for (data_size_t i = 0; i < num_queries_; ++i) { for (data_size_t i = 0; i < num_queries_; ++i) {
inverse_max_dcgs_[i] = static_cast<score_t>( inverse_max_dcgs_[i] = static_cast<score_t>(
......
...@@ -40,7 +40,7 @@ public: ...@@ -40,7 +40,7 @@ public:
* \brief Construct a histogram * \brief Construct a histogram
* \param num_data number of data in current leaf * \param num_data number of data in current leaf
* \param sum_gradients sum of gradients of current leaf * \param sum_gradients sum of gradients of current leaf
* \param sum_hessians sum of hissians of current leaf * \param sum_hessians sum of hessians of current leaf
* \param ordered_gradients Orederd gradients * \param ordered_gradients Orederd gradients
* \param ordered_hessians Ordered hessians * \param ordered_hessians Ordered hessians
* \param data_indices data indices of current leaf * \param data_indices data indices of current leaf
...@@ -59,7 +59,7 @@ public: ...@@ -59,7 +59,7 @@ public:
* \param leaf current leaf * \param leaf current leaf
* \param num_data number of data in current leaf * \param num_data number of data in current leaf
* \param sum_gradients sum of gradients of current leaf * \param sum_gradients sum of gradients of current leaf
* \param sum_hessians sum of hissians of current leaf * \param sum_hessians sum of hessians of current leaf
* \param gradients * \param gradients
* \param hessian * \param hessian
*/ */
...@@ -76,7 +76,7 @@ public: ...@@ -76,7 +76,7 @@ public:
* \brief Set sumup information for current histogram * \brief Set sumup information for current histogram
* \param num_data number of data in current leaf * \param num_data number of data in current leaf
* \param sum_gradients sum of gradients of current leaf * \param sum_gradients sum of gradients of current leaf
* \param sum_hessians sum of hissians of current leaf * \param sum_hessians sum of hessians of current leaf
*/ */
void SetSumup(data_size_t num_data, score_t sum_gradients, score_t sum_hessians) { void SetSumup(data_size_t num_data, score_t sum_gradients, score_t sum_hessians) {
num_data_ = num_data; num_data_ = num_data;
......
...@@ -26,7 +26,7 @@ public: ...@@ -26,7 +26,7 @@ public:
} }
/*! /*!
* \brief Init splits on current leaf, don't need to travesal all data * \brief Init splits on current leaf, don't need to traverse all data
* \param leaf Index of current leaf * \param leaf Index of current leaf
* \param data_partition current data partition * \param data_partition current data partition
* \param sum_gradients * \param sum_gradients
...@@ -43,7 +43,7 @@ public: ...@@ -43,7 +43,7 @@ public:
} }
/*! /*!
* \brief Init splits on current leaf, need to travesal all data to sum up * \brief Init splits on current leaf, need to traverse all data to sum up
* \param gradients * \param gradients
* \param hessians * \param hessians
*/ */
......
...@@ -77,9 +77,9 @@ private: ...@@ -77,9 +77,9 @@ private:
int* block_start_; int* block_start_;
/*! \brief Block size for reduce scatter */ /*! \brief Block size for reduce scatter */
int* block_len_; int* block_len_;
/*! \brief Write positions for feature histgrams */ /*! \brief Write positions for feature histograms */
int* buffer_write_start_pos_; int* buffer_write_start_pos_;
/*! \brief Read positions for local feature histgrams */ /*! \brief Read positions for local feature histograms */
int* buffer_read_start_pos_; int* buffer_read_start_pos_;
/*! \brief Size for reduce scatter */ /*! \brief Size for reduce scatter */
int reduce_scatter_size_; int reduce_scatter_size_;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment