"tests/vscode:/vscode.git/clone" did not exist on "1d7b54d30ff0d657d1756a198734545047e40ab1"
Commit 0b9fe27a authored by Hui Xue's avatar Hui Xue
Browse files

t push origin masterMerge branch 'xuehui1991-update_for_dcg'

merge to master.
parents 0dcd422a bb05a06f
......@@ -119,7 +119,7 @@ private:
};
/*!
* \brief Interface for ordered bin data. It very efficient for construct histogram, especially for sparse bin
* \brief Interface for ordered bin data. It's very efficient for constructing histogram, especially for sparse bin
* There are 2 advantages for using ordered bin.
* 1. group the data by leaf, improve the cache hit.
* 2. only store the non-zero bin, which can speed up the histogram consturction for sparse feature.
......@@ -253,7 +253,7 @@ public:
virtual OrderedBin* CreateOrderedBin() const = 0;
/*!
* \brief After pushed all feature data, should call this to have better refactor for bin data
* \brief After pushed all feature data, call this could have better refactor for bin data
*/
virtual void FinishLoad() = 0;
......@@ -261,7 +261,7 @@ public:
* \brief Create object for bin data of one feature, will call CreateDenseBin or CreateSparseBin according to "is_sparse"
* \param num_data Total number of data
* \param num_bin Number of bin
* \param is_sparse True if this feature is saprese
* \param is_sparse True if this feature is sparse
* \param sparse_rate Sparse rate of this bins( num_bin0/num_data )
* \param is_enable_sparse True if enable sparse feature
* \param is_sparse Will set to true if this bin is sparse
......
......@@ -47,14 +47,14 @@ public:
virtual void Train() = 0;
/*!
* \brief Predtion for one record, not sigmoid transform
* \brief Prediction for one record, not sigmoid transform
* \param feature_values Feature value on this record
* \return Prediction result for this record
*/
virtual double PredictRaw(const double * feature_values) const = 0;
/*!
* \brief Predtion for one record, will use sigmoid transform if needed
* \brief Prediction for one record, will use sigmoid transform if needed
* \param feature_values Feature value on this record
* \return Prediction result for this record
*/
......
......@@ -20,7 +20,7 @@ public:
virtual ~ConfigBase() {}
/*!
* \brief SetLabelAt current config object by params
* \brief Set current config object by params
* \param params Store the key and value for params
*/
virtual void Set(
......@@ -30,7 +30,7 @@ public:
* \brief Get string value by specific name of key
* \param params Store the key and value for params
* \param name Name of key
* \param out Value will asign to out if key exists
* \param out Value will assign to out if key exists
* \return True if key exists
*/
inline bool GetString(
......@@ -41,7 +41,7 @@ public:
* \brief Get int value by specific name of key
* \param params Store the key and value for params
* \param name Name of key
* \param out Value will asign to out if key exists
* \param out Value will assign to out if key exists
* \return True if key exists
*/
inline bool GetInt(
......@@ -52,7 +52,7 @@ public:
* \brief Get double value by specific name of key
* \param params Store the key and value for params
* \param name Name of key
* \param out Value will asign to out if key exists
* \param out Value will assign to out if key exists
* \return True if key exists
*/
inline bool GetDouble(
......@@ -63,7 +63,7 @@ public:
* \brief Get bool value by specific name of key
* \param params Store the key and value for params
* \param name Name of key
* \param out Value will asign to out if key exists
* \param out Value will assign to out if key exists
* \return True if key exists
*/
inline bool GetBool(
......
......@@ -17,7 +17,7 @@ namespace LightGBM {
class Feature;
/*!
* \brief This class is used to store some meta(non-feature) data for tranining data,
* \brief This class is used to store some meta(non-feature) data for training data,
* e.g. labels, weights, initial scores, qurey level informations.
*
* Some details:
......@@ -110,14 +110,14 @@ public:
}
/*!
* \brief Get weights, if not exists, will return nullput
* \brief Get weights, if not exists, will return nullptr
* \return Pointer of weights
*/
inline const float* weights()
const { return weights_; }
/*!
* \brief Get data boundaries on queries, if not exists, will return nullput
* \brief Get data boundaries on queries, if not exists, will return nullptr
* we assume data will order by query,
* the interval of [query_boundaris[i], query_boundaris[i+1])
* is the data indices for query i.
......@@ -133,13 +133,13 @@ public:
inline const data_size_t num_queries() const { return num_queries_; }
/*!
* \brief Get weights for queries, if not exists, will return nullput
* \brief Get weights for queries, if not exists, will return nullptr
* \return Pointer of weights for queries
*/
inline const float* query_weights() const { return query_weights_; }
/*!
* \brief Get initial scores, if not exists, will return nullput
* \brief Get initial scores, if not exists, will return nullptr
* \return Pointer of initial scores
*/
inline const score_t* init_score() const { return init_score_; }
......@@ -231,7 +231,7 @@ public:
* \param max_bin The maximal number of bin that feature values will bucket in
* \param random_seed The seed for random generator
* \param is_enable_sparse True for sparse feature
* \param predict_fun Used for initial model, will give a prediction score based on this function, thenn set as initial score
* \param predict_fun Used for initial model, will give a prediction score based on this function, then set as initial score
*/
Dataset(const char* data_filename, const char* init_score_filename,
int max_bin, int random_seed, bool is_enable_sparse, const PredictFunction& predict_fun);
......@@ -243,7 +243,7 @@ public:
* \param max_bin The maximal number of bin that feature values will bucket in
* \param random_seed The seed for random generator
* \param is_enable_sparse True for sparse feature
* \param predict_fun Used for initial model, will give a prediction score based on this function, thenn set as initial score
* \param predict_fun Used for initial model, will give a prediction score based on this function, then set as initial score
*/
Dataset(const char* data_filename,
int max_bin, int random_seed, bool is_enable_sparse,
......
......@@ -37,7 +37,7 @@ public:
/*!
* \brief node type on recursive halving algorithm
* When number of machines is not power of 2, need group maiches into power of 2 group.
* When number of machines is not power of 2, need group machines into power of 2 group.
* And we can let each group has at most 2 machines.
* if the group only has 1 machine. this machine is the normal node
* if the grou has 2 machines, this group will have two type of nodes, one is the leader.
......
......@@ -80,7 +80,7 @@ void Application::LoadParameters(int argc, char** argv) {
config_reader.ReadAllLines();
if (config_reader.Lines().size() > 0) {
for (auto& line : config_reader.Lines()) {
// remove str after #
// remove str after "#"
if (line.size() > 0 && std::string::npos != line.find_first_of("#")) {
line.erase(line.find_first_of("#"));
}
......
......@@ -248,7 +248,6 @@ std::string GBDT::ModelsToString() const {
void GBDT::ModelsFromString(const std::string& model_str, int num_used_model) {
// use serialized string to restore this object
// deseialize string to object????
models_.clear();
std::vector<std::string> lines = Common::Split(model_str.c_str(), '\n');
size_t i = 0;
......
......@@ -107,7 +107,7 @@ private:
*/
void UpdateScore(const Tree* tree);
/*!
* \brief Print Metric result of current iteration
* \brief Print metric result of current iteration
* \param iter Current interation
*/
void OutputMetric(int iter);
......@@ -116,11 +116,11 @@ private:
const Dataset* train_data_;
/*! \brief Config of gbdt */
const GBDTConfig* gbdt_config_;
/*! \brief Tree learner, will use tihs class to learn trees */
/*! \brief Tree learner, will use this class to learn trees */
TreeLearner* tree_learner_;
/*! \brief Objective function */
const ObjectiveFunction* object_function_;
/*! \brief Store and update traning data's score */
/*! \brief Store and update training data's score */
ScoreUpdater* train_score_updater_;
/*! \brief Metrics for training data */
std::vector<const Metric*> training_metrics_;
......
......@@ -57,8 +57,8 @@ public:
* \brief Like AddScore(const Tree* tree), but only for part of data
* Used for prediction of training out-of-bad data
* \param tree Trained tree model
* \param data_indices Indices of data that want proccess to
* \param data_cnt Number of data that want proccess to
* \param data_indices Indices of data that will be proccessed
* \param data_cnt Number of data that will be proccessed
*/
inline void AddScore(const Tree* tree, const data_size_t* data_indices,
data_size_t data_cnt) {
......
......@@ -31,12 +31,12 @@ Dataset::Dataset(const char* data_filename, const char* init_score_filename,
// create text parser
parser_ = Parser::CreateParser(data_filename_, 0, nullptr);
if (parser_ == nullptr) {
Log::Stderr("cannot recognise input data format, filename: %s", data_filename_);
Log::Stderr("cannot recognize input data format, filename: %s", data_filename_);
}
// create text reader
text_reader_ = new TextReader<data_size_t>(data_filename);
} else {
// only need to load initilize score, other meta data will load from bin flie
// only need to load initilize score, other meta data will be loaded from bin flie
metadata_.Init(init_score_filename);
Log::Stdout("will load data set from binary file");
parser_ = nullptr;
......@@ -613,7 +613,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit
size_t size_of_metadata = *(reinterpret_cast<size_t*>(buffer));
// re-allocmate space if not enough
// re-allocate space if not enough
if (size_of_metadata > buffer_size) {
delete[] buffer;
buffer_size = size_of_metadata;
......@@ -673,7 +673,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit
Log::Stderr("binary file format error at feature %d's size", i);
}
size_t size_of_feature = *(reinterpret_cast<size_t*>(buffer));
// re-allocmate space if not enough
// re-allocate space if not enough
if (size_of_feature > buffer_size) {
delete[] buffer;
buffer_size = size_of_feature;
......
......@@ -10,7 +10,7 @@
namespace LightGBM {
/*!
* \brief Used to Store bins for dense feature
* \brief Used to store bins for dense feature
* Use template to reduce memory cost
*/
template <typename VAL_T>
......
......@@ -13,7 +13,7 @@
namespace LightGBM {
/*!
* \brief Ordered bin for sparse feature . efficient for construct histogram, especally for sparse bin
* \brief Ordered bin for sparse feature . Efficient for construct histogram, especally for sparse bin
* There are 2 advantages for using ordered bin.
* 1. group the data by leaf, improve the cache hit.
* 2. only store the non-zero bin, which can speed up the histogram cconsturction for sparse feature.
......
......@@ -225,7 +225,7 @@ public:
}
private:
/*! \brief Output frequently */
/*! \brief Output frequency */
int output_freq_;
/*! \brief Number of data */
data_size_t num_data_;
......
......@@ -21,7 +21,7 @@ void DCGCalculator::Init(std::vector<double> input_label_gain) {
label_gain_ = input_label_gain;
discount_.clear();
for (data_size_t i = 0; i < kMaxPosition; ++i) {
discount_.emplace_back(1.0 / std::log(2.0 + i));
discount_.emplace_back(1.0 / std::log2(2.0 + i));
}
is_inited_ = true;
}
......
......@@ -65,7 +65,7 @@ public:
}
private:
/*! \brief Output frequently */
/*! \brief Output frequency */
int output_freq_;
/*! \brief Number of data */
data_size_t num_data_;
......
......@@ -9,7 +9,7 @@
namespace LightGBM {
// static member defination
// static member definition
int Network::num_machines_;
int Network::rank_;
Linkers* Network::linkers_;
......@@ -141,7 +141,7 @@ void Network::ReduceScatter(char* input, int input_size, int* block_start, int*
// send local data to neighbor first
linkers_->Send(recursive_halving_map_.neighbor, input, input_size);
} else if (recursive_halving_map_.type == RecursiveHalvingNodeType::GroupLeader) {
// recieve neighbor data first
// receive neighbor data first
int need_recv_cnt = input_size;
linkers_->Recv(recursive_halving_map_.neighbor, output, need_recv_cnt);
// reduce
......
......@@ -50,7 +50,7 @@ public:
Log::Stderr("For NDCG metric, should have query information");
}
num_queries_ = metadata.num_queries();
// cache inverse max DCG, avoid compution many times
// cache inverse max DCG, avoid computation many times
inverse_max_dcgs_ = new score_t[num_queries_];
for (data_size_t i = 0; i < num_queries_; ++i) {
inverse_max_dcgs_[i] = static_cast<score_t>(
......
......@@ -40,7 +40,7 @@ public:
* \brief Construct a histogram
* \param num_data number of data in current leaf
* \param sum_gradients sum of gradients of current leaf
* \param sum_hessians sum of hissians of current leaf
* \param sum_hessians sum of hessians of current leaf
* \param ordered_gradients Orederd gradients
* \param ordered_hessians Ordered hessians
* \param data_indices data indices of current leaf
......@@ -59,7 +59,7 @@ public:
* \param leaf current leaf
* \param num_data number of data in current leaf
* \param sum_gradients sum of gradients of current leaf
* \param sum_hessians sum of hissians of current leaf
* \param sum_hessians sum of hessians of current leaf
* \param gradients
* \param hessian
*/
......@@ -76,7 +76,7 @@ public:
* \brief Set sumup information for current histogram
* \param num_data number of data in current leaf
* \param sum_gradients sum of gradients of current leaf
* \param sum_hessians sum of hissians of current leaf
* \param sum_hessians sum of hessians of current leaf
*/
void SetSumup(data_size_t num_data, score_t sum_gradients, score_t sum_hessians) {
num_data_ = num_data;
......
......@@ -26,7 +26,7 @@ public:
}
/*!
* \brief Init splits on current leaf, don't need to travesal all data
* \brief Init splits on current leaf, don't need to traverse all data
* \param leaf Index of current leaf
* \param data_partition current data partition
* \param sum_gradients
......@@ -43,7 +43,7 @@ public:
}
/*!
* \brief Init splits on current leaf, need to travesal all data to sum up
* \brief Init splits on current leaf, need to traverse all data to sum up
* \param gradients
* \param hessians
*/
......
......@@ -77,9 +77,9 @@ private:
int* block_start_;
/*! \brief Block size for reduce scatter */
int* block_len_;
/*! \brief Write positions for feature histgrams */
/*! \brief Write positions for feature histograms */
int* buffer_write_start_pos_;
/*! \brief Read positions for local feature histgrams */
/*! \brief Read positions for local feature histograms */
int* buffer_read_start_pos_;
/*! \brief Size for reduce scatter */
int reduce_scatter_size_;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment