Commit 8497af62 authored by Allardvm's avatar Allardvm
Browse files

Improved consistency and wording of user-facing logs and documentation

Packages that parse LightGBM’s logs will require minor changes to
parsing logic to work correctly.
parent 4e291459
......@@ -2,21 +2,21 @@ LightGBM, Light Gradient Boosting Machine
==========
[![Build Status](https://travis-ci.org/Microsoft/LightGBM.svg?branch=master)](https://travis-ci.org/Microsoft/LightGBM)
LightGBM is a gradient boosting framework that is using tree based learning algorithms. It is designed to be distributed and efficient with following advantages:
LightGBM is a gradient boosting framework that uses tree based learning algorithms. It is designed to be distributed and efficient with the following advantages:
- Fast training speed and high efficiency
- Faster training speed and higher efficiency
- Lower memory usage
- Better accuracy
- Parallel learning supported
- Capability of handling large-scaling data
- Capable of handling large-scale data
For more details, please refer to [Features](https://github.com/Microsoft/LightGBM/wiki/Features).
The [experiments](https://github.com/Microsoft/LightGBM/wiki/Experiments#comparison-experiment) on public datasets show that LightGBM outperform other existing boosting tools on both efficiency and accuracy, with significant lower memory consumption. What's more, the [experiments](https://github.com/Microsoft/LightGBM/wiki/Experiments#parallel-experiment) show that LightGBM can achieve linear speed-up by using multiple machines for training in specific settings.
[Experiments](https://github.com/Microsoft/LightGBM/wiki/Experiments#comparison-experiment) on public datasets show that LightGBM can outperform other existing boosting framework on both efficiency and accuracy, with significant lower memory consumption. What's more, the [experiments](https://github.com/Microsoft/LightGBM/wiki/Experiments#parallel-experiment) show that LightGBM can achieve a linear speed-up by using multiple machines for training in specific settings.
Get Started
------------
For a quick start, please follow the [Installation Guide](https://github.com/Microsoft/LightGBM/wiki/Installation-Guide) and [Quick Start](https://github.com/Microsoft/LightGBM/wiki/Quick-Start).
To get started, please follow the [Installation Guide](https://github.com/Microsoft/LightGBM/wiki/Installation-Guide) and [Quick Start](https://github.com/Microsoft/LightGBM/wiki/Quick-Start).
Documents
------------
......@@ -28,8 +28,6 @@ Documents
* [**Parallel Learning Guide**](https://github.com/Microsoft/LightGBM/wiki/Parallel-Learning-Guide)
* [**Configuration**](https://github.com/Microsoft/LightGBM/wiki/Configuration)
Microsoft Open Source Code of Conduct
------------
This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
......@@ -259,7 +259,7 @@ inline bool ConfigBase::GetInt(
const std::string& name, int* out) {
if (params.count(name) > 0) {
if (!Common::AtoiAndCheck(params.at(name).c_str(), out)) {
Log::Fatal("Parameter %s should be int type, passed is [%s]",
Log::Fatal("Parameter %s should be of type int, got [%s]",
name.c_str(), params.at(name).c_str());
}
return true;
......@@ -272,7 +272,7 @@ inline bool ConfigBase::GetDouble(
const std::string& name, double* out) {
if (params.count(name) > 0) {
if (!Common::AtofAndCheck(params.at(name).c_str(), out)) {
Log::Fatal("Parameter %s should be double type, passed is [%s]",
Log::Fatal("Parameter %s should be of type double, got [%s]",
name.c_str(), params.at(name).c_str());
}
return true;
......@@ -291,7 +291,7 @@ inline bool ConfigBase::GetBool(
} else if (value == std::string("true") || value == std::string("+")) {
*out = true;
} else {
Log::Fatal("Parameter %s should be \"true\"/\"+\" or \"false\"/\"-\", passed is [%s]",
Log::Fatal("Parameter %s should be \"true\"/\"+\" or \"false\"/\"-\", got [%s]",
name.c_str(), params.at(name).c_str());
}
return true;
......
......@@ -179,7 +179,7 @@ inline static const char* Atof(const char* p, double* out) {
} else if (tmp_str == std::string("inf") || tmp_str == std::string("infinity")) {
*out = sign * 1e308;
} else {
Log::Fatal("Unknow token %s in data file", tmp_str.c_str());
Log::Fatal("Unknown token %s in data file", tmp_str.c_str());
}
p += cnt;
}
......@@ -255,7 +255,7 @@ inline static std::string ArrayToString(std::vector<T> arr, char delimiter) {
inline static void StringToIntArray(const std::string& str, char delimiter, size_t n, int* out) {
std::vector<std::string> strs = Split(str.c_str(), delimiter);
if (strs.size() != n) {
Log::Fatal("StringToIntArray error, size doesn't matched.");
Log::Fatal("StringToIntArray error, size doesn't match.");
}
for (size_t i = 0; i < strs.size(); ++i) {
strs[i] = Trim(strs[i]);
......@@ -267,7 +267,7 @@ inline static void StringToIntArray(const std::string& str, char delimiter, size
inline static void StringToDoubleArray(const std::string& str, char delimiter, size_t n, double* out) {
std::vector<std::string> strs = Split(str.c_str(), delimiter);
if (strs.size() != n) {
Log::Fatal("StringToDoubleArray error, size doesn't matched.");
Log::Fatal("StringToDoubleArray error, size doesn't match.");
}
for (size_t i = 0; i < strs.size(); ++i) {
strs[i] = Trim(strs[i]);
......
......@@ -35,7 +35,7 @@ public:
file = fopen(filename, "r");
#endif
if (file == NULL) {
Log::Fatal("failed to open file %s", filename);
Log::Fatal("Could not open %s", filename);
}
std::stringstream str_buf;
int read_c = -1;
......@@ -59,7 +59,7 @@ public:
}
fclose(file);
first_line_ = str_buf.str();
Log::Debug("skip header:\"%s\" in file %s", first_line_.c_str(), filename_);
Log::Debug("Skipped header \"%s\" in file %s", first_line_.c_str(), filename_);
}
}
/*!
......@@ -129,7 +129,7 @@ public:
});
// if last line of file doesn't contain end of line
if (last_line_.size() > 0) {
Log::Info("Warning: last line of file %s doesn't contain end of line, application will still use this line", filename_);
Log::Info("Warning: last line of %s has no end of line, still using this line", filename_);
process_fun(total_cnt, last_line_.c_str(), last_line_.size());
++total_cnt;
last_line_ = "";
......@@ -266,7 +266,7 @@ public:
});
// if last line of file doesn't contain end of line
if (last_line_.size() > 0) {
Log::Info("Warning: last line of file %s doesn't contain end of line, application will still use this line", filename_);
Log::Info("Warning: last line of %s has no end of line, still using this line", filename_);
if (filter_fun(used_cnt, total_cnt)) {
lines_.push_back(last_line_);
process_fun(used_cnt, lines_);
......
......@@ -95,7 +95,7 @@ void Application::LoadParameters(int argc, char** argv) {
if (key.size() <= 0) {
continue;
}
// Command line have higher priority
// Command-line has higher priority
if (params.count(key) == 0) {
params[key] = value;
}
......@@ -105,7 +105,7 @@ void Application::LoadParameters(int argc, char** argv) {
}
}
} else {
Log::Warning("Config file: %s doesn't exist, will ignore",
Log::Warning("Config file %s doesn't exist, will ignore",
params["config_file"].c_str());
}
}
......@@ -113,15 +113,15 @@ void Application::LoadParameters(int argc, char** argv) {
ParameterAlias::KeyAliasTransform(&params);
// load configs
config_.Set(params);
Log::Info("Loading parameters .. finished");
Log::Info("Finished loading parameters");
}
void Application::LoadData() {
auto start_time = std::chrono::high_resolution_clock::now();
// predition is needed if using input initial model(continued train)
// prediction is needed if using input initial model(continued train)
PredictFunction predict_fun = nullptr;
Predictor* predictor = nullptr;
// need to continue train
// need to continue training
if (boosting_->NumberOfSubModels() > 0) {
predictor = new Predictor(boosting_, config_.io_config.is_sigmoid, config_.predict_leaf_index);
if (config_.io_config.num_class == 1){
......@@ -170,7 +170,7 @@ void Application::LoadData() {
train_metric_.push_back(metric);
}
}
// Add validation data, if exists
// Add validation data, if it exists
for (size_t i = 0; i < config_.io_config.valid_data_filenames.size(); ++i) {
// add
valid_datas_.push_back(
......@@ -201,7 +201,7 @@ void Application::LoadData() {
}
auto end_time = std::chrono::high_resolution_clock::now();
// output used time on each iteration
Log::Info("Finish loading data, use %f seconds",
Log::Info("Finished loading data in %f seconds",
std::chrono::duration<double, std::milli>(end_time - start_time) * 1e-3);
}
......@@ -209,7 +209,7 @@ void Application::InitTrain() {
if (config_.is_parallel) {
// need init network
Network::Init(config_.network_config);
Log::Info("Finish network initialization");
Log::Info("Finished initializing network");
// sync global random seed for feature patition
if (config_.boosting_type == BoostingType::kGBDT) {
GBDTConfig* gbdt_config =
......@@ -240,11 +240,11 @@ void Application::InitTrain() {
boosting_->AddDataset(valid_datas_[i],
ConstPtrInVectorWarpper<Metric>(valid_metrics_[i]));
}
Log::Info("Finish training initilization.");
Log::Info("Finished initializing training");
}
void Application::Train() {
Log::Info("Start train ...");
Log::Info("Started training...");
int total_iter = config_.boosting_config->num_iterations;
bool is_finished = false;
bool need_eval = true;
......@@ -253,14 +253,14 @@ void Application::Train() {
is_finished = boosting_->TrainOneIter(nullptr, nullptr, need_eval);
auto end_time = std::chrono::high_resolution_clock::now();
// output used time per iteration
Log::Info("%f seconds elapsed, finished %d iteration", std::chrono::duration<double,
Log::Info("%f seconds elapsed, finished iteration %d", std::chrono::duration<double,
std::milli>(end_time - start_time) * 1e-3, iter + 1);
boosting_->SaveModelToFile(is_finished, config_.io_config.output_model.c_str());
}
is_finished = true;
// save model to file
boosting_->SaveModelToFile(is_finished, config_.io_config.output_model.c_str());
Log::Info("Finished train");
Log::Info("Finished training");
}
......@@ -271,20 +271,20 @@ void Application::Predict() {
config_.predict_leaf_index);
predictor.Predict(config_.io_config.data_filename.c_str(),
config_.io_config.output_result.c_str(), config_.io_config.has_header);
Log::Info("Finish predict.");
Log::Info("Finished prediction");
}
void Application::InitPredict() {
boosting_ =
Boosting::CreateBoosting(config_.io_config.input_model.c_str());
Log::Info("Finish predict initilization.");
Log::Info("Finished initializing prediction");
}
template<typename T>
T Application::GlobalSyncUpByMin(T& local) {
T global = local;
if (!config_.is_parallel) {
// not need to sync if not parallel learning
// no need to sync if not parallel learning
return global;
}
Network::Allreduce(reinterpret_cast<char*>(&local),
......
......@@ -25,7 +25,7 @@ public:
/*!
* \brief Constructor
* \param boosting Input boosting model
* \param is_sigmoid True if need to predict result with sigmoid transform(if needed, like binary classification)
* \param is_sigmoid True if need to predict result with sigmoid transform (if needed, like binary classification)
* \param predict_leaf_index True if output leaf index instead of prediction score
*/
Predictor(const Boosting* boosting, bool is_simgoid, bool is_predict_leaf_index)
......@@ -56,7 +56,7 @@ public:
}
/*!
* \brief prediction for one record, only raw result(without sigmoid transformation)
* \brief prediction for one record, only raw result (without sigmoid transformation)
* \param features Feature for this record
* \return Prediction result
*/
......@@ -67,7 +67,7 @@ public:
}
/*!
* \brief prediction for one record, only raw result(without sigmoid transformation)
* \brief prediction for one record, only raw result (without sigmoid transformation)
* \param features Feature for this record
* \return Predictied leaf index
*/
......@@ -78,7 +78,7 @@ public:
}
/*!
* \brief prediction for one record, will use sigmoid transformation if needed(only enabled for binary classification noe)
* \brief prediction for one record, will use sigmoid transformation if needed (only enabled for binary classification noe)
* \param features Feature of this record
* \return Prediction result
*/
......@@ -115,12 +115,12 @@ public:
#endif
if (result_file == NULL) {
Log::Fatal("Predition result file %s doesn't exists", data_filename);
Log::Fatal("Prediction results file %s doesn't exist", data_filename);
}
Parser* parser = Parser::CreateParser(data_filename, has_header, num_features_, boosting_->LabelIdx());
if (parser == nullptr) {
Log::Fatal("Recongnizing input data format failed, filename %s", data_filename);
Log::Fatal("Could not recognize the data format of data file %s", data_filename);
}
// function for parse data
......
......@@ -40,7 +40,7 @@ Boosting* Boosting::CreateBoosting(BoostingType type, const char* filename) {
}
LoadFileToBoosting(ret, filename);
} else {
Log::Fatal("Boosting type in parameter is not same with the type in model file");
Log::Fatal("Boosting type in parameter is not the same as the type in the model file");
}
return ret;
}
......
......@@ -152,7 +152,7 @@ void GBDT::Bagging(int iter, const int curr_class) {
bag_data_cnt_ = cur_left_cnt;
out_of_bag_data_cnt_ = num_data_ - bag_data_cnt_;
}
Log::Info("re-bagging, using %d data to train", bag_data_cnt_);
Log::Info("Re-bagging, using %d data to train", bag_data_cnt_);
// set bagging data to tree learner
tree_learner_[curr_class]->SetBaggingData(bag_data_indices_, bag_data_cnt_);
}
......@@ -182,7 +182,7 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
Tree * new_tree = tree_learner_[curr_class]->Train(gradient + curr_class * num_data_, hessian+ curr_class * num_data_);
// if cannot learn a new tree, then stop
if (new_tree->num_leaves() <= 1) {
Log::Info("Can't training anymore, there isn't any leaf meets split requirements.");
Log::Info("Stopped training because there are no more leafs that meet the split requirements.");
return true;
}
......@@ -231,7 +231,7 @@ bool GBDT::OutputMetric(int iter) {
for (auto& sub_metric : training_metrics_) {
auto name = sub_metric->GetName();
auto scores = sub_metric->Eval(train_score_updater_->score());
Log::Info("Iteration:%d, %s : %s", iter, name, Common::ArrayToString<double>(scores, ' ').c_str());
Log::Info("Iteration: %d, %s: %s", iter, name, Common::ArrayToString<double>(scores, ' ').c_str());
}
}
// print validation metric
......@@ -241,7 +241,7 @@ bool GBDT::OutputMetric(int iter) {
auto test_scores = valid_metrics_[i][j]->Eval(valid_score_updater_[i]->score());
if ((iter % gbdt_config_->output_freq) == 0) {
auto name = valid_metrics_[i][j]->GetName();
Log::Info("Iteration:%d, %s : %s", iter, name, Common::ArrayToString<double>(test_scores, ' ').c_str());
Log::Info("Iteration: %d, %s: %s", iter, name, Common::ArrayToString<double>(test_scores, ' ').c_str());
}
if (!ret && early_stopping_round_ > 0) {
bool the_bigger_the_better = valid_metrics_[i][j]->is_bigger_better();
......@@ -355,7 +355,7 @@ void GBDT::ModelsFromString(const std::string& model_str) {
std::vector<std::string> lines = Common::Split(model_str.c_str(), '\n');
size_t i = 0;
// get number of class
// get number of classes
while (i < lines.size()) {
size_t find_pos = lines[i].find("num_class=");
if (find_pos != std::string::npos) {
......@@ -368,7 +368,7 @@ void GBDT::ModelsFromString(const std::string& model_str) {
}
}
if (i == lines.size()) {
Log::Fatal("Model file doesn't contain number of class");
Log::Fatal("Model file doesn't specify the number of classes");
return;
}
......@@ -386,7 +386,7 @@ void GBDT::ModelsFromString(const std::string& model_str) {
}
}
if (i == lines.size()) {
Log::Fatal("Model file doesn't contain label index");
Log::Fatal("Model file doesn't specify the label index");
return;
}
......@@ -404,7 +404,7 @@ void GBDT::ModelsFromString(const std::string& model_str) {
}
}
if (i == lines.size()) {
Log::Fatal("Model file doesn't contain max_feature_idx");
Log::Fatal("Model file doesn't specify max_feature_idx");
return;
}
// get sigmoid parameter
......@@ -439,7 +439,7 @@ void GBDT::ModelsFromString(const std::string& model_str) {
++i;
}
}
Log::Info("%d models has been loaded\n", models_.size());
Log::Info("Finished loading %d models", models_.size());
num_used_model_ = static_cast<int>(models_.size()) / num_class_;
}
......
......@@ -77,7 +77,7 @@ void OverallConfig::GetBoostingType(const std::unordered_map<std::string, std::s
if (value == std::string("gbdt") || value == std::string("gbrt")) {
boosting_type = BoostingType::kGBDT;
} else {
Log::Fatal("Boosting type %s error", value.c_str());
Log::Fatal("Unknown boosting type %s", value.c_str());
}
}
}
......@@ -125,7 +125,7 @@ void OverallConfig::GetTaskType(const std::unordered_map<std::string, std::strin
|| value == std::string("test")) {
task_type = TaskType::kPredict;
} else {
Log::Fatal("Task type error");
Log::Fatal("Unknown task type %s", value.c_str());
}
}
}
......@@ -138,19 +138,19 @@ void OverallConfig::CheckParamConflict() {
int num_class_check = gbdt_config->num_class;
if (objective_type_multiclass){
if (num_class_check <= 1){
Log::Fatal("You should specify number of class(>=2) for multiclass training.");
Log::Fatal("Number of classes should be specified and greater than 1 for multiclass training");
}
}
else {
if (task_type == TaskType::kTrain && num_class_check != 1){
Log::Fatal("Number of class must be 1 for non-multiclass training.");
Log::Fatal("Number of classes must be 1 for non-multiclass training");
}
}
for (std::string metric_type : metric_types){
bool metric_type_multiclass = ( metric_type == std::string("multi_logloss") || metric_type == std::string("multi_error"));
if ((objective_type_multiclass && !metric_type_multiclass)
|| (!objective_type_multiclass && metric_type_multiclass)){
Log::Fatal("Objective and metrics don't match.");
Log::Fatal("Objective and metrics don't match");
}
}
......@@ -172,9 +172,9 @@ void OverallConfig::CheckParamConflict() {
} else if (gbdt_config->tree_learner_type == TreeLearnerType::kDataParallelTreeLearner) {
is_parallel_find_bin = true;
if (gbdt_config->tree_config.histogram_pool_size >= 0) {
Log::Warning("Histogram LRU queue was enabled (histogram_pool_size=%f). Will disable this for reducing communication cost."
Log::Warning("Histogram LRU queue was enabled (histogram_pool_size=%f). Will disable this to reduce communication costs"
, gbdt_config->tree_config.histogram_pool_size);
// Change pool size to -1(not limit) when using data parallel for reducing communication cost
// Change pool size to -1 (not limit) when using data parallel to reduce communication costs
gbdt_config->tree_config.histogram_pool_size = -1;
}
......@@ -308,7 +308,7 @@ void GBDTConfig::GetTreeLearnerType(const std::unordered_map<std::string, std::s
tree_learner_type = TreeLearnerType::kDataParallelTreeLearner;
}
else {
Log::Fatal("Tree learner type error");
Log::Fatal("Unknown tree learner type %s", value.c_str());
}
}
}
......
......@@ -24,12 +24,12 @@ Dataset::Dataset(const char* data_filename, const char* init_score_filename,
CheckCanLoadFromBin();
if (is_loading_from_binfile_ && predict_fun != nullptr) {
Log::Info("Cannot performing initialization of prediction by using binary file, using text file instead");
Log::Info("Cannot initialize prediction by using a binary file, using text file instead");
is_loading_from_binfile_ = false;
}
if (!is_loading_from_binfile_) {
// load weight, query information and initilize score
// load weight, query information and initialize score
metadata_.Init(data_filename, init_score_filename, num_class_);
// create text reader
text_reader_ = new TextReader<data_size_t>(data_filename, io_config.has_header);
......@@ -51,17 +51,17 @@ Dataset::Dataset(const char* data_filename, const char* init_score_filename,
std::string name = io_config.label_column.substr(name_prefix.size());
if (name2idx.count(name) > 0) {
label_idx_ = name2idx[name];
Log::Info("use %s column as label", name.c_str());
Log::Info("Using column %s as label", name.c_str());
} else {
Log::Fatal("cannot find label column: %s in data file", name.c_str());
Log::Fatal("Could not find label column %s in data file", name.c_str());
}
} else {
if (!Common::AtoiAndCheck(io_config.label_column.c_str(), &label_idx_)) {
Log::Fatal("label_column is not a number, \
if you want to use column name, \
please add prefix \"name:\" before column name");
if you want to use a column name, \
please add the prefix \"name:\" to the column name");
}
Log::Info("use %d-th column as label", label_idx_);
Log::Info("Using column number %d as label", label_idx_);
}
}
if (feature_names_.size() > 0) {
......@@ -79,7 +79,7 @@ Dataset::Dataset(const char* data_filename, const char* init_score_filename,
if (tmp > label_idx_) { tmp -= 1; }
ignore_features_.emplace(tmp);
} else {
Log::Fatal("cannot find column: %s in data file", name.c_str());
Log::Fatal("Could not find ignore column %s in data file", name.c_str());
}
}
} else {
......@@ -87,8 +87,8 @@ Dataset::Dataset(const char* data_filename, const char* init_score_filename,
int tmp = 0;
if (!Common::AtoiAndCheck(token.c_str(), &tmp)) {
Log::Fatal("ignore_column is not a number, \
if you want to use column name, \
please add prefix \"name:\" before column name");
if you want to use a column name, \
please add the prefix \"name:\" to the column name");
}
// skip for label column
if (tmp > label_idx_) { tmp -= 1; }
......@@ -104,17 +104,17 @@ Dataset::Dataset(const char* data_filename, const char* init_score_filename,
std::string name = io_config.weight_column.substr(name_prefix.size());
if (name2idx.count(name) > 0) {
weight_idx_ = name2idx[name];
Log::Info("use %s column as weight", name.c_str());
Log::Info("Using column %s as weight", name.c_str());
} else {
Log::Fatal("cannot find weight column: %s in data file", name.c_str());
Log::Fatal("Could not find weight column %s in data file", name.c_str());
}
} else {
if (!Common::AtoiAndCheck(io_config.weight_column.c_str(), &weight_idx_)) {
Log::Fatal("weight_column is not a number, \
if you want to use column name, \
please add prefix \"name:\" before column name");
if you want to use a column name, \
please add the prefix \"name:\" to the column name");
}
Log::Info("use %d-th column as weight", weight_idx_);
Log::Info("Using column number %d as weight", weight_idx_);
}
// skip for label column
if (weight_idx_ > label_idx_) {
......@@ -128,17 +128,17 @@ Dataset::Dataset(const char* data_filename, const char* init_score_filename,
std::string name = io_config.group_column.substr(name_prefix.size());
if (name2idx.count(name) > 0) {
group_idx_ = name2idx[name];
Log::Info("use %s column as group/query id", name.c_str());
Log::Info("Using column %s as group/query id", name.c_str());
} else {
Log::Fatal("cannot find group/query column: %s in data file", name.c_str());
Log::Fatal("Could not find group/query column %s in data file", name.c_str());
}
} else {
if (!Common::AtoiAndCheck(io_config.group_column.c_str(), &group_idx_)) {
Log::Fatal("group_column is not a number, \
if you want to use column name, \
please add prefix \"name:\" before column name");
if you want to use a column name, \
please add the prefix \"name:\" to the column name");
}
Log::Info("use %d-th column as group/query id", group_idx_);
Log::Info("Using column number %d as group/query id", group_idx_);
}
// skip for label column
if (group_idx_ > label_idx_) {
......@@ -150,10 +150,10 @@ Dataset::Dataset(const char* data_filename, const char* init_score_filename,
// create text parser
parser_ = Parser::CreateParser(data_filename_, io_config.has_header, 0, label_idx_);
if (parser_ == nullptr) {
Log::Fatal("Cannot recognising input data format, filename: %s", data_filename_);
Log::Fatal("Could not recognize data format of %s", data_filename_);
}
} else {
// only need to load initilize score, other meta data will be loaded from bin flie
// only need to load initialize score, other meta data will be loaded from binary file
metadata_.Init(init_score_filename, num_class_);
Log::Info("Loading data set from binary file");
parser_ = nullptr;
......@@ -199,7 +199,7 @@ void Dataset::LoadDataToMemory(int rank, int num_machines, bool is_pre_partition
[this, rank, num_machines, &qid, &query_boundaries, &is_query_used, num_queries]
(data_size_t line_idx) {
if (qid >= num_queries) {
Log::Fatal("Current query is exceed the range of query file, please ensure your query file is correct");
Log::Fatal("Current query exceeds the range of the query file, please ensure the query file is correct");
}
if (line_idx >= query_boundaries[qid + 1]) {
// if is new query
......@@ -256,8 +256,8 @@ void Dataset::SampleDataFromFile(int rank, int num_machines, bool is_pre_partiti
[this, rank, num_machines, &qid, &query_boundaries, &is_query_used, num_queries]
(data_size_t line_idx) {
if (qid >= num_queries) {
Log::Fatal("Query id is exceed the range of query file, \
please ensure your query file is correct");
Log::Fatal("Query id exceeds the range of the query file, \
please ensure the query file is correct");
}
if (line_idx >= query_boundaries[qid + 1]) {
// if is new query
......@@ -325,7 +325,7 @@ void Dataset::ConstructBinMappers(int rank, int num_machines, const std::vector<
// start find bins
if (num_machines == 1) {
std::vector<BinMapper*> bin_mappers(sample_values.size());
// if only 1 machines, find bin locally
// if only one machine, find bin locally
#pragma omp parallel for schedule(guided)
for (int i = 0; i < static_cast<int>(sample_values.size()); ++i) {
if (ignore_features_.count(i) > 0) {
......@@ -338,7 +338,7 @@ void Dataset::ConstructBinMappers(int rank, int num_machines, const std::vector<
for (size_t i = 0; i < sample_values.size(); ++i) {
if (bin_mappers[i] == nullptr) {
Log::Warning("Ignore Feature %s ", feature_names_[i].c_str());
Log::Warning("Ignoring feature %s", feature_names_[i].c_str());
}
else if (!bin_mappers[i]->is_trival()) {
// map real feature index to used feature index
......@@ -348,7 +348,7 @@ void Dataset::ConstructBinMappers(int rank, int num_machines, const std::vector<
num_data_, is_enable_sparse_));
} else {
// if feature is trival(only 1 bin), free spaces
Log::Warning("Feature %s only contains one value, will be ignored", feature_names_[i].c_str());
Log::Warning("Ignoring feature %s, only has one value", feature_names_[i].c_str());
delete bin_mappers[i];
}
}
......@@ -396,7 +396,7 @@ void Dataset::ConstructBinMappers(int rank, int num_machines, const std::vector<
// restore features bins from buffer
for (int i = 0; i < total_num_feature; ++i) {
if (ignore_features_.count(i) > 0) {
Log::Warning("Ignore Feature %s ", feature_names_[i].c_str());
Log::Warning("Ignoring feature %s", feature_names_[i].c_str());
continue;
}
BinMapper* bin_mapper = new BinMapper();
......@@ -405,7 +405,7 @@ void Dataset::ConstructBinMappers(int rank, int num_machines, const std::vector<
used_feature_map_[i] = static_cast<int>(features_.size());
features_.push_back(new Feature(static_cast<int>(i), bin_mapper, num_data_, is_enable_sparse_));
} else {
Log::Warning("Feature %s only contains one value, will be ignored", feature_names_[i].c_str());
Log::Warning("Ignoring feature %s, only has one value", feature_names_[i].c_str());
delete bin_mapper;
}
}
......@@ -423,8 +423,8 @@ void Dataset::LoadTrainData(int rank, int num_machines, bool is_pre_partition, b
// don't support query id in data file when training in parallel
if (num_machines > 1 && !is_pre_partition) {
if (group_idx_ > 0) {
Log::Fatal("Don't support query id in data file when training parallel without pre-partition. \
Please use an additional query file or pre-partition your data");
Log::Fatal("Using a query id without pre-partitioning the data file is not supported for parallel training. \
Please use an additional query file or pre-partition the data");
}
}
used_data_indices_.clear();
......@@ -670,10 +670,10 @@ void Dataset::SaveBinaryFile() {
file = fopen(bin_filename.c_str(), "wb");
#endif
if (file == NULL) {
Log::Fatal("Cannot write binary data to %s ", bin_filename.c_str());
Log::Fatal("Could not write binary data to %s", bin_filename.c_str());
}
Log::Info("Saving data to binary file: %s", data_filename_);
Log::Info("Saving data to binary file %s", data_filename_);
// get size of header
size_t size_of_header = sizeof(global_num_data_) + sizeof(is_enable_sparse_)
......@@ -753,7 +753,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit
#endif
if (file == NULL) {
Log::Fatal("Cannot read binary data from %s", bin_filename.c_str());
Log::Fatal("Could not read binary data from %s", bin_filename.c_str());
}
// buffer to read binary file
......@@ -764,7 +764,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit
size_t read_cnt = fread(buffer, sizeof(size_t), 1, file);
if (read_cnt != 1) {
Log::Fatal("Binary file format error at header size");
Log::Fatal("Binary file error: header has the wrong size");
}
size_t size_of_head = *(reinterpret_cast<size_t*>(buffer));
......@@ -779,7 +779,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit
read_cnt = fread(buffer, 1, size_of_head, file);
if (read_cnt != size_of_head) {
Log::Fatal("Binary file format error at header");
Log::Fatal("Binary file error: header is incorrect");
}
// get header
const char* mem_ptr = buffer;
......@@ -822,7 +822,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit
read_cnt = fread(buffer, sizeof(size_t), 1, file);
if (read_cnt != 1) {
Log::Fatal("Binary file format error: wrong size of meta data");
Log::Fatal("Binary file error: meta data has the wrong size");
}
size_t size_of_metadata = *(reinterpret_cast<size_t*>(buffer));
......@@ -837,7 +837,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit
read_cnt = fread(buffer, 1, size_of_metadata, file);
if (read_cnt != size_of_metadata) {
Log::Fatal("Binary file format error: wrong size of meta data");
Log::Fatal("Binary file error: meta data is incorrect");
}
// load meta data
metadata_.LoadFromMemory(buffer);
......@@ -861,7 +861,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit
bool is_query_used = false;
for (data_size_t i = 0; i < num_data_; ++i) {
if (qid >= num_queries) {
Log::Fatal("current query is exceed the range of query file, please ensure your query file is correct");
Log::Fatal("Current query exceeds the range of the query file, please ensure the query file is correct");
}
if (i >= query_boundaries[qid + 1]) {
// if is new query
......@@ -884,7 +884,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit
// read feature size
read_cnt = fread(buffer, sizeof(size_t), 1, file);
if (read_cnt != 1) {
Log::Fatal("Binary file format error at feature %d's size", i);
Log::Fatal("Binary file error: feature %d has the wrong size", i);
}
size_t size_of_feature = *(reinterpret_cast<size_t*>(buffer));
// re-allocate space if not enough
......@@ -897,7 +897,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit
read_cnt = fread(buffer, 1, size_of_feature, file);
if (read_cnt != size_of_feature) {
Log::Fatal("Binary file format error at feature %d loading , read count %d", i, read_cnt);
Log::Fatal("Binary file error: feature %d is incorrect, read count: %d", i, read_cnt);
}
features_.push_back(new Feature(buffer, static_cast<data_size_t>(global_num_data_), used_data_indices_));
}
......@@ -910,7 +910,7 @@ void Dataset::CheckDataset() {
Log::Fatal("Data file %s is empty", data_filename_);
}
if (features_.size() <= 0) {
Log::Fatal("Usable feature of data %s is null", data_filename_);
Log::Fatal("No usable features in data file %s", data_filename_);
}
}
......
......@@ -48,7 +48,7 @@ void Metadata::Init(data_size_t num_data, int num_class, int weight_idx, int que
label_ = new float[num_data_];
if (weight_idx >= 0) {
if (weights_ != nullptr) {
Log::Info("using weight in data file, and ignore additional weight file");
Log::Info("Using weights in data file, ignoring the additional weights file");
delete[] weights_;
}
weights_ = new float[num_data_];
......@@ -57,7 +57,7 @@ void Metadata::Init(data_size_t num_data, int num_class, int weight_idx, int que
}
if (query_idx >= 0) {
if (query_boundaries_ != nullptr) {
Log::Info("using query id in data file, and ignore additional query file");
Log::Info("Using query id in data file, ignoring the additional query file");
delete[] query_boundaries_;
}
if (query_weights_ != nullptr) { delete[] query_weights_; }
......@@ -109,7 +109,7 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
}
// check weights
if (weights_ != nullptr && num_weights_ != num_data_) {
Log::Fatal("Initial weight size doesn't equal to data");
Log::Fatal("Weights size doesn't match data size");
delete[] weights_;
num_weights_ = 0;
weights_ = nullptr;
......@@ -117,7 +117,7 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
// check query boundries
if (query_boundaries_ != nullptr && query_boundaries_[num_queries_] != num_data_) {
Log::Fatal("Initial query size doesn't equal to data");
Log::Fatal("Query size doesn't match data size");
delete[] query_boundaries_;
num_queries_ = 0;
query_boundaries_ = nullptr;
......@@ -126,7 +126,7 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
// contain initial score file
if (init_score_ != nullptr && num_init_score_ != num_data_) {
delete[] init_score_;
Log::Fatal("Initial score size doesn't equal to data");
Log::Fatal("Initial score size doesn't match data size");
init_score_ = nullptr;
num_init_score_ = 0;
}
......@@ -134,14 +134,14 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
data_size_t num_used_data = static_cast<data_size_t>(used_data_indices.size());
// check weights
if (weights_ != nullptr && num_weights_ != num_all_data) {
Log::Fatal("Initial weights size doesn't equal to data");
Log::Fatal("Weights size doesn't match data size");
delete[] weights_;
num_weights_ = 0;
weights_ = nullptr;
}
// check query boundries
if (query_boundaries_ != nullptr && query_boundaries_[num_queries_] != num_all_data) {
Log::Fatal("Initial query size doesn't equal to data");
Log::Fatal("Query size doesn't match data size");
delete[] query_boundaries_;
num_queries_ = 0;
query_boundaries_ = nullptr;
......@@ -149,7 +149,7 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
// contain initial score file
if (init_score_ != nullptr && num_init_score_ != num_all_data) {
Log::Fatal("Initial score size doesn't equal to data");
Log::Fatal("Initial score size doesn't match data size");
delete[] init_score_;
num_init_score_ = 0;
init_score_ = nullptr;
......@@ -220,7 +220,7 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
void Metadata::SetInitScore(const float* init_score, data_size_t len) {
if (len != num_data_ * num_class_) {
Log::Fatal("Length of initial score is not same with number of data");
Log::Fatal("Initial score size doesn't match data size");
}
if (init_score_ != nullptr) { delete[] init_score_; }
num_init_score_ = num_data_;
......@@ -240,7 +240,7 @@ void Metadata::LoadWeights() {
if (reader.Lines().size() <= 0) {
return;
}
Log::Info("Start loading weights");
Log::Info("Loading weights...");
num_weights_ = static_cast<data_size_t>(reader.Lines().size());
weights_ = new float[num_weights_];
for (data_size_t i = 0; i < num_weights_; ++i) {
......@@ -256,7 +256,7 @@ void Metadata::LoadInitialScore() {
TextReader<size_t> reader(init_score_filename_, false);
reader.ReadAllLines();
Log::Info("Start loading initial scores");
Log::Info("Loading initial scores...");
num_init_score_ = static_cast<data_size_t>(reader.Lines().size());
init_score_ = new float[num_init_score_ * num_class_];
......@@ -292,7 +292,7 @@ void Metadata::LoadQueryBoundaries() {
if (reader.Lines().size() <= 0) {
return;
}
Log::Info("Start loading query boundries");
Log::Info("Loading query boundaries...");
query_boundaries_ = new data_size_t[reader.Lines().size() + 1];
num_queries_ = static_cast<data_size_t>(reader.Lines().size());
query_boundaries_[0] = 0;
......@@ -307,7 +307,7 @@ void Metadata::LoadQueryWeights() {
if (weights_ == nullptr || query_boundaries_ == nullptr) {
return;
}
Log::Info("Start loading query weights");
Log::Info("Loading query weights...");
query_weights_ = new float[num_queries_];
for (data_size_t i = 0; i < num_queries_; ++i) {
query_weights_[i] = 0.0f;
......
......@@ -72,7 +72,7 @@ Parser* Parser::CreateParser(const char* filename, bool has_header, int num_feat
std::ifstream tmp_file;
tmp_file.open(filename);
if (!tmp_file.is_open()) {
Log::Fatal("Data file: %s doesn't exist", filename);
Log::Fatal("Data file %s doesn't exist'", filename);
}
std::string line1, line2;
if (has_header) {
......@@ -83,12 +83,12 @@ Parser* Parser::CreateParser(const char* filename, bool has_header, int num_feat
if (!tmp_file.eof()) {
std::getline(tmp_file, line1);
} else {
Log::Fatal("Data file: %s at least should have one line", filename);
Log::Fatal("Data file %s should have at least one line", filename);
}
if (!tmp_file.eof()) {
std::getline(tmp_file, line2);
} else {
Log::Warning("Data file: %s only have one line", filename);
Log::Warning("Data file %s only has one line", filename);
}
tmp_file.close();
int comma_cnt = 0, comma_cnt2 = 0;
......@@ -120,7 +120,7 @@ Parser* Parser::CreateParser(const char* filename, bool has_header, int num_feat
}
}
if (type == DataType::INVALID) {
Log::Fatal("Unkown format of training data");
Log::Fatal("Unknown format of training data");
}
Parser* ret = nullptr;
if (type == DataType::LIBSVM) {
......@@ -137,7 +137,7 @@ Parser* Parser::CreateParser(const char* filename, bool has_header, int num_feat
}
if (label_idx < 0) {
Log::Info("Data file: %s doesn't contain label column", filename);
Log::Info("Data file %s doesn't contain a label column", filename);
}
return ret;
}
......
......@@ -36,7 +36,7 @@ public:
if (*str == ',') {
++str;
} else if (*str != '\0') {
Log::Fatal("input format error, should be CSV");
Log::Fatal("Input format error when parsing as CSV");
}
}
}
......@@ -66,7 +66,7 @@ public:
if (*str == '\t') {
++str;
} else if (*str != '\0') {
Log::Fatal("input format error, should be TSV");
Log::Fatal("Input format error when parsing as TSV");
}
}
}
......@@ -79,7 +79,7 @@ public:
explicit LibSVMParser(int label_idx)
:label_idx_(label_idx) {
if (label_idx > 0) {
Log::Fatal("label should be the first column in Libsvm file");
Log::Fatal("Label should be the first column in a LibSVM file");
}
}
inline void ParseOneLine(const char* str,
......@@ -99,7 +99,7 @@ public:
str = Common::Atof(str, &val);
out_features->emplace_back(idx, val);
} else {
Log::Fatal("input format error, should be LibSVM");
Log::Fatal("Input format error when parsing as LibSVM");
}
str = Common::SkipSpaceAndTab(str);
}
......
......@@ -28,7 +28,7 @@ public:
: num_data_(num_data) {
default_bin_ = static_cast<VAL_T>(default_bin);
if (default_bin_ != 0) {
Log::Info("Warning: Having sparse feature with negative values. Will let negative values equal zero as well");
Log::Info("Warning: sparse feature with negative values, treating negative values as zero");
}
#pragma omp parallel
#pragma omp master
......@@ -54,7 +54,7 @@ public:
void ConstructHistogram(data_size_t*, data_size_t , const score_t* ,
const score_t* , HistogramBinEntry*) const override {
// Will use OrderedSparseBin->ConstructHistogram() instead
Log::Info("Should use OrderedSparseBin->ConstructHistogram() instead");
Log::Info("Using OrderedSparseBin->ConstructHistogram() instead");
}
data_size_t Split(unsigned int threshold, data_size_t* data_indices, data_size_t num_data,
......
......@@ -146,7 +146,7 @@ Tree::Tree(const std::string& str) {
|| key_vals.count("split_gain") <= 0 || key_vals.count("threshold") <= 0
|| key_vals.count("left_child") <= 0 || key_vals.count("right_child") <= 0
|| key_vals.count("leaf_parent") <= 0 || key_vals.count("leaf_value") <= 0) {
Log::Fatal("tree model string format error");
Log::Fatal("Tree model string format error");
}
Common::Atoi(key_vals["num_leaves"].c_str(), &num_leaves_);
......
......@@ -21,7 +21,7 @@ public:
explicit BinaryMetric(const MetricConfig& config) {
sigmoid_ = static_cast<score_t>(config.sigmoid);
if (sigmoid_ <= 0.0f) {
Log::Fatal("Sigmoid param %f should greater than zero", sigmoid_);
Log::Fatal("Sigmoid parameter %f should greater than zero", sigmoid_);
}
}
......
......@@ -60,7 +60,7 @@ void DCGCalculator::CalMaxDCG(const std::vector<data_size_t>& ks,
std::vector<data_size_t> label_cnt(label_gain_.size(), 0);
// counts for all labels
for (data_size_t i = 0; i < num_data; ++i) {
if (static_cast<size_t>(label[i]) >= label_cnt.size()) { Log::Fatal("label excel %d", label[i]); }
if (static_cast<size_t>(label[i]) >= label_cnt.size()) { Log::Fatal("Label excel %d", label[i]); }
++label_cnt[static_cast<int>(label[i])];
}
score_t cur_result = 0.0f;
......
......@@ -45,7 +45,7 @@ public:
// get query boundaries
query_boundaries_ = metadata.query_boundaries();
if (query_boundaries_ == nullptr) {
Log::Fatal("For NDCG metric, there should be query information");
Log::Fatal("The NDCG metric requires query information");
}
num_queries_ = metadata.num_queries();
// get query weights
......
......@@ -44,7 +44,7 @@ Linkers::Linkers(NetworkConfig config) {
}
}
if (rank_ == -1) {
Log::Fatal("Machine list file doesn't contain local machine");
Log::Fatal("Machine list file doesn't contain the local machine");
}
// construct listener
listener_ = new TcpSocket();
......@@ -73,14 +73,14 @@ Linkers::~Linkers() {
}
}
TcpSocket::Finalize();
Log::Info("Network using %f seconds", network_time_ * 1e-3);
Log::Info("Finished linking network in %f seconds", network_time_ * 1e-3);
}
void Linkers::ParseMachineList(const char * filename) {
TextReader<size_t> machine_list_reader(filename, false);
machine_list_reader.ReadAllLines();
if (machine_list_reader.Lines().size() <= 0) {
Log::Fatal("Machine list file:%s doesn't exist", filename);
Log::Fatal("Machine list file %s doesn't exist", filename);
}
for (auto& line : machine_list_reader.Lines()) {
......@@ -95,7 +95,7 @@ void Linkers::ParseMachineList(const char * filename) {
continue;
}
if (client_ips_.size() >= static_cast<size_t>(num_machines_)) {
Log::Warning("The #machine in machine_list is larger than parameter num_machines, the redundant will ignored");
Log::Warning("machine_list size is larger than the parameter num_machines, ignoring redundant entries");
break;
}
str_after_split[0] = Common::Trim(str_after_split[0]);
......@@ -104,17 +104,17 @@ void Linkers::ParseMachineList(const char * filename) {
client_ports_.push_back(atoi(str_after_split[1].c_str()));
}
if (client_ips_.size() != static_cast<size_t>(num_machines_)) {
Log::Warning("The world size is bigger the #machine in machine list, change world size to %d .", client_ips_.size());
Log::Warning("World size is larger than the machine_list size, change world size to %d", client_ips_.size());
num_machines_ = static_cast<int>(client_ips_.size());
}
}
void Linkers::TryBind(int port) {
Log::Info("try to bind port %d.", port);
Log::Info("Trying to bind port %d...", port);
if (listener_->Bind(port)) {
Log::Info("Binding port %d success.", port);
Log::Info("Binding port %d succeeded", port);
} else {
Log::Fatal("Binding port %d failed.", port);
Log::Fatal("Binding port %d failed", port);
}
}
......@@ -192,7 +192,7 @@ void Linkers::Construct() {
if (cur_socket.Connect(client_ips_[out_rank].c_str(), client_ports_[out_rank])) {
break;
} else {
Log::Warning("Connect to rank %d failed, wait for %d milliseconds", out_rank, connect_fail_delay_time);
Log::Warning("Connecting to rank %d failed, waiting for %d milliseconds", out_rank, connect_fail_delay_time);
std::this_thread::sleep_for(std::chrono::milliseconds(connect_fail_delay_time));
}
}
......@@ -217,7 +217,7 @@ bool Linkers::CheckLinker(int rank) {
void Linkers::PrintLinkers() {
for (int i = 0; i < num_machines_; ++i) {
if (CheckLinker(i)) {
Log::Info("Connected to rank %d.", i);
Log::Info("Connected to rank %d", i);
}
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment