Commit dce329e5 authored by Hui Xue's avatar Hui Xue
Browse files

Merge remote-tracking branch 'upstream/master'

# Conflicts:
#	src/io/dataset.cpp
#	src/io/ordered_sparse_bin.hpp
#	src/treelearner/leaf_splits.hpp
#	src/treelearner/serial_tree_learner.cpp
parents 0b9fe27a a6a75fe9
......@@ -24,7 +24,7 @@ public:
*/
~GBDT();
/*!
* \brief Initial logic
* \brief Initialization logic
* \param config Config for boosting
* \param train_data Training data
* \param object_function Training objective function
......@@ -36,9 +36,9 @@ public:
const char* output_model_filename)
override;
/*!
* \brief Add a validation data
* \param valid_data Validation data
* \param valid_metrics Metrics for validation data
* \brief Adding a validation dataset
* \param valid_data Validation dataset
* \param valid_metrics Metrics for validation dataset
*/
void AddDataset(const Dataset* valid_data,
const std::vector<const Metric*>& valid_metrics) override;
......@@ -47,18 +47,26 @@ public:
*/
void Train() override;
/*!
* \brief Predtion for one record, not use sigmoid
* \brief Predtion for one record without sigmoid transformation
* \param feature_values Feature value on this record
* \return Prediction result for this record
*/
double PredictRaw(const double * feature_values) const override;
/*!
* \brief Predtion for one record, will use sigmoid transform if needed
* \brief Predtion for one record with sigmoid transformation if enabled
* \param feature_values Feature value on this record
* \return Prediction result for this record
*/
double Predict(const double * feature_values) const override;
/*!
* \brief Predtion for one record with leaf index
* \param feature_values Feature value on this record
* \return Predicted leaf index for this record
*/
std::vector<int> PredictLeafIndex(const double* value) const override;
/*!
* \brief Serialize models by string
* \return String output of tranined model
......@@ -87,8 +95,8 @@ private:
*/
void Bagging(int iter);
/*!
* \brief update score for out-of-bag data.
* It is necessary for this update, since we may re-bagging data on training
* \brief updating score for out-of-bag data.
* Data should be update since we may re-bagging data on training
* \param tree Trained tree of this iteration
*/
void UpdateScoreOutOfBag(const Tree* tree);
......@@ -97,12 +105,12 @@ private:
*/
void Boosting();
/*!
* \brief train one tree
* \brief training one tree
* \return Trained tree of this iteration
*/
Tree* TrainOneTree();
/*!
* \brief update score after tree trained
* \brief updating score after tree was trained
* \param tree Trained tree of this iteration
*/
void UpdateScore(const Tree* tree);
......@@ -110,7 +118,9 @@ private:
* \brief Print metric result of current iteration
* \param iter Current interation
*/
void OutputMetric(int iter);
bool OutputMetric(int iter);
int early_stopping_round_;
/*! \brief Pointer to training data */
const Dataset* train_data_;
......@@ -128,6 +138,9 @@ private:
std::vector<ScoreUpdater*> valid_score_updater_;
/*! \brief Metric for validation data */
std::vector<std::vector<const Metric*>> valid_metrics_;
/*! \brief Best score(s) for early stopping */
std::vector<std::vector<int>> best_iter_;
std::vector<std::vector<score_t>> best_score_;
/*! \brief Trained models(trees) */
std::vector<Tree*> models_;
/*! \brief Max feature index of training data*/
......
......@@ -37,25 +37,25 @@ public:
delete[] score_;
}
/*!
* \brief Use tree model to get prediction, then add to score for all data
* Note: this function generally will be used for validation data.
* \brief Using tree model to get prediction number, then adding to scores for all data
* Note: this function generally will be used on validation data too.
* \param tree Trained tree model
*/
inline void AddScore(const Tree* tree) {
tree->AddPredictionToScore(data_, num_data_, score_);
}
/*!
* \brief Add prediction score, only used for training data.
* After trained a tree, the training data is partitioned into tree leaves.
* We can get prediction by faster speed based on this.
* \brief Adding prediction score, only used for training data.
* The training data is partitioned into tree leaves after training
* Based on which We can get prediction quckily.
* \param tree_learner
*/
inline void AddScore(const TreeLearner* tree_learner) {
tree_learner->AddPredictionToScore(score_);
}
/*!
* \brief Like AddScore(const Tree* tree), but only for part of data
* Used for prediction of training out-of-bad data
* \brief Using tree model to get prediction number, then adding to scores for parts of data
* Used for prediction of training out-of-bag data
* \param tree Trained tree model
* \param data_indices Indices of data that will be proccessed
* \param data_cnt Number of data that will be proccessed
......
......@@ -182,35 +182,35 @@ template class OrderedSparseBin<uint16_t>;
template class OrderedSparseBin<uint32_t>;
Bin* Bin::CreateBin(data_size_t num_data, int num_bin, double sparse_rate, bool is_enable_sparse, bool* is_sparse) {
Bin* Bin::CreateBin(data_size_t num_data, int num_bin, double sparse_rate, bool is_enable_sparse, bool* is_sparse, int default_bin) {
// sparse threshold
const double kSparseThreshold = 0.8;
if (sparse_rate >= kSparseThreshold && is_enable_sparse) {
*is_sparse = true;
return CreateSparseBin(num_data, num_bin);
return CreateSparseBin(num_data, num_bin, default_bin);
} else {
*is_sparse = false;
return CreateDenseBin(num_data, num_bin);
return CreateDenseBin(num_data, num_bin, default_bin);
}
}
Bin* Bin::CreateDenseBin(data_size_t num_data, int num_bin) {
Bin* Bin::CreateDenseBin(data_size_t num_data, int num_bin, int default_bin) {
if (num_bin <= 256) {
return new DenseBin<uint8_t>(num_data);
return new DenseBin<uint8_t>(num_data, default_bin);
} else if (num_bin <= 65536) {
return new DenseBin<uint16_t>(num_data);
return new DenseBin<uint16_t>(num_data, default_bin);
} else {
return new DenseBin<uint32_t>(num_data);
return new DenseBin<uint32_t>(num_data, default_bin);
}
}
Bin* Bin::CreateSparseBin(data_size_t num_data, int num_bin) {
Bin* Bin::CreateSparseBin(data_size_t num_data, int num_bin, int default_bin) {
if (num_bin <= 256) {
return new SparseBin<uint8_t>(num_data);
return new SparseBin<uint8_t>(num_data, default_bin);
} else if (num_bin <= 65536) {
return new SparseBin<uint16_t>(num_data);
return new SparseBin<uint16_t>(num_data, default_bin);
} else {
return new SparseBin<uint32_t>(num_data);
return new SparseBin<uint32_t>(num_data, default_bin);
}
}
......
......@@ -15,6 +15,8 @@ void OverallConfig::Set(const std::unordered_map<std::string, std::string>& para
GetInt(params, "num_threads", &num_threads);
GetTaskType(params);
GetBool(params, "predict_leaf_index", &predict_leaf_index);
GetBoostingType(params);
GetObjectiveType(params);
GetMetricType(params);
......@@ -34,6 +36,19 @@ void OverallConfig::Set(const std::unordered_map<std::string, std::string>& para
metric_config.Set(params);
// check for conflicts
CheckParamConflict();
if (io_config.verbosity == 1) {
LightGBM::Log::ResetLogLevel(LightGBM::LogLevel::Info);
}
else if (io_config.verbosity == 0) {
LightGBM::Log::ResetLogLevel(LightGBM::LogLevel::Error);
}
else if (io_config.verbosity >= 2) {
LightGBM::Log::ResetLogLevel(LightGBM::LogLevel::Debug);
}
else {
LightGBM::Log::ResetLogLevel(LightGBM::LogLevel::Fatal);
}
}
void OverallConfig::GetBoostingType(const std::unordered_map<std::string, std::string>& params) {
......@@ -43,7 +58,7 @@ void OverallConfig::GetBoostingType(const std::unordered_map<std::string, std::s
if (value == std::string("gbdt") || value == std::string("gbrt")) {
boosting_type = BoostingType::kGBDT;
} else {
Log::Stderr("boosting type %s error", value.c_str());
Log::Fatal("Boosting type %s error", value.c_str());
}
}
}
......@@ -91,34 +106,37 @@ void OverallConfig::GetTaskType(const std::unordered_map<std::string, std::strin
|| value == std::string("test")) {
task_type = TaskType::kPredict;
} else {
Log::Stderr("task type error");
Log::Fatal("Task type error");
}
}
}
void OverallConfig::CheckParamConflict() {
GBDTConfig* gbdt_config = dynamic_cast<GBDTConfig*>(boosting_config);
if (network_config.num_machines > 1) {
is_parallel = true;
} else {
is_parallel = false;
dynamic_cast<GBDTConfig*>(boosting_config)->tree_learner_type =
TreeLearnerType::kSerialTreeLearner;
gbdt_config->tree_learner_type = TreeLearnerType::kSerialTreeLearner;
}
if (dynamic_cast<GBDTConfig*>(boosting_config)->tree_learner_type ==
TreeLearnerType::kSerialTreeLearner) {
if (gbdt_config->tree_learner_type == TreeLearnerType::kSerialTreeLearner) {
is_parallel = false;
network_config.num_machines = 1;
}
if (dynamic_cast<GBDTConfig*>(boosting_config)->tree_learner_type ==
TreeLearnerType::kSerialTreeLearner ||
dynamic_cast<GBDTConfig*>(boosting_config)->tree_learner_type ==
TreeLearnerType::kFeatureParallelTreelearner) {
if (gbdt_config->tree_learner_type == TreeLearnerType::kSerialTreeLearner ||
gbdt_config->tree_learner_type == TreeLearnerType::kFeatureParallelTreelearner) {
is_parallel_find_bin = false;
} else if (dynamic_cast<GBDTConfig*>(boosting_config)->tree_learner_type ==
TreeLearnerType::kDataParallelTreeLearner) {
} else if (gbdt_config->tree_learner_type == TreeLearnerType::kDataParallelTreeLearner) {
is_parallel_find_bin = true;
if (gbdt_config->tree_config.histogram_pool_size >= 0) {
Log::Error("Histogram LRU queue was enabled (histogram_pool_size=%f). Will disable this for reducing communication cost."
, gbdt_config->tree_config.histogram_pool_size);
// Change pool size to -1(not limit) when using data parallel for reducing communication cost
gbdt_config->tree_config.histogram_pool_size = -1;
}
}
}
......@@ -128,8 +146,9 @@ void IOConfig::Set(const std::unordered_map<std::string, std::string>& params) {
GetInt(params, "data_random_seed", &data_random_seed);
if (!GetString(params, "data", &data_filename)) {
Log::Stderr("No training/prediction data, application quit");
Log::Fatal("No training/prediction data, application quit");
}
GetInt(params, "verbose", &verbosity);
GetInt(params, "num_model_predict", &num_model_predict);
GetBool(params, "is_pre_partition", &is_pre_partition);
GetBool(params, "is_enable_sparse", &is_enable_sparse);
......@@ -140,6 +159,7 @@ void IOConfig::Set(const std::unordered_map<std::string, std::string>& params) {
GetString(params, "input_model", &input_model);
GetString(params, "output_result", &output_result);
GetString(params, "input_init_score", &input_init_score);
GetString(params, "log_file", &log_file);
std::string tmp_str = "";
if (GetString(params, "valid_data", &tmp_str)) {
valid_data_filenames = Common::Split(tmp_str.c_str(), ',');
......@@ -167,6 +187,7 @@ void ObjectiveConfig::Set(const std::unordered_map<std::string, std::string>& pa
void MetricConfig::Set(const std::unordered_map<std::string, std::string>& params) {
GetInt(params, "early_stopping_round", &early_stopping_round);
GetInt(params, "metric_freq", &output_freq);
CHECK(output_freq >= 0);
GetDouble(params, "sigmoid", &sigmoid);
......@@ -202,10 +223,13 @@ void TreeConfig::Set(const std::unordered_map<std::string, std::string>& params)
GetDouble(params, "min_sum_hessian_in_leaf", &min_sum_hessian_in_leaf);
CHECK(min_sum_hessian_in_leaf > 1.0f || min_data_in_leaf > 0);
GetInt(params, "num_leaves", &num_leaves);
CHECK(num_leaves > 0);
CHECK(num_leaves > 1);
GetInt(params, "feature_fraction_seed", &feature_fraction_seed);
GetDouble(params, "feature_fraction", &feature_fraction);
CHECK(feature_fraction > 0.0 && feature_fraction <= 1.0);
GetDouble(params, "histogram_pool_size", &histogram_pool_size);
GetInt(params, "max_depth", &max_depth);
CHECK(max_depth > 1 || max_depth < 0);
}
......@@ -219,6 +243,8 @@ void BoostingConfig::Set(const std::unordered_map<std::string, std::string>& par
CHECK(bagging_fraction > 0.0 && bagging_fraction <= 1.0);
GetDouble(params, "learning_rate", &learning_rate);
CHECK(learning_rate > 0.0);
GetInt(params, "early_stopping_round", &early_stopping_round);
CHECK(early_stopping_round >= 0);
}
void GBDTConfig::GetTreeLearnerType(const std::unordered_map<std::string, std::string>& params) {
......@@ -233,7 +259,7 @@ void GBDTConfig::GetTreeLearnerType(const std::unordered_map<std::string, std::s
tree_learner_type = TreeLearnerType::kDataParallelTreeLearner;
}
else {
Log::Stderr("tree learner type error");
Log::Fatal("Tree learner type error");
}
}
}
......
......@@ -21,7 +21,7 @@ Dataset::Dataset(const char* data_filename, const char* init_score_filename,
CheckCanLoadFromBin();
if (is_loading_from_binfile_ && predict_fun != nullptr) {
Log::Stdout("cannot perform initial prediction for binary file, will use text file instead");
Log::Info("Cannot performing initialization of prediction by using binary file, using text file instead");
is_loading_from_binfile_ = false;
}
......@@ -31,14 +31,14 @@ Dataset::Dataset(const char* data_filename, const char* init_score_filename,
// create text parser
parser_ = Parser::CreateParser(data_filename_, 0, nullptr);
if (parser_ == nullptr) {
Log::Stderr("cannot recognize input data format, filename: %s", data_filename_);
Log::Fatal("Cannot recognising input data format, filename: %s", data_filename_);
}
// create text reader
text_reader_ = new TextReader<data_size_t>(data_filename);
} else {
// only need to load initilize score, other meta data will be loaded from bin flie
metadata_.Init(init_score_filename);
Log::Stdout("will load data set from binary file");
Log::Info("Loading data set from binary file");
parser_ = nullptr;
text_reader_ = nullptr;
}
......@@ -82,7 +82,7 @@ void Dataset::LoadDataToMemory(int rank, int num_machines, bool is_pre_partition
[this, rank, num_machines, &qid, &query_boundaries, &is_query_used, num_queries]
(data_size_t line_idx) {
if (qid >= num_queries) {
Log::Stderr("current query is exceed the range of query file, please ensure your query file is correct");
Log::Fatal("Current query is exceed the range of query file, please ensure your query file is correct");
}
if (line_idx >= query_boundaries[qid + 1]) {
// if is new query
......@@ -139,7 +139,7 @@ void Dataset::SampleDataFromFile(int rank, int num_machines, bool is_pre_partiti
[this, rank, num_machines, &qid, &query_boundaries, &is_query_used, num_queries]
(data_size_t line_idx) {
if (qid >= num_queries) {
Log::Stderr("current query is exceed the range of query file, \
Log::Fatal("Query id is exceed the range of query file, \
please ensure your query file is correct");
}
if (line_idx >= query_boundaries[qid + 1]) {
......@@ -189,7 +189,7 @@ void Dataset::ConstructBinMappers(int rank, int num_machines, const std::vector<
// -1 means doesn't use this feature
used_feature_map_ = std::vector<int>(sample_values.size(), -1);
num_total_features_ = sample_values.size();
num_total_features_ = static_cast<int>(sample_values.size());
// start find bins
if (num_machines == 1) {
std::vector<BinMapper*> bin_mappers(sample_values.size());
......@@ -209,7 +209,7 @@ void Dataset::ConstructBinMappers(int rank, int num_machines, const std::vector<
num_data_, is_enable_sparse_));
} else {
// if feature is trival(only 1 bin), free spaces
Log::Stdout("Warning: feture %d only contains one value, will ignore it", i);
Log::Error("Feature %d only contains one value, will be ignored", i);
delete bin_mappers[i];
}
}
......@@ -486,10 +486,10 @@ void Dataset::SaveBinaryFile() {
file = fopen(bin_filename.c_str(), "wb");
#endif
if (file == NULL) {
Log::Stderr("cannot write binary data to %s ", bin_filename.c_str());
Log::Fatal("Cannot write binary data to %s ", bin_filename.c_str());
}
Log::Stdout("start save binary file for data %s", data_filename_);
Log::Info("Saving data to binary file: %s", data_filename_);
// get size of header
size_t size_of_header = sizeof(global_num_data_) + sizeof(is_enable_sparse_)
......@@ -556,7 +556,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit
#endif
if (file == NULL) {
Log::Stderr("cannot read binary data from %s", bin_filename.c_str());
Log::Fatal("Cannot read binary data from %s", bin_filename.c_str());
}
// buffer to read binary file
......@@ -567,7 +567,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit
size_t read_cnt = fread(buffer, sizeof(size_t), 1, file);
if (read_cnt != 1) {
Log::Stderr("binary file format error at header size");
Log::Fatal("Binary file format error at header size");
}
size_t size_of_head = *(reinterpret_cast<size_t*>(buffer));
......@@ -582,7 +582,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit
read_cnt = fread(buffer, 1, size_of_head, file);
if (read_cnt != size_of_head) {
Log::Stderr("binary file format error at header");
Log::Fatal("Binary file format error at header");
}
// get header
const char* mem_ptr = buffer;
......@@ -608,7 +608,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit
read_cnt = fread(buffer, sizeof(size_t), 1, file);
if (read_cnt != 1) {
Log::Stderr("binary file format error at size of meta data");
Log::Fatal("Binary file format error: wrong size of meta data");
}
size_t size_of_metadata = *(reinterpret_cast<size_t*>(buffer));
......@@ -623,7 +623,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit
read_cnt = fread(buffer, 1, size_of_metadata, file);
if (read_cnt != size_of_metadata) {
Log::Stderr("binary file format error at meta data");
Log::Fatal("Binary file format error: wrong size of meta data");
}
// load meta data
metadata_.LoadFromMemory(buffer);
......@@ -647,7 +647,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit
bool is_query_used = false;
for (data_size_t i = 0; i < num_data_; i++) {
if (qid >= num_queries) {
Log::Stderr("current query is exceed the range of query file, please ensure your query file is correct");
Log::Fatal("current query is exceed the range of query file, please ensure your query file is correct");
}
if (i >= query_boundaries[qid + 1]) {
// if is new query
......@@ -670,7 +670,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit
// read feature size
read_cnt = fread(buffer, sizeof(size_t), 1, file);
if (read_cnt != 1) {
Log::Stderr("binary file format error at feature %d's size", i);
Log::Fatal("Binary file format error at feature %d's size", i);
}
size_t size_of_feature = *(reinterpret_cast<size_t*>(buffer));
// re-allocate space if not enough
......@@ -683,7 +683,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit
read_cnt = fread(buffer, 1, size_of_feature, file);
if (read_cnt != size_of_feature) {
Log::Stderr("binary file format error at feature %d loading , read count %d", i, read_cnt);
Log::Fatal("Binary file format error at feature %d loading , read count %d", i, read_cnt);
}
features_.push_back(new Feature(buffer, static_cast<data_size_t>(global_num_data_), used_data_indices_));
}
......@@ -693,10 +693,10 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit
void Dataset::CheckDataset() {
if (num_data_ <= 0) {
Log::Stderr("data size of %s is zero", data_filename_);
Log::Fatal("Data file %s is empty", data_filename_);
}
if (features_.size() <= 0) {
Log::Stderr("not useful feature of data %s", data_filename_);
Log::Fatal("Usable feature of data %s is null", data_filename_);
}
}
......
......@@ -16,10 +16,17 @@ namespace LightGBM {
template <typename VAL_T>
class DenseBin: public Bin {
public:
explicit DenseBin(data_size_t num_data)
explicit DenseBin(data_size_t num_data, int default_bin)
: num_data_(num_data) {
data_ = new VAL_T[num_data_];
if (default_bin == 0) {
std::memset(data_, 0, sizeof(VAL_T)*num_data_);
} else {
VAL_T default_bin_T = static_cast<VAL_T>(default_bin);
for (data_size_t i = 0; i < num_data_; ++i) {
data_[i] = default_bin_T;
}
}
}
~DenseBin() {
......
......@@ -61,7 +61,7 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
if (used_data_indices.size() == 0) {
// check weights
if (weights_ != nullptr && num_weights_ != num_data_) {
Log::Stdout("init weight size doesn't equal with data file, will ignore");
Log::Error("Initial weight size doesn't equal to data, weights will be ignored");
delete[] weights_;
num_weights_ = 0;
weights_ = nullptr;
......@@ -69,7 +69,7 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
// check query boundries
if (query_boundaries_ != nullptr && query_boundaries_[num_queries_] != num_data_) {
Log::Stdout("init query size doesn't equal with data file, will ignore");
Log::Error("Initial query size doesn't equal to data, queies will be ignored");
delete[] query_boundaries_;
num_queries_ = 0;
query_boundaries_ = nullptr;
......@@ -78,21 +78,21 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
// contain initial score file
if (init_score_ != nullptr && num_init_score_ != num_data_) {
delete[] init_score_;
Log::Stdout("init score size doesn't equal with data file, will ignore");
Log::Error("Initial score size doesn't equal to data, score file will be ignored");
num_init_score_ = 0;
}
} else {
data_size_t num_used_data = static_cast<data_size_t>(used_data_indices.size());
// check weights
if (weights_ != nullptr && num_weights_ != num_all_data) {
Log::Stdout("init weight size doesn't equal with data file, will ignore");
Log::Error("Initial weights size doesn't equal to data, weights will be ignored");
delete[] weights_;
num_weights_ = 0;
weights_ = nullptr;
}
// check query boundries
if (query_boundaries_ != nullptr && query_boundaries_[num_queries_] != num_all_data) {
Log::Stdout("init query size doesn't equal with data file, will ignore");
Log::Error("Initial query size doesn't equal to data , queries will be ignored");
delete[] query_boundaries_;
num_queries_ = 0;
query_boundaries_ = nullptr;
......@@ -100,7 +100,7 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
// contain initial score file
if (init_score_ != nullptr && num_init_score_ != num_all_data) {
Log::Stdout("init score size doesn't equal with data file, will ignore");
Log::Error("Initial score size doesn't equal to data , initial scores will be ignored");
delete[] init_score_;
num_init_score_ = 0;
}
......@@ -131,10 +131,10 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
used_query.push_back(qid);
data_idx += len;
} else {
Log::Stderr("data partition error, not according to query");
Log::Fatal("Data partition error, data didn't match queies");
}
} else {
Log::Stderr("data partition error, not according to query");
Log::Fatal("Data partition error, data didn't match queies");
}
}
data_size_t * old_query_boundaries = query_boundaries_;
......@@ -182,7 +182,7 @@ void Metadata::LoadWeights() {
if (reader.Lines().size() <= 0) {
return;
}
Log::Stdout("Start to load weights");
Log::Info("Start loading weights");
num_weights_ = static_cast<data_size_t>(reader.Lines().size());
weights_ = new float[num_weights_];
for (data_size_t i = 0; i < num_weights_; ++i) {
......@@ -198,7 +198,7 @@ void Metadata::LoadInitialScore() {
TextReader<size_t> reader(init_score_filename_);
reader.ReadAllLines();
Log::Stdout("Start to load initial score");
Log::Info("Start loading initial scores");
num_init_score_ = static_cast<data_size_t>(reader.Lines().size());
init_score_ = new score_t[num_init_score_];
double tmp = 0.0f;
......@@ -218,7 +218,7 @@ void Metadata::LoadQueryBoundaries() {
if (reader.Lines().size() <= 0) {
return;
}
Log::Stdout("Start to load query boundries");
Log::Info("Start loading query boundries");
query_boundaries_ = new data_size_t[reader.Lines().size() + 1];
num_queries_ = static_cast<data_size_t>(reader.Lines().size());
query_boundaries_[0] = 0;
......@@ -233,7 +233,7 @@ void Metadata::LoadQueryWeights() {
if (weights_ == nullptr || query_boundaries_ == nullptr) {
return;
}
Log::Stdout("Start to load query weights");
Log::Info("Start loading query weights");
query_weights_ = new float[num_queries_];
for (data_size_t i = 0; i < num_queries_; ++i) {
query_weights_[i] = 0.0f;
......
......@@ -13,12 +13,21 @@
namespace LightGBM {
/*!
<<<<<<< HEAD
* \brief Ordered bin for sparse feature . Efficient for construct histogram, especally for sparse bin
* There are 2 advantages for using ordered bin.
* 1. group the data by leaf, improve the cache hit.
* 2. only store the non-zero bin, which can speed up the histogram cconsturction for sparse feature.
* But it has a additional cost, it need re-order the bins after leaf split, which will cost much for dense feature.
* So we only use ordered bin for sparse features now.
=======
* \brief Interface for ordered bin data. efficient for construct histogram, especially for sparse bin
* There are 2 advantages by using ordered bin.
* 1. group the data by leafs to improve the cache hit.
* 2. only store the non-zero bin, which can speed up the histogram consturction for sparse features.
* However it brings additional cost: it need re-order the bins after every split, which will cost much for dense feature.
* So we only using ordered bin for sparse situations.
>>>>>>> upstream/master
*/
template <typename VAL_T>
class OrderedSparseBin:public OrderedBin {
......
......@@ -34,7 +34,7 @@ bool CheckHasLabelForLibsvm(std::string& str) {
bool CheckHasLabelForTSV(std::string& str, int num_features) {
str = Common::Trim(str);
auto tokens = Common::Split(str.c_str(), '\t');
if (tokens.size() == num_features) {
if (static_cast<int>(tokens.size()) == num_features) {
return false;
} else {
return true;
......@@ -44,7 +44,7 @@ bool CheckHasLabelForTSV(std::string& str, int num_features) {
bool CheckHasLabelForCSV(std::string& str, int num_features) {
str = Common::Trim(str);
auto tokens = Common::Split(str.c_str(), ',');
if (tokens.size() == num_features) {
if (static_cast<int>(tokens.size()) == num_features) {
return false;
} else {
return true;
......@@ -55,18 +55,18 @@ Parser* Parser::CreateParser(const char* filename, int num_features, bool* has_l
std::ifstream tmp_file;
tmp_file.open(filename);
if (!tmp_file.is_open()) {
Log::Stderr("Data file: %s doesn't exist", filename);
Log::Fatal("Data file: %s doesn't exist", filename);
}
std::string line1, line2;
if (!tmp_file.eof()) {
std::getline(tmp_file, line1);
} else {
Log::Stderr("Data file: %s at least should have one line", filename);
Log::Fatal("Data file: %s at least should have one line", filename);
}
if (!tmp_file.eof()) {
std::getline(tmp_file, line2);
} else {
Log::Stdout("Data file: %s only have one line", filename);
Log::Error("Data file: %s only have one line", filename);
}
tmp_file.close();
int comma_cnt = 0, comma_cnt2 = 0;
......
......@@ -20,12 +20,14 @@ public:
double val = 0.0;
while (*str != '\0') {
str = Common::Atof(str, &val);
if (fabs(val) > 1e-10) {
out_features->emplace_back(idx, val);
}
++idx;
if (*str == ',') {
++str;
} else if (*str != '\0') {
Log::Stderr("input format error, should be CSV");
Log::Fatal("input format error, should be CSV");
}
}
}
......@@ -36,7 +38,7 @@ public:
if (*str == ',') {
++str;
} else if (*str != '\0') {
Log::Stderr("input format error, should be CSV");
Log::Fatal("input format error, should be CSV");
}
return ParseOneLine(str, out_features);
}
......@@ -49,12 +51,14 @@ public:
double val = 0.0;
while (*str != '\0') {
str = Common::Atof(str, &val);
if (fabs(val) > 1e-10) {
out_features->emplace_back(idx, val);
}
++idx;
if (*str == '\t') {
++str;
} else if (*str != '\0') {
Log::Stderr("input format error, should be TSV");
Log::Fatal("input format error, should be TSV");
}
}
}
......@@ -65,7 +69,7 @@ public:
if (*str == '\t') {
++str;
} else if (*str != '\0') {
Log::Stderr("input format error, should be TSV");
Log::Fatal("input format error, should be TSV");
}
return ParseOneLine(str, out_features);
}
......@@ -84,7 +88,7 @@ public:
str = Common::Atof(str, &val);
out_features->emplace_back(idx, val);
} else {
Log::Stderr("input format error, should be LibSVM");
Log::Fatal("input format error, should be LibSVM");
}
str = Common::SkipSpaceAndTab(str);
}
......
......@@ -24,8 +24,12 @@ class SparseBin:public Bin {
public:
friend class SparseBinIterator<VAL_T>;
explicit SparseBin(data_size_t num_data)
explicit SparseBin(data_size_t num_data, int default_bin)
: num_data_(num_data) {
default_bin_ = static_cast<VAL_T>(default_bin);
if (default_bin_ != 0) {
Log::Info("Warning: Having sparse feature with negative values. Will let negative values equal zero as well");
}
#pragma omp parallel
#pragma omp master
{
......@@ -41,7 +45,7 @@ public:
void Push(int tid, data_size_t idx, uint32_t value) override {
// not store zero data
if (value == 0) { return; }
if (value <= default_bin_) { return; }
push_buffers_[tid].emplace_back(idx, static_cast<VAL_T>(value));
}
......@@ -50,7 +54,7 @@ public:
void ConstructHistogram(data_size_t*, data_size_t , const score_t* ,
const score_t* , HistogramBinEntry*) const override {
// Will use OrderedSparseBin->ConstructHistogram() instead
Log::Stderr("Should use OrderedSparseBin->ConstructHistogram() instead");
Log::Info("Should use OrderedSparseBin->ConstructHistogram() instead");
}
data_size_t Split(unsigned int threshold, data_size_t* data_indices, data_size_t num_data,
......@@ -240,6 +244,7 @@ private:
std::vector<std::vector<std::pair<data_size_t, VAL_T>>> push_buffers_;
std::vector<std::pair<data_size_t, data_size_t>> fast_index_;
data_size_t fast_index_shift_;
VAL_T default_bin_;
};
template <typename VAL_T>
......
......@@ -28,6 +28,9 @@ Tree::Tree(int max_leaves)
leaf_parent_ = new int[max_leaves_];
leaf_value_ = new score_t[max_leaves_];
leaf_depth_ = new int[max_leaves_];
// root is in the depth 1
leaf_depth_[0] = 1;
num_leaves_ = 1;
leaf_parent_[0] = -1;
}
......@@ -41,6 +44,7 @@ Tree::~Tree() {
if (threshold_ != nullptr) { delete[] threshold_; }
if (split_gain_ != nullptr) { delete[] split_gain_; }
if (leaf_value_ != nullptr) { delete[] leaf_value_; }
if (leaf_depth_ != nullptr) { delete[] leaf_depth_; }
}
int Tree::Split(int leaf, int feature, unsigned int threshold_bin, int real_feature,
......@@ -70,9 +74,11 @@ int Tree::Split(int leaf, int feature, unsigned int threshold_bin, int real_feat
leaf_parent_[num_leaves_] = new_node_idx;
leaf_value_[leaf] = left_value;
leaf_value_[num_leaves_] = right_value;
// update leaf depth
leaf_depth_[num_leaves_] = leaf_depth_[leaf] + 1;
leaf_depth_[leaf]++;
++num_leaves_;
return num_leaves_ - 1;
}
......@@ -140,7 +146,7 @@ Tree::Tree(const std::string& str) {
|| key_vals.count("split_gain") <= 0 || key_vals.count("threshold") <= 0
|| key_vals.count("left_child") <= 0 || key_vals.count("right_child") <= 0
|| key_vals.count("leaf_parent") <= 0 || key_vals.count("leaf_value") <= 0) {
Log::Stderr("tree model string format error");
Log::Fatal("tree model string format error");
}
Common::Atoi(key_vals["num_leaves"].c_str(), &num_leaves_);
......@@ -155,6 +161,7 @@ Tree::Tree(const std::string& str) {
split_feature_ = nullptr;
threshold_in_bin_ = nullptr;
leaf_depth_ = nullptr;
Common::StringToIntArray(key_vals["split_feature"], ' ',
num_leaves_ - 1, split_feature_real_);
......
......@@ -18,10 +18,12 @@ template<typename PointWiseLossCalculator>
class BinaryMetric: public Metric {
public:
explicit BinaryMetric(const MetricConfig& config) {
early_stopping_round_ = config.early_stopping_round;
output_freq_ = config.output_freq;
the_bigger_the_better = false;
sigmoid_ = static_cast<score_t>(config.sigmoid);
if (sigmoid_ <= 0.0f) {
Log::Stderr("sigmoid param %f should greater than zero", sigmoid_);
Log::Fatal("Sigmoid param %f should greater than zero", sigmoid_);
}
}
......@@ -48,14 +50,14 @@ public:
}
}
void Print(int iter, const score_t* score) const override {
score_t PrintAndGetLoss(int iter, const score_t* score) const override {
score_t sum_loss = 0.0f;
if (output_freq_ > 0 && iter % output_freq_ == 0) {
if (early_stopping_round_ > 0 || (output_freq_ > 0 && iter % output_freq_ == 0)) {
if (weights_ == nullptr) {
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
for (data_size_t i = 0; i < num_data_; ++i) {
// sigmoid transform
score_t prob = 1.0f / (1.0f + std::exp(-sigmoid_ * score[i]));
score_t prob = 1.0f / (1.0f + std::exp(-2.0f * sigmoid_ * score[i]));
// add loss
sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], prob);
}
......@@ -63,13 +65,18 @@ public:
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
for (data_size_t i = 0; i < num_data_; ++i) {
// sigmoid transform
score_t prob = 1.0f / (1.0f + std::exp(-sigmoid_ * score[i]));
score_t prob = 1.0f / (1.0f + std::exp(-2.0f * sigmoid_ * score[i]));
// add loss
sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], prob) * weights_[i];
}
}
Log::Stdout("Iteration:%d, %s's %s: %f", iter, name, PointWiseLossCalculator::Name(), sum_loss / sum_weights_);
score_t loss = sum_loss / sum_weights_;
if (output_freq_ > 0 && iter % output_freq_ == 0){
Log::Info("Iteration:%d, %s's %s: %f", iter, name, PointWiseLossCalculator::Name(), loss);
}
return loss;
}
return 0.0f;
}
private:
......@@ -139,7 +146,9 @@ public:
class AUCMetric: public Metric {
public:
explicit AUCMetric(const MetricConfig& config) {
early_stopping_round_ = config.early_stopping_round;
output_freq_ = config.output_freq;
the_bigger_the_better = true;
}
virtual ~AUCMetric() {
......@@ -163,8 +172,8 @@ public:
}
}
void Print(int iter, const score_t* score) const override {
if (output_freq_ > 0 && iter % output_freq_ == 0) {
score_t PrintAndGetLoss(int iter, const score_t* score) const override {
if (early_stopping_round_ > 0 || (output_freq_ > 0 && iter % output_freq_ == 0)) {
// get indices sorted by score, descent order
std::vector<data_size_t> sorted_idx;
for (data_size_t i = 0; i < num_data_; ++i) {
......@@ -220,8 +229,12 @@ public:
if (sum_pos > 0.0f && sum_pos != sum_weights_) {
auc = accum / (sum_pos *(sum_weights_ - sum_pos));
}
Log::Stdout("iteration:%d, %s's %s: %f", iter, name, "auc", auc);
if (output_freq_ > 0 && iter % output_freq_ == 0){
Log::Info("Iteration:%d, %s's %s: %f", iter, name, "auc", auc);
}
return auc;
}
return 0.0f;
}
private:
......
......@@ -57,7 +57,7 @@ void DCGCalculator::CalMaxDCG(const std::vector<data_size_t>& ks,
std::vector<data_size_t> label_cnt(label_gain_.size(), 0);
// counts for all labels
for (data_size_t i = 0; i < num_data; ++i) {
if (static_cast<size_t>(label[i]) >= label_cnt.size()) { Log::Stderr("label excel %d\n", label[i]); }
if (static_cast<size_t>(label[i]) >= label_cnt.size()) { Log::Fatal("label excel %d", label[i]); }
++label_cnt[static_cast<int>(label[i])];
}
double cur_result = 0.0;
......
......@@ -16,7 +16,9 @@ namespace LightGBM {
class NDCGMetric:public Metric {
public:
explicit NDCGMetric(const MetricConfig& config) {
early_stopping_round_ = config.early_stopping_round;
output_freq_ = config.output_freq;
the_bigger_the_better = true;
// get eval position
for (auto k : config.eval_at) {
eval_at_.push_back(static_cast<data_size_t>(k));
......@@ -41,7 +43,7 @@ public:
// get query boundaries
query_boundaries_ = metadata.query_boundaries();
if (query_boundaries_ == nullptr) {
Log::Stderr("For NDCG metric, should have query information");
Log::Fatal("For NDCG metric, there should be query information");
}
num_queries_ = metadata.num_queries();
// get query weights
......@@ -73,8 +75,8 @@ public:
}
}
void Print(int iter, const score_t* score) const override {
if (output_freq_ > 0 && iter % output_freq_ == 0) {
score_t PrintAndGetLoss(int iter, const score_t* score) const override {
if (early_stopping_round_ > 0 || (output_freq_ > 0 && iter % output_freq_ == 0)) {
// some buffers for multi-threading sum up
std::vector<std::vector<double>> result_buffer_;
for (int i = 0; i < num_threads_; ++i) {
......@@ -132,8 +134,12 @@ public:
result[j] /= sum_query_weights_;
result_ss << "NDCG@" << eval_at_[j] << ":" << result[j] << "\t";
}
Log::Stdout("Iteration:%d, Test:%s, %s ", iter, name, result_ss.str().c_str());
if (output_freq_ > 0 && iter % output_freq_ == 0){
Log::Info("Iteration:%d, Test:%s, %s ", iter, name, result_ss.str().c_str());
}
return result[0];
}
return 0.0f;
}
private:
......
......@@ -16,7 +16,9 @@ template<typename PointWiseLossCalculator>
class RegressionMetric: public Metric {
public:
explicit RegressionMetric(const MetricConfig& config) {
early_stopping_round_ = config.early_stopping_round;
output_freq_ = config.output_freq;
the_bigger_the_better = false;
}
virtual ~RegressionMetric() {
......@@ -40,8 +42,8 @@ public:
}
}
void Print(int iter, const score_t* score) const override {
if (output_freq_ > 0 && iter % output_freq_ == 0) {
score_t PrintAndGetLoss(int iter, const score_t* score) const override {
if (early_stopping_round_ > 0 || (output_freq_ > 0 && iter % output_freq_ == 0)) {
score_t sum_loss = 0.0;
if (weights_ == nullptr) {
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
......@@ -56,8 +58,13 @@ public:
sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i]) * weights_[i];
}
}
Log::Stdout("Iteration:%d, %s's %s : %f", iter, name, PointWiseLossCalculator::Name(), PointWiseLossCalculator::AverageLoss(sum_loss, sum_weights_));
score_t loss = PointWiseLossCalculator::AverageLoss(sum_loss, sum_weights_);
if (output_freq_ > 0 && iter % output_freq_ == 0){
Log::Info("Iteration:%d, %s's %s : %f", iter, name, PointWiseLossCalculator::Name(), loss);
}
return loss;
}
return 0.0f;
}
inline static score_t AverageLoss(score_t sum_loss, score_t sum_weights) {
......
......@@ -44,7 +44,7 @@ Linkers::Linkers(NetworkConfig config) {
}
}
if (rank_ == -1) {
Log::Stderr("machine list file doesn't contain local machine, app quit");
Log::Fatal("Machine list file doesn't contain local machine");
}
// construct listener
listener_ = new TcpSocket();
......@@ -73,14 +73,14 @@ Linkers::~Linkers() {
}
}
TcpSocket::Finalize();
Log::Stdout("network used %f seconds", network_time_ * 1e-3);
Log::Info("Network using %f seconds", network_time_ * 1e-3);
}
void Linkers::ParseMachineList(const char * filename) {
TextReader<size_t> machine_list_reader(filename);
machine_list_reader.ReadAllLines();
if (machine_list_reader.Lines().size() <= 0) {
Log::Stderr("machine list file:%s doesn't exist", filename);
Log::Fatal("Machine list file:%s doesn't exist", filename);
}
for (auto& line : machine_list_reader.Lines()) {
......@@ -95,7 +95,7 @@ void Linkers::ParseMachineList(const char * filename) {
continue;
}
if (client_ips_.size() >= static_cast<size_t>(num_machines_)) {
Log::Stdout("The #machine in machine list is larger than parameter num_machines, will ignore rest");
Log::Error("The #machine in machine_list is larger than parameter num_machines, the redundant will ignored");
break;
}
str_after_split[0] = Common::Trim(str_after_split[0]);
......@@ -104,17 +104,17 @@ void Linkers::ParseMachineList(const char * filename) {
client_ports_.push_back(atoi(str_after_split[1].c_str()));
}
if (client_ips_.size() != static_cast<size_t>(num_machines_)) {
Log::Stdout("The world size is bigger the #machine in machine list, change world size to %d .", client_ips_.size());
Log::Error("The world size is bigger the #machine in machine list, change world size to %d .", client_ips_.size());
num_machines_ = static_cast<int>(client_ips_.size());
}
}
void Linkers::TryBind(int port) {
Log::Stdout("try to bind port %d.", port);
Log::Info("try to bind port %d.", port);
if (listener_->Bind(port)) {
Log::Stdout("bind port %d success.", port);
Log::Info("Binding port %d success.", port);
} else {
Log::Stderr("bind port %d failed.", port);
Log::Fatal("Binding port %d failed.", port);
}
}
......@@ -125,7 +125,7 @@ void Linkers::SetLinker(int rank, const TcpSocket& socket) {
}
void Linkers::ListenThread(int incoming_cnt) {
Log::Stdout("Listening...");
Log::Info("Listening...");
char buffer[100];
int connected_cnt = 0;
while (connected_cnt < incoming_cnt) {
......@@ -192,7 +192,7 @@ void Linkers::Construct() {
if (cur_socket.Connect(client_ips_[out_rank].c_str(), client_ports_[out_rank])) {
break;
} else {
Log::Stdout("connect to rank %d failed, wait for %d milliseconds", out_rank, connect_fail_delay_time);
Log::Error("Connect to rank %d failed, wait for %d milliseconds", out_rank, connect_fail_delay_time);
std::this_thread::sleep_for(std::chrono::milliseconds(connect_fail_delay_time));
}
}
......@@ -217,7 +217,7 @@ bool Linkers::CheckLinker(int rank) {
void Linkers::PrintLinkers() {
for (int i = 0; i < num_machines_; ++i) {
if (CheckLinker(i)) {
Log::Stdout("Connected to rank %d.", i);
Log::Info("Connected to rank %d.", i);
}
}
}
......
......@@ -30,7 +30,7 @@ void Network::Init(NetworkConfig config) {
block_len_ = new int[num_machines_];
buffer_size_ = 1024 * 1024;
buffer_ = new char[buffer_size_];
Log::Stdout("local rank %d, total number of machines %d", rank_, num_machines_);
Log::Info("local rank %d, total number of machines %d", rank_, num_machines_);
}
void Network::Dispose() {
......
......@@ -60,7 +60,7 @@ public:
TcpSocket() {
sockfd_ = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (sockfd_ == INVALID_SOCKET) {
Log::Stderr("socket construct error");
Log::Fatal("Socket construct error");
return;
}
ConfigSocket();
......@@ -69,7 +69,7 @@ public:
explicit TcpSocket(SOCKET socket) {
sockfd_ = socket;
if (sockfd_ == INVALID_SOCKET) {
Log::Stderr("passed socket error");
Log::Fatal("Passed socket error");
return;
}
ConfigSocket();
......@@ -97,11 +97,11 @@ public:
#if defined(_WIN32)
WSADATA wsa_data;
if (WSAStartup(MAKEWORD(2, 2), &wsa_data) == -1) {
Log::Stderr("socket error: start up error");
Log::Fatal("Socket error: WSAStart up error");
}
if (LOBYTE(wsa_data.wVersion) != 2 || HIBYTE(wsa_data.wVersion) != 2) {
WSACleanup();
Log::Stderr("socket error: Winsock.dll version error");
Log::Fatal("Socket error: Winsock.dll version error");
}
#else
#endif
......@@ -128,7 +128,7 @@ public:
char buffer[512];
// get hostName
if (gethostname(buffer, sizeof(buffer)) == SOCKET_ERROR) {
Log::Stderr("Error code: %d, when getting local host name.", WSAGetLastError());
Log::Fatal("Error code: %d, when getting local host name.", WSAGetLastError());
}
// push local ip
PIP_ADAPTER_INFO pAdapterInfo;
......@@ -137,7 +137,7 @@ public:
ULONG ulOutBufLen = sizeof(IP_ADAPTER_INFO);
pAdapterInfo = (IP_ADAPTER_INFO *)MALLOC(sizeof(IP_ADAPTER_INFO));
if (pAdapterInfo == NULL) {
Log::Stderr("Error allocating memory needed to call GetAdaptersinfo\n");
Log::Fatal("GetAdaptersinfo error: allocating memory ");
}
// Make an initial call to GetAdaptersInfo to get
// the necessary size into the ulOutBufLen variable
......@@ -145,7 +145,7 @@ public:
FREE(pAdapterInfo);
pAdapterInfo = (IP_ADAPTER_INFO *)MALLOC(ulOutBufLen);
if (pAdapterInfo == NULL) {
Log::Stderr("Error allocating memory needed to call GetAdaptersinfo\n");
Log::Fatal("GetAdaptersinfo error: allocating memory ");
}
}
if ((dwRetVal = GetAdaptersInfo(pAdapterInfo, &ulOutBufLen)) == NO_ERROR) {
......@@ -155,7 +155,7 @@ public:
pAdapter = pAdapter->Next;
}
} else {
printf("GetAdaptersInfo failed with error: %d\n", dwRetVal);
Log::Error("GetAdaptersinfo error: code %d ", dwRetVal);
}
if (pAdapterInfo)
FREE(pAdapterInfo);
......@@ -218,7 +218,7 @@ public:
inline TcpSocket Accept() {
SOCKET newfd = accept(sockfd_, NULL, NULL);
if (newfd == INVALID_SOCKET) {
Log::Stderr("socket accept error,error code: %d", GetLastError());
Log::Fatal("Socket accept error, code: %d", GetLastError());
}
return TcpSocket(newfd);
}
......@@ -226,7 +226,7 @@ public:
inline int Send(const char *buf_, int len, int flag = 0) {
int cur_cnt = send(sockfd_, buf_, len, flag);
if (cur_cnt == SOCKET_ERROR) {
Log::Stderr("socket send error, error code: %d", GetLastError());
Log::Fatal("Socket send error, code: %d", GetLastError());
}
return cur_cnt;
}
......@@ -234,7 +234,7 @@ public:
inline int Recv(char *buf_, int len, int flags = 0) {
int cur_cnt = recv(sockfd_, buf_ , len , flags);
if (cur_cnt == SOCKET_ERROR) {
Log::Stderr("socket recv error, error code: %d", GetLastError());
Log::Fatal("Socket recv error, code: %d", GetLastError());
}
return cur_cnt;
}
......
......@@ -16,7 +16,7 @@ public:
is_unbalance_ = config.is_unbalance;
sigmoid_ = static_cast<score_t>(config.sigmoid);
if (sigmoid_ <= 0.0) {
Log::Stderr("sigmoid param %f should greater than zero", sigmoid_);
Log::Fatal("Sigmoid parameter %f :should greater than zero", sigmoid_);
}
}
~BinaryLogloss() {}
......@@ -34,10 +34,10 @@ public:
++cnt_negative;
}
}
Log::Stdout("number of postive:%d number of negative:%d", cnt_positive, cnt_negative);
Log::Info("Number of postive:%d, number of negative:%d", cnt_positive, cnt_negative);
// cannot continue if all sample are same class
if (cnt_positive == 0 || cnt_negative == 0) {
Log::Stderr("input training data only contain one class");
Log::Fatal("Input training data only contains one class");
}
// use -1 for negative class, and 1 for positive class
label_val_[0] = -1;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment