Commit e179c7c6 authored by Guolin Ke's avatar Guolin Ke
Browse files

change to boost from average output.

parent 1c1749db
...@@ -213,6 +213,7 @@ public: ...@@ -213,6 +213,7 @@ public:
int drop_seed = 4; int drop_seed = 4;
double top_rate = 0.2f; double top_rate = 0.2f;
double other_rate = 0.1f; double other_rate = 0.1f;
bool boost_from_average = true;
std::string tree_learner_type = "serial"; std::string tree_learner_type = "serial";
TreeConfig tree_config; TreeConfig tree_config;
LIGHTGBM_EXPORT void Set(const std::unordered_map<std::string, std::string>& params) override; LIGHTGBM_EXPORT void Set(const std::unordered_map<std::string, std::string>& params) override;
......
...@@ -38,7 +38,8 @@ GBDT::GBDT() ...@@ -38,7 +38,8 @@ GBDT::GBDT()
sigmoid_(-1.0f), sigmoid_(-1.0f),
num_iteration_for_pred_(0), num_iteration_for_pred_(0),
shrinkage_rate_(0.1f), shrinkage_rate_(0.1f),
num_init_iteration_(0) { num_init_iteration_(0),
boost_from_average_(false) {
#pragma omp parallel #pragma omp parallel
#pragma omp master #pragma omp master
{ {
...@@ -198,7 +199,7 @@ void GBDT::AddValidDataset(const Dataset* valid_data, ...@@ -198,7 +199,7 @@ void GBDT::AddValidDataset(const Dataset* valid_data,
valid_metrics_.back().shrink_to_fit(); valid_metrics_.back().shrink_to_fit();
} }
data_size_t GBDT::BaggingHelper(Random& cur_rand, data_size_t start, data_size_t cnt, data_size_t* buffer){ data_size_t GBDT::BaggingHelper(Random& cur_rand, data_size_t start, data_size_t cnt, data_size_t* buffer) {
if (cnt <= 0) { if (cnt <= 0) {
return 0; return 0;
} }
...@@ -230,7 +231,7 @@ void GBDT::Bagging(int iter) { ...@@ -230,7 +231,7 @@ void GBDT::Bagging(int iter) {
data_size_t inner_size = (num_data_ + num_threads_ - 1) / num_threads_; data_size_t inner_size = (num_data_ + num_threads_ - 1) / num_threads_;
if (inner_size < min_inner_size) { inner_size = min_inner_size; } if (inner_size < min_inner_size) { inner_size = min_inner_size; }
#pragma omp parallel for schedule(static,1) #pragma omp parallel for schedule(static,1)
for (int i = 0; i < num_threads_; ++i) { for (int i = 0; i < num_threads_; ++i) {
left_cnts_buf_[i] = 0; left_cnts_buf_[i] = 0;
right_cnts_buf_[i] = 0; right_cnts_buf_[i] = 0;
...@@ -253,7 +254,7 @@ void GBDT::Bagging(int iter) { ...@@ -253,7 +254,7 @@ void GBDT::Bagging(int iter) {
} }
left_cnt = left_write_pos_buf_[num_threads_ - 1] + left_cnts_buf_[num_threads_ - 1]; left_cnt = left_write_pos_buf_[num_threads_ - 1] + left_cnts_buf_[num_threads_ - 1];
#pragma omp parallel for schedule(static, 1) #pragma omp parallel for schedule(static, 1)
for (int i = 0; i < num_threads_; ++i) { for (int i = 0; i < num_threads_; ++i) {
if (left_cnts_buf_[i] > 0) { if (left_cnts_buf_[i] > 0) {
std::memcpy(bag_data_indices_.data() + left_write_pos_buf_[i], std::memcpy(bag_data_indices_.data() + left_write_pos_buf_[i],
...@@ -293,17 +294,42 @@ void GBDT::UpdateScoreOutOfBag(const Tree* tree, const int curr_class) { ...@@ -293,17 +294,42 @@ void GBDT::UpdateScoreOutOfBag(const Tree* tree, const int curr_class) {
} }
bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) { bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) {
// boosting from average prediction.
if (models_.empty() && gbdt_config_->boost_from_average && !train_score_updater_->has_init_score()) {
std::vector<double> sum_per_class(num_class_, 0.0f);
auto label = train_data_->metadata().label();
if (num_class_ > 1) {
for (data_size_t i = 0; i < num_data_; ++i) {
sum_per_class[static_cast<int>(label[i])] += 1.0f;
}
} else {
for (data_size_t i = 0; i < num_data_; ++i) {
sum_per_class[0] += label[i];
}
}
for (int curr_class = 0; curr_class < num_class_; ++curr_class) {
double init_score = sum_per_class[curr_class] / num_data_;
std::unique_ptr<Tree> new_tree(new Tree(2));
new_tree->Split(0, 0, BinType::NumericalBin, 0, 0, 0, init_score, init_score, 0, num_data_, 1);
train_score_updater_->AddScore(init_score, curr_class);
for (auto& score_updater : valid_score_updater_) {
score_updater->AddScore(init_score, curr_class);
}
models_.push_back(std::move(new_tree));
}
boost_from_average_ = true;
}
// boosting first // boosting first
if (gradient == nullptr || hessian == nullptr) { if (gradient == nullptr || hessian == nullptr) {
#ifdef TIMETAG #ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now(); auto start_time = std::chrono::steady_clock::now();
#endif #endif
Boosting(); Boosting();
gradient = gradients_.data(); gradient = gradients_.data();
hessian = hessians_.data(); hessian = hessians_.data();
#ifdef TIMETAG #ifdef TIMETAG
boosting_time += std::chrono::steady_clock::now() - start_time; boosting_time += std::chrono::steady_clock::now() - start_time;
#endif #endif
} }
#ifdef TIMETAG #ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now(); auto start_time = std::chrono::steady_clock::now();
...@@ -314,9 +340,9 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is ...@@ -314,9 +340,9 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
bagging_time += std::chrono::steady_clock::now() - start_time; bagging_time += std::chrono::steady_clock::now() - start_time;
#endif #endif
if (is_use_subset_ && bag_data_cnt_ < num_data_) { if (is_use_subset_ && bag_data_cnt_ < num_data_) {
#ifdef TIMETAG #ifdef TIMETAG
start_time = std::chrono::steady_clock::now(); start_time = std::chrono::steady_clock::now();
#endif #endif
if (gradients_.empty()) { if (gradients_.empty()) {
size_t total_size = static_cast<size_t>(num_data_) * num_class_; size_t total_size = static_cast<size_t>(num_data_) * num_class_;
gradients_.resize(total_size); gradients_.resize(total_size);
...@@ -333,23 +359,23 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is ...@@ -333,23 +359,23 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
} }
gradient = gradients_.data(); gradient = gradients_.data();
hessian = hessians_.data(); hessian = hessians_.data();
#ifdef TIMETAG #ifdef TIMETAG
sub_gradient_time += std::chrono::steady_clock::now() - start_time; sub_gradient_time += std::chrono::steady_clock::now() - start_time;
#endif #endif
} }
bool should_continue = false; bool should_continue = false;
for (int curr_class = 0; curr_class < num_class_; ++curr_class) { for (int curr_class = 0; curr_class < num_class_; ++curr_class) {
#ifdef TIMETAG #ifdef TIMETAG
start_time = std::chrono::steady_clock::now(); start_time = std::chrono::steady_clock::now();
#endif #endif
std::unique_ptr<Tree> new_tree(new Tree(2)); std::unique_ptr<Tree> new_tree(new Tree(2));
if (!is_class_end_[curr_class]) { if (!is_class_end_[curr_class]) {
// train a new tree // train a new tree
new_tree.reset(tree_learner_->Train(gradient + curr_class * num_data_, hessian + curr_class * num_data_)); new_tree.reset(tree_learner_->Train(gradient + curr_class * num_data_, hessian + curr_class * num_data_));
} }
#ifdef TIMETAG #ifdef TIMETAG
tree_time += std::chrono::steady_clock::now() - start_time; tree_time += std::chrono::steady_clock::now() - start_time;
#endif #endif
if (new_tree->num_leaves() > 1) { if (new_tree->num_leaves() > 1) {
should_continue = true; should_continue = true;
...@@ -519,8 +545,7 @@ std::vector<double> GBDT::GetEvalAt(int data_idx) const { ...@@ -519,8 +545,7 @@ std::vector<double> GBDT::GetEvalAt(int data_idx) const {
ret.push_back(score); ret.push_back(score);
} }
} }
} } else {
else {
auto used_idx = data_idx - 1; auto used_idx = data_idx - 1;
for (size_t j = 0; j < valid_metrics_[used_idx].size(); ++j) { for (size_t j = 0; j < valid_metrics_[used_idx].size(); ++j) {
auto test_scores = valid_metrics_[used_idx][j]->Eval(valid_score_updater_[used_idx]->score()); auto test_scores = valid_metrics_[used_idx][j]->Eval(valid_score_updater_[used_idx]->score());
...@@ -553,7 +578,7 @@ void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) { ...@@ -553,7 +578,7 @@ void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) {
*out_len = static_cast<int64_t>(num_data) * num_class_; *out_len = static_cast<int64_t>(num_data) * num_class_;
} }
if (num_class_ > 1) { if (num_class_ > 1) {
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data; ++i) { for (data_size_t i = 0; i < num_data; ++i) {
std::vector<double> tmp_result(num_class_); std::vector<double> tmp_result(num_class_);
for (int j = 0; j < num_class_; ++j) { for (int j = 0; j < num_class_; ++j) {
...@@ -564,13 +589,13 @@ void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) { ...@@ -564,13 +589,13 @@ void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) {
out_result[j * num_data + i] = static_cast<double>(tmp_result[j]); out_result[j * num_data + i] = static_cast<double>(tmp_result[j]);
} }
} }
} else if(sigmoid_ > 0.0f){ } else if (sigmoid_ > 0.0f) {
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data; ++i) { for (data_size_t i = 0; i < num_data; ++i) {
out_result[i] = static_cast<double>(1.0f / (1.0f + std::exp(- sigmoid_ * raw_scores[i]))); out_result[i] = static_cast<double>(1.0f / (1.0f + std::exp(-sigmoid_ * raw_scores[i])));
} }
} else { } else {
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data; ++i) { for (data_size_t i = 0; i < num_data; ++i) {
out_result[i] = static_cast<double>(raw_scores[i]); out_result[i] = static_cast<double>(raw_scores[i]);
} }
...@@ -641,6 +666,10 @@ std::string GBDT::SaveModelToString(int num_iterations) const { ...@@ -641,6 +666,10 @@ std::string GBDT::SaveModelToString(int num_iterations) const {
// output sigmoid parameter // output sigmoid parameter
ss << "sigmoid=" << sigmoid_ << std::endl; ss << "sigmoid=" << sigmoid_ << std::endl;
if (boost_from_average_) {
ss << "boost_from_average" << std::endl;
}
ss << "feature_names=" << Common::Join(feature_names_, " ") << std::endl; ss << "feature_names=" << Common::Join(feature_names_, " ") << std::endl;
ss << "feature_infos=" << Common::Join(feature_infos_, " ") << std::endl; ss << "feature_infos=" << Common::Join(feature_infos_, " ") << std::endl;
...@@ -713,6 +742,11 @@ bool GBDT::LoadModelFromString(const std::string& model_str) { ...@@ -713,6 +742,11 @@ bool GBDT::LoadModelFromString(const std::string& model_str) {
} else { } else {
sigmoid_ = -1.0f; sigmoid_ = -1.0f;
} }
// get boost_from_average_
line = Common::FindFromLines(lines, "boost_from_average");
if (line.size() > 0) {
boost_from_average_ = true;
}
// get feature names // get feature names
line = Common::FindFromLines(lines, "feature_names="); line = Common::FindFromLines(lines, "feature_names=");
if (line.size() > 0) { if (line.size() > 0) {
...@@ -721,8 +755,7 @@ bool GBDT::LoadModelFromString(const std::string& model_str) { ...@@ -721,8 +755,7 @@ bool GBDT::LoadModelFromString(const std::string& model_str) {
Log::Fatal("Wrong size of feature_names"); Log::Fatal("Wrong size of feature_names");
return false; return false;
} }
} } else {
else {
Log::Fatal("Model file doesn't contain feature names"); Log::Fatal("Model file doesn't contain feature names");
return false; return false;
} }
...@@ -780,7 +813,7 @@ std::vector<std::pair<size_t, std::string>> GBDT::FeatureImportance() const { ...@@ -780,7 +813,7 @@ std::vector<std::pair<size_t, std::string>> GBDT::FeatureImportance() const {
} }
// sort the importance // sort the importance
std::sort(pairs.begin(), pairs.end(), std::sort(pairs.begin(), pairs.end(),
[](const std::pair<size_t, std::string>& lhs, [] (const std::pair<size_t, std::string>& lhs,
const std::pair<size_t, std::string>& rhs) { const std::pair<size_t, std::string>& rhs) {
return lhs.first > rhs.first; return lhs.first > rhs.first;
}); });
......
...@@ -206,7 +206,7 @@ public: ...@@ -206,7 +206,7 @@ public:
inline void SetNumIterationForPred(int num_iteration) override { inline void SetNumIterationForPred(int num_iteration) override {
num_iteration_for_pred_ = static_cast<int>(models_.size()) / num_class_; num_iteration_for_pred_ = static_cast<int>(models_.size()) / num_class_;
if (num_iteration > 0) { if (num_iteration > 0) {
num_iteration_for_pred_ = std::min(num_iteration, num_iteration_for_pred_); num_iteration_for_pred_ = std::min(num_iteration + (boost_from_average_ ? 1 : 0), num_iteration_for_pred_);
} }
} }
...@@ -345,6 +345,7 @@ protected: ...@@ -345,6 +345,7 @@ protected:
std::unique_ptr<Dataset> tmp_subset_; std::unique_ptr<Dataset> tmp_subset_;
bool is_use_subset_; bool is_use_subset_;
std::vector<bool> is_class_end_; std::vector<bool> is_class_end_;
bool boost_from_average_;
}; };
} // namespace LightGBM } // namespace LightGBM
......
...@@ -25,10 +25,11 @@ public: ...@@ -25,10 +25,11 @@ public:
int64_t total_size = static_cast<int64_t>(num_data_) * num_class; int64_t total_size = static_cast<int64_t>(num_data_) * num_class;
score_.resize(total_size); score_.resize(total_size);
// default start score is zero // default start score is zero
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (int64_t i = 0; i < total_size; ++i) { for (int64_t i = 0; i < total_size; ++i) {
score_[i] = 0.0f; score_[i] = 0.0f;
} }
has_init_score_ = false;
const double* init_score = data->metadata().init_score(); const double* init_score = data->metadata().init_score();
// if exists initial score, will start from it // if exists initial score, will start from it
if (init_score != nullptr) { if (init_score != nullptr) {
...@@ -36,7 +37,8 @@ public: ...@@ -36,7 +37,8 @@ public:
|| (data->metadata().num_init_score() / num_data_) != num_class) { || (data->metadata().num_init_score() / num_data_) != num_class) {
Log::Fatal("number of class for initial score error"); Log::Fatal("number of class for initial score error");
} }
#pragma omp parallel for schedule(static) has_init_score_ = true;
#pragma omp parallel for schedule(static)
for (int64_t i = 0; i < total_size; ++i) { for (int64_t i = 0; i < total_size; ++i) {
score_[i] = init_score[i]; score_[i] = init_score[i];
} }
...@@ -46,6 +48,16 @@ public: ...@@ -46,6 +48,16 @@ public:
~ScoreUpdater() { ~ScoreUpdater() {
} }
inline bool has_init_score() const { return has_init_score_; }
inline void AddScore(double val, int curr_class) {
int64_t offset = curr_class * num_data_;
#pragma omp parallel for schedule(static)
for (int64_t i = 0; i < num_data_; ++i) {
score_[offset + i] += val;
}
}
/*! /*!
* \brief Using tree model to get prediction number, then adding to scores for all data * \brief Using tree model to get prediction number, then adding to scores for all data
* Note: this function generally will be used on validation data too. * Note: this function generally will be used on validation data too.
...@@ -92,6 +104,7 @@ private: ...@@ -92,6 +104,7 @@ private:
const Dataset* data_; const Dataset* data_;
/*! \brief Scores for data set */ /*! \brief Scores for data set */
std::vector<double> score_; std::vector<double> score_;
bool has_init_score_;
}; };
} // namespace LightGBM } // namespace LightGBM
......
...@@ -331,6 +331,7 @@ void BoostingConfig::Set(const std::unordered_map<std::string, std::string>& par ...@@ -331,6 +331,7 @@ void BoostingConfig::Set(const std::unordered_map<std::string, std::string>& par
GetBool(params, "uniform_drop", &uniform_drop); GetBool(params, "uniform_drop", &uniform_drop);
GetDouble(params, "top_rate", &top_rate); GetDouble(params, "top_rate", &top_rate);
GetDouble(params, "other_rate", &other_rate); GetDouble(params, "other_rate", &other_rate);
GetBool(params, "boost_from_average", &boost_from_average);
CHECK(drop_rate <= 1.0 && drop_rate >= 0.0); CHECK(drop_rate <= 1.0 && drop_rate >= 0.0);
CHECK(skip_drop <= 1.0 && skip_drop >= 0.0); CHECK(skip_drop <= 1.0 && skip_drop >= 0.0);
GetTreeLearnerType(params); GetTreeLearnerType(params);
......
...@@ -375,7 +375,15 @@ Tree::Tree(const std::string& str) { ...@@ -375,7 +375,15 @@ Tree::Tree(const std::string& str) {
} }
} }
} }
if (key_vals.count("num_leaves") <= 0 || key_vals.count("split_feature") <= 0 if (key_vals.count("num_leaves") <= 0) {
Log::Fatal("Tree model string format error");
}
Common::Atoi(key_vals["num_leaves"].c_str(), &num_leaves_);
if (num_leaves_ <= 1) { return; }
if (key_vals.count("split_feature") <= 0
|| key_vals.count("split_gain") <= 0 || key_vals.count("threshold") <= 0 || key_vals.count("split_gain") <= 0 || key_vals.count("threshold") <= 0
|| key_vals.count("left_child") <= 0 || key_vals.count("right_child") <= 0 || key_vals.count("left_child") <= 0 || key_vals.count("right_child") <= 0
|| key_vals.count("leaf_parent") <= 0 || key_vals.count("leaf_value") <= 0 || key_vals.count("leaf_parent") <= 0 || key_vals.count("leaf_value") <= 0
...@@ -386,8 +394,6 @@ Tree::Tree(const std::string& str) { ...@@ -386,8 +394,6 @@ Tree::Tree(const std::string& str) {
Log::Fatal("Tree model string format error"); Log::Fatal("Tree model string format error");
} }
Common::Atoi(key_vals["num_leaves"].c_str(), &num_leaves_);
left_child_ = Common::StringToArray<int>(key_vals["left_child"], ' ', num_leaves_ - 1); left_child_ = Common::StringToArray<int>(key_vals["left_child"], ' ', num_leaves_ - 1);
right_child_ = Common::StringToArray<int>(key_vals["right_child"], ' ', num_leaves_ - 1); right_child_ = Common::StringToArray<int>(key_vals["right_child"], ' ', num_leaves_ - 1);
split_feature_ = Common::StringToArray<int>(key_vals["split_feature"], ' ', num_leaves_ - 1); split_feature_ = Common::StringToArray<int>(key_vals["split_feature"], ' ', num_leaves_ - 1);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment