"git@developer.sourcefind.cn:tianlh/lightgbm-dcu.git" did not exist on "b69364e9280888914ea716f6e310add457b88d74"
Commit e179c7c6 authored by Guolin Ke's avatar Guolin Ke
Browse files

change to boost from average output.

parent 1c1749db
...@@ -213,6 +213,7 @@ public: ...@@ -213,6 +213,7 @@ public:
int drop_seed = 4; int drop_seed = 4;
double top_rate = 0.2f; double top_rate = 0.2f;
double other_rate = 0.1f; double other_rate = 0.1f;
bool boost_from_average = true;
std::string tree_learner_type = "serial"; std::string tree_learner_type = "serial";
TreeConfig tree_config; TreeConfig tree_config;
LIGHTGBM_EXPORT void Set(const std::unordered_map<std::string, std::string>& params) override; LIGHTGBM_EXPORT void Set(const std::unordered_map<std::string, std::string>& params) override;
......
...@@ -38,7 +38,8 @@ GBDT::GBDT() ...@@ -38,7 +38,8 @@ GBDT::GBDT()
sigmoid_(-1.0f), sigmoid_(-1.0f),
num_iteration_for_pred_(0), num_iteration_for_pred_(0),
shrinkage_rate_(0.1f), shrinkage_rate_(0.1f),
num_init_iteration_(0) { num_init_iteration_(0),
boost_from_average_(false) {
#pragma omp parallel #pragma omp parallel
#pragma omp master #pragma omp master
{ {
...@@ -60,7 +61,7 @@ GBDT::~GBDT() { ...@@ -60,7 +61,7 @@ GBDT::~GBDT() {
} }
void GBDT::Init(const BoostingConfig* config, const Dataset* train_data, const ObjectiveFunction* object_function, void GBDT::Init(const BoostingConfig* config, const Dataset* train_data, const ObjectiveFunction* object_function,
const std::vector<const Metric*>& training_metrics) { const std::vector<const Metric*>& training_metrics) {
iter_ = 0; iter_ = 0;
num_iteration_for_pred_ = 0; num_iteration_for_pred_ = 0;
max_feature_idx_ = 0; max_feature_idx_ = 0;
...@@ -72,7 +73,7 @@ void GBDT::Init(const BoostingConfig* config, const Dataset* train_data, const O ...@@ -72,7 +73,7 @@ void GBDT::Init(const BoostingConfig* config, const Dataset* train_data, const O
} }
void GBDT::ResetTrainingData(const BoostingConfig* config, const Dataset* train_data, const ObjectiveFunction* object_function, void GBDT::ResetTrainingData(const BoostingConfig* config, const Dataset* train_data, const ObjectiveFunction* object_function,
const std::vector<const Metric*>& training_metrics) { const std::vector<const Metric*>& training_metrics) {
auto new_config = std::unique_ptr<BoostingConfig>(new BoostingConfig(*config)); auto new_config = std::unique_ptr<BoostingConfig>(new BoostingConfig(*config));
if (train_data_ != nullptr && !train_data_->CheckAlign(*train_data)) { if (train_data_ != nullptr && !train_data_->CheckAlign(*train_data)) {
Log::Fatal("cannot reset training data, since new training data has different bin mappers"); Log::Fatal("cannot reset training data, since new training data has different bin mappers");
...@@ -84,7 +85,7 @@ void GBDT::ResetTrainingData(const BoostingConfig* config, const Dataset* train_ ...@@ -84,7 +85,7 @@ void GBDT::ResetTrainingData(const BoostingConfig* config, const Dataset* train_
sigmoid_ = -1.0f; sigmoid_ = -1.0f;
if (object_function_ != nullptr if (object_function_ != nullptr
&& std::string(object_function_->GetName()) == std::string("binary")) { && std::string(object_function_->GetName()) == std::string("binary")) {
// only binary classification need sigmoid transform // only binary classification need sigmoid transform
sigmoid_ = new_config->sigmoid; sigmoid_ = new_config->sigmoid;
} }
...@@ -130,7 +131,7 @@ void GBDT::ResetTrainingData(const BoostingConfig* config, const Dataset* train_ ...@@ -130,7 +131,7 @@ void GBDT::ResetTrainingData(const BoostingConfig* config, const Dataset* train_
} }
if ((train_data_ != train_data && train_data != nullptr) if ((train_data_ != train_data && train_data != nullptr)
|| (gbdt_config_ != nullptr && gbdt_config_->bagging_fraction != new_config->bagging_fraction)) { || (gbdt_config_ != nullptr && gbdt_config_->bagging_fraction != new_config->bagging_fraction)) {
// if need bagging, create buffer // if need bagging, create buffer
if (new_config->bagging_fraction < 1.0 && new_config->bagging_freq > 0) { if (new_config->bagging_fraction < 1.0 && new_config->bagging_freq > 0) {
bag_data_cnt_ = bag_data_cnt_ =
...@@ -167,7 +168,7 @@ void GBDT::ResetTrainingData(const BoostingConfig* config, const Dataset* train_ ...@@ -167,7 +168,7 @@ void GBDT::ResetTrainingData(const BoostingConfig* config, const Dataset* train_
} }
void GBDT::AddValidDataset(const Dataset* valid_data, void GBDT::AddValidDataset(const Dataset* valid_data,
const std::vector<const Metric*>& valid_metrics) { const std::vector<const Metric*>& valid_metrics) {
if (!train_data_->CheckAlign(*valid_data)) { if (!train_data_->CheckAlign(*valid_data)) {
Log::Fatal("cannot add validation data, since it has different bin mappers with training data"); Log::Fatal("cannot add validation data, since it has different bin mappers with training data");
} }
...@@ -198,7 +199,7 @@ void GBDT::AddValidDataset(const Dataset* valid_data, ...@@ -198,7 +199,7 @@ void GBDT::AddValidDataset(const Dataset* valid_data,
valid_metrics_.back().shrink_to_fit(); valid_metrics_.back().shrink_to_fit();
} }
data_size_t GBDT::BaggingHelper(Random& cur_rand, data_size_t start, data_size_t cnt, data_size_t* buffer){ data_size_t GBDT::BaggingHelper(Random& cur_rand, data_size_t start, data_size_t cnt, data_size_t* buffer) {
if (cnt <= 0) { if (cnt <= 0) {
return 0; return 0;
} }
...@@ -230,7 +231,7 @@ void GBDT::Bagging(int iter) { ...@@ -230,7 +231,7 @@ void GBDT::Bagging(int iter) {
data_size_t inner_size = (num_data_ + num_threads_ - 1) / num_threads_; data_size_t inner_size = (num_data_ + num_threads_ - 1) / num_threads_;
if (inner_size < min_inner_size) { inner_size = min_inner_size; } if (inner_size < min_inner_size) { inner_size = min_inner_size; }
#pragma omp parallel for schedule(static,1) #pragma omp parallel for schedule(static,1)
for (int i = 0; i < num_threads_; ++i) { for (int i = 0; i < num_threads_; ++i) {
left_cnts_buf_[i] = 0; left_cnts_buf_[i] = 0;
right_cnts_buf_[i] = 0; right_cnts_buf_[i] = 0;
...@@ -253,15 +254,15 @@ void GBDT::Bagging(int iter) { ...@@ -253,15 +254,15 @@ void GBDT::Bagging(int iter) {
} }
left_cnt = left_write_pos_buf_[num_threads_ - 1] + left_cnts_buf_[num_threads_ - 1]; left_cnt = left_write_pos_buf_[num_threads_ - 1] + left_cnts_buf_[num_threads_ - 1];
#pragma omp parallel for schedule(static, 1) #pragma omp parallel for schedule(static, 1)
for (int i = 0; i < num_threads_; ++i) { for (int i = 0; i < num_threads_; ++i) {
if (left_cnts_buf_[i] > 0) { if (left_cnts_buf_[i] > 0) {
std::memcpy(bag_data_indices_.data() + left_write_pos_buf_[i], std::memcpy(bag_data_indices_.data() + left_write_pos_buf_[i],
tmp_indices_.data() + offsets_buf_[i], left_cnts_buf_[i] * sizeof(data_size_t)); tmp_indices_.data() + offsets_buf_[i], left_cnts_buf_[i] * sizeof(data_size_t));
} }
if (right_cnts_buf_[i] > 0) { if (right_cnts_buf_[i] > 0) {
std::memcpy(bag_data_indices_.data() + left_cnt + right_write_pos_buf_[i], std::memcpy(bag_data_indices_.data() + left_cnt + right_write_pos_buf_[i],
tmp_indices_.data() + offsets_buf_[i] + left_cnts_buf_[i], right_cnts_buf_[i] * sizeof(data_size_t)); tmp_indices_.data() + offsets_buf_[i] + left_cnts_buf_[i], right_cnts_buf_[i] * sizeof(data_size_t));
} }
} }
bag_data_cnt_ = left_cnt; bag_data_cnt_ = left_cnt;
...@@ -293,17 +294,42 @@ void GBDT::UpdateScoreOutOfBag(const Tree* tree, const int curr_class) { ...@@ -293,17 +294,42 @@ void GBDT::UpdateScoreOutOfBag(const Tree* tree, const int curr_class) {
} }
bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) { bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) {
// boosting from average prediction.
if (models_.empty() && gbdt_config_->boost_from_average && !train_score_updater_->has_init_score()) {
std::vector<double> sum_per_class(num_class_, 0.0f);
auto label = train_data_->metadata().label();
if (num_class_ > 1) {
for (data_size_t i = 0; i < num_data_; ++i) {
sum_per_class[static_cast<int>(label[i])] += 1.0f;
}
} else {
for (data_size_t i = 0; i < num_data_; ++i) {
sum_per_class[0] += label[i];
}
}
for (int curr_class = 0; curr_class < num_class_; ++curr_class) {
double init_score = sum_per_class[curr_class] / num_data_;
std::unique_ptr<Tree> new_tree(new Tree(2));
new_tree->Split(0, 0, BinType::NumericalBin, 0, 0, 0, init_score, init_score, 0, num_data_, 1);
train_score_updater_->AddScore(init_score, curr_class);
for (auto& score_updater : valid_score_updater_) {
score_updater->AddScore(init_score, curr_class);
}
models_.push_back(std::move(new_tree));
}
boost_from_average_ = true;
}
// boosting first // boosting first
if (gradient == nullptr || hessian == nullptr) { if (gradient == nullptr || hessian == nullptr) {
#ifdef TIMETAG #ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now(); auto start_time = std::chrono::steady_clock::now();
#endif #endif
Boosting(); Boosting();
gradient = gradients_.data(); gradient = gradients_.data();
hessian = hessians_.data(); hessian = hessians_.data();
#ifdef TIMETAG #ifdef TIMETAG
boosting_time += std::chrono::steady_clock::now() - start_time; boosting_time += std::chrono::steady_clock::now() - start_time;
#endif #endif
} }
#ifdef TIMETAG #ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now(); auto start_time = std::chrono::steady_clock::now();
...@@ -314,14 +340,14 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is ...@@ -314,14 +340,14 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
bagging_time += std::chrono::steady_clock::now() - start_time; bagging_time += std::chrono::steady_clock::now() - start_time;
#endif #endif
if (is_use_subset_ && bag_data_cnt_ < num_data_) { if (is_use_subset_ && bag_data_cnt_ < num_data_) {
#ifdef TIMETAG #ifdef TIMETAG
start_time = std::chrono::steady_clock::now(); start_time = std::chrono::steady_clock::now();
#endif #endif
if (gradients_.empty()) { if (gradients_.empty()) {
size_t total_size = static_cast<size_t>(num_data_) * num_class_; size_t total_size = static_cast<size_t>(num_data_) * num_class_;
gradients_.resize(total_size); gradients_.resize(total_size);
hessians_.resize(total_size); hessians_.resize(total_size);
} }
// get sub gradients // get sub gradients
for (int curr_class = 0; curr_class < num_class_; ++curr_class) { for (int curr_class = 0; curr_class < num_class_; ++curr_class) {
auto bias = curr_class * num_data_; auto bias = curr_class * num_data_;
...@@ -333,23 +359,23 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is ...@@ -333,23 +359,23 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
} }
gradient = gradients_.data(); gradient = gradients_.data();
hessian = hessians_.data(); hessian = hessians_.data();
#ifdef TIMETAG #ifdef TIMETAG
sub_gradient_time += std::chrono::steady_clock::now() - start_time; sub_gradient_time += std::chrono::steady_clock::now() - start_time;
#endif #endif
} }
bool should_continue = false; bool should_continue = false;
for (int curr_class = 0; curr_class < num_class_; ++curr_class) { for (int curr_class = 0; curr_class < num_class_; ++curr_class) {
#ifdef TIMETAG #ifdef TIMETAG
start_time = std::chrono::steady_clock::now(); start_time = std::chrono::steady_clock::now();
#endif #endif
std::unique_ptr<Tree> new_tree(new Tree(2)); std::unique_ptr<Tree> new_tree(new Tree(2));
if (!is_class_end_[curr_class]) { if (!is_class_end_[curr_class]) {
// train a new tree // train a new tree
new_tree.reset(tree_learner_->Train(gradient + curr_class * num_data_, hessian + curr_class * num_data_)); new_tree.reset(tree_learner_->Train(gradient + curr_class * num_data_, hessian + curr_class * num_data_));
} }
#ifdef TIMETAG #ifdef TIMETAG
tree_time += std::chrono::steady_clock::now() - start_time; tree_time += std::chrono::steady_clock::now() - start_time;
#endif #endif
if (new_tree->num_leaves() > 1) { if (new_tree->num_leaves() > 1) {
should_continue = true; should_continue = true;
...@@ -414,7 +440,7 @@ bool GBDT::EvalAndCheckEarlyStopping() { ...@@ -414,7 +440,7 @@ bool GBDT::EvalAndCheckEarlyStopping() {
is_met_early_stopping = !best_msg.empty(); is_met_early_stopping = !best_msg.empty();
if (is_met_early_stopping) { if (is_met_early_stopping) {
Log::Info("Early stopping at iteration %d, the best iteration round is %d", Log::Info("Early stopping at iteration %d, the best iteration round is %d",
iter_, iter_ - early_stopping_round_); iter_, iter_ - early_stopping_round_);
Log::Info("Output of best iteration round:\n%s", best_msg.c_str()); Log::Info("Output of best iteration round:\n%s", best_msg.c_str());
// pop last early_stopping_round_ models // pop last early_stopping_round_ models
for (int i = 0; i < early_stopping_round_ * num_class_; ++i) { for (int i = 0; i < early_stopping_round_ * num_class_; ++i) {
...@@ -519,8 +545,7 @@ std::vector<double> GBDT::GetEvalAt(int data_idx) const { ...@@ -519,8 +545,7 @@ std::vector<double> GBDT::GetEvalAt(int data_idx) const {
ret.push_back(score); ret.push_back(score);
} }
} }
} } else {
else {
auto used_idx = data_idx - 1; auto used_idx = data_idx - 1;
for (size_t j = 0; j < valid_metrics_[used_idx].size(); ++j) { for (size_t j = 0; j < valid_metrics_[used_idx].size(); ++j) {
auto test_scores = valid_metrics_[used_idx][j]->Eval(valid_score_updater_[used_idx]->score()); auto test_scores = valid_metrics_[used_idx][j]->Eval(valid_score_updater_[used_idx]->score());
...@@ -553,7 +578,7 @@ void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) { ...@@ -553,7 +578,7 @@ void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) {
*out_len = static_cast<int64_t>(num_data) * num_class_; *out_len = static_cast<int64_t>(num_data) * num_class_;
} }
if (num_class_ > 1) { if (num_class_ > 1) {
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data; ++i) { for (data_size_t i = 0; i < num_data; ++i) {
std::vector<double> tmp_result(num_class_); std::vector<double> tmp_result(num_class_);
for (int j = 0; j < num_class_; ++j) { for (int j = 0; j < num_class_; ++j) {
...@@ -564,13 +589,13 @@ void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) { ...@@ -564,13 +589,13 @@ void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) {
out_result[j * num_data + i] = static_cast<double>(tmp_result[j]); out_result[j * num_data + i] = static_cast<double>(tmp_result[j]);
} }
} }
} else if(sigmoid_ > 0.0f){ } else if (sigmoid_ > 0.0f) {
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data; ++i) { for (data_size_t i = 0; i < num_data; ++i) {
out_result[i] = static_cast<double>(1.0f / (1.0f + std::exp(- sigmoid_ * raw_scores[i]))); out_result[i] = static_cast<double>(1.0f / (1.0f + std::exp(-sigmoid_ * raw_scores[i])));
} }
} else { } else {
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data; ++i) { for (data_size_t i = 0; i < num_data; ++i) {
out_result[i] = static_cast<double>(raw_scores[i]); out_result[i] = static_cast<double>(raw_scores[i]);
} }
...@@ -598,15 +623,15 @@ std::string GBDT::DumpModel(int num_iteration) const { ...@@ -598,15 +623,15 @@ std::string GBDT::DumpModel(int num_iteration) const {
str_buf << "\"max_feature_idx\":" << max_feature_idx_ << "," << std::endl; str_buf << "\"max_feature_idx\":" << max_feature_idx_ << "," << std::endl;
str_buf << "\"sigmoid\":" << sigmoid_ << "," << std::endl; str_buf << "\"sigmoid\":" << sigmoid_ << "," << std::endl;
str_buf << "\"feature_names\":[\"" str_buf << "\"feature_names\":[\""
<< Common::Join(feature_names_, "\",\"") << "\"]," << Common::Join(feature_names_, "\",\"") << "\"],"
<< std::endl; << std::endl;
str_buf << "\"tree_info\":["; str_buf << "\"tree_info\":[";
int num_used_model = static_cast<int>(models_.size()); int num_used_model = static_cast<int>(models_.size());
if (num_iteration > 0) { if (num_iteration > 0) {
num_used_model = std::min(num_iteration * num_class_, num_used_model); num_used_model = std::min(num_iteration * num_class_, num_used_model);
} }
for (int i = 0; i < num_used_model; ++i) { for (int i = 0; i < num_used_model; ++i) {
if (i > 0) { if (i > 0) {
str_buf << ","; str_buf << ",";
...@@ -624,45 +649,49 @@ std::string GBDT::DumpModel(int num_iteration) const { ...@@ -624,45 +649,49 @@ std::string GBDT::DumpModel(int num_iteration) const {
} }
std::string GBDT::SaveModelToString(int num_iterations) const { std::string GBDT::SaveModelToString(int num_iterations) const {
std::stringstream ss; std::stringstream ss;
// output model type
ss << SubModelName() << std::endl;
// output number of class
ss << "num_class=" << num_class_ << std::endl;
// output label index
ss << "label_index=" << label_idx_ << std::endl;
// output max_feature_idx
ss << "max_feature_idx=" << max_feature_idx_ << std::endl;
// output objective name
if (object_function_ != nullptr) {
ss << "objective=" << object_function_->GetName() << std::endl;
}
// output sigmoid parameter
ss << "sigmoid=" << sigmoid_ << std::endl;
ss << "feature_names=" << Common::Join(feature_names_, " ") << std::endl; // output model type
ss << SubModelName() << std::endl;
// output number of class
ss << "num_class=" << num_class_ << std::endl;
// output label index
ss << "label_index=" << label_idx_ << std::endl;
// output max_feature_idx
ss << "max_feature_idx=" << max_feature_idx_ << std::endl;
// output objective name
if (object_function_ != nullptr) {
ss << "objective=" << object_function_->GetName() << std::endl;
}
// output sigmoid parameter
ss << "sigmoid=" << sigmoid_ << std::endl;
ss << "feature_infos=" << Common::Join(feature_infos_, " ") << std::endl; if (boost_from_average_) {
ss << "boost_from_average" << std::endl;
}
ss << std::endl; ss << "feature_names=" << Common::Join(feature_names_, " ") << std::endl;
int num_used_model = static_cast<int>(models_.size());
if (num_iterations > 0) {
num_used_model = std::min(num_iterations * num_class_, num_used_model);
}
// output tree models
for (int i = 0; i < num_used_model; ++i) {
ss << "Tree=" << i << std::endl;
ss << models_[i]->ToString() << std::endl;
}
std::vector<std::pair<size_t, std::string>> pairs = FeatureImportance(); ss << "feature_infos=" << Common::Join(feature_infos_, " ") << std::endl;
ss << std::endl << "feature importances:" << std::endl;
for (size_t i = 0; i < pairs.size(); ++i) {
ss << pairs[i].second << "=" << std::to_string(pairs[i].first) << std::endl;
}
return ss.str(); ss << std::endl;
int num_used_model = static_cast<int>(models_.size());
if (num_iterations > 0) {
num_used_model = std::min(num_iterations * num_class_, num_used_model);
}
// output tree models
for (int i = 0; i < num_used_model; ++i) {
ss << "Tree=" << i << std::endl;
ss << models_[i]->ToString() << std::endl;
}
std::vector<std::pair<size_t, std::string>> pairs = FeatureImportance();
ss << std::endl << "feature importances:" << std::endl;
for (size_t i = 0; i < pairs.size(); ++i) {
ss << pairs[i].second << "=" << std::to_string(pairs[i].first) << std::endl;
}
return ss.str();
} }
bool GBDT::SaveModelToFile(int num_iteration, const char* filename) const { bool GBDT::SaveModelToFile(int num_iteration, const char* filename) const {
...@@ -713,6 +742,11 @@ bool GBDT::LoadModelFromString(const std::string& model_str) { ...@@ -713,6 +742,11 @@ bool GBDT::LoadModelFromString(const std::string& model_str) {
} else { } else {
sigmoid_ = -1.0f; sigmoid_ = -1.0f;
} }
// get boost_from_average_
line = Common::FindFromLines(lines, "boost_from_average");
if (line.size() > 0) {
boost_from_average_ = true;
}
// get feature names // get feature names
line = Common::FindFromLines(lines, "feature_names="); line = Common::FindFromLines(lines, "feature_names=");
if (line.size() > 0) { if (line.size() > 0) {
...@@ -721,8 +755,7 @@ bool GBDT::LoadModelFromString(const std::string& model_str) { ...@@ -721,8 +755,7 @@ bool GBDT::LoadModelFromString(const std::string& model_str) {
Log::Fatal("Wrong size of feature_names"); Log::Fatal("Wrong size of feature_names");
return false; return false;
} }
} } else {
else {
Log::Fatal("Model file doesn't contain feature names"); Log::Fatal("Model file doesn't contain feature names");
return false; return false;
} }
...@@ -766,25 +799,25 @@ bool GBDT::LoadModelFromString(const std::string& model_str) { ...@@ -766,25 +799,25 @@ bool GBDT::LoadModelFromString(const std::string& model_str) {
std::vector<std::pair<size_t, std::string>> GBDT::FeatureImportance() const { std::vector<std::pair<size_t, std::string>> GBDT::FeatureImportance() const {
std::vector<size_t> feature_importances(max_feature_idx_ + 1, 0); std::vector<size_t> feature_importances(max_feature_idx_ + 1, 0);
for (size_t iter = 0; iter < models_.size(); ++iter) { for (size_t iter = 0; iter < models_.size(); ++iter) {
for (int split_idx = 0; split_idx < models_[iter]->num_leaves() - 1; ++split_idx) { for (int split_idx = 0; split_idx < models_[iter]->num_leaves() - 1; ++split_idx) {
++feature_importances[models_[iter]->split_feature(split_idx)]; ++feature_importances[models_[iter]->split_feature(split_idx)];
}
} }
// store the importance first }
std::vector<std::pair<size_t, std::string>> pairs; // store the importance first
for (size_t i = 0; i < feature_importances.size(); ++i) { std::vector<std::pair<size_t, std::string>> pairs;
if (feature_importances[i] > 0) { for (size_t i = 0; i < feature_importances.size(); ++i) {
pairs.emplace_back(feature_importances[i], feature_names_[i]); if (feature_importances[i] > 0) {
} pairs.emplace_back(feature_importances[i], feature_names_[i]);
} }
// sort the importance }
std::sort(pairs.begin(), pairs.end(), // sort the importance
[](const std::pair<size_t, std::string>& lhs, std::sort(pairs.begin(), pairs.end(),
const std::pair<size_t, std::string>& rhs) { [] (const std::pair<size_t, std::string>& lhs,
return lhs.first > rhs.first; const std::pair<size_t, std::string>& rhs) {
}); return lhs.first > rhs.first;
return pairs; });
return pairs;
} }
std::vector<double> GBDT::PredictRaw(const double* value) const { std::vector<double> GBDT::PredictRaw(const double* value) const {
......
...@@ -206,7 +206,7 @@ public: ...@@ -206,7 +206,7 @@ public:
inline void SetNumIterationForPred(int num_iteration) override { inline void SetNumIterationForPred(int num_iteration) override {
num_iteration_for_pred_ = static_cast<int>(models_.size()) / num_class_; num_iteration_for_pred_ = static_cast<int>(models_.size()) / num_class_;
if (num_iteration > 0) { if (num_iteration > 0) {
num_iteration_for_pred_ = std::min(num_iteration, num_iteration_for_pred_); num_iteration_for_pred_ = std::min(num_iteration + (boost_from_average_ ? 1 : 0), num_iteration_for_pred_);
} }
} }
...@@ -345,6 +345,7 @@ protected: ...@@ -345,6 +345,7 @@ protected:
std::unique_ptr<Dataset> tmp_subset_; std::unique_ptr<Dataset> tmp_subset_;
bool is_use_subset_; bool is_use_subset_;
std::vector<bool> is_class_end_; std::vector<bool> is_class_end_;
bool boost_from_average_;
}; };
} // namespace LightGBM } // namespace LightGBM
......
...@@ -25,18 +25,20 @@ public: ...@@ -25,18 +25,20 @@ public:
int64_t total_size = static_cast<int64_t>(num_data_) * num_class; int64_t total_size = static_cast<int64_t>(num_data_) * num_class;
score_.resize(total_size); score_.resize(total_size);
// default start score is zero // default start score is zero
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (int64_t i = 0; i < total_size; ++i) { for (int64_t i = 0; i < total_size; ++i) {
score_[i] = 0.0f; score_[i] = 0.0f;
} }
has_init_score_ = false;
const double* init_score = data->metadata().init_score(); const double* init_score = data->metadata().init_score();
// if exists initial score, will start from it // if exists initial score, will start from it
if (init_score != nullptr) { if (init_score != nullptr) {
if ((data->metadata().num_init_score() % num_data_) != 0 if ((data->metadata().num_init_score() % num_data_) != 0
|| (data->metadata().num_init_score() / num_data_) != num_class) { || (data->metadata().num_init_score() / num_data_) != num_class) {
Log::Fatal("number of class for initial score error"); Log::Fatal("number of class for initial score error");
} }
#pragma omp parallel for schedule(static) has_init_score_ = true;
#pragma omp parallel for schedule(static)
for (int64_t i = 0; i < total_size; ++i) { for (int64_t i = 0; i < total_size; ++i) {
score_[i] = init_score[i]; score_[i] = init_score[i];
} }
...@@ -46,6 +48,16 @@ public: ...@@ -46,6 +48,16 @@ public:
~ScoreUpdater() { ~ScoreUpdater() {
} }
inline bool has_init_score() const { return has_init_score_; }
inline void AddScore(double val, int curr_class) {
int64_t offset = curr_class * num_data_;
#pragma omp parallel for schedule(static)
for (int64_t i = 0; i < num_data_; ++i) {
score_[offset + i] += val;
}
}
/*! /*!
* \brief Using tree model to get prediction number, then adding to scores for all data * \brief Using tree model to get prediction number, then adding to scores for all data
* Note: this function generally will be used on validation data too. * Note: this function generally will be used on validation data too.
...@@ -74,7 +86,7 @@ public: ...@@ -74,7 +86,7 @@ public:
* \param curr_class Current class for multiclass training * \param curr_class Current class for multiclass training
*/ */
inline void AddScore(const Tree* tree, const data_size_t* data_indices, inline void AddScore(const Tree* tree, const data_size_t* data_indices,
data_size_t data_cnt, int curr_class) { data_size_t data_cnt, int curr_class) {
tree->AddPredictionToScore(data_, data_indices, data_cnt, score_.data() + curr_class * num_data_); tree->AddPredictionToScore(data_, data_indices, data_cnt, score_.data() + curr_class * num_data_);
} }
/*! \brief Pointer of score */ /*! \brief Pointer of score */
...@@ -92,6 +104,7 @@ private: ...@@ -92,6 +104,7 @@ private:
const Dataset* data_; const Dataset* data_;
/*! \brief Scores for data set */ /*! \brief Scores for data set */
std::vector<double> score_; std::vector<double> score_;
bool has_init_score_;
}; };
} // namespace LightGBM } // namespace LightGBM
......
...@@ -331,6 +331,7 @@ void BoostingConfig::Set(const std::unordered_map<std::string, std::string>& par ...@@ -331,6 +331,7 @@ void BoostingConfig::Set(const std::unordered_map<std::string, std::string>& par
GetBool(params, "uniform_drop", &uniform_drop); GetBool(params, "uniform_drop", &uniform_drop);
GetDouble(params, "top_rate", &top_rate); GetDouble(params, "top_rate", &top_rate);
GetDouble(params, "other_rate", &other_rate); GetDouble(params, "other_rate", &other_rate);
GetBool(params, "boost_from_average", &boost_from_average);
CHECK(drop_rate <= 1.0 && drop_rate >= 0.0); CHECK(drop_rate <= 1.0 && drop_rate >= 0.0);
CHECK(skip_drop <= 1.0 && skip_drop >= 0.0); CHECK(skip_drop <= 1.0 && skip_drop >= 0.0);
GetTreeLearnerType(params); GetTreeLearnerType(params);
......
...@@ -375,19 +375,25 @@ Tree::Tree(const std::string& str) { ...@@ -375,19 +375,25 @@ Tree::Tree(const std::string& str) {
} }
} }
} }
if (key_vals.count("num_leaves") <= 0 || key_vals.count("split_feature") <= 0 if (key_vals.count("num_leaves") <= 0) {
|| key_vals.count("split_gain") <= 0 || key_vals.count("threshold") <= 0
|| key_vals.count("left_child") <= 0 || key_vals.count("right_child") <= 0
|| key_vals.count("leaf_parent") <= 0 || key_vals.count("leaf_value") <= 0
|| key_vals.count("internal_value") <= 0 || key_vals.count("internal_count") <= 0
|| key_vals.count("leaf_count") <= 0 || key_vals.count("shrinkage") <= 0
|| key_vals.count("decision_type") <= 0
) {
Log::Fatal("Tree model string format error"); Log::Fatal("Tree model string format error");
} }
Common::Atoi(key_vals["num_leaves"].c_str(), &num_leaves_); Common::Atoi(key_vals["num_leaves"].c_str(), &num_leaves_);
if (num_leaves_ <= 1) { return; }
if (key_vals.count("split_feature") <= 0
|| key_vals.count("split_gain") <= 0 || key_vals.count("threshold") <= 0
|| key_vals.count("left_child") <= 0 || key_vals.count("right_child") <= 0
|| key_vals.count("leaf_parent") <= 0 || key_vals.count("leaf_value") <= 0
|| key_vals.count("internal_value") <= 0 || key_vals.count("internal_count") <= 0
|| key_vals.count("leaf_count") <= 0 || key_vals.count("shrinkage") <= 0
|| key_vals.count("decision_type") <= 0
) {
Log::Fatal("Tree model string format error");
}
left_child_ = Common::StringToArray<int>(key_vals["left_child"], ' ', num_leaves_ - 1); left_child_ = Common::StringToArray<int>(key_vals["left_child"], ' ', num_leaves_ - 1);
right_child_ = Common::StringToArray<int>(key_vals["right_child"], ' ', num_leaves_ - 1); right_child_ = Common::StringToArray<int>(key_vals["right_child"], ' ', num_leaves_ - 1);
split_feature_ = Common::StringToArray<int>(key_vals["split_feature"], ' ', num_leaves_ - 1); split_feature_ = Common::StringToArray<int>(key_vals["split_feature"], ' ', num_leaves_ - 1);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment