Commit 82c27d42 authored by Guolin Ke's avatar Guolin Ke
Browse files

fix prediction of ova multi-class.

parent 512e0c34
......@@ -40,15 +40,15 @@ GBDT::GBDT()
shrinkage_rate_(0.1f),
num_init_iteration_(0),
boost_from_average_(false) {
#pragma omp parallel
#pragma omp master
#pragma omp parallel
#pragma omp master
{
num_threads_ = omp_get_num_threads();
}
}
GBDT::~GBDT() {
#ifdef TIMETAG
#ifdef TIMETAG
Log::Info("GBDT::boosting costs %f", boosting_time * 1e-3);
Log::Info("GBDT::train_score costs %f", train_score_time * 1e-3);
Log::Info("GBDT::out_of_bag_score costs %f", out_of_bag_score_time * 1e-3);
......@@ -57,7 +57,7 @@ GBDT::~GBDT() {
Log::Info("GBDT::bagging costs %f", bagging_time * 1e-3);
Log::Info("GBDT::sub_gradient costs %f", sub_gradient_time * 1e-3);
Log::Info("GBDT::tree costs %f", tree_time * 1e-3);
#endif
#endif
}
void GBDT::Init(const BoostingConfig* config, const Dataset* train_data, const ObjectiveFunction* object_function,
......@@ -85,7 +85,8 @@ void GBDT::ResetTrainingData(const BoostingConfig* config, const Dataset* train_
sigmoid_ = -1.0f;
if (object_function_ != nullptr
&& std::string(object_function_->GetName()) == std::string("binary")) {
&& (std::string(object_function_->GetName()) == std::string("binary")
|| std::string(object_function_->GetName()) == std::string("multiclassova"))) {
// only binary classification need sigmoid transform
sigmoid_ = new_config->sigmoid;
}
......@@ -174,7 +175,6 @@ void GBDT::ResetTrainingData(const BoostingConfig* config, const Dataset* train_
if (num_class_ > 1) {
for (int i = 0; i < num_class_; ++i) {
if (cnt_per_class[i] == num_data_) {
Log::Warning("Only contain one class.");
class_need_train_[i] = false;
class_default_output_[i] = -std::log(kEpsilon);
} else if (cnt_per_class[i] == 0) {
......@@ -261,7 +261,7 @@ void GBDT::Bagging(int iter) {
data_size_t inner_size = (num_data_ + num_threads_ - 1) / num_threads_;
if (inner_size < min_inner_size) { inner_size = min_inner_size; }
OMP_INIT_EX();
#pragma omp parallel for schedule(static,1)
#pragma omp parallel for schedule(static,1)
for (int i = 0; i < num_threads_; ++i) {
OMP_LOOP_EX_BEGIN();
left_cnts_buf_[i] = 0;
......@@ -287,7 +287,7 @@ void GBDT::Bagging(int iter) {
}
left_cnt = left_write_pos_buf_[num_threads_ - 1] + left_cnts_buf_[num_threads_ - 1];
#pragma omp parallel for schedule(static, 1)
#pragma omp parallel for schedule(static, 1)
for (int i = 0; i < num_threads_; ++i) {
OMP_LOOP_EX_BEGIN();
if (left_cnts_buf_[i] > 0) {
......@@ -317,22 +317,22 @@ void GBDT::Bagging(int iter) {
}
void GBDT::UpdateScoreOutOfBag(const Tree* tree, const int curr_class) {
#ifdef TIMETAG
#ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now();
#endif
#endif
// we need to predict out-of-bag scores of data for boosting
if (num_data_ - bag_data_cnt_ > 0 && !is_use_subset_) {
train_score_updater_->AddScore(tree, bag_data_indices_.data() + bag_data_cnt_, num_data_ - bag_data_cnt_, curr_class);
}
#ifdef TIMETAG
#ifdef TIMETAG
out_of_bag_score_time += std::chrono::steady_clock::now() - start_time;
#endif
#endif
}
bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) {
// boosting from average prediction. It doesn't work well for classification, remove it for now.
if (models_.empty()
&& gbdt_config_->boost_from_average
if (models_.empty()
&& gbdt_config_->boost_from_average
&& !train_score_updater_->has_init_score()
&& sigmoid_ < 0.0f
&& num_class_ <= 1) {
......@@ -354,28 +354,28 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
}
// boosting first
if (gradient == nullptr || hessian == nullptr) {
#ifdef TIMETAG
#ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now();
#endif
#endif
Boosting();
gradient = gradients_.data();
hessian = hessians_.data();
#ifdef TIMETAG
#ifdef TIMETAG
boosting_time += std::chrono::steady_clock::now() - start_time;
#endif
#endif
}
#ifdef TIMETAG
#ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now();
#endif
#endif
// bagging logic
Bagging(iter_);
#ifdef TIMETAG
#ifdef TIMETAG
bagging_time += std::chrono::steady_clock::now() - start_time;
#endif
#endif
if (is_use_subset_ && bag_data_cnt_ < num_data_) {
#ifdef TIMETAG
#ifdef TIMETAG
start_time = std::chrono::steady_clock::now();
#endif
#endif
if (gradients_.empty()) {
size_t total_size = static_cast<size_t>(num_data_) * num_class_;
gradients_.resize(total_size);
......@@ -392,23 +392,23 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
}
gradient = gradients_.data();
hessian = hessians_.data();
#ifdef TIMETAG
#ifdef TIMETAG
sub_gradient_time += std::chrono::steady_clock::now() - start_time;
#endif
#endif
}
bool should_continue = false;
for (int curr_class = 0; curr_class < num_class_; ++curr_class) {
#ifdef TIMETAG
#ifdef TIMETAG
start_time = std::chrono::steady_clock::now();
#endif
#endif
std::unique_ptr<Tree> new_tree(new Tree(2));
if (class_need_train_[curr_class]) {
new_tree.reset(
tree_learner_->Train(gradient + curr_class * num_data_, hessian + curr_class * num_data_));
}
#ifdef TIMETAG
#ifdef TIMETAG
tree_time += std::chrono::steady_clock::now() - start_time;
#endif
#endif
if (new_tree->num_leaves() > 1) {
should_continue = true;
......@@ -421,7 +421,7 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
// only add default score one-time
if (!class_need_train_[curr_class] && models_.size() < static_cast<size_t>(num_class_)) {
auto output = class_default_output_[curr_class];
new_tree->Split(0, 0, BinType::NumericalBin, 0, 0, 0,
new_tree->Split(0, 0, BinType::NumericalBin, 0, 0, 0,
output, output, 0, num_data_, 1);
train_score_updater_->AddScore(output, curr_class);
for (auto& score_updater : valid_score_updater_) {
......@@ -469,14 +469,14 @@ void GBDT::RollbackOneIter() {
bool GBDT::EvalAndCheckEarlyStopping() {
bool is_met_early_stopping = false;
#ifdef TIMETAG
#ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now();
#endif
#endif
// print message for metric
auto best_msg = OutputMetric(iter_);
#ifdef TIMETAG
#ifdef TIMETAG
metric_time += std::chrono::steady_clock::now() - start_time;
#endif
#endif
is_met_early_stopping = !best_msg.empty();
if (is_met_early_stopping) {
Log::Info("Early stopping at iteration %d, the best iteration round is %d",
......@@ -491,28 +491,28 @@ bool GBDT::EvalAndCheckEarlyStopping() {
}
void GBDT::UpdateScore(const Tree* tree, const int curr_class) {
#ifdef TIMETAG
#ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now();
#endif
#endif
// update training score
if (!is_use_subset_) {
train_score_updater_->AddScore(tree_learner_.get(), tree, curr_class);
} else {
train_score_updater_->AddScore(tree, curr_class);
}
#ifdef TIMETAG
#ifdef TIMETAG
train_score_time += std::chrono::steady_clock::now() - start_time;
#endif
#ifdef TIMETAG
#endif
#ifdef TIMETAG
start_time = std::chrono::steady_clock::now();
#endif
#endif
// update validation score
for (auto& score_updater : valid_score_updater_) {
score_updater->AddScore(tree, curr_class);
}
#ifdef TIMETAG
#ifdef TIMETAG
valid_score_time += std::chrono::steady_clock::now() - start_time;
#endif
#endif
}
std::string GBDT::OutputMetric(int iter) {
......@@ -617,8 +617,16 @@ void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) {
num_data = valid_score_updater_[used_idx]->num_data();
*out_len = static_cast<int64_t>(num_data) * num_class_;
}
if (num_class_ > 1) {
#pragma omp parallel for schedule(static)
if (sigmoid_ > 0.0f) {
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data; ++i) {
for (int j = 0; j < num_class_; ++j) {
out_result[i + j * num_data_] = static_cast<double>(
1.0f / (1.0f + std::exp(-sigmoid_ * raw_scores[i + j * num_data_])));
}
}
} else if (num_class_ > 1) {
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data; ++i) {
std::vector<double> tmp_result(num_class_);
for (int j = 0; j < num_class_; ++j) {
......@@ -629,13 +637,8 @@ void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) {
out_result[j * num_data + i] = static_cast<double>(tmp_result[j]);
}
}
} else if (sigmoid_ > 0.0f) {
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data; ++i) {
out_result[i] = static_cast<double>(1.0f / (1.0f + std::exp(-sigmoid_ * raw_scores[i])));
}
} else {
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data; ++i) {
out_result[i] = static_cast<double>(raw_scores[i]);
}
......@@ -855,8 +858,8 @@ std::vector<std::pair<size_t, std::string>> GBDT::FeatureImportance() const {
}
// sort the importance
std::sort(pairs.begin(), pairs.end(),
[] (const std::pair<size_t, std::string>& lhs,
const std::pair<size_t, std::string>& rhs) {
[](const std::pair<size_t, std::string>& lhs,
const std::pair<size_t, std::string>& rhs) {
return lhs.first > rhs.first;
});
return pairs;
......@@ -880,8 +883,10 @@ std::vector<double> GBDT::Predict(const double* value) const {
}
}
// if need sigmoid transform
if (sigmoid_ > 0 && num_class_ == 1) {
ret[0] = 1.0f / (1.0f + std::exp(-sigmoid_ * ret[0]));
if (sigmoid_ > 0) {
for (int j = 0; j < num_class_; ++j) {
ret[j] = 1.0f / (1.0f + std::exp(-sigmoid_ * ret[j]));
}
} else if (num_class_ > 1) {
Common::Softmax(&ret);
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment