fix prediction of ova multi-class.

82c27d42 · Guolin Ke · 512e0c34 · 82c27d42
Commit 82c27d42 authored Mar 31, 2017 by Guolin Ke
Hide whitespace changes
Inline Side-by-side

Showing with 60 additions and 55 deletions

src/boosting/gbdt.cpp src/boosting/gbdt.cpp +60 -55

No files found.
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -40,15 +40,15 @@ GBDT::GBDT()
  shrinkage_rate_(0.1f),
  num_init_iteration_(0),
  boost_from_average_(false) {
-#pragma omp parallel
-#pragma omp master
+  #pragma omp parallel
+  #pragma omp master
    {
      num_threads_ = omp_get_num_threads();
    }
 }

 GBDT::~GBDT() {
-#ifdef TIMETAG
+  #ifdef TIMETAG
  Log::Info("GBDT::boosting costs %f", boosting_time * 1e-3);
  Log::Info("GBDT::train_score costs %f", train_score_time * 1e-3);
  Log::Info("GBDT::out_of_bag_score costs %f", out_of_bag_score_time * 1e-3);
@@ -57,7 +57,7 @@ GBDT::~GBDT() {
  Log::Info("GBDT::bagging costs %f", bagging_time * 1e-3);
  Log::Info("GBDT::sub_gradient costs %f", sub_gradient_time * 1e-3);
  Log::Info("GBDT::tree costs %f", tree_time * 1e-3);
-#endif
+  #endif
 }

 void GBDT::Init(const BoostingConfig* config, const Dataset* train_data, const ObjectiveFunction* object_function,
@@ -85,7 +85,8 @@ void GBDT::ResetTrainingData(const BoostingConfig* config, const Dataset* train_

  sigmoid_ = -1.0f;
  if (object_function_ != nullptr
-      && std::string(object_function_->GetName()) == std::string("binary")) {
+      && (std::string(object_function_->GetName()) == std::string("binary")
+          || std::string(object_function_->GetName()) == std::string("multiclassova"))) {
    // only binary classification need sigmoid transform
    sigmoid_ = new_config->sigmoid;
  }
@@ -174,7 +175,6 @@ void GBDT::ResetTrainingData(const BoostingConfig* config, const Dataset* train_
      if (num_class_ > 1) {
        for (int i = 0; i < num_class_; ++i) {
          if (cnt_per_class[i] == num_data_) {
-            Log::Warning("Only contain one class.");
            class_need_train_[i] = false;
            class_default_output_[i] = -std::log(kEpsilon);
          } else if (cnt_per_class[i] == 0) {
@@ -261,7 +261,7 @@ void GBDT::Bagging(int iter) {
    data_size_t inner_size = (num_data_ + num_threads_ - 1) / num_threads_;
    if (inner_size < min_inner_size) { inner_size = min_inner_size; }
    OMP_INIT_EX();
-  #pragma omp parallel for schedule(static,1)
+    #pragma omp parallel for schedule(static,1)
    for (int i = 0; i < num_threads_; ++i) {
      OMP_LOOP_EX_BEGIN();
      left_cnts_buf_[i] = 0;
@@ -287,7 +287,7 @@ void GBDT::Bagging(int iter) {
    }
    left_cnt = left_write_pos_buf_[num_threads_ - 1] + left_cnts_buf_[num_threads_ - 1];

-  #pragma omp parallel for schedule(static, 1)
+    #pragma omp parallel for schedule(static, 1)
    for (int i = 0; i < num_threads_; ++i) {
      OMP_LOOP_EX_BEGIN();
      if (left_cnts_buf_[i] > 0) {
@@ -317,22 +317,22 @@ void GBDT::Bagging(int iter) {
 }

 void GBDT::UpdateScoreOutOfBag(const Tree* tree, const int curr_class) {
-#ifdef TIMETAG
+  #ifdef TIMETAG
  auto start_time = std::chrono::steady_clock::now();
-#endif
+  #endif
  // we need to predict out-of-bag scores of data for boosting
  if (num_data_ - bag_data_cnt_ > 0 && !is_use_subset_) {
    train_score_updater_->AddScore(tree, bag_data_indices_.data() + bag_data_cnt_, num_data_ - bag_data_cnt_, curr_class);
  }
-#ifdef TIMETAG
+  #ifdef TIMETAG
  out_of_bag_score_time += std::chrono::steady_clock::now() - start_time;
-#endif
+  #endif
 }

 bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) {
  // boosting from average prediction. It doesn't work well for classification, remove it for now.
-  if (models_.empty() 
-      && gbdt_config_->boost_from_average 
+  if (models_.empty()
+      && gbdt_config_->boost_from_average
      && !train_score_updater_->has_init_score()
      && sigmoid_ < 0.0f
      && num_class_ <= 1) {
@@ -354,28 +354,28 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
  }
  // boosting first
  if (gradient == nullptr || hessian == nullptr) {
-  #ifdef TIMETAG
+    #ifdef TIMETAG
    auto start_time = std::chrono::steady_clock::now();
-  #endif
+    #endif
    Boosting();
    gradient = gradients_.data();
    hessian = hessians_.data();
-  #ifdef TIMETAG
+    #ifdef TIMETAG
    boosting_time += std::chrono::steady_clock::now() - start_time;
-  #endif
+    #endif
  }
-#ifdef TIMETAG
+  #ifdef TIMETAG
  auto start_time = std::chrono::steady_clock::now();
-#endif
+  #endif
  // bagging logic
  Bagging(iter_);
-#ifdef TIMETAG
+  #ifdef TIMETAG
  bagging_time += std::chrono::steady_clock::now() - start_time;
-#endif
+  #endif
  if (is_use_subset_ && bag_data_cnt_ < num_data_) {
-  #ifdef TIMETAG
+    #ifdef TIMETAG
    start_time = std::chrono::steady_clock::now();
-  #endif
+    #endif
    if (gradients_.empty()) {
      size_t total_size = static_cast<size_t>(num_data_) * num_class_;
      gradients_.resize(total_size);
@@ -392,23 +392,23 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
    }
    gradient = gradients_.data();
    hessian = hessians_.data();
-  #ifdef TIMETAG
+    #ifdef TIMETAG
    sub_gradient_time += std::chrono::steady_clock::now() - start_time;
-  #endif
+    #endif
  }
  bool should_continue = false;
  for (int curr_class = 0; curr_class < num_class_; ++curr_class) {
-  #ifdef TIMETAG
+    #ifdef TIMETAG
    start_time = std::chrono::steady_clock::now();
-  #endif
+    #endif
    std::unique_ptr<Tree> new_tree(new Tree(2));
    if (class_need_train_[curr_class]) {
      new_tree.reset(
        tree_learner_->Train(gradient + curr_class * num_data_, hessian + curr_class * num_data_));
    }
-  #ifdef TIMETAG
+    #ifdef TIMETAG
    tree_time += std::chrono::steady_clock::now() - start_time;
-  #endif
+    #endif

    if (new_tree->num_leaves() > 1) {
      should_continue = true;
@@ -421,7 +421,7 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
      // only add default score one-time
      if (!class_need_train_[curr_class] && models_.size() < static_cast<size_t>(num_class_)) {
        auto output = class_default_output_[curr_class];
-        new_tree->Split(0, 0, BinType::NumericalBin, 0, 0, 0, 
+        new_tree->Split(0, 0, BinType::NumericalBin, 0, 0, 0,
                        output, output, 0, num_data_, 1);
        train_score_updater_->AddScore(output, curr_class);
        for (auto& score_updater : valid_score_updater_) {
@@ -469,14 +469,14 @@ void GBDT::RollbackOneIter() {

 bool GBDT::EvalAndCheckEarlyStopping() {
  bool is_met_early_stopping = false;
-#ifdef TIMETAG
+  #ifdef TIMETAG
  auto start_time = std::chrono::steady_clock::now();
-#endif
+  #endif
  // print message for metric
  auto best_msg = OutputMetric(iter_);
-#ifdef TIMETAG
+  #ifdef TIMETAG
  metric_time += std::chrono::steady_clock::now() - start_time;
-#endif
+  #endif
  is_met_early_stopping = !best_msg.empty();
  if (is_met_early_stopping) {
    Log::Info("Early stopping at iteration %d, the best iteration round is %d",
@@ -491,28 +491,28 @@ bool GBDT::EvalAndCheckEarlyStopping() {
 }

 void GBDT::UpdateScore(const Tree* tree, const int curr_class) {
-#ifdef TIMETAG
+  #ifdef TIMETAG
  auto start_time = std::chrono::steady_clock::now();
-#endif
+  #endif
  // update training score
  if (!is_use_subset_) {
    train_score_updater_->AddScore(tree_learner_.get(), tree, curr_class);
  } else {
    train_score_updater_->AddScore(tree, curr_class);
  }
-#ifdef TIMETAG
+  #ifdef TIMETAG
  train_score_time += std::chrono::steady_clock::now() - start_time;
-#endif
-#ifdef TIMETAG
+  #endif
+  #ifdef TIMETAG
  start_time = std::chrono::steady_clock::now();
-#endif
+  #endif
  // update validation score
  for (auto& score_updater : valid_score_updater_) {
    score_updater->AddScore(tree, curr_class);
  }
-#ifdef TIMETAG
+  #ifdef TIMETAG
  valid_score_time += std::chrono::steady_clock::now() - start_time;
-#endif
+  #endif
 }

 std::string GBDT::OutputMetric(int iter) {
@@ -617,8 +617,16 @@ void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) {
    num_data = valid_score_updater_[used_idx]->num_data();
    *out_len = static_cast<int64_t>(num_data) * num_class_;
  }
-  if (num_class_ > 1) {
-  #pragma omp parallel for schedule(static)
+  if (sigmoid_ > 0.0f) {
+    #pragma omp parallel for schedule(static)
+    for (data_size_t i = 0; i < num_data; ++i) {
+      for (int j = 0; j < num_class_; ++j) {
+        out_result[i + j * num_data_] = static_cast<double>(
+          1.0f / (1.0f + std::exp(-sigmoid_ * raw_scores[i + j * num_data_])));
+      }
+    }
+  } else if (num_class_ > 1) {
+    #pragma omp parallel for schedule(static)
    for (data_size_t i = 0; i < num_data; ++i) {
      std::vector<double> tmp_result(num_class_);
      for (int j = 0; j < num_class_; ++j) {
@@ -629,13 +637,8 @@ void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) {
        out_result[j * num_data + i] = static_cast<double>(tmp_result[j]);
      }
    }
-  } else if (sigmoid_ > 0.0f) {
-  #pragma omp parallel for schedule(static)
-    for (data_size_t i = 0; i < num_data; ++i) {
-      out_result[i] = static_cast<double>(1.0f / (1.0f + std::exp(-sigmoid_ * raw_scores[i])));
-    }
  } else {
-  #pragma omp parallel for schedule(static)
+    #pragma omp parallel for schedule(static)
    for (data_size_t i = 0; i < num_data; ++i) {
      out_result[i] = static_cast<double>(raw_scores[i]);
    }
@@ -855,8 +858,8 @@ std::vector<std::pair<size_t, std::string>> GBDT::FeatureImportance() const {
  }
  // sort the importance
  std::sort(pairs.begin(), pairs.end(),
-            [] (const std::pair<size_t, std::string>& lhs,
-                const std::pair<size_t, std::string>& rhs) {
+            [](const std::pair<size_t, std::string>& lhs,
+               const std::pair<size_t, std::string>& rhs) {
    return lhs.first > rhs.first;
  });
  return pairs;
@@ -880,8 +883,10 @@ std::vector<double> GBDT::Predict(const double* value) const {
    }
  }
  // if need sigmoid transform
-  if (sigmoid_ > 0 && num_class_ == 1) {
-    ret[0] = 1.0f / (1.0f + std::exp(-sigmoid_ * ret[0]));
+  if (sigmoid_ > 0) {
+    for (int j = 0; j < num_class_; ++j) {
+      ret[j] = 1.0f / (1.0f + std::exp(-sigmoid_ * ret[j]));
+    }
  } else if (num_class_ > 1) {
    Common::Softmax(&ret);
  }