change inner prediction score to double type.

12a96334 · Guolin Ke · 5d12a8db · 12a96334 · 12a96334 · 12a96334
Commit 12a96334 authored Jan 10, 2017 by Guolin Ke
3 changed files
--- a/src/objective/rank_objective.hpp
+++ b/src/objective/rank_objective.hpp
@@ -19,12 +19,12 @@ namespace LightGBM {
 class LambdarankNDCG: public ObjectiveFunction {
 public:
  explicit LambdarankNDCG(const ObjectiveConfig& config) {
-    sigmoid_ = static_cast<score_t>(config.sigmoid);
+    sigmoid_ = static_cast<double>(config.sigmoid);
    // initialize DCG calculator
    DCGCalculator::Init(config.label_gain);
    // copy lable gain to local
    for (auto gain : config.label_gain) {
-      label_gain_.push_back(static_cast<score_t>(gain));
+      label_gain_.push_back(static_cast<double>(gain));
    }
    label_gain_.shrink_to_fit();
    // will optimize NDCG@optimize_pos_at_
@@ -65,7 +65,7 @@ public:
    ConstructSigmoidTable();
  }

-  void GetGradients(const score_t* score, score_t* gradients,
+  void GetGradients(const double* score, score_t* gradients,
                    score_t* hessians) const override {
    #pragma omp parallel for schedule(guided)
    for (data_size_t i = 0; i < num_queries_; ++i) {
@@ -73,14 +73,14 @@ public:
    }
  }

-  inline void GetGradientsForOneQuery(const score_t* score,
+  inline void GetGradientsForOneQuery(const double* score,
              score_t* lambdas, score_t* hessians, data_size_t query_id) const {
    // get doc boundary for current query
    const data_size_t start = query_boundaries_[query_id];
    const data_size_t cnt =
      query_boundaries_[query_id + 1] - query_boundaries_[query_id];
    // get max DCG on current query
-    const score_t inverse_max_dcg = inverse_max_dcgs_[query_id];
+    const double inverse_max_dcg = inverse_max_dcgs_[query_id];
    // add pointers with offset
    const float* label = label_ + start;
    score += start;
@@ -99,60 +99,60 @@ public:
    std::sort(sorted_idx.begin(), sorted_idx.end(),
             [score](data_size_t a, data_size_t b) { return score[a] > score[b]; });
    // get best and worst score
-    const score_t best_score = score[sorted_idx[0]];
+    const double best_score = score[sorted_idx[0]];
    data_size_t worst_idx = cnt - 1;
    if (worst_idx > 0 && score[sorted_idx[worst_idx]] == kMinScore) {
      worst_idx -= 1;
    }
-    const score_t wrost_score = score[sorted_idx[worst_idx]];
+    const double wrost_score = score[sorted_idx[worst_idx]];
    // start accmulate lambdas by pairs
    for (data_size_t i = 0; i < cnt; ++i) {
      const data_size_t high = sorted_idx[i];
      const int high_label = static_cast<int>(label[high]);
-      const score_t high_score = score[high];
+      const double high_score = score[high];
      if (high_score == kMinScore) { continue; }
-      const score_t high_label_gain = label_gain_[high_label];
-      const score_t high_discount = DCGCalculator::GetDiscount(i);
-      score_t high_sum_lambda = 0.0;
-      score_t high_sum_hessian = 0.0;
+      const double high_label_gain = label_gain_[high_label];
+      const double high_discount = DCGCalculator::GetDiscount(i);
+      double high_sum_lambda = 0.0;
+      double high_sum_hessian = 0.0;
      for (data_size_t j = 0; j < cnt; ++j) {
        // skip same data
        if (i == j) { continue; }

        const data_size_t low = sorted_idx[j];
        const int low_label = static_cast<int>(label[low]);
-        const score_t low_score = score[low];
+        const double low_score = score[low];
        // only consider pair with different label
        if (high_label <= low_label || low_score == kMinScore) { continue; }

-        const score_t delta_score = high_score - low_score;
+        const double delta_score = high_score - low_score;

-        const score_t low_label_gain = label_gain_[low_label];
-        const score_t low_discount = DCGCalculator::GetDiscount(j);
+        const double low_label_gain = label_gain_[low_label];
+        const double low_discount = DCGCalculator::GetDiscount(j);
        // get dcg gap
-        const score_t dcg_gap = high_label_gain - low_label_gain;
+        const double dcg_gap = high_label_gain - low_label_gain;
        // get discount of this pair
-        const score_t paired_discount = fabs(high_discount - low_discount);
+        const double paired_discount = fabs(high_discount - low_discount);
        // get delta NDCG
-        score_t delta_pair_NDCG = dcg_gap * paired_discount * inverse_max_dcg;
+        double delta_pair_NDCG = dcg_gap * paired_discount * inverse_max_dcg;
        // regular the delta_pair_NDCG by score distance
        if (high_label != low_label && best_score != wrost_score) {
          delta_pair_NDCG /= (0.01f + fabs(delta_score));
        }
        // calculate lambda for this pair
-        score_t p_lambda = GetSigmoid(delta_score);
-        score_t p_hessian = p_lambda * (2.0f - p_lambda);
+        double p_lambda = GetSigmoid(delta_score);
+        double p_hessian = p_lambda * (2.0f - p_lambda);
        // update
        p_lambda *= -delta_pair_NDCG;
        p_hessian *= 2 * delta_pair_NDCG;
        high_sum_lambda += p_lambda;
        high_sum_hessian += p_hessian;
-        lambdas[low] -= p_lambda;
-        hessians[low] += p_hessian;
+        lambdas[low] -= static_cast<score_t>(p_lambda);
+        hessians[low] += static_cast<score_t>(p_hessian);
      }
      // update
-      lambdas[high] += high_sum_lambda;
-      hessians[high] += high_sum_hessian;
+      lambdas[high] += static_cast<score_t>(high_sum_lambda);
+      hessians[high] += static_cast<score_t>(high_sum_hessian);
    }
    // if need weights
    if (weights_ != nullptr) {
@@ -164,7 +164,7 @@ public:
  }


-  inline score_t GetSigmoid(score_t score) const {
+  inline double GetSigmoid(double score) const {
    if (score <= min_sigmoid_input_) {
      // too small, use lower bound
      return sigmoid_table_[0];
@@ -186,7 +186,7 @@ public:
      _sigmoid_bins / (max_sigmoid_input_ - min_sigmoid_input_);
    // cache
    for (size_t i = 0; i < _sigmoid_bins; ++i) {
-      const score_t score = i / sigmoid_table_idx_factor_ + min_sigmoid_input_;
+      const double score = i / sigmoid_table_idx_factor_ + min_sigmoid_input_;
      sigmoid_table_[i] = 2.0f / (1.0f + std::exp(2.0f * score * sigmoid_));
    }
  }
@@ -197,11 +197,11 @@ public:

 private:
  /*! \brief Gains for labels */
-  std::vector<score_t> label_gain_;
+  std::vector<double> label_gain_;
  /*! \brief Cache inverse max DCG, speed up calculation */
-  std::vector<score_t> inverse_max_dcgs_;
+  std::vector<double> inverse_max_dcgs_;
  /*! \brief Simgoid param */
-  score_t sigmoid_;
+  double sigmoid_;
  /*! \brief Optimized NDCG@ */
  int optimize_pos_at_;
  /*! \brief Number of queries */
@@ -215,15 +215,15 @@ private:
  /*! \brief Query boundries */
  const data_size_t* query_boundaries_;
  /*! \brief Cache result for sigmoid transform to speed up */
-  std::vector<score_t> sigmoid_table_;
+  std::vector<double> sigmoid_table_;
  /*! \brief Number of bins in simoid table */
  size_t _sigmoid_bins = 1024 * 1024;
  /*! \brief Minimal input of sigmoid table */
-  score_t min_sigmoid_input_ = -50;
+  double min_sigmoid_input_ = -50;
  /*! \brief Maximal input of sigmoid table */
-  score_t max_sigmoid_input_ = 50;
+  double max_sigmoid_input_ = 50;
  /*! \brief Factor that covert score to bin in sigmoid table */
-  score_t sigmoid_table_idx_factor_;
+  double sigmoid_table_idx_factor_;
 };

 }  // namespace LightGBM

--- a/src/objective/regression_objective.hpp
+++ b/src/objective/regression_objective.hpp
@@ -22,18 +22,18 @@ public:
    weights_ = metadata.weights();
  }

-  void GetGradients(const score_t* score, score_t* gradients,
+  void GetGradients(const double* score, score_t* gradients,
    score_t* hessians) const override {
    if (weights_ == nullptr) {
 #pragma omp parallel for schedule(static)
      for (data_size_t i = 0; i < num_data_; ++i) {
-        gradients[i] = (score[i] - label_[i]);
-        hessians[i] = 1.0;
+        gradients[i] = static_cast<score_t>(score[i] - label_[i]);
+        hessians[i] = 1.0f;
      }
    } else {
 #pragma omp parallel for schedule(static)
      for (data_size_t i = 0; i < num_data_; ++i) {
-        gradients[i] = (score[i] - label_[i]) * weights_[i];
+        gradients[i] = static_cast<score_t>(score[i] - label_[i]) * weights_[i];
        hessians[i] = weights_[i];
      }
    }
@@ -58,7 +58,7 @@ private:
 class RegressionL1loss: public ObjectiveFunction {
 public:
  explicit RegressionL1loss(const ObjectiveConfig& config) {
-    eta_ = static_cast<score_t>(config.gaussian_eta);
+    eta_ = static_cast<double>(config.gaussian_eta);
  }

  ~RegressionL1loss() {}
@@ -69,12 +69,12 @@ public:
    weights_ = metadata.weights();
  }

-  void GetGradients(const score_t* score, score_t* gradients,
+  void GetGradients(const double* score, score_t* gradients,
    score_t* hessians) const override {
    if (weights_ == nullptr) {
 #pragma omp parallel for schedule(static)
      for (data_size_t i = 0; i < num_data_; ++i) {
-        const score_t diff = score[i] - label_[i];
+        const double diff = score[i] - label_[i];
        if (diff >= 0.0f) {
          gradients[i] = 1.0f;
        } else {
@@ -85,7 +85,7 @@ public:
    } else {
 #pragma omp parallel for schedule(static)
      for (data_size_t i = 0; i < num_data_; ++i) {
-        const score_t diff = score[i] - label_[i];
+        const double diff = score[i] - label_[i];
        if (diff >= 0.0f) {
          gradients[i] = weights_[i];
        } else {
@@ -108,7 +108,7 @@ private:
  /*! \brief Pointer of weights */
  const float* weights_;
  /*! \brief a parameter to control the width of Gaussian function to approximate hessian */
-  score_t eta_;
+  double eta_;
 };

 /*!
@@ -117,8 +117,8 @@ private:
 class RegressionHuberLoss: public ObjectiveFunction {
 public:
  explicit RegressionHuberLoss(const ObjectiveConfig& config) {
-    delta_ = static_cast<score_t>(config.huber_delta);
-    eta_ = static_cast<score_t>(config.gaussian_eta);
+    delta_ = static_cast<double>(config.huber_delta);
+    eta_ = static_cast<double>(config.gaussian_eta);
  }

  ~RegressionHuberLoss() {
@@ -130,21 +130,21 @@ public:
    weights_ = metadata.weights();
  }

-  void GetGradients(const score_t* score, score_t* gradients,
+  void GetGradients(const double* score, score_t* gradients,
    score_t* hessians) const override {
    if (weights_ == nullptr) {
 #pragma omp parallel for schedule(static)
      for (data_size_t i = 0; i < num_data_; ++i) {
-        const score_t diff = score[i] - label_[i];
+        const double diff = score[i] - label_[i];

        if (std::abs(diff) <= delta_) {
-          gradients[i] = diff;
+          gradients[i] = static_cast<score_t>(diff);
          hessians[i] = 1.0f;
        } else {
          if (diff >= 0.0f) {
-            gradients[i] = delta_;
+            gradients[i] = static_cast<score_t>(delta_);
          } else {
-            gradients[i] = -delta_;
+            gradients[i] = static_cast<score_t>(-delta_);
          }
          hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_));
        }
@@ -152,16 +152,16 @@ public:
    } else {
 #pragma omp parallel for schedule(static)
      for (data_size_t i = 0; i < num_data_; ++i) {
-        const score_t diff = score[i] - label_[i];
+        const double diff = score[i] - label_[i];

        if (std::abs(diff) <= delta_) {
-          gradients[i] = diff * weights_[i];
+          gradients[i] = static_cast<score_t>(diff * weights_[i]);
          hessians[i] = weights_[i];
        } else {
          if (diff >= 0.0f) {
-            gradients[i] = delta_ * weights_[i];
+            gradients[i] = static_cast<score_t>(delta_ * weights_[i]);
          } else {
-            gradients[i] = -delta_ * weights_[i];
+            gradients[i] = static_cast<score_t>(-delta_ * weights_[i]);
          }
          hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_, weights_[i]));
        }
@@ -181,9 +181,9 @@ private:
  /*! \brief Pointer of weights */
  const float* weights_;
  /*! \brief delta for Huber loss */
-  score_t delta_;
+  double delta_;
  /*! \brief a parameter to control the width of Gaussian function to approximate hessian */
-  score_t eta_;
+  double eta_;
 };


@@ -191,7 +191,7 @@ private:
 class RegressionFairLoss: public ObjectiveFunction {
 public:
  explicit RegressionFairLoss(const ObjectiveConfig& config) {
-    c_ = static_cast<score_t>(config.fair_c);
+    c_ = static_cast<double>(config.fair_c);
  }

  ~RegressionFairLoss() {}
@@ -202,23 +202,21 @@ public:
    weights_ = metadata.weights();
  }

-  void GetGradients(const score_t* score, score_t* gradients,
+  void GetGradients(const double* score, score_t* gradients,
    score_t* hessians) const override {
    if (weights_ == nullptr) {
 #pragma omp parallel for schedule(static)
      for (data_size_t i = 0; i < num_data_; ++i) {
-        const score_t x = score[i] - label_[i];
-        gradients[i] = c_ * x / (std::fabs(x) + c_);
-        hessians[i] = c_ * c_ / ((std::fabs(x) + c_) * (std::fabs(x) + c_));
+        const double x = score[i] - label_[i];
+        gradients[i] = static_cast<score_t>(c_ * x / (std::fabs(x) + c_));
+        hessians[i] = static_cast<score_t>(c_ * c_ / ((std::fabs(x) + c_) * (std::fabs(x) + c_)));
      }
    } else {
 #pragma omp parallel for schedule(static)
      for (data_size_t i = 0; i < num_data_; ++i) {
-        const score_t x = score[i] - label_[i];
-        gradients[i] = c_ * x / (std::fabs(x) + c_);
-        gradients[i] *= weights_[i];
-        hessians[i] = c_ * c_ / ((std::fabs(x) + c_) * (std::fabs(x) + c_));
-        hessians[i] *= weights_[i];
+        const double x = score[i] - label_[i];
+        gradients[i] = static_cast<score_t>(c_ * x / (std::fabs(x) + c_) * weights_[i]);
+        hessians[i] = static_cast<score_t>(c_ * c_ / ((std::fabs(x) + c_) * (std::fabs(x) + c_)) * weights_[i]);
      }
    }
  }
@@ -235,7 +233,7 @@ private:
  /*! \brief Pointer of weights */
  const float* weights_;
  /*! \brief c for Fair loss */
-  score_t c_;
+  double c_;
 };

 }  // namespace LightGBM

--- a/src/treelearner/serial_tree_learner.h
+++ b/src/treelearner/serial_tree_learner.h
@@ -40,10 +40,10 @@ public:
    data_partition_->SetUsedDataIndices(used_indices, num_data);
  }

-  void AddPredictionToScore(score_t* out_score) const override {
+  void AddPredictionToScore(double* out_score) const override {
    #pragma omp parallel for schedule(guided)
    for (int i = 0; i < data_partition_->num_leaves(); ++i) {
-      score_t output = static_cast<score_t>(last_trained_tree_->LeafOutput(i));
+      double output = static_cast<double>(last_trained_tree_->LeafOutput(i));
      data_size_t cnt_leaf_data = 0;
      auto tmp_idx = data_partition_->GetIndexOnLeaf(i, &cnt_leaf_data);
      for (data_size_t j = 0; j < cnt_leaf_data; ++j) {