Optimize the computation of the cross-entropy ranking loss (#3080)

* Fix loss computation * fix test * Optimize ranking loss computation.

Optimize the computation of the cross-entropy ranking loss (#3080)
* Fix loss computation * fix test * Optimize ranking loss computation.
7e81d9e2 · sbruch · GitHub · dea2391b · 7e81d9e2
Unverified Commit 7e81d9e2 authored May 21, 2020 by sbruch Committed by GitHub May 21, 2020
Show whitespace changes
Inline Side-by-side

Showing with 36 additions and 29 deletions

src/objective/rank_objective.hpp src/objective/rank_objective.hpp +36 -29

No files found.
--- a/src/objective/rank_objective.hpp
+++ b/src/objective/rank_objective.hpp
@@ -305,48 +305,55 @@ class RankXENDCG : public RankingObjective {
                                      const label_t* label, const double* score,
                                      score_t* lambdas,
                                      score_t* hessians) const override {
+    // Skip groups with too few items.
+    if (cnt <= 1) {
+      for (data_size_t i = 0; i < cnt; ++i) {
+        lambdas[i] = 0.0f;
+        hessians[i] = 0.0f;
+      }
+      return;
+    }
+
    // Turn scores into a probability distribution using Softmax.
    std::vector<double> rho(cnt, 0.0);
    Common::Softmax(score, rho.data(), cnt);

-    // used for Phi and L1
-    std::vector<double> l1s(cnt);
-    double sum_labels = 0;
+    // An auxiliary buffer of parameters used to form the ground-truth
+    // distribution and compute the loss.
+    std::vector<double> params(cnt);
+
+    double inv_denominator = 0;
    for (data_size_t i = 0; i < cnt; ++i) {
-      l1s[i] = Phi(label[i], rands_[query_id].NextFloat());
-      sum_labels += l1s[i];
+      params[i] = Phi(label[i], rands_[query_id].NextFloat());
+      inv_denominator += params[i];
    }
    // sum_labels will always be positive number
-    sum_labels = std::max<double>(kEpsilon, sum_labels);
+    inv_denominator = 1. / std::max<double>(kEpsilon, inv_denominator);
+
    // Approximate gradients and inverse Hessian.
    // First order terms.
-    double sum_l1 = 0.0f;
+    double sum_l1 = 0.0;
    for (data_size_t i = 0; i < cnt; ++i) {
-      l1s[i] = -l1s[i] / sum_labels + rho[i];
-      sum_l1 += l1s[i] / (1. - rho[i]);
+      double term = -params[i] * inv_denominator + rho[i];
+      lambdas[i] = static_cast<score_t>(term);
+      // Params will now store terms needed to compute second-order terms.
+      params[i] = term / (1. - rho[i]);
+      sum_l1 += params[i];
    }
-    if (cnt <= 1) {
-      // when cnt <= 1, the l2 and l3 are zeros
-      for (data_size_t i = 0; i < cnt; ++i) {
-        lambdas[i] = static_cast<score_t>(l1s[i]);
-        hessians[i] = static_cast<score_t>(rho[i] * (1.0 - rho[i]));
-      }
-    } else {
    // Second order terms.
-      std::vector<double> l2s(cnt, 0.0);
    double sum_l2 = 0.0;
    for (data_size_t i = 0; i < cnt; ++i) {
-        l2s[i] = sum_l1 - (l1s[i] / (1. - rho[i]));
-        sum_l2 += l2s[i] * rho[i] / (1. - rho[i]);
+      double term = rho[i] * (sum_l1 - params[i]);
+      lambdas[i] += static_cast<score_t>(term);
+      // Params will now store terms needed to compute third-order terms.
+      params[i] = term / (1. - rho[i]);
+      sum_l2 += params[i];
    }
    for (data_size_t i = 0; i < cnt; ++i) {
-        auto l3 = sum_l2 - (l2s[i] * rho[i] / (1. - rho[i]));
-        lambdas[i] = static_cast<score_t>(l1s[i] + rho[i] * l2s[i] +
-                                          rho[i] * l3);
+      lambdas[i] += static_cast<score_t>(rho[i] * (sum_l2 - params[i]));
      hessians[i] = static_cast<score_t>(rho[i] * (1.0 - rho[i]));
    }
  }
-  }

  double Phi(const label_t l, double g) const {
    return Common::Pow(2, static_cast<int>(l)) - g;