Add support to optimize for NDCG at a given truncation level (#3425)

* Add support to optimize for NDCG at a given truncation level In order to correctly optimize for NDCG@_k_, one should exclude pairs containing both documents beyond the top-_k_ (as they don't affect NDCG@_k_ when swapped). * Update rank_objective.hpp * Apply suggestions from code review Co-authored-by: Guolin Ke <guolin.ke@outlook.com> * Update rank_objective.hpp remove the additional branching: get high_rank and low_rank by one "if". * Update config.h add description to lambdarank_truncation_level parameter * Update Parameters.rst * Update test_sklearn.py update expected NDCG value for a test, as it was affected by the underlying change in the algorithm * Update test_sklearn.py update NDCG@3 reference value * fix R learning-to-rank tests * Update rank_objective.hpp * Update include/LightGBM/config.h Co-authored-by: Guolin Ke <guolin.ke@outlook.com> * Update Parameters.rst Co-authored-by: Guolin Ke <guolin.ke@outlook.com> Co-authored-by: James Lamb <jaylamb20@gmail.com>

Add support to optimize for NDCG at a given truncation level (#3425)
* Add support to optimize for NDCG at a given truncation level In order to correctly optimize for NDCG@_k_, one should exclude pairs containing both documents beyond the top-_k_ (as they don't affect NDCG@_k_ when swapped). * Update rank_objective.hpp * Apply suggestions from code review Co-authored-by: Guolin Ke <guolin.ke@outlook.com> * Update rank_objective.hpp remove the additional branching: get high_rank and low_rank by one "if". * Update config.h add description to lambdarank_truncation_level parameter * Update Parameters.rst * Update test_sklearn.py update expected NDCG value for a test, as it was affected by the underlying change in the algorithm * Update test_sklearn.py update NDCG@3 reference value * fix R learning-to-rank tests * Update rank_objective.hpp * Update include/LightGBM/config.h Co-authored-by: Guolin Ke <guolin.ke@outlook.com> * Update Parameters.rst Co-authored-by: Guolin Ke <guolin.ke@outlook.com> Co-authored-by: James Lamb <jaylamb20@gmail.com>
ba0a1f8d · Pavel Metrikov · GitHub · 53977f36 · ba0a1f8d · ba0a1f8d
Unverified Commit ba0a1f8d authored Oct 27, 2020 by Pavel Metrikov Committed by GitHub Oct 27, 2020
5 changed files
--- a/R-package/tests/testthat/test_learning_to_rank.R
+++ b/R-package/tests/testthat/test_learning_to_rank.R
@@ -45,9 +45,9 @@ test_that("learning-to-rank with lgb.train() works as expected", {
        expect_identical(result[["data_name"]], "training")
    }
    expect_identical(sapply(eval_results, function(x) {x$name}), eval_names)
-    expect_equal(eval_results[[1L]][["value"]], 0.825)
+    expect_equal(eval_results[[1L]][["value"]], 0.775)
-    expect_true(abs(eval_results[[2L]][["value"]] - 0.7766434) < TOLERANCE)
+    expect_true(abs(eval_results[[2L]][["value"]] - 0.745986) < TOLERANCE)
-    expect_true(abs(eval_results[[3L]][["value"]] - 0.7527939) < TOLERANCE)
+    expect_true(abs(eval_results[[3L]][["value"]] - 0.7351959) < TOLERANCE)
 })
 test_that("learning-to-rank with lgb.cv() works as expected", {
@@ -93,7 +93,7 @@ test_that("learning-to-rank with lgb.cv() works as expected", {
    best_score <- cv_bst$best_score
    expect_true(best_iter > 0L && best_iter <= nrounds)
    expect_true(best_score > 0.0 && best_score < 1.0)
-    expect_true(abs(best_score - 0.775) < TOLERANCE)
+    expect_true(abs(best_score - 0.75) < TOLERANCE)
    # best_score should be set for the first metric
    first_metric <- eval_names[[1L]]
@@ -115,18 +115,18 @@ test_that("learning-to-rank with lgb.cv() works as expected", {
    }
    # first and last value of each metric should be as expected
-    ndcg1_values <- c(0.725, 0.75, 0.75, 0.775, 0.75, 0.75, 0.75, 0.75, 0.75, 0.75)
+    ndcg1_values <- c(0.675, 0.725, 0.65, 0.725, 0.75, 0.725, 0.75, 0.725, 0.75, 0.75)
    expect_true(all(abs(unlist(eval_results[["ndcg@1"]][["eval"]]) - ndcg1_values) < TOLERANCE))
    ndcg2_values <- c(
-        0.6863147, 0.720986, 0.7306574, 0.745986, 0.7306574,
+        0.6556574, 0.6669721, 0.6306574, 0.6476294, 0.6629581,
-        0.720986, 0.7403287, 0.7403287, 0.7403287, 0.7306574
+        0.6476294, 0.6629581, 0.6379581, 0.7113147, 0.6823008
    )
    expect_true(all(abs(unlist(eval_results[["ndcg@2"]][["eval"]]) - ndcg2_values) < TOLERANCE))
    ndcg3_values <- c(
-        0.6777939, 0.6984639, 0.711732, 0.7234639, 0.711732,
+        0.6484639, 0.6571238, 0.6469279, 0.6540516, 0.6481857,
-        0.7101959, 0.719134, 0.719134, 0.725, 0.711732
+        0.6481857, 0.6481857, 0.6466496, 0.7027939, 0.6629898
    )
    expect_true(all(abs(unlist(eval_results[["ndcg@3"]][["eval"]]) - ndcg3_values) < TOLERANCE))

--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@@ -951,11 +951,13 @@ Objective Parameters
   -  set this closer to ``1`` to shift towards a **Poisson** distribution
-  ``lambdarank_truncation_level`` :raw-html:`<a id="lambdarank_truncation_level" title="Permalink to this parameter" href="#lambdarank_truncation_level">&#x1F517;&#xFE0E;</a>`, default = ``20``, type = int, constraints: ``lambdarank_truncation_level > 0``
+-  ``lambdarank_truncation_level`` :raw-html:`<a id="lambdarank_truncation_level" title="Permalink to this parameter" href="#lambdarank_truncation_level">&#x1F517;&#xFE0E;</a>`, default = ``30``, type = int, constraints: ``lambdarank_truncation_level > 0``
   -  used only in ``lambdarank`` application
-   -  used for truncating the max DCG, refer to "truncation level" in the Sec. 3 of `LambdaMART paper <https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/MSR-TR-2010-82.pdf>`__
+   -  controls the number of top-results to focus on during training, refer to "truncation level" in the Sec. 3 of `LambdaMART paper <https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/MSR-TR-2010-82.pdf>`
+   -  is closely related to the desirable cutoff k in the metric NDCG@k that we aim at optimizing the ranker for. The optimal setting for this parameter is likely to be slightly higher than k (e.g., k + 3) to include more pairs of documents to train on, but perhaps not too high to avoid deviating too much from the desired target metric NDCG@k
 -  ``lambdarank_norm`` :raw-html:`<a id="lambdarank_norm" title="Permalink to this parameter" href="#lambdarank_norm">&#x1F517;&#xFE0E;</a>`, default = ``true``, type = bool

--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -835,8 +835,9 @@ struct Config {
  // check = >0
  // desc = used only in ``lambdarank`` application
-  // desc = used for truncating the max DCG, refer to "truncation level" in the Sec. 3 of `LambdaMART paper <https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/MSR-TR-2010-82.pdf>`__
+  // desc = controls the number of top-results to focus on during training, refer to "truncation level" in the Sec. 3 of `LambdaMART paper <https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/MSR-TR-2010-82.pdf>`
-  int lambdarank_truncation_level = 20;
+  // desc = is closely related to the desirable cutoff k in the metric NDCG@k that we aim at optimizing the ranker for. The optimal setting for this parameter is likely to be slightly higher than k (e.g., k + 3) to include more pairs of documents to train on, but perhaps not too high to avoid deviating too much from the desired target metric NDCG@k
+  int lambdarank_truncation_level = 30;
  // desc = used only in ``lambdarank`` application
  // desc = set this to ``true`` to normalize the lambdas for different queries, and improve the performance for unbalanced data

--- a/src/objective/rank_objective.hpp
+++ b/src/objective/rank_objective.hpp
@@ -163,35 +163,34 @@ class LambdarankNDCG : public RankingObjective {
    }
    const double worst_score = score[sorted_idx[worst_idx]];
    double sum_lambdas = 0.0;
-    // start accmulate lambdas by pairs
+    // start accmulate lambdas by pairs that contain at least one document above truncation level
-    for (data_size_t i = 0; i < cnt; ++i) {
+    for (data_size_t i = 0; i < cnt - 1 && i < truncation_level_; ++i) {
-      const data_size_t high = sorted_idx[i];
+      if (score[sorted_idx[i]] == kMinScore) { continue; }
-      const int high_label = static_cast<int>(label[high]);
+      for (data_size_t j = i + 1; j < cnt; ++j) {
-      const double high_score = score[high];
+        if (score[sorted_idx[j]] == kMinScore) { continue; }
-      if (high_score == kMinScore) {
+        // skip pairs with the same labels
-        continue;
+        if (label[sorted_idx[i]] == label[sorted_idx[j]]) { continue; }
-      }
+        data_size_t high_rank, low_rank;
-      const double high_label_gain = label_gain_[high_label];
+        if (label[sorted_idx[i]] > label[sorted_idx[j]]) {
-      const double high_discount = DCGCalculator::GetDiscount(i);
+          high_rank = i;
-      double high_sum_lambda = 0.0;
+          low_rank = j;
-      double high_sum_hessian = 0.0;
+        } else {
-      for (data_size_t j = 0; j < cnt; ++j) {
+          high_rank = j;
-        // skip same data
+          low_rank = i;
-        if (i == j) {
-          continue;
        }
-        const data_size_t low = sorted_idx[j];
+        const data_size_t high = sorted_idx[high_rank];
+        const int high_label = static_cast<int>(label[high]);
+        const double high_score = score[high];
+        const double high_label_gain = label_gain_[high_label];
+        const double high_discount = DCGCalculator::GetDiscount(high_rank);
+        const data_size_t low = sorted_idx[low_rank];
        const int low_label = static_cast<int>(label[low]);
        const double low_score = score[low];
-        // only consider pair with different label
+        const double low_label_gain = label_gain_[low_label];
-        if (high_label <= low_label || low_score == kMinScore) {
+        const double low_discount = DCGCalculator::GetDiscount(low_rank);
-          continue;
-        }
        const double delta_score = high_score - low_score;
-        const double low_label_gain = label_gain_[low_label];
-        const double low_discount = DCGCalculator::GetDiscount(j);
        // get dcg gap
        const double dcg_gap = high_label_gain - low_label_gain;
        // get discount of this pair
@@ -208,16 +207,13 @@ class LambdarankNDCG : public RankingObjective {
        // update
        p_lambda *= -sigmoid_ * delta_pair_NDCG;
        p_hessian *= sigmoid_ * sigmoid_ * delta_pair_NDCG;
-        high_sum_lambda += p_lambda;
-        high_sum_hessian += p_hessian;
        lambdas[low] -= static_cast<score_t>(p_lambda);
        hessians[low] += static_cast<score_t>(p_hessian);
+        lambdas[high] += static_cast<score_t>(p_lambda);
+        hessians[high] += static_cast<score_t>(p_hessian);
        // lambda is negative, so use minus to accumulate
        sum_lambdas -= 2 * p_lambda;
      }
-      // update
-      lambdas[high] += static_cast<score_t>(high_sum_lambda);
-      hessians[high] += static_cast<score_t>(high_sum_hessian);
    }
    if (norm_ && sum_lambdas > 0) {
      double norm_factor = std::log2(1 + sum_lambdas) / sum_lambdas;

--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -119,8 +119,8 @@ class TestSklearn(unittest.TestCase):
                eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=10, verbose=False,
                callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))])
        self.assertLessEqual(gbm.best_iteration_, 24)
-        self.assertGreater(gbm.best_score_['valid_0']['ndcg@1'], 0.5769)
+        self.assertGreater(gbm.best_score_['valid_0']['ndcg@1'], 0.5674)
-        self.assertGreater(gbm.best_score_['valid_0']['ndcg@3'], 0.5920)
+        self.assertGreater(gbm.best_score_['valid_0']['ndcg@3'], 0.578)
    def test_xendcg(self):
        dir_path = os.path.dirname(os.path.realpath(__file__))