Unverified Commit ba0a1f8d authored by Pavel Metrikov's avatar Pavel Metrikov Committed by GitHub
Browse files

Add support to optimize for NDCG at a given truncation level (#3425)



* Add support to optimize for NDCG at a given truncation level

In order to correctly optimize for NDCG@_k_, one should exclude pairs containing both documents beyond the top-_k_ (as they don't affect NDCG@_k_ when swapped).

* Update rank_objective.hpp

* Apply suggestions from code review
Co-authored-by: default avatarGuolin Ke <guolin.ke@outlook.com>

* Update rank_objective.hpp

remove the additional branching: get high_rank and low_rank by one "if".

* Update config.h

add description to lambdarank_truncation_level parameter

* Update Parameters.rst

* Update test_sklearn.py

update expected NDCG value for a test, as it was affected by the underlying change in the algorithm

* Update test_sklearn.py

update NDCG@3 reference value

* fix R learning-to-rank tests

* Update rank_objective.hpp

* Update include/LightGBM/config.h
Co-authored-by: default avatarGuolin Ke <guolin.ke@outlook.com>

* Update Parameters.rst
Co-authored-by: default avatarGuolin Ke <guolin.ke@outlook.com>
Co-authored-by: default avatarJames Lamb <jaylamb20@gmail.com>
parent 53977f36
...@@ -45,9 +45,9 @@ test_that("learning-to-rank with lgb.train() works as expected", { ...@@ -45,9 +45,9 @@ test_that("learning-to-rank with lgb.train() works as expected", {
expect_identical(result[["data_name"]], "training") expect_identical(result[["data_name"]], "training")
} }
expect_identical(sapply(eval_results, function(x) {x$name}), eval_names) expect_identical(sapply(eval_results, function(x) {x$name}), eval_names)
expect_equal(eval_results[[1L]][["value"]], 0.825) expect_equal(eval_results[[1L]][["value"]], 0.775)
expect_true(abs(eval_results[[2L]][["value"]] - 0.7766434) < TOLERANCE) expect_true(abs(eval_results[[2L]][["value"]] - 0.745986) < TOLERANCE)
expect_true(abs(eval_results[[3L]][["value"]] - 0.7527939) < TOLERANCE) expect_true(abs(eval_results[[3L]][["value"]] - 0.7351959) < TOLERANCE)
}) })
test_that("learning-to-rank with lgb.cv() works as expected", { test_that("learning-to-rank with lgb.cv() works as expected", {
...@@ -93,7 +93,7 @@ test_that("learning-to-rank with lgb.cv() works as expected", { ...@@ -93,7 +93,7 @@ test_that("learning-to-rank with lgb.cv() works as expected", {
best_score <- cv_bst$best_score best_score <- cv_bst$best_score
expect_true(best_iter > 0L && best_iter <= nrounds) expect_true(best_iter > 0L && best_iter <= nrounds)
expect_true(best_score > 0.0 && best_score < 1.0) expect_true(best_score > 0.0 && best_score < 1.0)
expect_true(abs(best_score - 0.775) < TOLERANCE) expect_true(abs(best_score - 0.75) < TOLERANCE)
# best_score should be set for the first metric # best_score should be set for the first metric
first_metric <- eval_names[[1L]] first_metric <- eval_names[[1L]]
...@@ -115,18 +115,18 @@ test_that("learning-to-rank with lgb.cv() works as expected", { ...@@ -115,18 +115,18 @@ test_that("learning-to-rank with lgb.cv() works as expected", {
} }
# first and last value of each metric should be as expected # first and last value of each metric should be as expected
ndcg1_values <- c(0.725, 0.75, 0.75, 0.775, 0.75, 0.75, 0.75, 0.75, 0.75, 0.75) ndcg1_values <- c(0.675, 0.725, 0.65, 0.725, 0.75, 0.725, 0.75, 0.725, 0.75, 0.75)
expect_true(all(abs(unlist(eval_results[["ndcg@1"]][["eval"]]) - ndcg1_values) < TOLERANCE)) expect_true(all(abs(unlist(eval_results[["ndcg@1"]][["eval"]]) - ndcg1_values) < TOLERANCE))
ndcg2_values <- c( ndcg2_values <- c(
0.6863147, 0.720986, 0.7306574, 0.745986, 0.7306574, 0.6556574, 0.6669721, 0.6306574, 0.6476294, 0.6629581,
0.720986, 0.7403287, 0.7403287, 0.7403287, 0.7306574 0.6476294, 0.6629581, 0.6379581, 0.7113147, 0.6823008
) )
expect_true(all(abs(unlist(eval_results[["ndcg@2"]][["eval"]]) - ndcg2_values) < TOLERANCE)) expect_true(all(abs(unlist(eval_results[["ndcg@2"]][["eval"]]) - ndcg2_values) < TOLERANCE))
ndcg3_values <- c( ndcg3_values <- c(
0.6777939, 0.6984639, 0.711732, 0.7234639, 0.711732, 0.6484639, 0.6571238, 0.6469279, 0.6540516, 0.6481857,
0.7101959, 0.719134, 0.719134, 0.725, 0.711732 0.6481857, 0.6481857, 0.6466496, 0.7027939, 0.6629898
) )
expect_true(all(abs(unlist(eval_results[["ndcg@3"]][["eval"]]) - ndcg3_values) < TOLERANCE)) expect_true(all(abs(unlist(eval_results[["ndcg@3"]][["eval"]]) - ndcg3_values) < TOLERANCE))
......
...@@ -951,11 +951,13 @@ Objective Parameters ...@@ -951,11 +951,13 @@ Objective Parameters
- set this closer to ``1`` to shift towards a **Poisson** distribution - set this closer to ``1`` to shift towards a **Poisson** distribution
- ``lambdarank_truncation_level`` :raw-html:`<a id="lambdarank_truncation_level" title="Permalink to this parameter" href="#lambdarank_truncation_level">&#x1F517;&#xFE0E;</a>`, default = ``20``, type = int, constraints: ``lambdarank_truncation_level > 0`` - ``lambdarank_truncation_level`` :raw-html:`<a id="lambdarank_truncation_level" title="Permalink to this parameter" href="#lambdarank_truncation_level">&#x1F517;&#xFE0E;</a>`, default = ``30``, type = int, constraints: ``lambdarank_truncation_level > 0``
- used only in ``lambdarank`` application - used only in ``lambdarank`` application
- used for truncating the max DCG, refer to "truncation level" in the Sec. 3 of `LambdaMART paper <https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/MSR-TR-2010-82.pdf>`__ - controls the number of top-results to focus on during training, refer to "truncation level" in the Sec. 3 of `LambdaMART paper <https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/MSR-TR-2010-82.pdf>`
- is closely related to the desirable cutoff k in the metric NDCG@k that we aim at optimizing the ranker for. The optimal setting for this parameter is likely to be slightly higher than k (e.g., k + 3) to include more pairs of documents to train on, but perhaps not too high to avoid deviating too much from the desired target metric NDCG@k
- ``lambdarank_norm`` :raw-html:`<a id="lambdarank_norm" title="Permalink to this parameter" href="#lambdarank_norm">&#x1F517;&#xFE0E;</a>`, default = ``true``, type = bool - ``lambdarank_norm`` :raw-html:`<a id="lambdarank_norm" title="Permalink to this parameter" href="#lambdarank_norm">&#x1F517;&#xFE0E;</a>`, default = ``true``, type = bool
......
...@@ -835,8 +835,9 @@ struct Config { ...@@ -835,8 +835,9 @@ struct Config {
// check = >0 // check = >0
// desc = used only in ``lambdarank`` application // desc = used only in ``lambdarank`` application
// desc = used for truncating the max DCG, refer to "truncation level" in the Sec. 3 of `LambdaMART paper <https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/MSR-TR-2010-82.pdf>`__ // desc = controls the number of top-results to focus on during training, refer to "truncation level" in the Sec. 3 of `LambdaMART paper <https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/MSR-TR-2010-82.pdf>`
int lambdarank_truncation_level = 20; // desc = is closely related to the desirable cutoff k in the metric NDCG@k that we aim at optimizing the ranker for. The optimal setting for this parameter is likely to be slightly higher than k (e.g., k + 3) to include more pairs of documents to train on, but perhaps not too high to avoid deviating too much from the desired target metric NDCG@k
int lambdarank_truncation_level = 30;
// desc = used only in ``lambdarank`` application // desc = used only in ``lambdarank`` application
// desc = set this to ``true`` to normalize the lambdas for different queries, and improve the performance for unbalanced data // desc = set this to ``true`` to normalize the lambdas for different queries, and improve the performance for unbalanced data
......
...@@ -163,35 +163,34 @@ class LambdarankNDCG : public RankingObjective { ...@@ -163,35 +163,34 @@ class LambdarankNDCG : public RankingObjective {
} }
const double worst_score = score[sorted_idx[worst_idx]]; const double worst_score = score[sorted_idx[worst_idx]];
double sum_lambdas = 0.0; double sum_lambdas = 0.0;
// start accmulate lambdas by pairs // start accmulate lambdas by pairs that contain at least one document above truncation level
for (data_size_t i = 0; i < cnt; ++i) { for (data_size_t i = 0; i < cnt - 1 && i < truncation_level_; ++i) {
const data_size_t high = sorted_idx[i]; if (score[sorted_idx[i]] == kMinScore) { continue; }
const int high_label = static_cast<int>(label[high]); for (data_size_t j = i + 1; j < cnt; ++j) {
const double high_score = score[high]; if (score[sorted_idx[j]] == kMinScore) { continue; }
if (high_score == kMinScore) { // skip pairs with the same labels
continue; if (label[sorted_idx[i]] == label[sorted_idx[j]]) { continue; }
} data_size_t high_rank, low_rank;
const double high_label_gain = label_gain_[high_label]; if (label[sorted_idx[i]] > label[sorted_idx[j]]) {
const double high_discount = DCGCalculator::GetDiscount(i); high_rank = i;
double high_sum_lambda = 0.0; low_rank = j;
double high_sum_hessian = 0.0; } else {
for (data_size_t j = 0; j < cnt; ++j) { high_rank = j;
// skip same data low_rank = i;
if (i == j) {
continue;
} }
const data_size_t low = sorted_idx[j]; const data_size_t high = sorted_idx[high_rank];
const int high_label = static_cast<int>(label[high]);
const double high_score = score[high];
const double high_label_gain = label_gain_[high_label];
const double high_discount = DCGCalculator::GetDiscount(high_rank);
const data_size_t low = sorted_idx[low_rank];
const int low_label = static_cast<int>(label[low]); const int low_label = static_cast<int>(label[low]);
const double low_score = score[low]; const double low_score = score[low];
// only consider pair with different label const double low_label_gain = label_gain_[low_label];
if (high_label <= low_label || low_score == kMinScore) { const double low_discount = DCGCalculator::GetDiscount(low_rank);
continue;
}
const double delta_score = high_score - low_score; const double delta_score = high_score - low_score;
const double low_label_gain = label_gain_[low_label];
const double low_discount = DCGCalculator::GetDiscount(j);
// get dcg gap // get dcg gap
const double dcg_gap = high_label_gain - low_label_gain; const double dcg_gap = high_label_gain - low_label_gain;
// get discount of this pair // get discount of this pair
...@@ -208,16 +207,13 @@ class LambdarankNDCG : public RankingObjective { ...@@ -208,16 +207,13 @@ class LambdarankNDCG : public RankingObjective {
// update // update
p_lambda *= -sigmoid_ * delta_pair_NDCG; p_lambda *= -sigmoid_ * delta_pair_NDCG;
p_hessian *= sigmoid_ * sigmoid_ * delta_pair_NDCG; p_hessian *= sigmoid_ * sigmoid_ * delta_pair_NDCG;
high_sum_lambda += p_lambda;
high_sum_hessian += p_hessian;
lambdas[low] -= static_cast<score_t>(p_lambda); lambdas[low] -= static_cast<score_t>(p_lambda);
hessians[low] += static_cast<score_t>(p_hessian); hessians[low] += static_cast<score_t>(p_hessian);
lambdas[high] += static_cast<score_t>(p_lambda);
hessians[high] += static_cast<score_t>(p_hessian);
// lambda is negative, so use minus to accumulate // lambda is negative, so use minus to accumulate
sum_lambdas -= 2 * p_lambda; sum_lambdas -= 2 * p_lambda;
} }
// update
lambdas[high] += static_cast<score_t>(high_sum_lambda);
hessians[high] += static_cast<score_t>(high_sum_hessian);
} }
if (norm_ && sum_lambdas > 0) { if (norm_ && sum_lambdas > 0) {
double norm_factor = std::log2(1 + sum_lambdas) / sum_lambdas; double norm_factor = std::log2(1 + sum_lambdas) / sum_lambdas;
......
...@@ -119,8 +119,8 @@ class TestSklearn(unittest.TestCase): ...@@ -119,8 +119,8 @@ class TestSklearn(unittest.TestCase):
eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=10, verbose=False, eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=10, verbose=False,
callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))]) callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))])
self.assertLessEqual(gbm.best_iteration_, 24) self.assertLessEqual(gbm.best_iteration_, 24)
self.assertGreater(gbm.best_score_['valid_0']['ndcg@1'], 0.5769) self.assertGreater(gbm.best_score_['valid_0']['ndcg@1'], 0.5674)
self.assertGreater(gbm.best_score_['valid_0']['ndcg@3'], 0.5920) self.assertGreater(gbm.best_score_['valid_0']['ndcg@3'], 0.578)
def test_xendcg(self): def test_xendcg(self):
dir_path = os.path.dirname(os.path.realpath(__file__)) dir_path = os.path.dirname(os.path.realpath(__file__))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment