sigmoid_ in grad and hess for rank objective (#2322)

* Lambdas and hessians need to factor sigmoid_ into the computation. Additionally, the sigmoid function has an arbitrary factor of 2 in the exponent; it is not just non-standard but the gradients are not computed correctly anyway. * Update unit test * Also remove a heuristic that normalizes the gradient by the difference in scores. * Also fix unit test after removing the heuristic

sigmoid_ in grad and hess for rank objective (#2322)
* Lambdas and hessians need to factor sigmoid_ into the computation. Additionally, the sigmoid function has an arbitrary factor of 2 in the exponent; it is not just non-standard but the gradients are not computed correctly anyway. * Update unit test * Also remove a heuristic that normalizes the gradient by the difference in scores. * Also fix unit test after removing the heuristic
aee92f63 · sbruch · Guolin Ke · c421f898 · aee92f63 · aee92f63
Commit aee92f63 authored Aug 16, 2019 by sbruch Committed by Guolin Ke Aug 16, 2019
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 17 deletions

src/objective/rank_objective.hpp src/objective/rank_objective.hpp +4 -15

tests/python_package_test/test_sklearn.py tests/python_package_test/test_sklearn.py +2 -2

No files found.
--- a/src/objective/rank_objective.hpp
+++ b/src/objective/rank_objective.hpp
@@ -104,13 +104,6 @@ class LambdarankNDCG: public ObjectiveFunction {
    }
    std::stable_sort(sorted_idx.begin(), sorted_idx.end(),
                     [score](data_size_t a, data_size_t b) { return score[a] > score[b]; });
-    // get best and worst score
-    const double best_score = score[sorted_idx[0]];
-    data_size_t worst_idx = cnt - 1;
-    if (worst_idx > 0 && score[sorted_idx[worst_idx]] == kMinScore) {
-      worst_idx -= 1;
-    }
-    const double wrost_score = score[sorted_idx[worst_idx]];
    // start accmulate lambdas by pairs
    for (data_size_t i = 0; i < cnt; ++i) {
      const data_size_t high = sorted_idx[i];
@@ -141,16 +134,12 @@ class LambdarankNDCG: public ObjectiveFunction {
        const double paired_discount = fabs(high_discount - low_discount);
        // get delta NDCG
        double delta_pair_NDCG = dcg_gap * paired_discount * inverse_max_dcg;
-        // regular the delta_pair_NDCG by score distance
-        if (high_label != low_label && best_score != wrost_score) {
-          delta_pair_NDCG /= (0.01f + fabs(delta_score));
-        }
        // calculate lambda for this pair
        double p_lambda = GetSigmoid(delta_score);
-        double p_hessian = p_lambda * (2.0f - p_lambda);
+        double p_hessian = p_lambda * (1.0f - p_lambda);
        // update
-        p_lambda *= -delta_pair_NDCG;
-        p_hessian *= 2 * delta_pair_NDCG;
+        p_lambda *= -sigmoid_ * delta_pair_NDCG;
+        p_hessian *= sigmoid_ * sigmoid_ * delta_pair_NDCG;
        high_sum_lambda += p_lambda;
        high_sum_hessian += p_hessian;
        lambdas[low] -= static_cast<score_t>(p_lambda);
@@ -193,7 +182,7 @@ class LambdarankNDCG: public ObjectiveFunction {
    // cache
    for (size_t i = 0; i < _sigmoid_bins; ++i) {
      const double score = i / sigmoid_table_idx_factor_ + min_sigmoid_input_;
-      sigmoid_table_[i] = 2.0f / (1.0f + std::exp(2.0f * score * sigmoid_));
+      sigmoid_table_[i] = 1.0f / (1.0f + std::exp(score * sigmoid_));
    }
  }


--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -82,8 +82,8 @@ class TestSklearn(unittest.TestCase):
                eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=5, verbose=False,
                callbacks=[lgb.reset_parameter(learning_rate=lambda x: 0.95 ** x * 0.1)])
        self.assertLessEqual(gbm.best_iteration_, 12)
-        self.assertGreater(gbm.best_score_['valid_0']['ndcg@1'], 0.65)
-        self.assertGreater(gbm.best_score_['valid_0']['ndcg@3'], 0.65)
+        self.assertGreater(gbm.best_score_['valid_0']['ndcg@1'], 0.6173)
+        self.assertGreater(gbm.best_score_['valid_0']['ndcg@3'], 0.6479)

    def test_regression_with_custom_objective(self):
        def objective_ls(y_true, y_pred):