normalize the lambdas in lambdamart objective (#2331)

* norm the lambda scores * change default to false * update doc * typo * Update Parameters.rst * Update config.h * Update test_sklearn.py * Update test_sklearn.py * Update test_sklearn.py * Update test_sklearn.py * Update test_sklearn.py * Update rank_objective.hpp * Update Parameters.rst * Update config.h * Update test_sklearn.py * Update test_sklearn.py * Update test_sklearn.py

normalize the lambdas in lambdamart objective (#2331)
* norm the lambda scores * change default to false * update doc * typo * Update Parameters.rst * Update config.h * Update test_sklearn.py * Update test_sklearn.py * Update test_sklearn.py * Update test_sklearn.py * Update test_sklearn.py * Update rank_objective.hpp * Update Parameters.rst * Update config.h * Update test_sklearn.py * Update test_sklearn.py * Update test_sklearn.py
0dfda826 · Guolin Ke · GitHub · 5e602244 · 0dfda826 · 0dfda826
Unverified Commit 0dfda826 authored Aug 25, 2019 by Guolin Ke Committed by GitHub Aug 25, 2019
5 changed files
--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@@ -802,6 +802,14 @@ Objective Parameters

   -  optimizes `NDCG <https://en.wikipedia.org/wiki/Discounted_cumulative_gain#Normalized_DCG>`__ at this position

+-  ``lambdamart_norm`` :raw-html:`<a id="lambdamart_norm" title="Permalink to this parameter" href="#lambdamart_norm">&#x1F517;&#xFE0E;</a>`, default = ``true``, type = bool
+
+   -  used only in ``lambdarank`` application
+
+   -  set this to ``true`` to normalize the lambdas for different queries, and improve the performance for unbalanced data
+
+   -  set this to ``false`` to enforce the original lambdamart algorithm
+
 -  ``label_gain`` :raw-html:`<a id="label_gain" title="Permalink to this parameter" href="#label_gain">&#x1F517;&#xFE0E;</a>`, default = ``0,1,3,7,15,31,63,...,2^30-1``, type = multi-double

   -  used only in ``lambdarank`` application

--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -718,6 +718,11 @@ struct Config {
  // desc = optimizes `NDCG <https://en.wikipedia.org/wiki/Discounted_cumulative_gain#Normalized_DCG>`__ at this position
  int max_position = 20;

+  // desc = used only in ``lambdarank`` application
+  // desc = set this to ``true`` to normalize the lambdas for different queries, and improve the performance for unbalanced data
+  // desc = set this to ``false`` to enforce the original lambdamart algorithm
+  bool lambdamart_norm = true;
+
  // type = multi-double
  // default = 0,1,3,7,15,31,63,...,2^30-1
  // desc = used only in ``lambdarank`` application

--- a/src/io/config_auto.cpp
+++ b/src/io/config_auto.cpp
@@ -264,6 +264,7 @@ std::unordered_set<std::string> Config::parameter_set({
  "poisson_max_delta_step",
  "tweedie_variance_power",
  "max_position",
+  "lambdamart_norm",
  "label_gain",
  "metric",
  "metric_freq",
@@ -530,6 +531,8 @@ void Config::GetMembersFromString(const std::unordered_map<std::string, std::str
  GetInt(params, "max_position", &max_position);
  CHECK(max_position >0);

+  GetBool(params, "lambdamart_norm", &lambdamart_norm);
+
  if (GetString(params, "label_gain", &tmp_str)) {
    label_gain = Common::StringToArray<double>(tmp_str, ',');
  }
@@ -661,6 +664,7 @@ std::string Config::SaveMembersToString() const {
  str_buf << "[poisson_max_delta_step: " << poisson_max_delta_step << "]\n";
  str_buf << "[tweedie_variance_power: " << tweedie_variance_power << "]\n";
  str_buf << "[max_position: " << max_position << "]\n";
+  str_buf << "[lambdamart_norm: " << lambdamart_norm << "]\n";
  str_buf << "[label_gain: " << Common::Join(label_gain, ",") << "]\n";
  str_buf << "[metric_freq: " << metric_freq << "]\n";
  str_buf << "[is_provide_training_metric: " << is_provide_training_metric << "]\n";

--- a/src/objective/rank_objective.hpp
+++ b/src/objective/rank_objective.hpp
@@ -24,6 +24,7 @@ class LambdarankNDCG: public ObjectiveFunction {
 public:
  explicit LambdarankNDCG(const Config& config) {
    sigmoid_ = static_cast<double>(config.sigmoid);
+    norm_ = config.lambdamart_norm;
    label_gain_ = config.label_gain;
    // initialize DCG calculator
    DCGCalculator::DefaultLabelGain(&label_gain_);
@@ -104,6 +105,14 @@ class LambdarankNDCG: public ObjectiveFunction {
    }
    std::stable_sort(sorted_idx.begin(), sorted_idx.end(),
                     [score](data_size_t a, data_size_t b) { return score[a] > score[b]; });
+    // get best and worst score	
+    const double best_score = score[sorted_idx[0]];
+    data_size_t worst_idx = cnt - 1;
+    if (worst_idx > 0 && score[sorted_idx[worst_idx]] == kMinScore) {
+      worst_idx -= 1;
+    }
+    const double worst_score = score[sorted_idx[worst_idx]];
+    double sum_lambdas = 0.0;
    // start accmulate lambdas by pairs
    for (data_size_t i = 0; i < cnt; ++i) {
      const data_size_t high = sorted_idx[i];
@@ -134,6 +143,10 @@ class LambdarankNDCG: public ObjectiveFunction {
        const double paired_discount = fabs(high_discount - low_discount);
        // get delta NDCG
        double delta_pair_NDCG = dcg_gap * paired_discount * inverse_max_dcg;
+        // regular the delta_pair_NDCG by score distance	
+        if (norm_ && high_label != low_label && best_score != worst_score) {
+          delta_pair_NDCG /= (0.01f + fabs(delta_score));
+        }
        // calculate lambda for this pair
        double p_lambda = GetSigmoid(delta_score);
        double p_hessian = p_lambda * (1.0f - p_lambda);
@@ -144,11 +157,20 @@ class LambdarankNDCG: public ObjectiveFunction {
        high_sum_hessian += p_hessian;
        lambdas[low] -= static_cast<score_t>(p_lambda);
        hessians[low] += static_cast<score_t>(p_hessian);
+        // lambda is negative, so use minus to accumulate
+        sum_lambdas -= 2 * p_lambda;
      }
      // update
      lambdas[high] += static_cast<score_t>(high_sum_lambda);
      hessians[high] += static_cast<score_t>(high_sum_hessian);
    }
+    if (norm_ && sum_lambdas > 0) {
+      double norm_factor = std::log2(1 + sum_lambdas) / sum_lambdas;
+      for (data_size_t i = 0; i < cnt; ++i) {
+        lambdas[i] = static_cast<score_t>(lambdas[i] * norm_factor);
+        hessians[i] = static_cast<score_t>(hessians[i] * norm_factor);
+      }
+    }
    // if need weights
    if (weights_ != nullptr) {
      for (data_size_t i = 0; i < cnt; ++i) {
@@ -205,6 +227,8 @@ class LambdarankNDCG: public ObjectiveFunction {
  std::vector<double> inverse_max_dcgs_;
  /*! \brief Simgoid param */
  double sigmoid_;
+  /*! \brief Normalize the lambdas or not */
+  bool norm_;
  /*! \brief Optimized NDCG@ */
  int optimize_pos_at_;
  /*! \brief Number of queries */

--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -79,11 +79,11 @@ class TestSklearn(unittest.TestCase):
                                         '../../examples/lambdarank/rank.test.query'))
        gbm = lgb.LGBMRanker()
        gbm.fit(X_train, y_train, group=q_train, eval_set=[(X_test, y_test)],
-                eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=5, verbose=False,
-                callbacks=[lgb.reset_parameter(learning_rate=lambda x: 0.95 ** x * 0.1)])
-        self.assertLessEqual(gbm.best_iteration_, 12)
-        self.assertGreater(gbm.best_score_['valid_0']['ndcg@1'], 0.6173)
-        self.assertGreater(gbm.best_score_['valid_0']['ndcg@3'], 0.6479)
+                eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=10, verbose=False,
+                callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))])
+        self.assertLessEqual(gbm.best_iteration_, 25)
+        self.assertGreater(gbm.best_score_['valid_0']['ndcg@1'], 0.6333)
+        self.assertGreater(gbm.best_score_['valid_0']['ndcg@3'], 0.6048)

    def test_regression_with_custom_objective(self):
        def objective_ls(y_true, y_pred):