rank_objective.hpp 12.4 KB
Newer Older
1
/*!
2
3
4
 * Copyright (c) 2020 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for
 * license information.
5
 */
Guolin Ke's avatar
Guolin Ke committed
6
7
8
9
#ifndef LIGHTGBM_OBJECTIVE_RANK_OBJECTIVE_HPP_
#define LIGHTGBM_OBJECTIVE_RANK_OBJECTIVE_HPP_

#include <LightGBM/metric.h>
10
#include <LightGBM/objective_function.h>
Guolin Ke's avatar
Guolin Ke committed
11

12
13
#include <algorithm>
#include <cmath>
Guolin Ke's avatar
Guolin Ke committed
14
15
#include <cstdio>
#include <cstring>
16
17
#include <limits>
#include <string>
Guolin Ke's avatar
Guolin Ke committed
18
19
20
#include <vector>

namespace LightGBM {
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71

/*!
 * \brief Objective function for Ranking
 */
class RankingObjective : public ObjectiveFunction {
 public:
  explicit RankingObjective(const Config& config)
      : seed_(config.objective_seed) {}

  explicit RankingObjective(const std::vector<std::string>&) : seed_(0) {}

  ~RankingObjective() {}

  void Init(const Metadata& metadata, data_size_t num_data) override {
    num_data_ = num_data;
    // get label
    label_ = metadata.label();
    // get weights
    weights_ = metadata.weights();
    // get boundries
    query_boundaries_ = metadata.query_boundaries();
    if (query_boundaries_ == nullptr) {
      Log::Fatal("Ranking tasks require query information");
    }
    num_queries_ = metadata.num_queries();
  }

  void GetGradients(const double* score, score_t* gradients,
                    score_t* hessians) const override {
#pragma omp parallel for schedule(guided)
    for (data_size_t i = 0; i < num_queries_; ++i) {
      const data_size_t start = query_boundaries_[i];
      const data_size_t cnt = query_boundaries_[i + 1] - query_boundaries_[i];
      GetGradientsForOneQuery(i, cnt, label_ + start, score + start,
                              gradients + start, hessians + start);
      if (weights_ != nullptr) {
        for (data_size_t j = 0; j < cnt; ++j) {
          gradients[start + j] =
              static_cast<score_t>(gradients[start + j] * weights_[start + j]);
          hessians[start + j] =
              static_cast<score_t>(hessians[start + j] * weights_[start + j]);
        }
      }
    }
  }

  virtual void GetGradientsForOneQuery(data_size_t query_id, data_size_t cnt,
                                       const label_t* label,
                                       const double* score, score_t* lambdas,
                                       score_t* hessians) const = 0;

72
  const char* GetName() const override = 0;
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93

  std::string ToString() const override {
    std::stringstream str_buf;
    str_buf << GetName();
    return str_buf.str();
  }

  bool NeedAccuratePrediction() const override { return false; }

 protected:
  int seed_;
  data_size_t num_queries_;
  /*! \brief Number of data */
  data_size_t num_data_;
  /*! \brief Pointer of label */
  const label_t* label_;
  /*! \brief Pointer of weights */
  const label_t* weights_;
  /*! \brief Query boundries */
  const data_size_t* query_boundaries_;
};
94

Guolin Ke's avatar
Guolin Ke committed
95
/*!
96
97
98
 * \brief Objective function for Lambdrank with NDCG
 */
class LambdarankNDCG : public RankingObjective {
Nikita Titov's avatar
Nikita Titov committed
99
 public:
100
101
102
103
104
  explicit LambdarankNDCG(const Config& config)
      : RankingObjective(config),
        sigmoid_(config.sigmoid),
        norm_(config.lambdarank_norm),
        truncation_level_(config.lambdarank_truncation_level) {
Guolin Ke's avatar
Guolin Ke committed
105
    label_gain_ = config.label_gain;
Guolin Ke's avatar
Guolin Ke committed
106
    // initialize DCG calculator
Guolin Ke's avatar
Guolin Ke committed
107
108
    DCGCalculator::DefaultLabelGain(&label_gain_);
    DCGCalculator::Init(label_gain_);
Guolin Ke's avatar
Guolin Ke committed
109
110
    sigmoid_table_.clear();
    inverse_max_dcgs_.clear();
Guolin Ke's avatar
Guolin Ke committed
111
    if (sigmoid_ <= 0.0) {
112
      Log::Fatal("Sigmoid param %f should be greater than zero", sigmoid_);
Guolin Ke's avatar
Guolin Ke committed
113
114
    }
  }
115

116
117
118
119
  explicit LambdarankNDCG(const std::vector<std::string>& strs)
      : RankingObjective(strs) {}

  ~LambdarankNDCG() {}
120

Guolin Ke's avatar
Guolin Ke committed
121
  void Init(const Metadata& metadata, data_size_t num_data) override {
122
    RankingObjective::Init(metadata, num_data);
123
    DCGCalculator::CheckLabel(label_, num_data_);
Guolin Ke's avatar
Guolin Ke committed
124
    inverse_max_dcgs_.resize(num_queries_);
Guolin Ke's avatar
Guolin Ke committed
125
#pragma omp parallel for schedule(static)
Guolin Ke's avatar
Guolin Ke committed
126
    for (data_size_t i = 0; i < num_queries_; ++i) {
127
128
129
      inverse_max_dcgs_[i] = DCGCalculator::CalMaxDCGAtK(
          truncation_level_, label_ + query_boundaries_[i],
          query_boundaries_[i + 1] - query_boundaries_[i]);
Guolin Ke's avatar
Guolin Ke committed
130
131
132
133
134
135
136
137
138

      if (inverse_max_dcgs_[i] > 0.0) {
        inverse_max_dcgs_[i] = 1.0f / inverse_max_dcgs_[i];
      }
    }
    // construct sigmoid table to speed up sigmoid transform
    ConstructSigmoidTable();
  }

139
140
141
142
  inline void GetGradientsForOneQuery(data_size_t query_id, data_size_t cnt,
                                      const label_t* label, const double* score,
                                      score_t* lambdas,
                                      score_t* hessians) const override {
Guolin Ke's avatar
Guolin Ke committed
143
    // get max DCG on current query
144
    const double inverse_max_dcg = inverse_max_dcgs_[query_id];
Guolin Ke's avatar
Guolin Ke committed
145
146
147
148
149
150
    // initialize with zero
    for (data_size_t i = 0; i < cnt; ++i) {
      lambdas[i] = 0.0f;
      hessians[i] = 0.0f;
    }
    // get sorted indices for scores
151
    std::vector<data_size_t> sorted_idx(cnt);
Guolin Ke's avatar
Guolin Ke committed
152
    for (data_size_t i = 0; i < cnt; ++i) {
153
      sorted_idx[i] = i;
Guolin Ke's avatar
Guolin Ke committed
154
    }
155
156
157
    std::stable_sort(
        sorted_idx.begin(), sorted_idx.end(),
        [score](data_size_t a, data_size_t b) { return score[a] > score[b]; });
Guolin Ke's avatar
Guolin Ke committed
158
    // get best and worst score
159
160
161
162
163
164
165
    const double best_score = score[sorted_idx[0]];
    data_size_t worst_idx = cnt - 1;
    if (worst_idx > 0 && score[sorted_idx[worst_idx]] == kMinScore) {
      worst_idx -= 1;
    }
    const double worst_score = score[sorted_idx[worst_idx]];
    double sum_lambdas = 0.0;
Guolin Ke's avatar
Guolin Ke committed
166
167
168
169
    // start accmulate lambdas by pairs
    for (data_size_t i = 0; i < cnt; ++i) {
      const data_size_t high = sorted_idx[i];
      const int high_label = static_cast<int>(label[high]);
170
      const double high_score = score[high];
171
172
173
      if (high_score == kMinScore) {
        continue;
      }
174
175
176
177
      const double high_label_gain = label_gain_[high_label];
      const double high_discount = DCGCalculator::GetDiscount(i);
      double high_sum_lambda = 0.0;
      double high_sum_hessian = 0.0;
Guolin Ke's avatar
Guolin Ke committed
178
179
      for (data_size_t j = 0; j < cnt; ++j) {
        // skip same data
180
181
182
        if (i == j) {
          continue;
        }
Guolin Ke's avatar
Guolin Ke committed
183
184
        const data_size_t low = sorted_idx[j];
        const int low_label = static_cast<int>(label[low]);
185
        const double low_score = score[low];
Guolin Ke's avatar
Guolin Ke committed
186
        // only consider pair with different label
187
188
189
        if (high_label <= low_label || low_score == kMinScore) {
          continue;
        }
Guolin Ke's avatar
Guolin Ke committed
190

191
        const double delta_score = high_score - low_score;
Guolin Ke's avatar
Guolin Ke committed
192

193
194
        const double low_label_gain = label_gain_[low_label];
        const double low_discount = DCGCalculator::GetDiscount(j);
Guolin Ke's avatar
Guolin Ke committed
195
        // get dcg gap
196
        const double dcg_gap = high_label_gain - low_label_gain;
Guolin Ke's avatar
Guolin Ke committed
197
        // get discount of this pair
198
        const double paired_discount = fabs(high_discount - low_discount);
Guolin Ke's avatar
Guolin Ke committed
199
        // get delta NDCG
200
        double delta_pair_NDCG = dcg_gap * paired_discount * inverse_max_dcg;
Guolin Ke's avatar
Guolin Ke committed
201
        // regular the delta_pair_NDCG by score distance
202
        if (norm_ && best_score != worst_score) {
203
204
          delta_pair_NDCG /= (0.01f + fabs(delta_score));
        }
Guolin Ke's avatar
Guolin Ke committed
205
        // calculate lambda for this pair
206
        double p_lambda = GetSigmoid(delta_score);
207
        double p_hessian = p_lambda * (1.0f - p_lambda);
Guolin Ke's avatar
Guolin Ke committed
208
        // update
209
210
        p_lambda *= -sigmoid_ * delta_pair_NDCG;
        p_hessian *= sigmoid_ * sigmoid_ * delta_pair_NDCG;
Guolin Ke's avatar
Guolin Ke committed
211
212
        high_sum_lambda += p_lambda;
        high_sum_hessian += p_hessian;
213
214
        lambdas[low] -= static_cast<score_t>(p_lambda);
        hessians[low] += static_cast<score_t>(p_hessian);
215
216
        // lambda is negative, so use minus to accumulate
        sum_lambdas -= 2 * p_lambda;
Guolin Ke's avatar
Guolin Ke committed
217
218
      }
      // update
219
220
      lambdas[high] += static_cast<score_t>(high_sum_lambda);
      hessians[high] += static_cast<score_t>(high_sum_hessian);
Guolin Ke's avatar
Guolin Ke committed
221
    }
222
223
224
225
226
227
228
    if (norm_ && sum_lambdas > 0) {
      double norm_factor = std::log2(1 + sum_lambdas) / sum_lambdas;
      for (data_size_t i = 0; i < cnt; ++i) {
        lambdas[i] = static_cast<score_t>(lambdas[i] * norm_factor);
        hessians[i] = static_cast<score_t>(hessians[i] * norm_factor);
      }
    }
Guolin Ke's avatar
Guolin Ke committed
229
230
  }

231
  inline double GetSigmoid(double score) const {
Guolin Ke's avatar
Guolin Ke committed
232
233
234
235
    if (score <= min_sigmoid_input_) {
      // too small, use lower bound
      return sigmoid_table_[0];
    } else if (score >= max_sigmoid_input_) {
236
      // too large, use upper bound
Guolin Ke's avatar
Guolin Ke committed
237
238
      return sigmoid_table_[_sigmoid_bins - 1];
    } else {
239
240
      return sigmoid_table_[static_cast<size_t>((score - min_sigmoid_input_) *
                                                sigmoid_table_idx_factor_)];
Guolin Ke's avatar
Guolin Ke committed
241
242
243
244
245
246
247
    }
  }

  void ConstructSigmoidTable() {
    // get boundary
    min_sigmoid_input_ = min_sigmoid_input_ / sigmoid_ / 2;
    max_sigmoid_input_ = -min_sigmoid_input_;
Guolin Ke's avatar
Guolin Ke committed
248
    sigmoid_table_.resize(_sigmoid_bins);
Guolin Ke's avatar
Guolin Ke committed
249
250
    // get score to bin factor
    sigmoid_table_idx_factor_ =
251
        _sigmoid_bins / (max_sigmoid_input_ - min_sigmoid_input_);
Guolin Ke's avatar
Guolin Ke committed
252
253
    // cache
    for (size_t i = 0; i < _sigmoid_bins; ++i) {
254
      const double score = i / sigmoid_table_idx_factor_ + min_sigmoid_input_;
255
      sigmoid_table_[i] = 1.0f / (1.0f + std::exp(score * sigmoid_));
Guolin Ke's avatar
Guolin Ke committed
256
257
258
    }
  }

259
  const char* GetName() const override { return "lambdarank"; }
260

Nikita Titov's avatar
Nikita Titov committed
261
 private:
Guolin Ke's avatar
Guolin Ke committed
262
  /*! \brief Simgoid param */
263
  double sigmoid_;
264
265
  /*! \brief Normalize the lambdas or not */
  bool norm_;
266
  /*! \brief Truncation position for max DCG */
267
268
269
  int truncation_level_;
  /*! \brief Cache inverse max DCG, speed up calculation */
  std::vector<double> inverse_max_dcgs_;
Guolin Ke's avatar
Guolin Ke committed
270
  /*! \brief Cache result for sigmoid transform to speed up */
271
  std::vector<double> sigmoid_table_;
272
  /*! \brief Gains for labels */
273
  std::vector<double> label_gain_;
Guolin Ke's avatar
Guolin Ke committed
274
275
276
  /*! \brief Number of bins in simoid table */
  size_t _sigmoid_bins = 1024 * 1024;
  /*! \brief Minimal input of sigmoid table */
277
  double min_sigmoid_input_ = -50;
Guolin Ke's avatar
Guolin Ke committed
278
  /*! \brief Maximal input of sigmoid table */
279
  double max_sigmoid_input_ = 50;
Guolin Ke's avatar
Guolin Ke committed
280
  /*! \brief Factor that covert score to bin in sigmoid table */
281
  double sigmoid_table_idx_factor_;
Guolin Ke's avatar
Guolin Ke committed
282
283
};

284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
/*!
 * \brief Implementation of the learning-to-rank objective function, XE_NDCG
 * [arxiv.org/abs/1911.09798].
 */
class RankXENDCG : public RankingObjective {
 public:
  explicit RankXENDCG(const Config& config) : RankingObjective(config) {}

  explicit RankXENDCG(const std::vector<std::string>& strs)
      : RankingObjective(strs) {}

  ~RankXENDCG() {}

  void Init(const Metadata& metadata, data_size_t num_data) override {
    RankingObjective::Init(metadata, num_data);
    for (data_size_t i = 0; i < num_queries_; ++i) {
      rands_.emplace_back(seed_ + i);
    }
  }

  inline void GetGradientsForOneQuery(data_size_t query_id, data_size_t cnt,
                                      const label_t* label, const double* score,
                                      score_t* lambdas,
                                      score_t* hessians) const override {
    // Turn scores into a probability distribution using Softmax.
    std::vector<double> rho(cnt, 0.0);
    Common::Softmax(score, rho.data(), cnt);

    // used for Phi and L1
    std::vector<double> l1s(cnt);
    double sum_labels = 0;
    for (data_size_t i = 0; i < cnt; ++i) {
      l1s[i] = Phi(label[i], rands_[query_id].NextFloat());
      sum_labels += l1s[i];
    }
    // sum_labels will always be positive number
    sum_labels = std::max<double>(kEpsilon, sum_labels);
    // Approximate gradients and inverse Hessian.
    // First order terms.
    double sum_l1 = 0.0f;
    for (data_size_t i = 0; i < cnt; ++i) {
      l1s[i] = -l1s[i] / sum_labels + rho[i];
326
      sum_l1 += l1s[i] / (1. - rho[i]);
327
328
329
330
331
332
333
334
335
336
337
338
    }
    if (cnt <= 1) {
      // when cnt <= 1, the l2 and l3 are zeros
      for (data_size_t i = 0; i < cnt; ++i) {
        lambdas[i] = static_cast<score_t>(l1s[i]);
        hessians[i] = static_cast<score_t>(rho[i] * (1.0 - rho[i]));
      }
    } else {
      // Second order terms.
      std::vector<double> l2s(cnt, 0.0);
      double sum_l2 = 0.0;
      for (data_size_t i = 0; i < cnt; ++i) {
339
340
        l2s[i] = sum_l1 - (l1s[i] / (1. - rho[i]));
        sum_l2 += l2s[i] * rho[i] / (1. - rho[i]);
341
342
      }
      for (data_size_t i = 0; i < cnt; ++i) {
343
        auto l3 = sum_l2 - (l2s[i] * rho[i] / (1. - rho[i]));
344
        lambdas[i] = static_cast<score_t>(l1s[i] + rho[i] * l2s[i] +
345
                                          rho[i] * l3);
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
        hessians[i] = static_cast<score_t>(rho[i] * (1.0 - rho[i]));
      }
    }
  }

  double Phi(const label_t l, double g) const {
    return Common::Pow(2, static_cast<int>(l)) - g;
  }

  const char* GetName() const override { return "rank_xendcg"; }

 private:
  mutable std::vector<Random> rands_;
};

Guolin Ke's avatar
Guolin Ke committed
361
}  // namespace LightGBM
362
#endif  // LightGBM_OBJECTIVE_RANK_OBJECTIVE_HPP_