rank_objective.hpp 12.4 KB
Newer Older
1
/*!
2
3
4
 * Copyright (c) 2020 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for
 * license information.
5
 */
Guolin Ke's avatar
Guolin Ke committed
6
7
8
9
#ifndef LIGHTGBM_OBJECTIVE_RANK_OBJECTIVE_HPP_
#define LIGHTGBM_OBJECTIVE_RANK_OBJECTIVE_HPP_

#include <LightGBM/metric.h>
10
#include <LightGBM/objective_function.h>
Guolin Ke's avatar
Guolin Ke committed
11

12
13
#include <algorithm>
#include <cmath>
Guolin Ke's avatar
Guolin Ke committed
14
15
#include <cstdio>
#include <cstring>
16
17
#include <limits>
#include <string>
Guolin Ke's avatar
Guolin Ke committed
18
19
20
#include <vector>

namespace LightGBM {
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93

/*!
 * \brief Objective function for Ranking
 */
class RankingObjective : public ObjectiveFunction {
 public:
  explicit RankingObjective(const Config& config)
      : seed_(config.objective_seed) {}

  explicit RankingObjective(const std::vector<std::string>&) : seed_(0) {}

  ~RankingObjective() {}

  void Init(const Metadata& metadata, data_size_t num_data) override {
    num_data_ = num_data;
    // get label
    label_ = metadata.label();
    // get weights
    weights_ = metadata.weights();
    // get boundries
    query_boundaries_ = metadata.query_boundaries();
    if (query_boundaries_ == nullptr) {
      Log::Fatal("Ranking tasks require query information");
    }
    num_queries_ = metadata.num_queries();
  }

  void GetGradients(const double* score, score_t* gradients,
                    score_t* hessians) const override {
#pragma omp parallel for schedule(guided)
    for (data_size_t i = 0; i < num_queries_; ++i) {
      const data_size_t start = query_boundaries_[i];
      const data_size_t cnt = query_boundaries_[i + 1] - query_boundaries_[i];
      GetGradientsForOneQuery(i, cnt, label_ + start, score + start,
                              gradients + start, hessians + start);
      if (weights_ != nullptr) {
        for (data_size_t j = 0; j < cnt; ++j) {
          gradients[start + j] =
              static_cast<score_t>(gradients[start + j] * weights_[start + j]);
          hessians[start + j] =
              static_cast<score_t>(hessians[start + j] * weights_[start + j]);
        }
      }
    }
  }

  virtual void GetGradientsForOneQuery(data_size_t query_id, data_size_t cnt,
                                       const label_t* label,
                                       const double* score, score_t* lambdas,
                                       score_t* hessians) const = 0;

  virtual const char* GetName() const override = 0;

  std::string ToString() const override {
    std::stringstream str_buf;
    str_buf << GetName();
    return str_buf.str();
  }

  bool NeedAccuratePrediction() const override { return false; }

 protected:
  int seed_;
  data_size_t num_queries_;
  /*! \brief Number of data */
  data_size_t num_data_;
  /*! \brief Pointer of label */
  const label_t* label_;
  /*! \brief Pointer of weights */
  const label_t* weights_;
  /*! \brief Query boundries */
  const data_size_t* query_boundaries_;
};
Guolin Ke's avatar
Guolin Ke committed
94
/*!
95
96
97
 * \brief Objective function for Lambdrank with NDCG
 */
class LambdarankNDCG : public RankingObjective {
Nikita Titov's avatar
Nikita Titov committed
98
 public:
99
100
101
102
103
  explicit LambdarankNDCG(const Config& config)
      : RankingObjective(config),
        sigmoid_(config.sigmoid),
        norm_(config.lambdarank_norm),
        truncation_level_(config.lambdarank_truncation_level) {
Guolin Ke's avatar
Guolin Ke committed
104
    label_gain_ = config.label_gain;
Guolin Ke's avatar
Guolin Ke committed
105
    // initialize DCG calculator
Guolin Ke's avatar
Guolin Ke committed
106
107
    DCGCalculator::DefaultLabelGain(&label_gain_);
    DCGCalculator::Init(label_gain_);
Guolin Ke's avatar
Guolin Ke committed
108
109
    sigmoid_table_.clear();
    inverse_max_dcgs_.clear();
Guolin Ke's avatar
Guolin Ke committed
110
    if (sigmoid_ <= 0.0) {
111
      Log::Fatal("Sigmoid param %f should be greater than zero", sigmoid_);
Guolin Ke's avatar
Guolin Ke committed
112
113
    }
  }
114

115
116
117
118
  explicit LambdarankNDCG(const std::vector<std::string>& strs)
      : RankingObjective(strs) {}

  ~LambdarankNDCG() {}
119

Guolin Ke's avatar
Guolin Ke committed
120
  void Init(const Metadata& metadata, data_size_t num_data) override {
121
    RankingObjective::Init(metadata, num_data);
122
    DCGCalculator::CheckLabel(label_, num_data_);
Guolin Ke's avatar
Guolin Ke committed
123
    inverse_max_dcgs_.resize(num_queries_);
Guolin Ke's avatar
Guolin Ke committed
124
#pragma omp parallel for schedule(static)
Guolin Ke's avatar
Guolin Ke committed
125
    for (data_size_t i = 0; i < num_queries_; ++i) {
126
127
128
      inverse_max_dcgs_[i] = DCGCalculator::CalMaxDCGAtK(
          truncation_level_, label_ + query_boundaries_[i],
          query_boundaries_[i + 1] - query_boundaries_[i]);
Guolin Ke's avatar
Guolin Ke committed
129
130
131
132
133
134
135
136
137

      if (inverse_max_dcgs_[i] > 0.0) {
        inverse_max_dcgs_[i] = 1.0f / inverse_max_dcgs_[i];
      }
    }
    // construct sigmoid table to speed up sigmoid transform
    ConstructSigmoidTable();
  }

138
139
140
141
  inline void GetGradientsForOneQuery(data_size_t query_id, data_size_t cnt,
                                      const label_t* label, const double* score,
                                      score_t* lambdas,
                                      score_t* hessians) const override {
Guolin Ke's avatar
Guolin Ke committed
142
    // get max DCG on current query
143
    const double inverse_max_dcg = inverse_max_dcgs_[query_id];
Guolin Ke's avatar
Guolin Ke committed
144
145
146
147
148
149
    // initialize with zero
    for (data_size_t i = 0; i < cnt; ++i) {
      lambdas[i] = 0.0f;
      hessians[i] = 0.0f;
    }
    // get sorted indices for scores
150
    std::vector<data_size_t> sorted_idx(cnt);
Guolin Ke's avatar
Guolin Ke committed
151
    for (data_size_t i = 0; i < cnt; ++i) {
152
      sorted_idx[i] = i;
Guolin Ke's avatar
Guolin Ke committed
153
    }
154
155
156
    std::stable_sort(
        sorted_idx.begin(), sorted_idx.end(),
        [score](data_size_t a, data_size_t b) { return score[a] > score[b]; });
Guolin Ke's avatar
Guolin Ke committed
157
    // get best and worst score
158
159
160
161
162
163
164
    const double best_score = score[sorted_idx[0]];
    data_size_t worst_idx = cnt - 1;
    if (worst_idx > 0 && score[sorted_idx[worst_idx]] == kMinScore) {
      worst_idx -= 1;
    }
    const double worst_score = score[sorted_idx[worst_idx]];
    double sum_lambdas = 0.0;
Guolin Ke's avatar
Guolin Ke committed
165
166
167
168
    // start accmulate lambdas by pairs
    for (data_size_t i = 0; i < cnt; ++i) {
      const data_size_t high = sorted_idx[i];
      const int high_label = static_cast<int>(label[high]);
169
      const double high_score = score[high];
170
171
172
      if (high_score == kMinScore) {
        continue;
      }
173
174
175
176
      const double high_label_gain = label_gain_[high_label];
      const double high_discount = DCGCalculator::GetDiscount(i);
      double high_sum_lambda = 0.0;
      double high_sum_hessian = 0.0;
Guolin Ke's avatar
Guolin Ke committed
177
178
      for (data_size_t j = 0; j < cnt; ++j) {
        // skip same data
179
180
181
        if (i == j) {
          continue;
        }
Guolin Ke's avatar
Guolin Ke committed
182
183
        const data_size_t low = sorted_idx[j];
        const int low_label = static_cast<int>(label[low]);
184
        const double low_score = score[low];
Guolin Ke's avatar
Guolin Ke committed
185
        // only consider pair with different label
186
187
188
        if (high_label <= low_label || low_score == kMinScore) {
          continue;
        }
Guolin Ke's avatar
Guolin Ke committed
189

190
        const double delta_score = high_score - low_score;
Guolin Ke's avatar
Guolin Ke committed
191

192
193
        const double low_label_gain = label_gain_[low_label];
        const double low_discount = DCGCalculator::GetDiscount(j);
Guolin Ke's avatar
Guolin Ke committed
194
        // get dcg gap
195
        const double dcg_gap = high_label_gain - low_label_gain;
Guolin Ke's avatar
Guolin Ke committed
196
        // get discount of this pair
197
        const double paired_discount = fabs(high_discount - low_discount);
Guolin Ke's avatar
Guolin Ke committed
198
        // get delta NDCG
199
        double delta_pair_NDCG = dcg_gap * paired_discount * inverse_max_dcg;
Guolin Ke's avatar
Guolin Ke committed
200
        // regular the delta_pair_NDCG by score distance
201
        if (norm_ && best_score != worst_score) {
202
203
          delta_pair_NDCG /= (0.01f + fabs(delta_score));
        }
Guolin Ke's avatar
Guolin Ke committed
204
        // calculate lambda for this pair
205
        double p_lambda = GetSigmoid(delta_score);
206
        double p_hessian = p_lambda * (1.0f - p_lambda);
Guolin Ke's avatar
Guolin Ke committed
207
        // update
208
209
        p_lambda *= -sigmoid_ * delta_pair_NDCG;
        p_hessian *= sigmoid_ * sigmoid_ * delta_pair_NDCG;
Guolin Ke's avatar
Guolin Ke committed
210
211
        high_sum_lambda += p_lambda;
        high_sum_hessian += p_hessian;
212
213
        lambdas[low] -= static_cast<score_t>(p_lambda);
        hessians[low] += static_cast<score_t>(p_hessian);
214
215
        // lambda is negative, so use minus to accumulate
        sum_lambdas -= 2 * p_lambda;
Guolin Ke's avatar
Guolin Ke committed
216
217
      }
      // update
218
219
      lambdas[high] += static_cast<score_t>(high_sum_lambda);
      hessians[high] += static_cast<score_t>(high_sum_hessian);
Guolin Ke's avatar
Guolin Ke committed
220
    }
221
222
223
224
225
226
227
    if (norm_ && sum_lambdas > 0) {
      double norm_factor = std::log2(1 + sum_lambdas) / sum_lambdas;
      for (data_size_t i = 0; i < cnt; ++i) {
        lambdas[i] = static_cast<score_t>(lambdas[i] * norm_factor);
        hessians[i] = static_cast<score_t>(hessians[i] * norm_factor);
      }
    }
Guolin Ke's avatar
Guolin Ke committed
228
229
  }

230
  inline double GetSigmoid(double score) const {
Guolin Ke's avatar
Guolin Ke committed
231
232
233
234
    if (score <= min_sigmoid_input_) {
      // too small, use lower bound
      return sigmoid_table_[0];
    } else if (score >= max_sigmoid_input_) {
235
      // too large, use upper bound
Guolin Ke's avatar
Guolin Ke committed
236
237
      return sigmoid_table_[_sigmoid_bins - 1];
    } else {
238
239
      return sigmoid_table_[static_cast<size_t>((score - min_sigmoid_input_) *
                                                sigmoid_table_idx_factor_)];
Guolin Ke's avatar
Guolin Ke committed
240
241
242
243
244
245
246
    }
  }

  void ConstructSigmoidTable() {
    // get boundary
    min_sigmoid_input_ = min_sigmoid_input_ / sigmoid_ / 2;
    max_sigmoid_input_ = -min_sigmoid_input_;
Guolin Ke's avatar
Guolin Ke committed
247
    sigmoid_table_.resize(_sigmoid_bins);
Guolin Ke's avatar
Guolin Ke committed
248
249
    // get score to bin factor
    sigmoid_table_idx_factor_ =
250
        _sigmoid_bins / (max_sigmoid_input_ - min_sigmoid_input_);
Guolin Ke's avatar
Guolin Ke committed
251
252
    // cache
    for (size_t i = 0; i < _sigmoid_bins; ++i) {
253
      const double score = i / sigmoid_table_idx_factor_ + min_sigmoid_input_;
254
      sigmoid_table_[i] = 1.0f / (1.0f + std::exp(score * sigmoid_));
Guolin Ke's avatar
Guolin Ke committed
255
256
257
    }
  }

258
  const char* GetName() const override { return "lambdarank"; }
259

Nikita Titov's avatar
Nikita Titov committed
260
 private:
Guolin Ke's avatar
Guolin Ke committed
261
  /*! \brief Simgoid param */
262
  double sigmoid_;
263
264
  /*! \brief Normalize the lambdas or not */
  bool norm_;
265
266
267
268
  /*! \brief truncation position for max ndcg */
  int truncation_level_;
  /*! \brief Cache inverse max DCG, speed up calculation */
  std::vector<double> inverse_max_dcgs_;
Guolin Ke's avatar
Guolin Ke committed
269
  /*! \brief Cache result for sigmoid transform to speed up */
270
  std::vector<double> sigmoid_table_;
271
  std::vector<double> label_gain_;
Guolin Ke's avatar
Guolin Ke committed
272
273
274
  /*! \brief Number of bins in simoid table */
  size_t _sigmoid_bins = 1024 * 1024;
  /*! \brief Minimal input of sigmoid table */
275
  double min_sigmoid_input_ = -50;
Guolin Ke's avatar
Guolin Ke committed
276
  /*! \brief Maximal input of sigmoid table */
277
  double max_sigmoid_input_ = 50;
Guolin Ke's avatar
Guolin Ke committed
278
  /*! \brief Factor that covert score to bin in sigmoid table */
279
  double sigmoid_table_idx_factor_;
Guolin Ke's avatar
Guolin Ke committed
280
281
};

282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
/*!
 * \brief Implementation of the learning-to-rank objective function, XE_NDCG
 * [arxiv.org/abs/1911.09798].
 */
class RankXENDCG : public RankingObjective {
 public:
  explicit RankXENDCG(const Config& config) : RankingObjective(config) {}

  explicit RankXENDCG(const std::vector<std::string>& strs)
      : RankingObjective(strs) {}

  ~RankXENDCG() {}

  void Init(const Metadata& metadata, data_size_t num_data) override {
    RankingObjective::Init(metadata, num_data);
    for (data_size_t i = 0; i < num_queries_; ++i) {
      rands_.emplace_back(seed_ + i);
    }
  }

  inline void GetGradientsForOneQuery(data_size_t query_id, data_size_t cnt,
                                      const label_t* label, const double* score,
                                      score_t* lambdas,
                                      score_t* hessians) const override {
    // Turn scores into a probability distribution using Softmax.
    std::vector<double> rho(cnt, 0.0);
    Common::Softmax(score, rho.data(), cnt);

    // used for Phi and L1
    std::vector<double> l1s(cnt);
    double sum_labels = 0;
    for (data_size_t i = 0; i < cnt; ++i) {
      l1s[i] = Phi(label[i], rands_[query_id].NextFloat());
      sum_labels += l1s[i];
    }
    // sum_labels will always be positive number
    sum_labels = std::max<double>(kEpsilon, sum_labels);
    // Approximate gradients and inverse Hessian.
    // First order terms.
    double sum_l1 = 0.0f;
    for (data_size_t i = 0; i < cnt; ++i) {
      l1s[i] = -l1s[i] / sum_labels + rho[i];
      sum_l1 += l1s[i];
    }
    if (cnt <= 1) {
      // when cnt <= 1, the l2 and l3 are zeros
      for (data_size_t i = 0; i < cnt; ++i) {
        lambdas[i] = static_cast<score_t>(l1s[i]);
        hessians[i] = static_cast<score_t>(rho[i] * (1.0 - rho[i]));
      }
    } else {
      // Second order terms.
      std::vector<double> l2s(cnt, 0.0);
      double sum_l2 = 0.0;
      for (data_size_t i = 0; i < cnt; ++i) {
        l2s[i] = (sum_l1 - l1s[i]) / (1 - rho[i]);
        sum_l2 += l2s[i];
      }
      for (data_size_t i = 0; i < cnt; ++i) {
        auto l3 = (sum_l2 - l2s[i]) / (1 - rho[i]);
        lambdas[i] = static_cast<score_t>(l1s[i] + rho[i] * l2s[i] +
                                          rho[i] * rho[i] * l3);
        hessians[i] = static_cast<score_t>(rho[i] * (1.0 - rho[i]));
      }
    }
  }

  double Phi(const label_t l, double g) const {
    return Common::Pow(2, static_cast<int>(l)) - g;
  }

  const char* GetName() const override { return "rank_xendcg"; }

 private:
  mutable std::vector<Random> rands_;
};

Guolin Ke's avatar
Guolin Ke committed
359
}  // namespace LightGBM
360
#endif  // LightGBM_OBJECTIVE_RANK_OBJECTIVE_HPP_