rank_metric.hpp 5.84 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
Guolin Ke's avatar
Guolin Ke committed
5
6
7
8
#ifndef LIGHTGBM_METRIC_RANK_METRIC_HPP_
#define LIGHTGBM_METRIC_RANK_METRIC_HPP_

#include <LightGBM/metric.h>
Guolin Ke's avatar
Guolin Ke committed
9
10
#include <LightGBM/utils/common.h>
#include <LightGBM/utils/log.h>
11
#include <LightGBM/utils/openmp_wrapper.h>
Guolin Ke's avatar
Guolin Ke committed
12

13
#include <string>
Guolin Ke's avatar
Guolin Ke committed
14
15
16
17
18
19
#include <sstream>
#include <vector>

namespace LightGBM {

class NDCGMetric:public Metric {
Nikita Titov's avatar
Nikita Titov committed
20
 public:
Guolin Ke's avatar
Guolin Ke committed
21
  explicit NDCGMetric(const Config& config) {
Guolin Ke's avatar
Guolin Ke committed
22
    // get eval position
Guolin Ke's avatar
Guolin Ke committed
23
24
25
26
    eval_at_ = config.eval_at;
    auto label_gain = config.label_gain;
    DCGCalculator::DefaultEvalAt(&eval_at_);
    DCGCalculator::DefaultLabelGain(&label_gain);
Guolin Ke's avatar
Guolin Ke committed
27
    // initialize DCG calculator
Guolin Ke's avatar
Guolin Ke committed
28
    DCGCalculator::Init(label_gain);
Guolin Ke's avatar
Guolin Ke committed
29
30
31
32
  }

  ~NDCGMetric() {
  }
Guolin Ke's avatar
Guolin Ke committed
33
  void Init(const Metadata& metadata, data_size_t num_data) override {
34
    for (auto k : eval_at_) {
35
      name_.emplace_back(std::string("ndcg@") + std::to_string(k));
36
    }
Guolin Ke's avatar
Guolin Ke committed
37
38
39
    num_data_ = num_data;
    // get label
    label_ = metadata.label();
40
    DCGCalculator::CheckLabel(label_, num_data_);
Guolin Ke's avatar
Guolin Ke committed
41
42
43
    // get query boundaries
    query_boundaries_ = metadata.query_boundaries();
    if (query_boundaries_ == nullptr) {
44
      Log::Fatal("The NDCG metric requires query information");
Guolin Ke's avatar
Guolin Ke committed
45
46
47
48
49
    }
    num_queries_ = metadata.num_queries();
    // get query weights
    query_weights_ = metadata.query_weights();
    if (query_weights_ == nullptr) {
50
      sum_query_weights_ = static_cast<double>(num_queries_);
Guolin Ke's avatar
Guolin Ke committed
51
52
53
54
55
56
    } else {
      sum_query_weights_ = 0.0f;
      for (data_size_t i = 0; i < num_queries_; ++i) {
        sum_query_weights_ += query_weights_[i];
      }
    }
57
    inverse_max_dcgs_.resize(num_queries_);
Guolin Ke's avatar
Guolin Ke committed
58
    // cache the inverse max DCG for all querys, used to calculate NDCG
59
    #pragma omp parallel for schedule(static)
Guolin Ke's avatar
Guolin Ke committed
60
    for (data_size_t i = 0; i < num_queries_; ++i) {
61
      inverse_max_dcgs_[i].resize(eval_at_.size(), 0.0f);
Guolin Ke's avatar
Guolin Ke committed
62
      DCGCalculator::CalMaxDCG(eval_at_, label_ + query_boundaries_[i],
63
64
                               query_boundaries_[i + 1] - query_boundaries_[i],
                               &inverse_max_dcgs_[i]);
Guolin Ke's avatar
Guolin Ke committed
65
      for (size_t j = 0; j < inverse_max_dcgs_[i].size(); ++j) {
66
67
68
        if (inverse_max_dcgs_[i][j] > 0.0f) {
          inverse_max_dcgs_[i][j] = 1.0f / inverse_max_dcgs_[i][j];
        } else {
Guolin Ke's avatar
Guolin Ke committed
69
70
          // marking negative for all negative querys.
          // if one meet this query, it's ndcg will be set as -1.
71
          inverse_max_dcgs_[i][j] = -1.0f;
Guolin Ke's avatar
Guolin Ke committed
72
73
74
75
76
        }
      }
    }
  }

Guolin Ke's avatar
Guolin Ke committed
77
  const std::vector<std::string>& GetName() const override {
78
    return name_;
79
80
  }

81
  double factor_to_bigger_better() const override {
82
    return 1.0f;
83
84
  }

Guolin Ke's avatar
Guolin Ke committed
85
  std::vector<double> Eval(const double* score, const ObjectiveFunction*) const override {
86
    int num_threads = OMP_NUM_THREADS();
87
    // some buffers for multi-threading sum up
88
    std::vector<std::vector<double>> result_buffer_;
89
    for (int i = 0; i < num_threads; ++i) {
90
91
      result_buffer_.emplace_back(eval_at_.size(), 0.0f);
    }
92
    std::vector<double> tmp_dcg(eval_at_.size(), 0.0f);
93
    if (query_weights_ == nullptr) {
94
      #pragma omp parallel for schedule(static) firstprivate(tmp_dcg)
95
96
97
98
99
100
      for (data_size_t i = 0; i < num_queries_; ++i) {
        const int tid = omp_get_thread_num();
        // if all doc in this query are all negative, let its NDCG=1
        if (inverse_max_dcgs_[i][0] <= 0.0f) {
          for (size_t j = 0; j < eval_at_.size(); ++j) {
            result_buffer_[tid][j] += 1.0f;
Guolin Ke's avatar
Guolin Ke committed
101
          }
102
103
104
        } else {
          // calculate DCG
          DCGCalculator::CalDCG(eval_at_, label_ + query_boundaries_[i],
105
106
                                score + query_boundaries_[i],
                                query_boundaries_[i + 1] - query_boundaries_[i], &tmp_dcg);
107
108
109
          // calculate NDCG
          for (size_t j = 0; j < eval_at_.size(); ++j) {
            result_buffer_[tid][j] += tmp_dcg[j] * inverse_max_dcgs_[i][j];
Guolin Ke's avatar
Guolin Ke committed
110
111
112
          }
        }
      }
113
    } else {
114
      #pragma omp parallel for schedule(static) firstprivate(tmp_dcg)
115
116
117
118
119
120
121
122
123
124
      for (data_size_t i = 0; i < num_queries_; ++i) {
        const int tid = omp_get_thread_num();
        // if all doc in this query are all negative, let its NDCG=1
        if (inverse_max_dcgs_[i][0] <= 0.0f) {
          for (size_t j = 0; j < eval_at_.size(); ++j) {
            result_buffer_[tid][j] += 1.0f;
          }
        } else {
          // calculate DCG
          DCGCalculator::CalDCG(eval_at_, label_ + query_boundaries_[i],
125
126
                                score + query_boundaries_[i],
                                query_boundaries_[i + 1] - query_boundaries_[i], &tmp_dcg);
127
128
129
130
          // calculate NDCG
          for (size_t j = 0; j < eval_at_.size(); ++j) {
            result_buffer_[tid][j] += tmp_dcg[j] * inverse_max_dcgs_[i][j] * query_weights_[i];
          }
Guolin Ke's avatar
Guolin Ke committed
131
132
        }
      }
133
134
    }
    // Get final average NDCG
135
    std::vector<double> result(eval_at_.size(), 0.0f);
136
    for (size_t j = 0; j < result.size(); ++j) {
137
      for (int i = 0; i < num_threads; ++i) {
138
        result[j] += result_buffer_[i][j];
wxchan's avatar
wxchan committed
139
      }
140
      result[j] /= sum_query_weights_;
Guolin Ke's avatar
Guolin Ke committed
141
    }
142
    return result;
Guolin Ke's avatar
Guolin Ke committed
143
144
  }

Nikita Titov's avatar
Nikita Titov committed
145
 private:
Guolin Ke's avatar
Guolin Ke committed
146
147
148
  /*! \brief Number of data */
  data_size_t num_data_;
  /*! \brief Pointer of label */
149
  const label_t* label_;
Guolin Ke's avatar
Guolin Ke committed
150
  /*! \brief Name of test set */
151
  std::vector<std::string> name_;
Guolin Ke's avatar
Guolin Ke committed
152
153
154
155
156
  /*! \brief Query boundaries information */
  const data_size_t* query_boundaries_;
  /*! \brief Number of queries */
  data_size_t num_queries_;
  /*! \brief Weights of queries */
157
  const label_t* query_weights_;
Guolin Ke's avatar
Guolin Ke committed
158
  /*! \brief Sum weights of queries */
159
  double sum_query_weights_;
Guolin Ke's avatar
Guolin Ke committed
160
161
162
  /*! \brief Evaluate position of NDCG */
  std::vector<data_size_t> eval_at_;
  /*! \brief Cache the inverse max dcg for all queries */
163
  std::vector<std::vector<double>> inverse_max_dcgs_;
Guolin Ke's avatar
Guolin Ke committed
164
165
166
167
};

}  // namespace LightGBM

Guolin Ke's avatar
Guolin Ke committed
168
#endif   // LightGBM_METRIC_RANK_METRIC_HPP_