rank_metric.hpp 5.53 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
#ifndef LIGHTGBM_METRIC_RANK_METRIC_HPP_
#define LIGHTGBM_METRIC_RANK_METRIC_HPP_

#include <LightGBM/utils/common.h>
#include <LightGBM/utils/log.h>

#include <LightGBM/metric.h>

#include <omp.h>

#include <sstream>
#include <vector>

namespace LightGBM {

class NDCGMetric:public Metric {
public:
  explicit NDCGMetric(const MetricConfig& config) {
    // get eval position
    for (auto k : config.eval_at) {
      eval_at_.push_back(static_cast<data_size_t>(k));
    }
    // initialize DCG calculator
    DCGCalculator::Init(config.label_gain);
    // get number of threads
26
27
#pragma omp parallel
#pragma omp master
Guolin Ke's avatar
Guolin Ke committed
28
29
30
31
32
33
34
35
    {
      num_threads_ = omp_get_num_threads();
    }
  }

  ~NDCGMetric() {
  }
  void Init(const char* test_name, const Metadata& metadata, data_size_t num_data) override {
36
    for (auto k : eval_at_) {
37
38
      std::stringstream str_buf;
      str_buf << test_name << "'s : ";
39
      str_buf << "NDCG@" + std::to_string(k) + " ";
40
      name_.emplace_back(str_buf.str());
41
    }
Guolin Ke's avatar
Guolin Ke committed
42
43
44
45
46
47
    num_data_ = num_data;
    // get label
    label_ = metadata.label();
    // get query boundaries
    query_boundaries_ = metadata.query_boundaries();
    if (query_boundaries_ == nullptr) {
48
      Log::Fatal("The NDCG metric requires query information");
Guolin Ke's avatar
Guolin Ke committed
49
50
51
52
53
    }
    num_queries_ = metadata.num_queries();
    // get query weights
    query_weights_ = metadata.query_weights();
    if (query_weights_ == nullptr) {
54
      sum_query_weights_ = static_cast<double>(num_queries_);
Guolin Ke's avatar
Guolin Ke committed
55
56
57
58
59
60
61
62
    } else {
      sum_query_weights_ = 0.0f;
      for (data_size_t i = 0; i < num_queries_; ++i) {
        sum_query_weights_ += query_weights_[i];
      }
    }
    // cache the inverse max DCG for all querys, used to calculate NDCG
    for (data_size_t i = 0; i < num_queries_; ++i) {
63
      inverse_max_dcgs_.emplace_back(eval_at_.size(), 0.0f);
Guolin Ke's avatar
Guolin Ke committed
64
      DCGCalculator::CalMaxDCG(eval_at_, label_ + query_boundaries_[i],
65
66
        query_boundaries_[i + 1] - query_boundaries_[i],
        &inverse_max_dcgs_[i]);
Guolin Ke's avatar
Guolin Ke committed
67
      for (size_t j = 0; j < inverse_max_dcgs_[i].size(); ++j) {
68
69
70
        if (inverse_max_dcgs_[i][j] > 0.0f) {
          inverse_max_dcgs_[i][j] = 1.0f / inverse_max_dcgs_[i][j];
        } else {
Guolin Ke's avatar
Guolin Ke committed
71
72
          // marking negative for all negative querys.
          // if one meet this query, it's ndcg will be set as -1.
73
          inverse_max_dcgs_[i][j] = -1.0f;
Guolin Ke's avatar
Guolin Ke committed
74
75
76
77
78
        }
      }
    }
  }

79
80
  std::vector<std::string> GetName() const override {
    return name_;
81
82
  }

83
84
  score_t factor_to_bigger_better() const override {
    return 1.0f;
85
86
  }

87
  std::vector<double> Eval(const score_t* score) const override {
88
    // some buffers for multi-threading sum up
89
    std::vector<std::vector<double>> result_buffer_;
90
91
92
    for (int i = 0; i < num_threads_; ++i) {
      result_buffer_.emplace_back(eval_at_.size(), 0.0f);
    }
93
    std::vector<score_t> tmp_dcg(eval_at_.size(), 0.0f);
94
95
96
97
98
99
100
101
    if (query_weights_ == nullptr) {
#pragma omp parallel for schedule(guided) firstprivate(tmp_dcg)
      for (data_size_t i = 0; i < num_queries_; ++i) {
        const int tid = omp_get_thread_num();
        // if all doc in this query are all negative, let its NDCG=1
        if (inverse_max_dcgs_[i][0] <= 0.0f) {
          for (size_t j = 0; j < eval_at_.size(); ++j) {
            result_buffer_[tid][j] += 1.0f;
Guolin Ke's avatar
Guolin Ke committed
102
          }
103
104
105
106
107
108
109
110
        } else {
          // calculate DCG
          DCGCalculator::CalDCG(eval_at_, label_ + query_boundaries_[i],
            score + query_boundaries_[i],
            query_boundaries_[i + 1] - query_boundaries_[i], &tmp_dcg);
          // calculate NDCG
          for (size_t j = 0; j < eval_at_.size(); ++j) {
            result_buffer_[tid][j] += tmp_dcg[j] * inverse_max_dcgs_[i][j];
Guolin Ke's avatar
Guolin Ke committed
111
112
113
          }
        }
      }
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
    } else {
#pragma omp parallel for schedule(guided) firstprivate(tmp_dcg)
      for (data_size_t i = 0; i < num_queries_; ++i) {
        const int tid = omp_get_thread_num();
        // if all doc in this query are all negative, let its NDCG=1
        if (inverse_max_dcgs_[i][0] <= 0.0f) {
          for (size_t j = 0; j < eval_at_.size(); ++j) {
            result_buffer_[tid][j] += 1.0f;
          }
        } else {
          // calculate DCG
          DCGCalculator::CalDCG(eval_at_, label_ + query_boundaries_[i],
            score + query_boundaries_[i],
            query_boundaries_[i + 1] - query_boundaries_[i], &tmp_dcg);
          // calculate NDCG
          for (size_t j = 0; j < eval_at_.size(); ++j) {
            result_buffer_[tid][j] += tmp_dcg[j] * inverse_max_dcgs_[i][j] * query_weights_[i];
          }
Guolin Ke's avatar
Guolin Ke committed
132
133
        }
      }
134
135
    }
    // Get final average NDCG
136
    std::vector<double> result(eval_at_.size(), 0.0f);
137
138
139
    for (size_t j = 0; j < result.size(); ++j) {
      for (int i = 0; i < num_threads_; ++i) {
        result[j] += result_buffer_[i][j];
wxchan's avatar
wxchan committed
140
      }
141
      result[j] /= sum_query_weights_;
Guolin Ke's avatar
Guolin Ke committed
142
    }
143
    return result;
Guolin Ke's avatar
Guolin Ke committed
144
145
146
147
148
149
150
151
  }

private:
  /*! \brief Number of data */
  data_size_t num_data_;
  /*! \brief Pointer of label */
  const float* label_;
  /*! \brief Name of test set */
152
  std::vector<std::string> name_;
Guolin Ke's avatar
Guolin Ke committed
153
154
155
156
157
158
159
  /*! \brief Query boundaries information */
  const data_size_t* query_boundaries_;
  /*! \brief Number of queries */
  data_size_t num_queries_;
  /*! \brief Weights of queries */
  const float* query_weights_;
  /*! \brief Sum weights of queries */
160
  double sum_query_weights_;
Guolin Ke's avatar
Guolin Ke committed
161
162
163
  /*! \brief Evaluate position of NDCG */
  std::vector<data_size_t> eval_at_;
  /*! \brief Cache the inverse max dcg for all queries */
164
  std::vector<std::vector<score_t>> inverse_max_dcgs_;
Guolin Ke's avatar
Guolin Ke committed
165
166
167
168
169
170
  /*! \brief Number of threads */
  int num_threads_;
};

}  // namespace LightGBM

Guolin Ke's avatar
Guolin Ke committed
171
#endif   // LightGBM_METRIC_RANK_METRIC_HPP_