map_metric.hpp 5.36 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
#ifndef LIGHTGBM_METRIC_MAP_METRIC_HPP_
#define LIGHTGBM_METRIC_MAP_METRIC_HPP_

4
#include <LightGBM/metric.h>
Guolin Ke's avatar
Guolin Ke committed
5
6
#include <LightGBM/utils/common.h>
#include <LightGBM/utils/log.h>
7
#include <LightGBM/utils/openmp_wrapper.h>
Guolin Ke's avatar
Guolin Ke committed
8

9
10
#include <string>
#include <algorithm>
Guolin Ke's avatar
Guolin Ke committed
11
12
13
14
15
16
#include <sstream>
#include <vector>

namespace LightGBM {

class MapMetric:public Metric {
Nikita Titov's avatar
Nikita Titov committed
17
 public:
Guolin Ke's avatar
Guolin Ke committed
18
  explicit MapMetric(const Config& config) {
Guolin Ke's avatar
Guolin Ke committed
19
    // get eval position
Guolin Ke's avatar
Guolin Ke committed
20
21
    eval_at_ = config.eval_at;
    DCGCalculator::DefaultEvalAt(&eval_at_);
Guolin Ke's avatar
Guolin Ke committed
22
    // get number of threads
23
24
    #pragma omp parallel
    #pragma omp master
Guolin Ke's avatar
Guolin Ke committed
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
    {
      num_threads_ = omp_get_num_threads();
    }
  }

  ~MapMetric() {
  }

  void Init(const Metadata& metadata, data_size_t num_data) override {
    for (auto k : eval_at_) {
      name_.emplace_back(std::string("map@") + std::to_string(k));
    }
    num_data_ = num_data;
    // get label
    label_ = metadata.label();
    // get query boundaries
    query_boundaries_ = metadata.query_boundaries();
    if (query_boundaries_ == nullptr) {
      Log::Fatal("For MAP metric, there should be query information");
    }
    num_queries_ = metadata.num_queries();
46
    Log::Info("Total groups: %d, total data: %d", num_queries_, num_data_);
Guolin Ke's avatar
Guolin Ke committed
47
48
49
50
51
52
53
54
55
56
    // get query weights
    query_weights_ = metadata.query_weights();
    if (query_weights_ == nullptr) {
      sum_query_weights_ = static_cast<double>(num_queries_);
    } else {
      sum_query_weights_ = 0.0f;
      for (data_size_t i = 0; i < num_queries_; ++i) {
        sum_query_weights_ += query_weights_[i];
      }
    }
Guolin Ke's avatar
Guolin Ke committed
57
58
59
60
61
62
63
64
65

    npos_per_query_.resize(num_queries_, 0);
    for (data_size_t i = 0; i < num_queries_; ++i) {
      for (data_size_t j = query_boundaries_[i]; j < query_boundaries_[i + 1]; ++j) {
        if (label_[j] > 0.5f) {
          ++npos_per_query_[i];
        }
      }
    }
Guolin Ke's avatar
Guolin Ke committed
66
67
68
69
70
71
72
73
74
75
  }

  const std::vector<std::string>& GetName() const override {
    return name_;
  }

  double factor_to_bigger_better() const override {
    return 1.0f;
  }

76
  void CalMapAtK(std::vector<int> ks, data_size_t npos, const label_t* label,
77
                 const double* score, data_size_t num_data, std::vector<double>* out) const {
Guolin Ke's avatar
Guolin Ke committed
78
79
80
81
82
    // get sorted indices by score
    std::vector<data_size_t> sorted_idx;
    for (data_size_t i = 0; i < num_data; ++i) {
      sorted_idx.emplace_back(i);
    }
83
84
    std::stable_sort(sorted_idx.begin(), sorted_idx.end(),
                     [score](data_size_t a, data_size_t b) {return score[a] > score[b]; });
Guolin Ke's avatar
Guolin Ke committed
85
86
87
88
89

    int num_hit = 0;
    double sum_ap = 0.0f;
    data_size_t cur_left = 0;
    for (size_t i = 0; i < ks.size(); ++i) {
Guolin Ke's avatar
Guolin Ke committed
90
      data_size_t cur_k = static_cast<data_size_t>(ks[i]);
Guolin Ke's avatar
Guolin Ke committed
91
92
93
94
95
      if (cur_k > num_data) { cur_k = num_data; }
      for (data_size_t j = cur_left; j < cur_k; ++j) {
        data_size_t idx = sorted_idx[j];
        if (label[idx] > 0.5f) {
          ++num_hit;
Guolin Ke's avatar
Guolin Ke committed
96
          sum_ap += static_cast<double>(num_hit) / (j + 1.0f);
Guolin Ke's avatar
Guolin Ke committed
97
98
        }
      }
Guolin Ke's avatar
Guolin Ke committed
99
100
101
102
103
      if (npos > 0) {
        (*out)[i] = sum_ap / std::min(npos, cur_k);
      } else {
        (*out)[i] = 1.0f;
      }
Guolin Ke's avatar
Guolin Ke committed
104
105
106
      cur_left = cur_k;
    }
  }
Guolin Ke's avatar
Guolin Ke committed
107
  std::vector<double> Eval(const double* score, const ObjectiveFunction*) const override {
Guolin Ke's avatar
Guolin Ke committed
108
109
110
111
112
113
114
    // some buffers for multi-threading sum up
    std::vector<std::vector<double>> result_buffer_;
    for (int i = 0; i < num_threads_; ++i) {
      result_buffer_.emplace_back(eval_at_.size(), 0.0f);
    }
    std::vector<double> tmp_map(eval_at_.size(), 0.0f);
    if (query_weights_ == nullptr) {
115
      #pragma omp parallel for schedule(guided) firstprivate(tmp_map)
Guolin Ke's avatar
Guolin Ke committed
116
117
      for (data_size_t i = 0; i < num_queries_; ++i) {
        const int tid = omp_get_thread_num();
Guolin Ke's avatar
Guolin Ke committed
118
        CalMapAtK(eval_at_, npos_per_query_[i], label_ + query_boundaries_[i],
119
                  score + query_boundaries_[i], query_boundaries_[i + 1] - query_boundaries_[i], &tmp_map);
Guolin Ke's avatar
Guolin Ke committed
120
121
122
123
124
        for (size_t j = 0; j < eval_at_.size(); ++j) {
          result_buffer_[tid][j] += tmp_map[j];
        }
      }
    } else {
125
      #pragma omp parallel for schedule(guided) firstprivate(tmp_map)
Guolin Ke's avatar
Guolin Ke committed
126
127
      for (data_size_t i = 0; i < num_queries_; ++i) {
        const int tid = omp_get_thread_num();
Guolin Ke's avatar
Guolin Ke committed
128
        CalMapAtK(eval_at_, npos_per_query_[i], label_ + query_boundaries_[i],
129
                  score + query_boundaries_[i], query_boundaries_[i + 1] - query_boundaries_[i], &tmp_map);
Guolin Ke's avatar
Guolin Ke committed
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
        for (size_t j = 0; j < eval_at_.size(); ++j) {
          result_buffer_[tid][j] += tmp_map[j] * query_weights_[i];
        }
      }
    }
    // Get final average MAP
    std::vector<double> result(eval_at_.size(), 0.0f);
    for (size_t j = 0; j < result.size(); ++j) {
      for (int i = 0; i < num_threads_; ++i) {
        result[j] += result_buffer_[i][j];
      }
      result[j] /= sum_query_weights_;
    }
    return result;
  }

Nikita Titov's avatar
Nikita Titov committed
146
 private:
Guolin Ke's avatar
Guolin Ke committed
147
148
149
  /*! \brief Number of data */
  data_size_t num_data_;
  /*! \brief Pointer of label */
150
  const label_t* label_;
Guolin Ke's avatar
Guolin Ke committed
151
152
153
154
155
  /*! \brief Query boundaries information */
  const data_size_t* query_boundaries_;
  /*! \brief Number of queries */
  data_size_t num_queries_;
  /*! \brief Weights of queries */
156
  const label_t* query_weights_;
Guolin Ke's avatar
Guolin Ke committed
157
158
159
160
161
162
163
  /*! \brief Sum weights of queries */
  double sum_query_weights_;
  /*! \brief Evaluate position of Nmap */
  std::vector<data_size_t> eval_at_;
  /*! \brief Number of threads */
  int num_threads_;
  std::vector<std::string> name_;
Guolin Ke's avatar
Guolin Ke committed
164
  std::vector<data_size_t> npos_per_query_;
Guolin Ke's avatar
Guolin Ke committed
165
166
167
168
169
};

}  // namespace LightGBM

#endif   // LIGHTGBM_METRIC_MAP_METRIC_HPP_