dcg_calculator.cpp 5.18 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
10
11
12
#include <LightGBM/metric.h>

#include <LightGBM/utils/log.h>

#include <cmath>

#include <vector>
#include <algorithm>

namespace LightGBM {

/*! \brief Declaration for some static members */
13
14
std::vector<double> DCGCalculator::label_gain_;
std::vector<double> DCGCalculator::discount_;
Guolin Ke's avatar
Guolin Ke committed
15
16
const data_size_t DCGCalculator::kMaxPosition = 10000;

Guolin Ke's avatar
Guolin Ke committed
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40

void DCGCalculator::DefaultEvalAt(std::vector<int>* eval_at) {
  if (eval_at->empty()) {
    for (int i = 1; i <= 5; ++i) {
      eval_at->push_back(i);
    }
  } else {
    for (size_t i = 0; i < eval_at->size(); ++i) {
      CHECK(eval_at->at(i) > 0);
    }
  }
}

void DCGCalculator::DefaultLabelGain(std::vector<double>* label_gain) {
  if (!label_gain->empty()) { return; }
  // label_gain = 2^i - 1, may overflow, so we use 31 here
  const int max_label = 31;
  label_gain->push_back(0.0f);
  for (int i = 1; i < max_label; ++i) {
    label_gain->push_back(static_cast<double>((1 << i) - 1));
  }
}

void DCGCalculator::Init(const std::vector<double>& input_label_gain) {
Guolin Ke's avatar
Guolin Ke committed
41
  label_gain_.resize(input_label_gain.size());
42
  for(size_t i = 0;i < input_label_gain.size();++i){
Guolin Ke's avatar
Guolin Ke committed
43
    label_gain_[i] = static_cast<double>(input_label_gain[i]);
44
  }
Guolin Ke's avatar
Guolin Ke committed
45
  discount_.resize(kMaxPosition);
Guolin Ke's avatar
Guolin Ke committed
46
  for (data_size_t i = 0; i < kMaxPosition; ++i) {
47
    discount_[i] = 1.0 / std::log2(2.0 + i);
Guolin Ke's avatar
Guolin Ke committed
48
49
50
  }
}

51
double DCGCalculator::CalMaxDCGAtK(data_size_t k, const label_t* label, data_size_t num_data) {
52
  double ret = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
53
54
55
56
57
  // counts for all labels
  std::vector<data_size_t> label_cnt(label_gain_.size(), 0);
  for (data_size_t i = 0; i < num_data; ++i) {
    ++label_cnt[static_cast<int>(label[i])];
  }
58
  int top_label = static_cast<int>(label_gain_.size()) - 1;
Guolin Ke's avatar
Guolin Ke committed
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75

  if (k > num_data) { k = num_data; }
  //  start from top label, and accumulate DCG
  for (data_size_t j = 0; j < k; ++j) {
    while (top_label > 0 && label_cnt[top_label] <= 0) {
      top_label -= 1;
    }
    if (top_label < 0) {
      break;
    }
    ret += discount_[j] * label_gain_[top_label];
    label_cnt[top_label] -= 1;
  }
  return ret;
}

void DCGCalculator::CalMaxDCG(const std::vector<data_size_t>& ks,
76
                              const label_t* label,
Guolin Ke's avatar
Guolin Ke committed
77
                              data_size_t num_data,
78
                              std::vector<double>* out) {
Guolin Ke's avatar
Guolin Ke committed
79
80
81
82
83
  std::vector<data_size_t> label_cnt(label_gain_.size(), 0);
  // counts for all labels
  for (data_size_t i = 0; i < num_data; ++i) {
    ++label_cnt[static_cast<int>(label[i])];
  }
84
  double cur_result = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
85
  data_size_t cur_left = 0;
86
  int top_label = static_cast<int>(label_gain_.size()) - 1;
Guolin Ke's avatar
Guolin Ke committed
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
  // calculate k Max DCG by one pass
  for (size_t i = 0; i < ks.size(); ++i) {
    data_size_t cur_k = ks[i];
    if (cur_k > num_data) { cur_k = num_data; }
    for (data_size_t j = cur_left; j < cur_k; ++j) {
      while (top_label > 0 && label_cnt[top_label] <= 0) {
        top_label -= 1;
      }
      if (top_label < 0) {
        break;
      }
      cur_result += discount_[j] * label_gain_[top_label];
      label_cnt[top_label] -= 1;
    }
    (*out)[i] = cur_result;
    cur_left = cur_k;
  }
}


107
double DCGCalculator::CalDCGAtK(data_size_t k, const label_t* label,
108
                                const double* score, data_size_t num_data) {
Guolin Ke's avatar
Guolin Ke committed
109
  // get sorted indices by score
110
  std::vector<data_size_t> sorted_idx(num_data);
Guolin Ke's avatar
Guolin Ke committed
111
  for (data_size_t i = 0; i < num_data; ++i) {
112
    sorted_idx[i] = i;
Guolin Ke's avatar
Guolin Ke committed
113
  }
114
115
  std::stable_sort(sorted_idx.begin(), sorted_idx.end(),
                   [score](data_size_t a, data_size_t b) {return score[a] > score[b]; });
Guolin Ke's avatar
Guolin Ke committed
116
117

  if (k > num_data) { k = num_data; }
118
  double dcg = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
119
120
121
122
123
124
125
126
  // calculate dcg
  for (data_size_t i = 0; i < k; ++i) {
    data_size_t idx = sorted_idx[i];
    dcg += label_gain_[static_cast<int>(label[idx])] * discount_[i];
  }
  return dcg;
}

127
void DCGCalculator::CalDCG(const std::vector<data_size_t>& ks, const label_t* label,
128
                           const double * score, data_size_t num_data, std::vector<double>* out) {
Guolin Ke's avatar
Guolin Ke committed
129
  // get sorted indices by score
130
  std::vector<data_size_t> sorted_idx(num_data);
Guolin Ke's avatar
Guolin Ke committed
131
  for (data_size_t i = 0; i < num_data; ++i) {
132
    sorted_idx[i] = i;
Guolin Ke's avatar
Guolin Ke committed
133
  }
134
135
  std::stable_sort(sorted_idx.begin(), sorted_idx.end(),
                   [score](data_size_t a, data_size_t b) {return score[a] > score[b]; });
Guolin Ke's avatar
Guolin Ke committed
136

137
  double cur_result = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
138
139
140
141
142
143
144
145
146
147
148
149
150
151
  data_size_t cur_left = 0;
  // calculate multi dcg by one pass
  for (size_t i = 0; i < ks.size(); ++i) {
    data_size_t cur_k = ks[i];
    if (cur_k > num_data) { cur_k = num_data; }
    for (data_size_t j = cur_left; j < cur_k; ++j) {
      data_size_t idx = sorted_idx[j];
      cur_result += label_gain_[static_cast<int>(label[idx])] * discount_[j];
    }
    (*out)[i] = cur_result;
    cur_left = cur_k;
  }
}

152
void DCGCalculator::CheckLabel(const label_t* label, data_size_t num_data) {
153
  for (data_size_t i = 0; i < num_data; ++i) {
154
    label_t delta = std::fabs(label[i] - static_cast<int>(label[i]));
155
    if (delta > kEpsilon) {
156
157
      Log::Fatal("label should be int type (met %f) for ranking task,\n"
                 "for the gain of label, please set the label_gain parameter", label[i]);
158
159
160
161
162
163
164
    }
    if (static_cast<size_t>(label[i]) >= label_gain_.size() || label[i] < 0) {
      Log::Fatal("label (%d) excel the max range %d", label[i], label_gain_.size());
    }
  }
}

Guolin Ke's avatar
Guolin Ke committed
165
}  // namespace LightGBM