dcg_calculator.cpp 5.18 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
#include <LightGBM/metric.h>
#include <LightGBM/utils/log.h>

4
#include <algorithm>
Guolin Ke's avatar
Guolin Ke committed
5
6
7
8
9
10
#include <cmath>
#include <vector>

namespace LightGBM {

/*! \brief Declaration for some static members */
11
12
std::vector<double> DCGCalculator::label_gain_;
std::vector<double> DCGCalculator::discount_;
Guolin Ke's avatar
Guolin Ke committed
13
14
const data_size_t DCGCalculator::kMaxPosition = 10000;

Guolin Ke's avatar
Guolin Ke committed
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38

void DCGCalculator::DefaultEvalAt(std::vector<int>* eval_at) {
  if (eval_at->empty()) {
    for (int i = 1; i <= 5; ++i) {
      eval_at->push_back(i);
    }
  } else {
    for (size_t i = 0; i < eval_at->size(); ++i) {
      CHECK(eval_at->at(i) > 0);
    }
  }
}

void DCGCalculator::DefaultLabelGain(std::vector<double>* label_gain) {
  if (!label_gain->empty()) { return; }
  // label_gain = 2^i - 1, may overflow, so we use 31 here
  const int max_label = 31;
  label_gain->push_back(0.0f);
  for (int i = 1; i < max_label; ++i) {
    label_gain->push_back(static_cast<double>((1 << i) - 1));
  }
}

void DCGCalculator::Init(const std::vector<double>& input_label_gain) {
Guolin Ke's avatar
Guolin Ke committed
39
  label_gain_.resize(input_label_gain.size());
40
  for (size_t i = 0; i < input_label_gain.size(); ++i) {
Guolin Ke's avatar
Guolin Ke committed
41
    label_gain_[i] = static_cast<double>(input_label_gain[i]);
42
  }
Guolin Ke's avatar
Guolin Ke committed
43
  discount_.resize(kMaxPosition);
Guolin Ke's avatar
Guolin Ke committed
44
  for (data_size_t i = 0; i < kMaxPosition; ++i) {
45
    discount_[i] = 1.0 / std::log2(2.0 + i);
Guolin Ke's avatar
Guolin Ke committed
46
47
48
  }
}

49
double DCGCalculator::CalMaxDCGAtK(data_size_t k, const label_t* label, data_size_t num_data) {
50
  double ret = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
51
52
53
54
55
  // counts for all labels
  std::vector<data_size_t> label_cnt(label_gain_.size(), 0);
  for (data_size_t i = 0; i < num_data; ++i) {
    ++label_cnt[static_cast<int>(label[i])];
  }
56
  int top_label = static_cast<int>(label_gain_.size()) - 1;
Guolin Ke's avatar
Guolin Ke committed
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73

  if (k > num_data) { k = num_data; }
  //  start from top label, and accumulate DCG
  for (data_size_t j = 0; j < k; ++j) {
    while (top_label > 0 && label_cnt[top_label] <= 0) {
      top_label -= 1;
    }
    if (top_label < 0) {
      break;
    }
    ret += discount_[j] * label_gain_[top_label];
    label_cnt[top_label] -= 1;
  }
  return ret;
}

void DCGCalculator::CalMaxDCG(const std::vector<data_size_t>& ks,
74
                              const label_t* label,
Guolin Ke's avatar
Guolin Ke committed
75
                              data_size_t num_data,
76
                              std::vector<double>* out) {
Guolin Ke's avatar
Guolin Ke committed
77
78
79
80
81
  std::vector<data_size_t> label_cnt(label_gain_.size(), 0);
  // counts for all labels
  for (data_size_t i = 0; i < num_data; ++i) {
    ++label_cnt[static_cast<int>(label[i])];
  }
82
  double cur_result = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
83
  data_size_t cur_left = 0;
84
  int top_label = static_cast<int>(label_gain_.size()) - 1;
Guolin Ke's avatar
Guolin Ke committed
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
  // calculate k Max DCG by one pass
  for (size_t i = 0; i < ks.size(); ++i) {
    data_size_t cur_k = ks[i];
    if (cur_k > num_data) { cur_k = num_data; }
    for (data_size_t j = cur_left; j < cur_k; ++j) {
      while (top_label > 0 && label_cnt[top_label] <= 0) {
        top_label -= 1;
      }
      if (top_label < 0) {
        break;
      }
      cur_result += discount_[j] * label_gain_[top_label];
      label_cnt[top_label] -= 1;
    }
    (*out)[i] = cur_result;
    cur_left = cur_k;
  }
}


105
double DCGCalculator::CalDCGAtK(data_size_t k, const label_t* label,
106
                                const double* score, data_size_t num_data) {
Guolin Ke's avatar
Guolin Ke committed
107
  // get sorted indices by score
108
  std::vector<data_size_t> sorted_idx(num_data);
Guolin Ke's avatar
Guolin Ke committed
109
  for (data_size_t i = 0; i < num_data; ++i) {
110
    sorted_idx[i] = i;
Guolin Ke's avatar
Guolin Ke committed
111
  }
112
113
  std::stable_sort(sorted_idx.begin(), sorted_idx.end(),
                   [score](data_size_t a, data_size_t b) {return score[a] > score[b]; });
Guolin Ke's avatar
Guolin Ke committed
114
115

  if (k > num_data) { k = num_data; }
116
  double dcg = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
117
118
119
120
121
122
123
124
  // calculate dcg
  for (data_size_t i = 0; i < k; ++i) {
    data_size_t idx = sorted_idx[i];
    dcg += label_gain_[static_cast<int>(label[idx])] * discount_[i];
  }
  return dcg;
}

125
void DCGCalculator::CalDCG(const std::vector<data_size_t>& ks, const label_t* label,
126
                           const double * score, data_size_t num_data, std::vector<double>* out) {
Guolin Ke's avatar
Guolin Ke committed
127
  // get sorted indices by score
128
  std::vector<data_size_t> sorted_idx(num_data);
Guolin Ke's avatar
Guolin Ke committed
129
  for (data_size_t i = 0; i < num_data; ++i) {
130
    sorted_idx[i] = i;
Guolin Ke's avatar
Guolin Ke committed
131
  }
132
133
  std::stable_sort(sorted_idx.begin(), sorted_idx.end(),
                   [score](data_size_t a, data_size_t b) {return score[a] > score[b]; });
Guolin Ke's avatar
Guolin Ke committed
134

135
  double cur_result = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
136
137
138
139
140
141
142
143
144
145
146
147
148
149
  data_size_t cur_left = 0;
  // calculate multi dcg by one pass
  for (size_t i = 0; i < ks.size(); ++i) {
    data_size_t cur_k = ks[i];
    if (cur_k > num_data) { cur_k = num_data; }
    for (data_size_t j = cur_left; j < cur_k; ++j) {
      data_size_t idx = sorted_idx[j];
      cur_result += label_gain_[static_cast<int>(label[idx])] * discount_[j];
    }
    (*out)[i] = cur_result;
    cur_left = cur_k;
  }
}

150
void DCGCalculator::CheckLabel(const label_t* label, data_size_t num_data) {
151
  for (data_size_t i = 0; i < num_data; ++i) {
152
    label_t delta = std::fabs(label[i] - static_cast<int>(label[i]));
153
    if (delta > kEpsilon) {
154
155
      Log::Fatal("label should be int type (met %f) for ranking task,\n"
                 "for the gain of label, please set the label_gain parameter", label[i]);
156
157
158
159
160
161
162
    }
    if (static_cast<size_t>(label[i]) >= label_gain_.size() || label[i] < 0) {
      Log::Fatal("label (%d) excel the max range %d", label[i], label_gain_.size());
    }
  }
}

Guolin Ke's avatar
Guolin Ke committed
163
}  // namespace LightGBM