dcg_calculator.cpp 5.5 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
Guolin Ke's avatar
Guolin Ke committed
5
6
7
#include <LightGBM/metric.h>
#include <LightGBM/utils/log.h>

8
#include <algorithm>
Guolin Ke's avatar
Guolin Ke committed
9
10
11
12
13
14
#include <cmath>
#include <vector>

namespace LightGBM {

/*! \brief Declaration for some static members */
15
16
std::vector<double> DCGCalculator::label_gain_;
std::vector<double> DCGCalculator::discount_;
Guolin Ke's avatar
Guolin Ke committed
17
18
const data_size_t DCGCalculator::kMaxPosition = 10000;

Guolin Ke's avatar
Guolin Ke committed
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42

void DCGCalculator::DefaultEvalAt(std::vector<int>* eval_at) {
  if (eval_at->empty()) {
    for (int i = 1; i <= 5; ++i) {
      eval_at->push_back(i);
    }
  } else {
    for (size_t i = 0; i < eval_at->size(); ++i) {
      CHECK(eval_at->at(i) > 0);
    }
  }
}

void DCGCalculator::DefaultLabelGain(std::vector<double>* label_gain) {
  if (!label_gain->empty()) { return; }
  // label_gain = 2^i - 1, may overflow, so we use 31 here
  const int max_label = 31;
  label_gain->push_back(0.0f);
  for (int i = 1; i < max_label; ++i) {
    label_gain->push_back(static_cast<double>((1 << i) - 1));
  }
}

void DCGCalculator::Init(const std::vector<double>& input_label_gain) {
Guolin Ke's avatar
Guolin Ke committed
43
  label_gain_.resize(input_label_gain.size());
44
  for (size_t i = 0; i < input_label_gain.size(); ++i) {
Guolin Ke's avatar
Guolin Ke committed
45
    label_gain_[i] = static_cast<double>(input_label_gain[i]);
46
  }
Guolin Ke's avatar
Guolin Ke committed
47
  discount_.resize(kMaxPosition);
Guolin Ke's avatar
Guolin Ke committed
48
  for (data_size_t i = 0; i < kMaxPosition; ++i) {
49
    discount_[i] = 1.0 / std::log2(2.0 + i);
Guolin Ke's avatar
Guolin Ke committed
50
51
52
  }
}

53
double DCGCalculator::CalMaxDCGAtK(data_size_t k, const label_t* label, data_size_t num_data) {
54
  double ret = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
55
56
57
58
59
  // counts for all labels
  std::vector<data_size_t> label_cnt(label_gain_.size(), 0);
  for (data_size_t i = 0; i < num_data; ++i) {
    ++label_cnt[static_cast<int>(label[i])];
  }
60
  int top_label = static_cast<int>(label_gain_.size()) - 1;
Guolin Ke's avatar
Guolin Ke committed
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77

  if (k > num_data) { k = num_data; }
  //  start from top label, and accumulate DCG
  for (data_size_t j = 0; j < k; ++j) {
    while (top_label > 0 && label_cnt[top_label] <= 0) {
      top_label -= 1;
    }
    if (top_label < 0) {
      break;
    }
    ret += discount_[j] * label_gain_[top_label];
    label_cnt[top_label] -= 1;
  }
  return ret;
}

void DCGCalculator::CalMaxDCG(const std::vector<data_size_t>& ks,
78
                              const label_t* label,
Guolin Ke's avatar
Guolin Ke committed
79
                              data_size_t num_data,
80
                              std::vector<double>* out) {
Guolin Ke's avatar
Guolin Ke committed
81
82
83
84
85
  std::vector<data_size_t> label_cnt(label_gain_.size(), 0);
  // counts for all labels
  for (data_size_t i = 0; i < num_data; ++i) {
    ++label_cnt[static_cast<int>(label[i])];
  }
86
  double cur_result = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
87
  data_size_t cur_left = 0;
88
  int top_label = static_cast<int>(label_gain_.size()) - 1;
Guolin Ke's avatar
Guolin Ke committed
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
  // calculate k Max DCG by one pass
  for (size_t i = 0; i < ks.size(); ++i) {
    data_size_t cur_k = ks[i];
    if (cur_k > num_data) { cur_k = num_data; }
    for (data_size_t j = cur_left; j < cur_k; ++j) {
      while (top_label > 0 && label_cnt[top_label] <= 0) {
        top_label -= 1;
      }
      if (top_label < 0) {
        break;
      }
      cur_result += discount_[j] * label_gain_[top_label];
      label_cnt[top_label] -= 1;
    }
    (*out)[i] = cur_result;
    cur_left = cur_k;
  }
}


109
double DCGCalculator::CalDCGAtK(data_size_t k, const label_t* label,
110
                                const double* score, data_size_t num_data) {
Guolin Ke's avatar
Guolin Ke committed
111
  // get sorted indices by score
112
  std::vector<data_size_t> sorted_idx(num_data);
Guolin Ke's avatar
Guolin Ke committed
113
  for (data_size_t i = 0; i < num_data; ++i) {
114
    sorted_idx[i] = i;
Guolin Ke's avatar
Guolin Ke committed
115
  }
116
117
  std::stable_sort(sorted_idx.begin(), sorted_idx.end(),
                   [score](data_size_t a, data_size_t b) {return score[a] > score[b]; });
Guolin Ke's avatar
Guolin Ke committed
118
119

  if (k > num_data) { k = num_data; }
120
  double dcg = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
121
122
123
124
125
126
127
128
  // calculate dcg
  for (data_size_t i = 0; i < k; ++i) {
    data_size_t idx = sorted_idx[i];
    dcg += label_gain_[static_cast<int>(label[idx])] * discount_[i];
  }
  return dcg;
}

129
void DCGCalculator::CalDCG(const std::vector<data_size_t>& ks, const label_t* label,
130
                           const double * score, data_size_t num_data, std::vector<double>* out) {
Guolin Ke's avatar
Guolin Ke committed
131
  // get sorted indices by score
132
  std::vector<data_size_t> sorted_idx(num_data);
Guolin Ke's avatar
Guolin Ke committed
133
  for (data_size_t i = 0; i < num_data; ++i) {
134
    sorted_idx[i] = i;
Guolin Ke's avatar
Guolin Ke committed
135
  }
136
137
  std::stable_sort(sorted_idx.begin(), sorted_idx.end(),
                   [score](data_size_t a, data_size_t b) {return score[a] > score[b]; });
Guolin Ke's avatar
Guolin Ke committed
138

139
  double cur_result = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
140
141
142
143
144
145
146
147
148
149
150
151
152
153
  data_size_t cur_left = 0;
  // calculate multi dcg by one pass
  for (size_t i = 0; i < ks.size(); ++i) {
    data_size_t cur_k = ks[i];
    if (cur_k > num_data) { cur_k = num_data; }
    for (data_size_t j = cur_left; j < cur_k; ++j) {
      data_size_t idx = sorted_idx[j];
      cur_result += label_gain_[static_cast<int>(label[idx])] * discount_[j];
    }
    (*out)[i] = cur_result;
    cur_left = cur_k;
  }
}

154
void DCGCalculator::CheckLabel(const label_t* label, data_size_t num_data) {
155
  for (data_size_t i = 0; i < num_data; ++i) {
156
    label_t delta = std::fabs(label[i] - static_cast<int>(label[i]));
157
    if (delta > kEpsilon) {
158
159
      Log::Fatal("label should be int type (met %f) for ranking task,\n"
                 "for the gain of label, please set the label_gain parameter", label[i]);
160
    }
161
162
163
164
165
166
167

    if (label[i] < 0) {
      Log::Fatal("Label should be non-negative (met %f) for ranking task", label[i]);
    }

    if (static_cast<size_t>(label[i]) >= label_gain_.size()) {
      Log::Fatal("Label %zu is not less than the number of label mappings (%zu)", static_cast<size_t>(label[i]), label_gain_.size());
168
169
170
171
    }
  }
}

Guolin Ke's avatar
Guolin Ke committed
172
}  // namespace LightGBM