"src/vscode:/vscode.git/clone" did not exist on "f7e64a8f5df7769de9a266b2257cea688e3c4925"
dcg_calculator.cpp 4.26 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
10
11
12
13
#include <LightGBM/metric.h>

#include <LightGBM/utils/log.h>

#include <cmath>

#include <vector>
#include <algorithm>

namespace LightGBM {

/*! \brief Declaration for some static members */
bool DCGCalculator::is_inited_ = false;
14
15
std::vector<score_t> DCGCalculator::label_gain_;
std::vector<score_t> DCGCalculator::discount_;
Guolin Ke's avatar
Guolin Ke committed
16
17
const data_size_t DCGCalculator::kMaxPosition = 10000;

18
void DCGCalculator::Init(std::vector<double> input_label_gain) {
Guolin Ke's avatar
Guolin Ke committed
19
20
  //  only inited one time
  if (is_inited_) { return; }
21
22
23
24
  label_gain_.clear();
  for(size_t i = 0;i < input_label_gain.size();++i){
    label_gain_.push_back(static_cast<score_t>(input_label_gain[i]));
  }
Guolin Ke's avatar
Guolin Ke committed
25
  label_gain_.shrink_to_fit();
Guolin Ke's avatar
Guolin Ke committed
26
27
  discount_.clear();
  for (data_size_t i = 0; i < kMaxPosition; ++i) {
28
    discount_.emplace_back(1.0f / std::log2(2.0f + i));
Guolin Ke's avatar
Guolin Ke committed
29
  }
Guolin Ke's avatar
Guolin Ke committed
30
  discount_.shrink_to_fit();
Guolin Ke's avatar
Guolin Ke committed
31
32
33
  is_inited_ = true;
}

34
35
score_t DCGCalculator::CalMaxDCGAtK(data_size_t k, const float* label, data_size_t num_data) {
  score_t ret = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
36
37
38
39
40
  // counts for all labels
  std::vector<data_size_t> label_cnt(label_gain_.size(), 0);
  for (data_size_t i = 0; i < num_data; ++i) {
    ++label_cnt[static_cast<int>(label[i])];
  }
41
  int top_label = static_cast<int>(label_gain_.size()) - 1;
Guolin Ke's avatar
Guolin Ke committed
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60

  if (k > num_data) { k = num_data; }
  //  start from top label, and accumulate DCG
  for (data_size_t j = 0; j < k; ++j) {
    while (top_label > 0 && label_cnt[top_label] <= 0) {
      top_label -= 1;
    }
    if (top_label < 0) {
      break;
    }
    ret += discount_[j] * label_gain_[top_label];
    label_cnt[top_label] -= 1;
  }
  return ret;
}

void DCGCalculator::CalMaxDCG(const std::vector<data_size_t>& ks,
                              const float* label,
                              data_size_t num_data,
61
                              std::vector<score_t>* out) {
Guolin Ke's avatar
Guolin Ke committed
62
63
64
  std::vector<data_size_t> label_cnt(label_gain_.size(), 0);
  // counts for all labels
  for (data_size_t i = 0; i < num_data; ++i) {
65
    if (static_cast<size_t>(label[i]) >= label_cnt.size()) { Log::Fatal("Label excel %d", label[i]); }
Guolin Ke's avatar
Guolin Ke committed
66
67
    ++label_cnt[static_cast<int>(label[i])];
  }
68
  score_t cur_result = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
69
  data_size_t cur_left = 0;
70
  int top_label = static_cast<int>(label_gain_.size()) - 1;
Guolin Ke's avatar
Guolin Ke committed
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
  // calculate k Max DCG by one pass
  for (size_t i = 0; i < ks.size(); ++i) {
    data_size_t cur_k = ks[i];
    if (cur_k > num_data) { cur_k = num_data; }
    for (data_size_t j = cur_left; j < cur_k; ++j) {
      while (top_label > 0 && label_cnt[top_label] <= 0) {
        top_label -= 1;
      }
      if (top_label < 0) {
        break;
      }
      cur_result += discount_[j] * label_gain_[top_label];
      label_cnt[top_label] -= 1;
    }
    (*out)[i] = cur_result;
    cur_left = cur_k;
  }
}


91
score_t DCGCalculator::CalDCGAtK(data_size_t k, const float* label,
Guolin Ke's avatar
Guolin Ke committed
92
93
94
95
96
97
98
99
100
101
                                const score_t* score, data_size_t num_data) {
  // get sorted indices by score
  std::vector<data_size_t> sorted_idx;
  for (data_size_t i = 0; i < num_data; ++i) {
    sorted_idx.emplace_back(i);
  }
  std::sort(sorted_idx.begin(), sorted_idx.end(),
           [score](data_size_t a, data_size_t b) {return score[a] > score[b]; });

  if (k > num_data) { k = num_data; }
102
  score_t dcg = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
103
104
105
106
107
108
109
110
111
  // calculate dcg
  for (data_size_t i = 0; i < k; ++i) {
    data_size_t idx = sorted_idx[i];
    dcg += label_gain_[static_cast<int>(label[idx])] * discount_[i];
  }
  return dcg;
}

void DCGCalculator::CalDCG(const std::vector<data_size_t>& ks, const float* label,
112
                           const score_t * score, data_size_t num_data, std::vector<score_t>* out) {
Guolin Ke's avatar
Guolin Ke committed
113
114
115
116
117
118
119
120
  // get sorted indices by score
  std::vector<data_size_t> sorted_idx;
  for (data_size_t i = 0; i < num_data; ++i) {
    sorted_idx.emplace_back(i);
  }
  std::sort(sorted_idx.begin(), sorted_idx.end(),
            [score](data_size_t a, data_size_t b) {return score[a] > score[b]; });

121
  score_t cur_result = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
  data_size_t cur_left = 0;
  // calculate multi dcg by one pass
  for (size_t i = 0; i < ks.size(); ++i) {
    data_size_t cur_k = ks[i];
    if (cur_k > num_data) { cur_k = num_data; }
    for (data_size_t j = cur_left; j < cur_k; ++j) {
      data_size_t idx = sorted_idx[j];
      cur_result += label_gain_[static_cast<int>(label[idx])] * discount_[j];
    }
    (*out)[i] = cur_result;
    cur_left = cur_k;
  }
}

}  // namespace LightGBM