binary_metric.hpp 7.24 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
#ifndef LIGHTGBM_METRIC_BINARY_METRIC_HPP_
#define LIGHTGBM_METRIC_BINARY_METRIC_HPP_

Guolin Ke's avatar
Guolin Ke committed
4
5
#include <LightGBM/metric.h>

Guolin Ke's avatar
Guolin Ke committed
6
#include <LightGBM/utils/log.h>
7
#include <LightGBM/utils/common.h>
Guolin Ke's avatar
Guolin Ke committed
8
9
10

#include <algorithm>
#include <vector>
11
#include <sstream>
Guolin Ke's avatar
Guolin Ke committed
12
13
14
15
16
17
18
19
20
21

namespace LightGBM {

/*!
* \brief Metric for binary classification task.
* Use static class "PointWiseLossCalculator" to calculate loss point-wise
*/
template<typename PointWiseLossCalculator>
class BinaryMetric: public Metric {
public:
Guolin Ke's avatar
Guolin Ke committed
22
  explicit BinaryMetric(const Config&) {
Guolin Ke's avatar
Guolin Ke committed
23
24
25
26
27
  }

  virtual ~BinaryMetric() {
  }

Guolin Ke's avatar
Guolin Ke committed
28
29
  void Init(const Metadata& metadata, data_size_t num_data) override {
    name_.emplace_back(PointWiseLossCalculator::Name());
30

Guolin Ke's avatar
Guolin Ke committed
31
32
33
34
35
36
37
38
    num_data_ = num_data;
    // get label
    label_ = metadata.label();

    // get weights
    weights_ = metadata.weights();

    if (weights_ == nullptr) {
39
      sum_weights_ = static_cast<double>(num_data_);
Guolin Ke's avatar
Guolin Ke committed
40
41
42
43
44
45
46
47
    } else {
      sum_weights_ = 0.0f;
      for (data_size_t i = 0; i < num_data; ++i) {
        sum_weights_ += weights_[i];
      }
    }
  }

Guolin Ke's avatar
Guolin Ke committed
48
  const std::vector<std::string>& GetName() const override {
49
    return name_;
50
51
  }

52
  double factor_to_bigger_better() const override {
53
    return -1.0f;
54
55
  }

Guolin Ke's avatar
Guolin Ke committed
56
  std::vector<double> Eval(const double* score, const ObjectiveFunction* objective) const override {
57
    double sum_loss = 0.0f;
58
59
60
61
62
63
64
65
66
67
68
69
70
    if (objective == nullptr) {
      if (weights_ == nullptr) {
        #pragma omp parallel for schedule(static) reduction(+:sum_loss)
        for (data_size_t i = 0; i < num_data_; ++i) {
          // add loss
          sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i]);
        }
      } else {
        #pragma omp parallel for schedule(static) reduction(+:sum_loss)
        for (data_size_t i = 0; i < num_data_; ++i) {
          // add loss
          sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i]) * weights_[i];
        }
Guolin Ke's avatar
Guolin Ke committed
71
      }
72
    } else {
73
74
75
      if (weights_ == nullptr) {
        #pragma omp parallel for schedule(static) reduction(+:sum_loss)
        for (data_size_t i = 0; i < num_data_; ++i) {
Guolin Ke's avatar
Guolin Ke committed
76
77
          double prob = 0;
          objective->ConvertOutput(&score[i], &prob);
78
79
80
81
82
83
          // add loss
          sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], prob);
        }
      } else {
        #pragma omp parallel for schedule(static) reduction(+:sum_loss)
        for (data_size_t i = 0; i < num_data_; ++i) {
Guolin Ke's avatar
Guolin Ke committed
84
85
          double prob = 0;
          objective->ConvertOutput(&score[i], &prob);
86
87
88
          // add loss
          sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], prob) * weights_[i];
        }
wxchan's avatar
wxchan committed
89
      }
Guolin Ke's avatar
Guolin Ke committed
90
    }
91
92
    double loss = sum_loss / sum_weights_;
    return std::vector<double>(1, loss);
Guolin Ke's avatar
Guolin Ke committed
93
94
95
96
97
98
  }

private:
  /*! \brief Number of data */
  data_size_t num_data_;
  /*! \brief Pointer of label */
99
  const label_t* label_;
Guolin Ke's avatar
Guolin Ke committed
100
  /*! \brief Pointer of weighs */
101
  const label_t* weights_;
Guolin Ke's avatar
Guolin Ke committed
102
  /*! \brief Sum weights */
103
  double sum_weights_;
Guolin Ke's avatar
Guolin Ke committed
104
  /*! \brief Name of test set */
105
  std::vector<std::string> name_;
Guolin Ke's avatar
Guolin Ke committed
106
107
108
109
110
111
112
};

/*!
* \brief Log loss metric for binary classification task.
*/
class BinaryLoglossMetric: public BinaryMetric<BinaryLoglossMetric> {
public:
Guolin Ke's avatar
Guolin Ke committed
113
  explicit BinaryLoglossMetric(const Config& config) :BinaryMetric<BinaryLoglossMetric>(config) {}
Guolin Ke's avatar
Guolin Ke committed
114

115
  inline static double LossOnPoint(label_t label, double prob) {
Guolin Ke's avatar
Guolin Ke committed
116
    if (label <= 0) {
Guolin Ke's avatar
Guolin Ke committed
117
118
119
120
121
122
123
124
125
126
127
128
      if (1.0f - prob > kEpsilon) {
        return -std::log(1.0f - prob);
      }
    } else {
      if (prob > kEpsilon) {
        return -std::log(prob);
      }
    }
    return -std::log(kEpsilon);
  }

  inline static const char* Name() {
Guolin Ke's avatar
Guolin Ke committed
129
    return "binary_logloss";
Guolin Ke's avatar
Guolin Ke committed
130
131
132
133
134
135
136
  }
};
/*!
* \brief Error rate metric for binary classification task.
*/
class BinaryErrorMetric: public BinaryMetric<BinaryErrorMetric> {
public:
Guolin Ke's avatar
Guolin Ke committed
137
  explicit BinaryErrorMetric(const Config& config) :BinaryMetric<BinaryErrorMetric>(config) {}
Guolin Ke's avatar
Guolin Ke committed
138

139
  inline static double LossOnPoint(label_t label, double prob) {
140
    if (prob <= 0.5f) {
Guolin Ke's avatar
Guolin Ke committed
141
      return label > 0;
Guolin Ke's avatar
Guolin Ke committed
142
    } else {
Guolin Ke's avatar
Guolin Ke committed
143
      return label <= 0;
Guolin Ke's avatar
Guolin Ke committed
144
145
146
147
    }
  }

  inline static const char* Name() {
Guolin Ke's avatar
Guolin Ke committed
148
    return "binary_error";
Guolin Ke's avatar
Guolin Ke committed
149
150
151
152
153
154
155
156
  }
};

/*!
* \brief Auc Metric for binary classification task.
*/
class AUCMetric: public Metric {
public:
Guolin Ke's avatar
Guolin Ke committed
157
  explicit AUCMetric(const Config&) {
Guolin Ke's avatar
Guolin Ke committed
158
159
160
161
162
  }

  virtual ~AUCMetric() {
  }

Guolin Ke's avatar
Guolin Ke committed
163
  const std::vector<std::string>& GetName() const override {
164
    return name_;
165
166
  }

167
  double factor_to_bigger_better() const override {
168
    return 1.0f;
169
170
  }

Guolin Ke's avatar
Guolin Ke committed
171
  void Init(const Metadata& metadata, data_size_t num_data) override {
172
    name_.emplace_back("auc");
173

Guolin Ke's avatar
Guolin Ke committed
174
175
176
177
178
179
180
    num_data_ = num_data;
    // get label
    label_ = metadata.label();
    // get weights
    weights_ = metadata.weights();

    if (weights_ == nullptr) {
181
      sum_weights_ = static_cast<double>(num_data_);
Guolin Ke's avatar
Guolin Ke committed
182
183
184
185
186
187
188
189
    } else {
      sum_weights_ = 0.0f;
      for (data_size_t i = 0; i < num_data; ++i) {
        sum_weights_ += weights_[i];
      }
    }
  }

Guolin Ke's avatar
Guolin Ke committed
190
  std::vector<double> Eval(const double* score, const ObjectiveFunction*) const override {
191
192
193
194
195
    // get indices sorted by score, descent order
    std::vector<data_size_t> sorted_idx;
    for (data_size_t i = 0; i < num_data_; ++i) {
      sorted_idx.emplace_back(i);
    }
196
    Common::ParallelSort(sorted_idx.begin(), sorted_idx.end(), [score](data_size_t a, data_size_t b) {return score[a] > score[b]; });
197
    // temp sum of postive label
198
    double cur_pos = 0.0f;
199
    // total sum of postive label
200
    double sum_pos = 0.0f;
201
    // accumlate of auc
202
    double accum = 0.0f;
203
    // temp sum of negative label
204
    double cur_neg = 0.0f;
205
    double threshold = score[sorted_idx[0]];
206
    if (weights_ == nullptr) {  // no weights
Guolin Ke's avatar
Guolin Ke committed
207
      for (data_size_t i = 0; i < num_data_; ++i) {
208
        const label_t cur_label = label_[sorted_idx[i]];
209
        const double cur_score = score[sorted_idx[i]];
210
211
212
213
214
215
216
217
        // new threshold
        if (cur_score != threshold) {
          threshold = cur_score;
          // accmulate
          accum += cur_neg*(cur_pos * 0.5f + sum_pos);
          sum_pos += cur_pos;
          // reset
          cur_neg = cur_pos = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
218
        }
Guolin Ke's avatar
Guolin Ke committed
219
220
        cur_neg += (cur_label <= 0);
        cur_pos += (cur_label > 0);
Guolin Ke's avatar
Guolin Ke committed
221
      }
222
223
    } else {  // has weights
      for (data_size_t i = 0; i < num_data_; ++i) {
224
        const label_t cur_label = label_[sorted_idx[i]];
225
        const double cur_score = score[sorted_idx[i]];
226
        const label_t cur_weight = weights_[sorted_idx[i]];
227
228
229
230
231
232
233
234
235
        // new threshold
        if (cur_score != threshold) {
          threshold = cur_score;
          // accmulate
          accum += cur_neg*(cur_pos * 0.5f + sum_pos);
          sum_pos += cur_pos;
          // reset
          cur_neg = cur_pos = 0.0f;
        }
Guolin Ke's avatar
Guolin Ke committed
236
237
        cur_neg += (cur_label <= 0)*cur_weight;
        cur_pos += (cur_label > 0)*cur_weight;
wxchan's avatar
wxchan committed
238
      }
Guolin Ke's avatar
Guolin Ke committed
239
    }
240
241
    accum += cur_neg*(cur_pos * 0.5f + sum_pos);
    sum_pos += cur_pos;
242
    double auc = 1.0f;
243
244
245
    if (sum_pos > 0.0f && sum_pos != sum_weights_) {
      auc = accum / (sum_pos *(sum_weights_ - sum_pos));
    }
246
    return std::vector<double>(1, auc);
Guolin Ke's avatar
Guolin Ke committed
247
248
249
250
251
252
  }

private:
  /*! \brief Number of data */
  data_size_t num_data_;
  /*! \brief Pointer of label */
253
  const label_t* label_;
Guolin Ke's avatar
Guolin Ke committed
254
  /*! \brief Pointer of weighs */
255
  const label_t* weights_;
Guolin Ke's avatar
Guolin Ke committed
256
  /*! \brief Sum weights */
257
  double sum_weights_;
Guolin Ke's avatar
Guolin Ke committed
258
  /*! \brief Name of test set */
259
  std::vector<std::string> name_;
Guolin Ke's avatar
Guolin Ke committed
260
261
262
};

}  // namespace LightGBM
Guolin Ke's avatar
Guolin Ke committed
263
#endif   // LightGBM_METRIC_BINARY_METRIC_HPP_