binary_metric.hpp 7.26 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
#ifndef LIGHTGBM_METRIC_BINARY_METRIC_HPP_
#define LIGHTGBM_METRIC_BINARY_METRIC_HPP_

#include <LightGBM/utils/log.h>

#include <LightGBM/metric.h>

#include <algorithm>
#include <vector>
10
#include <sstream>
Guolin Ke's avatar
Guolin Ke committed
11
12
13
14
15
16
17
18
19
20

namespace LightGBM {

/*!
* \brief Metric for binary classification task.
* Use static class "PointWiseLossCalculator" to calculate loss point-wise
*/
template<typename PointWiseLossCalculator>
class BinaryMetric: public Metric {
public:
21
22
  explicit BinaryMetric(const MetricConfig&) {

Guolin Ke's avatar
Guolin Ke committed
23
24
25
26
27
28
  }

  virtual ~BinaryMetric() {

  }

Guolin Ke's avatar
Guolin Ke committed
29
30
  void Init(const Metadata& metadata, data_size_t num_data) override {
    name_.emplace_back(PointWiseLossCalculator::Name());
31

Guolin Ke's avatar
Guolin Ke committed
32
33
34
35
36
37
38
39
    num_data_ = num_data;
    // get label
    label_ = metadata.label();

    // get weights
    weights_ = metadata.weights();

    if (weights_ == nullptr) {
40
      sum_weights_ = static_cast<double>(num_data_);
Guolin Ke's avatar
Guolin Ke committed
41
42
43
44
45
46
47
48
    } else {
      sum_weights_ = 0.0f;
      for (data_size_t i = 0; i < num_data; ++i) {
        sum_weights_ += weights_[i];
      }
    }
  }

Guolin Ke's avatar
Guolin Ke committed
49
  const std::vector<std::string>& GetName() const override {
50
    return name_;
51
52
  }

53
  double factor_to_bigger_better() const override {
54
    return -1.0f;
55
56
  }

57
58
  std::vector<double> Eval(const double* score, const ObjectiveFunction* objective,
                           int) const override {
59
    double sum_loss = 0.0f;
60
61
62
63
64
65
66
67
68
69
70
71
72
    if (objective == nullptr) {
      if (weights_ == nullptr) {
        #pragma omp parallel for schedule(static) reduction(+:sum_loss)
        for (data_size_t i = 0; i < num_data_; ++i) {
          // add loss
          sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i]);
        }
      } else {
        #pragma omp parallel for schedule(static) reduction(+:sum_loss)
        for (data_size_t i = 0; i < num_data_; ++i) {
          // add loss
          sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i]) * weights_[i];
        }
Guolin Ke's avatar
Guolin Ke committed
73
      }
74
    } else {
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
      if (weights_ == nullptr) {
        #pragma omp parallel for schedule(static) reduction(+:sum_loss)
        for (data_size_t i = 0; i < num_data_; ++i) {
          double prob = objective->ConvertOutput(score[i]);
          // add loss
          sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], prob);
        }
      } else {
        #pragma omp parallel for schedule(static) reduction(+:sum_loss)
        for (data_size_t i = 0; i < num_data_; ++i) {
          // sigmoid transform
          double prob = objective->ConvertOutput(score[i]);
          // add loss
          sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], prob) * weights_[i];
        }
wxchan's avatar
wxchan committed
90
      }
Guolin Ke's avatar
Guolin Ke committed
91
    }
92
93
    double loss = sum_loss / sum_weights_;
    return std::vector<double>(1, loss);
Guolin Ke's avatar
Guolin Ke committed
94
95
96
97
98
99
100
101
102
103
  }

private:
  /*! \brief Number of data */
  data_size_t num_data_;
  /*! \brief Pointer of label */
  const float* label_;
  /*! \brief Pointer of weighs */
  const float* weights_;
  /*! \brief Sum weights */
104
  double sum_weights_;
Guolin Ke's avatar
Guolin Ke committed
105
  /*! \brief Name of test set */
106
  std::vector<std::string> name_;
Guolin Ke's avatar
Guolin Ke committed
107
108
109
110
111
112
113
114
115
};

/*!
* \brief Log loss metric for binary classification task.
*/
class BinaryLoglossMetric: public BinaryMetric<BinaryLoglossMetric> {
public:
  explicit BinaryLoglossMetric(const MetricConfig& config) :BinaryMetric<BinaryLoglossMetric>(config) {}

116
  inline static double LossOnPoint(float label, double prob) {
Guolin Ke's avatar
Guolin Ke committed
117
    if (label <= 0) {
Guolin Ke's avatar
Guolin Ke committed
118
119
120
121
122
123
124
125
126
127
128
129
      if (1.0f - prob > kEpsilon) {
        return -std::log(1.0f - prob);
      }
    } else {
      if (prob > kEpsilon) {
        return -std::log(prob);
      }
    }
    return -std::log(kEpsilon);
  }

  inline static const char* Name() {
Guolin Ke's avatar
Guolin Ke committed
130
    return "binary_logloss";
Guolin Ke's avatar
Guolin Ke committed
131
132
133
134
135
136
137
138
139
  }
};
/*!
* \brief Error rate metric for binary classification task.
*/
class BinaryErrorMetric: public BinaryMetric<BinaryErrorMetric> {
public:
  explicit BinaryErrorMetric(const MetricConfig& config) :BinaryMetric<BinaryErrorMetric>(config) {}

140
  inline static double LossOnPoint(float label, double prob) {
141
    if (prob <= 0.5f) {
Guolin Ke's avatar
Guolin Ke committed
142
      return label > 0;
Guolin Ke's avatar
Guolin Ke committed
143
    } else {
Guolin Ke's avatar
Guolin Ke committed
144
      return label <= 0;
Guolin Ke's avatar
Guolin Ke committed
145
146
147
148
    }
  }

  inline static const char* Name() {
Guolin Ke's avatar
Guolin Ke committed
149
    return "binary_error";
Guolin Ke's avatar
Guolin Ke committed
150
151
152
153
154
155
156
157
  }
};

/*!
* \brief Auc Metric for binary classification task.
*/
class AUCMetric: public Metric {
public:
158
159
  explicit AUCMetric(const MetricConfig&) {

Guolin Ke's avatar
Guolin Ke committed
160
161
162
163
164
  }

  virtual ~AUCMetric() {
  }

Guolin Ke's avatar
Guolin Ke committed
165
  const std::vector<std::string>& GetName() const override {
166
    return name_;
167
168
  }

169
  double factor_to_bigger_better() const override {
170
    return 1.0f;
171
172
  }

Guolin Ke's avatar
Guolin Ke committed
173
  void Init(const Metadata& metadata, data_size_t num_data) override {
174
    name_.emplace_back("auc");
175

Guolin Ke's avatar
Guolin Ke committed
176
177
178
179
180
181
182
    num_data_ = num_data;
    // get label
    label_ = metadata.label();
    // get weights
    weights_ = metadata.weights();

    if (weights_ == nullptr) {
183
      sum_weights_ = static_cast<double>(num_data_);
Guolin Ke's avatar
Guolin Ke committed
184
185
186
187
188
189
190
191
    } else {
      sum_weights_ = 0.0f;
      for (data_size_t i = 0; i < num_data; ++i) {
        sum_weights_ += weights_[i];
      }
    }
  }

192
193
  std::vector<double> Eval(const double* score, const ObjectiveFunction*,
                           int) const override {
194
195
196
197
198
199
200
    // get indices sorted by score, descent order
    std::vector<data_size_t> sorted_idx;
    for (data_size_t i = 0; i < num_data_; ++i) {
      sorted_idx.emplace_back(i);
    }
    std::sort(sorted_idx.begin(), sorted_idx.end(), [score](data_size_t a, data_size_t b) {return score[a] > score[b]; });
    // temp sum of postive label
201
    double cur_pos = 0.0f;
202
    // total sum of postive label
203
    double sum_pos = 0.0f;
204
    // accumlate of auc
205
    double accum = 0.0f;
206
    // temp sum of negative label
207
    double cur_neg = 0.0f;
208
    double threshold = score[sorted_idx[0]];
209
    if (weights_ == nullptr) {  // no weights
Guolin Ke's avatar
Guolin Ke committed
210
      for (data_size_t i = 0; i < num_data_; ++i) {
211
        const float cur_label = label_[sorted_idx[i]];
212
        const double cur_score = score[sorted_idx[i]];
213
214
215
216
217
218
219
220
        // new threshold
        if (cur_score != threshold) {
          threshold = cur_score;
          // accmulate
          accum += cur_neg*(cur_pos * 0.5f + sum_pos);
          sum_pos += cur_pos;
          // reset
          cur_neg = cur_pos = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
221
        }
Guolin Ke's avatar
Guolin Ke committed
222
223
        cur_neg += (cur_label <= 0);
        cur_pos += (cur_label > 0);
Guolin Ke's avatar
Guolin Ke committed
224
      }
225
226
227
    } else {  // has weights
      for (data_size_t i = 0; i < num_data_; ++i) {
        const float cur_label = label_[sorted_idx[i]];
228
        const double cur_score = score[sorted_idx[i]];
229
230
231
232
233
234
235
236
237
238
        const float cur_weight = weights_[sorted_idx[i]];
        // new threshold
        if (cur_score != threshold) {
          threshold = cur_score;
          // accmulate
          accum += cur_neg*(cur_pos * 0.5f + sum_pos);
          sum_pos += cur_pos;
          // reset
          cur_neg = cur_pos = 0.0f;
        }
Guolin Ke's avatar
Guolin Ke committed
239
240
        cur_neg += (cur_label <= 0)*cur_weight;
        cur_pos += (cur_label > 0)*cur_weight;
wxchan's avatar
wxchan committed
241
      }
Guolin Ke's avatar
Guolin Ke committed
242
    }
243
244
    accum += cur_neg*(cur_pos * 0.5f + sum_pos);
    sum_pos += cur_pos;
245
    double auc = 1.0f;
246
247
248
    if (sum_pos > 0.0f && sum_pos != sum_weights_) {
      auc = accum / (sum_pos *(sum_weights_ - sum_pos));
    }
249
    return std::vector<double>(1, auc);
Guolin Ke's avatar
Guolin Ke committed
250
251
252
253
254
255
256
257
258
259
  }

private:
  /*! \brief Number of data */
  data_size_t num_data_;
  /*! \brief Pointer of label */
  const float* label_;
  /*! \brief Pointer of weighs */
  const float* weights_;
  /*! \brief Sum weights */
260
  double sum_weights_;
Guolin Ke's avatar
Guolin Ke committed
261
  /*! \brief Name of test set */
262
  std::vector<std::string> name_;
Guolin Ke's avatar
Guolin Ke committed
263
264
265
};

}  // namespace LightGBM
Guolin Ke's avatar
Guolin Ke committed
266
#endif   // LightGBM_METRIC_BINARY_METRIC_HPP_