binary_metric.hpp 6.79 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
#ifndef LIGHTGBM_METRIC_BINARY_METRIC_HPP_
#define LIGHTGBM_METRIC_BINARY_METRIC_HPP_

#include <LightGBM/utils/log.h>

#include <LightGBM/metric.h>

#include <algorithm>
#include <vector>
10
#include <sstream>
Guolin Ke's avatar
Guolin Ke committed
11
12
13
14
15
16
17
18
19
20
21

namespace LightGBM {

/*!
* \brief Metric for binary classification task.
* Use static class "PointWiseLossCalculator" to calculate loss point-wise
*/
template<typename PointWiseLossCalculator>
class BinaryMetric: public Metric {
public:
  explicit BinaryMetric(const MetricConfig& config) {
22
    sigmoid_ = static_cast<double>(config.sigmoid);
Guolin Ke's avatar
Guolin Ke committed
23
    if (sigmoid_ <= 0.0f) {
24
      Log::Fatal("Sigmoid parameter %f should greater than zero", sigmoid_);
Guolin Ke's avatar
Guolin Ke committed
25
26
27
28
29
30
31
    }
  }

  virtual ~BinaryMetric() {

  }

Guolin Ke's avatar
Guolin Ke committed
32
33
  void Init(const Metadata& metadata, data_size_t num_data) override {
    name_.emplace_back(PointWiseLossCalculator::Name());
34

Guolin Ke's avatar
Guolin Ke committed
35
36
37
38
39
40
41
42
    num_data_ = num_data;
    // get label
    label_ = metadata.label();

    // get weights
    weights_ = metadata.weights();

    if (weights_ == nullptr) {
43
      sum_weights_ = static_cast<double>(num_data_);
Guolin Ke's avatar
Guolin Ke committed
44
45
46
47
48
49
50
51
    } else {
      sum_weights_ = 0.0f;
      for (data_size_t i = 0; i < num_data; ++i) {
        sum_weights_ += weights_[i];
      }
    }
  }

Guolin Ke's avatar
Guolin Ke committed
52
  const std::vector<std::string>& GetName() const override {
53
    return name_;
54
55
  }

56
  double factor_to_bigger_better() const override {
57
    return -1.0f;
58
59
  }

60
  std::vector<double> Eval(const double* score) const override {
61
    double sum_loss = 0.0f;
62
    if (weights_ == nullptr) {
63
      #pragma omp parallel for schedule(static) reduction(+:sum_loss)
64
65
      for (data_size_t i = 0; i < num_data_; ++i) {
        // sigmoid transform
66
        double prob = 1.0f / (1.0f + std::exp(-sigmoid_ * score[i]));
67
68
        // add loss
        sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], prob);
Guolin Ke's avatar
Guolin Ke committed
69
      }
70
    } else {
71
      #pragma omp parallel for schedule(static) reduction(+:sum_loss)
72
73
      for (data_size_t i = 0; i < num_data_; ++i) {
        // sigmoid transform
74
        double prob = 1.0f / (1.0f + std::exp(-sigmoid_ * score[i]));
75
76
        // add loss
        sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], prob) * weights_[i];
wxchan's avatar
wxchan committed
77
      }
Guolin Ke's avatar
Guolin Ke committed
78
    }
79
80
    double loss = sum_loss / sum_weights_;
    return std::vector<double>(1, loss);
Guolin Ke's avatar
Guolin Ke committed
81
82
83
84
85
86
87
88
89
90
  }

private:
  /*! \brief Number of data */
  data_size_t num_data_;
  /*! \brief Pointer of label */
  const float* label_;
  /*! \brief Pointer of weighs */
  const float* weights_;
  /*! \brief Sum weights */
91
  double sum_weights_;
Guolin Ke's avatar
Guolin Ke committed
92
  /*! \brief Name of test set */
93
  std::vector<std::string> name_;
Guolin Ke's avatar
Guolin Ke committed
94
  /*! \brief Sigmoid parameter */
95
  double sigmoid_;
Guolin Ke's avatar
Guolin Ke committed
96
97
98
99
100
101
102
103
104
};

/*!
* \brief Log loss metric for binary classification task.
*/
class BinaryLoglossMetric: public BinaryMetric<BinaryLoglossMetric> {
public:
  explicit BinaryLoglossMetric(const MetricConfig& config) :BinaryMetric<BinaryLoglossMetric>(config) {}

105
  inline static double LossOnPoint(float label, double prob) {
Guolin Ke's avatar
Guolin Ke committed
106
    if (label <= 0) {
Guolin Ke's avatar
Guolin Ke committed
107
108
109
110
111
112
113
114
115
116
117
118
      if (1.0f - prob > kEpsilon) {
        return -std::log(1.0f - prob);
      }
    } else {
      if (prob > kEpsilon) {
        return -std::log(prob);
      }
    }
    return -std::log(kEpsilon);
  }

  inline static const char* Name() {
Guolin Ke's avatar
Guolin Ke committed
119
    return "binary_logloss";
Guolin Ke's avatar
Guolin Ke committed
120
121
122
123
124
125
126
127
128
  }
};
/*!
* \brief Error rate metric for binary classification task.
*/
class BinaryErrorMetric: public BinaryMetric<BinaryErrorMetric> {
public:
  explicit BinaryErrorMetric(const MetricConfig& config) :BinaryMetric<BinaryErrorMetric>(config) {}

129
  inline static double LossOnPoint(float label, double prob) {
130
    if (prob <= 0.5f) {
Guolin Ke's avatar
Guolin Ke committed
131
      return label > 0;
Guolin Ke's avatar
Guolin Ke committed
132
    } else {
Guolin Ke's avatar
Guolin Ke committed
133
      return label <= 0;
Guolin Ke's avatar
Guolin Ke committed
134
135
136
137
    }
  }

  inline static const char* Name() {
Guolin Ke's avatar
Guolin Ke committed
138
    return "binary_error";
Guolin Ke's avatar
Guolin Ke committed
139
140
141
142
143
144
145
146
  }
};

/*!
* \brief Auc Metric for binary classification task.
*/
class AUCMetric: public Metric {
public:
147
148
  explicit AUCMetric(const MetricConfig&) {

Guolin Ke's avatar
Guolin Ke committed
149
150
151
152
153
  }

  virtual ~AUCMetric() {
  }

Guolin Ke's avatar
Guolin Ke committed
154
  const std::vector<std::string>& GetName() const override {
155
    return name_;
156
157
  }

158
  double factor_to_bigger_better() const override {
159
    return 1.0f;
160
161
  }

Guolin Ke's avatar
Guolin Ke committed
162
  void Init(const Metadata& metadata, data_size_t num_data) override {
163
    name_.emplace_back("auc");
164

Guolin Ke's avatar
Guolin Ke committed
165
166
167
168
169
170
171
    num_data_ = num_data;
    // get label
    label_ = metadata.label();
    // get weights
    weights_ = metadata.weights();

    if (weights_ == nullptr) {
172
      sum_weights_ = static_cast<double>(num_data_);
Guolin Ke's avatar
Guolin Ke committed
173
174
175
176
177
178
179
180
    } else {
      sum_weights_ = 0.0f;
      for (data_size_t i = 0; i < num_data; ++i) {
        sum_weights_ += weights_[i];
      }
    }
  }

181
  std::vector<double> Eval(const double* score) const override {
182
183
184
185
186
187
188
    // get indices sorted by score, descent order
    std::vector<data_size_t> sorted_idx;
    for (data_size_t i = 0; i < num_data_; ++i) {
      sorted_idx.emplace_back(i);
    }
    std::sort(sorted_idx.begin(), sorted_idx.end(), [score](data_size_t a, data_size_t b) {return score[a] > score[b]; });
    // temp sum of postive label
189
    double cur_pos = 0.0f;
190
    // total sum of postive label
191
    double sum_pos = 0.0f;
192
    // accumlate of auc
193
    double accum = 0.0f;
194
    // temp sum of negative label
195
    double cur_neg = 0.0f;
196
    double threshold = score[sorted_idx[0]];
197
    if (weights_ == nullptr) {  // no weights
Guolin Ke's avatar
Guolin Ke committed
198
      for (data_size_t i = 0; i < num_data_; ++i) {
199
        const float cur_label = label_[sorted_idx[i]];
200
        const double cur_score = score[sorted_idx[i]];
201
202
203
204
205
206
207
208
        // new threshold
        if (cur_score != threshold) {
          threshold = cur_score;
          // accmulate
          accum += cur_neg*(cur_pos * 0.5f + sum_pos);
          sum_pos += cur_pos;
          // reset
          cur_neg = cur_pos = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
209
        }
Guolin Ke's avatar
Guolin Ke committed
210
211
        cur_neg += (cur_label <= 0);
        cur_pos += (cur_label > 0);
Guolin Ke's avatar
Guolin Ke committed
212
      }
213
214
215
    } else {  // has weights
      for (data_size_t i = 0; i < num_data_; ++i) {
        const float cur_label = label_[sorted_idx[i]];
216
        const double cur_score = score[sorted_idx[i]];
217
218
219
220
221
222
223
224
225
226
        const float cur_weight = weights_[sorted_idx[i]];
        // new threshold
        if (cur_score != threshold) {
          threshold = cur_score;
          // accmulate
          accum += cur_neg*(cur_pos * 0.5f + sum_pos);
          sum_pos += cur_pos;
          // reset
          cur_neg = cur_pos = 0.0f;
        }
Guolin Ke's avatar
Guolin Ke committed
227
228
        cur_neg += (cur_label <= 0)*cur_weight;
        cur_pos += (cur_label > 0)*cur_weight;
wxchan's avatar
wxchan committed
229
      }
Guolin Ke's avatar
Guolin Ke committed
230
    }
231
232
    accum += cur_neg*(cur_pos * 0.5f + sum_pos);
    sum_pos += cur_pos;
233
    double auc = 1.0f;
234
235
236
    if (sum_pos > 0.0f && sum_pos != sum_weights_) {
      auc = accum / (sum_pos *(sum_weights_ - sum_pos));
    }
237
    return std::vector<double>(1, auc);
Guolin Ke's avatar
Guolin Ke committed
238
239
240
241
242
243
244
245
246
247
  }

private:
  /*! \brief Number of data */
  data_size_t num_data_;
  /*! \brief Pointer of label */
  const float* label_;
  /*! \brief Pointer of weighs */
  const float* weights_;
  /*! \brief Sum weights */
248
  double sum_weights_;
Guolin Ke's avatar
Guolin Ke committed
249
  /*! \brief Name of test set */
250
  std::vector<std::string> name_;
Guolin Ke's avatar
Guolin Ke committed
251
252
253
};

}  // namespace LightGBM
Guolin Ke's avatar
Guolin Ke committed
254
#endif   // LightGBM_METRIC_BINARY_METRIC_HPP_