binary_metric.hpp 11.3 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
5
6
#ifndef LIGHTGBM_SRC_METRIC_BINARY_METRIC_HPP_
#define LIGHTGBM_SRC_METRIC_BINARY_METRIC_HPP_
Guolin Ke's avatar
Guolin Ke committed
7

8
9
10
11
#include <LightGBM/metric.h>
#include <LightGBM/utils/common.h>
#include <LightGBM/utils/log.h>

12
#include <string>
Guolin Ke's avatar
Guolin Ke committed
13
#include <algorithm>
14
#include <sstream>
15
#include <vector>
Guolin Ke's avatar
Guolin Ke committed
16
17
18
19
20
21
22
23
24

namespace LightGBM {

/*!
* \brief Metric for binary classification task.
* Use static class "PointWiseLossCalculator" to calculate loss point-wise
*/
template<typename PointWiseLossCalculator>
class BinaryMetric: public Metric {
Nikita Titov's avatar
Nikita Titov committed
25
 public:
Guolin Ke's avatar
Guolin Ke committed
26
  explicit BinaryMetric(const Config&) {
Guolin Ke's avatar
Guolin Ke committed
27
28
29
30
31
  }

  virtual ~BinaryMetric() {
  }

Guolin Ke's avatar
Guolin Ke committed
32
33
  void Init(const Metadata& metadata, data_size_t num_data) override {
    name_.emplace_back(PointWiseLossCalculator::Name());
34

Guolin Ke's avatar
Guolin Ke committed
35
36
37
38
39
40
41
42
    num_data_ = num_data;
    // get label
    label_ = metadata.label();

    // get weights
    weights_ = metadata.weights();

    if (weights_ == nullptr) {
43
      sum_weights_ = static_cast<double>(num_data_);
Guolin Ke's avatar
Guolin Ke committed
44
45
46
47
48
49
50
51
    } else {
      sum_weights_ = 0.0f;
      for (data_size_t i = 0; i < num_data; ++i) {
        sum_weights_ += weights_[i];
      }
    }
  }

Guolin Ke's avatar
Guolin Ke committed
52
  const std::vector<std::string>& GetName() const override {
53
    return name_;
54
55
  }

56
  double factor_to_bigger_better() const override {
57
    return -1.0f;
58
59
  }

Guolin Ke's avatar
Guolin Ke committed
60
  std::vector<double> Eval(const double* score, const ObjectiveFunction* objective) const override {
61
    double sum_loss = 0.0f;
62
63
    if (objective == nullptr) {
      if (weights_ == nullptr) {
64
        #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) reduction(+:sum_loss)
65
66
67
68
69
        for (data_size_t i = 0; i < num_data_; ++i) {
          // add loss
          sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i]);
        }
      } else {
70
        #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) reduction(+:sum_loss)
71
72
73
74
        for (data_size_t i = 0; i < num_data_; ++i) {
          // add loss
          sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i]) * weights_[i];
        }
Guolin Ke's avatar
Guolin Ke committed
75
      }
76
    } else {
77
      if (weights_ == nullptr) {
78
        #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) reduction(+:sum_loss)
79
        for (data_size_t i = 0; i < num_data_; ++i) {
Guolin Ke's avatar
Guolin Ke committed
80
81
          double prob = 0;
          objective->ConvertOutput(&score[i], &prob);
82
83
84
85
          // add loss
          sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], prob);
        }
      } else {
86
        #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) reduction(+:sum_loss)
87
        for (data_size_t i = 0; i < num_data_; ++i) {
Guolin Ke's avatar
Guolin Ke committed
88
89
          double prob = 0;
          objective->ConvertOutput(&score[i], &prob);
90
91
92
          // add loss
          sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], prob) * weights_[i];
        }
wxchan's avatar
wxchan committed
93
      }
Guolin Ke's avatar
Guolin Ke committed
94
    }
95
96
    double loss = sum_loss / sum_weights_;
    return std::vector<double>(1, loss);
Guolin Ke's avatar
Guolin Ke committed
97
98
  }

99
 protected:
Guolin Ke's avatar
Guolin Ke committed
100
101
102
  /*! \brief Number of data */
  data_size_t num_data_;
  /*! \brief Pointer of label */
103
  const label_t* label_;
Guolin Ke's avatar
Guolin Ke committed
104
  /*! \brief Pointer of weighs */
105
  const label_t* weights_;
Guolin Ke's avatar
Guolin Ke committed
106
  /*! \brief Sum weights */
107
  double sum_weights_;
Guolin Ke's avatar
Guolin Ke committed
108
  /*! \brief Name of test set */
109
  std::vector<std::string> name_;
Guolin Ke's avatar
Guolin Ke committed
110
111
112
113
114
115
};

/*!
* \brief Log loss metric for binary classification task.
*/
class BinaryLoglossMetric: public BinaryMetric<BinaryLoglossMetric> {
Nikita Titov's avatar
Nikita Titov committed
116
 public:
Guolin Ke's avatar
Guolin Ke committed
117
  explicit BinaryLoglossMetric(const Config& config) :BinaryMetric<BinaryLoglossMetric>(config) {}
Guolin Ke's avatar
Guolin Ke committed
118

119
  inline static double LossOnPoint(label_t label, double prob) {
Guolin Ke's avatar
Guolin Ke committed
120
    if (label <= 0) {
Guolin Ke's avatar
Guolin Ke committed
121
122
123
124
125
126
127
128
129
130
131
132
      if (1.0f - prob > kEpsilon) {
        return -std::log(1.0f - prob);
      }
    } else {
      if (prob > kEpsilon) {
        return -std::log(prob);
      }
    }
    return -std::log(kEpsilon);
  }

  inline static const char* Name() {
Guolin Ke's avatar
Guolin Ke committed
133
    return "binary_logloss";
Guolin Ke's avatar
Guolin Ke committed
134
135
136
137
138
139
  }
};
/*!
* \brief Error rate metric for binary classification task.
*/
class BinaryErrorMetric: public BinaryMetric<BinaryErrorMetric> {
Nikita Titov's avatar
Nikita Titov committed
140
 public:
Guolin Ke's avatar
Guolin Ke committed
141
  explicit BinaryErrorMetric(const Config& config) :BinaryMetric<BinaryErrorMetric>(config) {}
Guolin Ke's avatar
Guolin Ke committed
142

143
  inline static double LossOnPoint(label_t label, double prob) {
144
    if (prob <= 0.5f) {
Guolin Ke's avatar
Guolin Ke committed
145
      return label > 0;
Guolin Ke's avatar
Guolin Ke committed
146
    } else {
Guolin Ke's avatar
Guolin Ke committed
147
      return label <= 0;
Guolin Ke's avatar
Guolin Ke committed
148
149
150
151
    }
  }

  inline static const char* Name() {
Guolin Ke's avatar
Guolin Ke committed
152
    return "binary_error";
Guolin Ke's avatar
Guolin Ke committed
153
154
155
156
157
158
159
  }
};

/*!
* \brief Auc Metric for binary classification task.
*/
class AUCMetric: public Metric {
Nikita Titov's avatar
Nikita Titov committed
160
 public:
Guolin Ke's avatar
Guolin Ke committed
161
  explicit AUCMetric(const Config&) {
Guolin Ke's avatar
Guolin Ke committed
162
163
164
165
166
  }

  virtual ~AUCMetric() {
  }

Guolin Ke's avatar
Guolin Ke committed
167
  const std::vector<std::string>& GetName() const override {
168
    return name_;
169
170
  }

171
  double factor_to_bigger_better() const override {
172
    return 1.0f;
173
174
  }

Guolin Ke's avatar
Guolin Ke committed
175
  void Init(const Metadata& metadata, data_size_t num_data) override {
176
    name_.emplace_back("auc");
177

Guolin Ke's avatar
Guolin Ke committed
178
179
180
181
182
183
184
    num_data_ = num_data;
    // get label
    label_ = metadata.label();
    // get weights
    weights_ = metadata.weights();

    if (weights_ == nullptr) {
185
      sum_weights_ = static_cast<double>(num_data_);
Guolin Ke's avatar
Guolin Ke committed
186
187
188
189
190
191
192
193
    } else {
      sum_weights_ = 0.0f;
      for (data_size_t i = 0; i < num_data; ++i) {
        sum_weights_ += weights_[i];
      }
    }
  }

Guolin Ke's avatar
Guolin Ke committed
194
  std::vector<double> Eval(const double* score, const ObjectiveFunction*) const override {
195
196
197
198
199
    // get indices sorted by score, descent order
    std::vector<data_size_t> sorted_idx;
    for (data_size_t i = 0; i < num_data_; ++i) {
      sorted_idx.emplace_back(i);
    }
200
    Common::ParallelSort(sorted_idx.begin(), sorted_idx.end(), [score](data_size_t a, data_size_t b) {return score[a] > score[b]; });
Andrew Ziem's avatar
Andrew Ziem committed
201
    // temp sum of positive label
202
    double cur_pos = 0.0f;
Andrew Ziem's avatar
Andrew Ziem committed
203
    // total sum of positive label
204
    double sum_pos = 0.0f;
Andrew Ziem's avatar
Andrew Ziem committed
205
    // accumulate of AUC
206
    double accum = 0.0f;
207
    // temp sum of negative label
208
    double cur_neg = 0.0f;
209
    double threshold = score[sorted_idx[0]];
210
    if (weights_ == nullptr) {  // no weights
Guolin Ke's avatar
Guolin Ke committed
211
      for (data_size_t i = 0; i < num_data_; ++i) {
212
        const label_t cur_label = label_[sorted_idx[i]];
213
        const double cur_score = score[sorted_idx[i]];
214
215
216
        // new threshold
        if (cur_score != threshold) {
          threshold = cur_score;
Andrew Ziem's avatar
Andrew Ziem committed
217
          // accumulate
218
219
220
221
          accum += cur_neg*(cur_pos * 0.5f + sum_pos);
          sum_pos += cur_pos;
          // reset
          cur_neg = cur_pos = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
222
        }
Guolin Ke's avatar
Guolin Ke committed
223
224
        cur_neg += (cur_label <= 0);
        cur_pos += (cur_label > 0);
Guolin Ke's avatar
Guolin Ke committed
225
      }
226
227
    } else {  // has weights
      for (data_size_t i = 0; i < num_data_; ++i) {
228
        const label_t cur_label = label_[sorted_idx[i]];
229
        const double cur_score = score[sorted_idx[i]];
230
        const label_t cur_weight = weights_[sorted_idx[i]];
231
232
233
        // new threshold
        if (cur_score != threshold) {
          threshold = cur_score;
Andrew Ziem's avatar
Andrew Ziem committed
234
          // accumulate
235
236
237
238
239
          accum += cur_neg*(cur_pos * 0.5f + sum_pos);
          sum_pos += cur_pos;
          // reset
          cur_neg = cur_pos = 0.0f;
        }
Guolin Ke's avatar
Guolin Ke committed
240
241
        cur_neg += (cur_label <= 0)*cur_weight;
        cur_pos += (cur_label > 0)*cur_weight;
wxchan's avatar
wxchan committed
242
      }
Guolin Ke's avatar
Guolin Ke committed
243
    }
244
245
    accum += cur_neg*(cur_pos * 0.5f + sum_pos);
    sum_pos += cur_pos;
246
    double auc = 1.0f;
247
248
249
    if (sum_pos > 0.0f && sum_pos != sum_weights_) {
      auc = accum / (sum_pos *(sum_weights_ - sum_pos));
    }
250
    return std::vector<double>(1, auc);
Guolin Ke's avatar
Guolin Ke committed
251
252
  }

Nikita Titov's avatar
Nikita Titov committed
253
 private:
Guolin Ke's avatar
Guolin Ke committed
254
255
256
  /*! \brief Number of data */
  data_size_t num_data_;
  /*! \brief Pointer of label */
257
  const label_t* label_;
Guolin Ke's avatar
Guolin Ke committed
258
  /*! \brief Pointer of weighs */
259
  const label_t* weights_;
Guolin Ke's avatar
Guolin Ke committed
260
  /*! \brief Sum weights */
261
  double sum_weights_;
Guolin Ke's avatar
Guolin Ke committed
262
  /*! \brief Name of test set */
263
  std::vector<std::string> name_;
Guolin Ke's avatar
Guolin Ke committed
264
265
};

266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311

/*!
* \brief Average Precision Metric for binary classification task.
*/
class AveragePrecisionMetric: public Metric {
 public:
  explicit AveragePrecisionMetric(const Config&) {
  }

  virtual ~AveragePrecisionMetric() {
  }

  const std::vector<std::string>& GetName() const override {
    return name_;
  }

  double factor_to_bigger_better() const override {
    return 1.0f;
  }

  void Init(const Metadata& metadata, data_size_t num_data) override {
    name_.emplace_back("average_precision");

    num_data_ = num_data;
    // get label
    label_ = metadata.label();
    // get weights
    weights_ = metadata.weights();

    if (weights_ == nullptr) {
      sum_weights_ = static_cast<double>(num_data_);
    } else {
      sum_weights_ = 0.0f;
      for (data_size_t i = 0; i < num_data; ++i) {
        sum_weights_ += weights_[i];
      }
    }
  }

  std::vector<double> Eval(const double* score, const ObjectiveFunction*) const override {
    // get indices sorted by score, descending order
    std::vector<data_size_t> sorted_idx;
    for (data_size_t i = 0; i < num_data_; ++i) {
      sorted_idx.emplace_back(i);
    }
    Common::ParallelSort(sorted_idx.begin(), sorted_idx.end(), [score](data_size_t a, data_size_t b) {return score[a] > score[b]; });
Andrew Ziem's avatar
Andrew Ziem committed
312
    // temp sum of positive label
313
    double cur_actual_pos = 0.0f;
Andrew Ziem's avatar
Andrew Ziem committed
314
    // total sum of positive label
315
316
317
318
319
    double sum_actual_pos = 0.0f;
    // total sum of predicted positive
    double sum_pred_pos = 0.0f;
    // accumulated precision
    double accum_prec = 1.0f;
Andrew Ziem's avatar
Andrew Ziem committed
320
    // accumulated pr-auc
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
    double accum = 0.0f;
    // temp sum of negative label
    double cur_neg = 0.0f;
    double threshold = score[sorted_idx[0]];
    if (weights_ == nullptr) {  // no weights
      for (data_size_t i = 0; i < num_data_; ++i) {
        const label_t cur_label = label_[sorted_idx[i]];
        const double cur_score = score[sorted_idx[i]];
        // new threshold
        if (cur_score != threshold) {
          threshold = cur_score;
          // accumulate
          sum_actual_pos += cur_actual_pos;
          sum_pred_pos += cur_actual_pos + cur_neg;
          accum_prec = sum_actual_pos / sum_pred_pos;
          accum += cur_actual_pos * accum_prec;
          // reset
          cur_neg = cur_actual_pos = 0.0f;
        }
        cur_neg += (cur_label <= 0);
        cur_actual_pos += (cur_label > 0);
      }
    } else {  // has weights
      for (data_size_t i = 0; i < num_data_; ++i) {
        const label_t cur_label = label_[sorted_idx[i]];
        const double cur_score = score[sorted_idx[i]];
        const label_t cur_weight = weights_[sorted_idx[i]];
        // new threshold
        if (cur_score != threshold) {
          threshold = cur_score;
Andrew Ziem's avatar
Andrew Ziem committed
351
          // accumulate
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
          sum_actual_pos += cur_actual_pos;
          sum_pred_pos += cur_actual_pos + cur_neg;
          accum_prec = sum_actual_pos / sum_pred_pos;
          accum += cur_actual_pos * accum_prec;
          // reset
          cur_neg = cur_actual_pos = 0.0f;
        }
        cur_neg += (cur_label <= 0) * cur_weight;
        cur_actual_pos += (cur_label > 0) * cur_weight;
      }
    }
    sum_actual_pos += cur_actual_pos;
    sum_pred_pos += cur_actual_pos + cur_neg;
    accum_prec = sum_actual_pos / sum_pred_pos;
    accum += cur_actual_pos * accum_prec;
    double ap = 1.0f;
    if (sum_actual_pos > 0.0f && sum_actual_pos != sum_weights_) {
      ap = accum / sum_actual_pos;
    }
    return std::vector<double>(1, ap);
  }

 private:
  /*! \brief Number of data */
  data_size_t num_data_;
  /*! \brief Pointer of label */
  const label_t* label_;
  /*! \brief Pointer of weighs */
  const label_t* weights_;
  /*! \brief Sum weights */
  double sum_weights_;
  /*! \brief Name of test set */
  std::vector<std::string> name_;
};

Guolin Ke's avatar
Guolin Ke committed
387
}  // namespace LightGBM
388
#endif   // LIGHTGBM_SRC_METRIC_BINARY_METRIC_HPP_