binary_metric.hpp 7.43 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
Guolin Ke's avatar
Guolin Ke committed
5
6
7
#ifndef LIGHTGBM_METRIC_BINARY_METRIC_HPP_
#define LIGHTGBM_METRIC_BINARY_METRIC_HPP_

8
#include <string>
Guolin Ke's avatar
Guolin Ke committed
9
#include <algorithm>
10
#include <sstream>
11
#include <vector>
Guolin Ke's avatar
Guolin Ke committed
12

13
14
15
16
#include <LightGBM/metric.h>
#include <LightGBM/utils/common.h>
#include <LightGBM/utils/log.h>

Guolin Ke's avatar
Guolin Ke committed
17
18
19
20
21
22
23
24
namespace LightGBM {

/*!
* \brief Metric for binary classification task.
* Use static class "PointWiseLossCalculator" to calculate loss point-wise
*/
template<typename PointWiseLossCalculator>
class BinaryMetric: public Metric {
Nikita Titov's avatar
Nikita Titov committed
25
 public:
Guolin Ke's avatar
Guolin Ke committed
26
  explicit BinaryMetric(const Config&) {
Guolin Ke's avatar
Guolin Ke committed
27
28
29
30
31
  }

  virtual ~BinaryMetric() {
  }

Guolin Ke's avatar
Guolin Ke committed
32
33
  void Init(const Metadata& metadata, data_size_t num_data) override {
    name_.emplace_back(PointWiseLossCalculator::Name());
34

Guolin Ke's avatar
Guolin Ke committed
35
36
37
38
39
40
41
42
    num_data_ = num_data;
    // get label
    label_ = metadata.label();

    // get weights
    weights_ = metadata.weights();

    if (weights_ == nullptr) {
43
      sum_weights_ = static_cast<double>(num_data_);
Guolin Ke's avatar
Guolin Ke committed
44
45
46
47
48
49
50
51
    } else {
      sum_weights_ = 0.0f;
      for (data_size_t i = 0; i < num_data; ++i) {
        sum_weights_ += weights_[i];
      }
    }
  }

Guolin Ke's avatar
Guolin Ke committed
52
  const std::vector<std::string>& GetName() const override {
53
    return name_;
54
55
  }

56
  double factor_to_bigger_better() const override {
57
    return -1.0f;
58
59
  }

Guolin Ke's avatar
Guolin Ke committed
60
  std::vector<double> Eval(const double* score, const ObjectiveFunction* objective) const override {
61
    double sum_loss = 0.0f;
62
63
64
65
66
67
68
69
70
71
72
73
74
    if (objective == nullptr) {
      if (weights_ == nullptr) {
        #pragma omp parallel for schedule(static) reduction(+:sum_loss)
        for (data_size_t i = 0; i < num_data_; ++i) {
          // add loss
          sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i]);
        }
      } else {
        #pragma omp parallel for schedule(static) reduction(+:sum_loss)
        for (data_size_t i = 0; i < num_data_; ++i) {
          // add loss
          sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i]) * weights_[i];
        }
Guolin Ke's avatar
Guolin Ke committed
75
      }
76
    } else {
77
78
79
      if (weights_ == nullptr) {
        #pragma omp parallel for schedule(static) reduction(+:sum_loss)
        for (data_size_t i = 0; i < num_data_; ++i) {
Guolin Ke's avatar
Guolin Ke committed
80
81
          double prob = 0;
          objective->ConvertOutput(&score[i], &prob);
82
83
84
85
86
87
          // add loss
          sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], prob);
        }
      } else {
        #pragma omp parallel for schedule(static) reduction(+:sum_loss)
        for (data_size_t i = 0; i < num_data_; ++i) {
Guolin Ke's avatar
Guolin Ke committed
88
89
          double prob = 0;
          objective->ConvertOutput(&score[i], &prob);
90
91
92
          // add loss
          sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], prob) * weights_[i];
        }
wxchan's avatar
wxchan committed
93
      }
Guolin Ke's avatar
Guolin Ke committed
94
    }
95
96
    double loss = sum_loss / sum_weights_;
    return std::vector<double>(1, loss);
Guolin Ke's avatar
Guolin Ke committed
97
98
  }

Nikita Titov's avatar
Nikita Titov committed
99
 private:
Guolin Ke's avatar
Guolin Ke committed
100
101
102
  /*! \brief Number of data */
  data_size_t num_data_;
  /*! \brief Pointer of label */
103
  const label_t* label_;
Guolin Ke's avatar
Guolin Ke committed
104
  /*! \brief Pointer of weighs */
105
  const label_t* weights_;
Guolin Ke's avatar
Guolin Ke committed
106
  /*! \brief Sum weights */
107
  double sum_weights_;
Guolin Ke's avatar
Guolin Ke committed
108
  /*! \brief Name of test set */
109
  std::vector<std::string> name_;
Guolin Ke's avatar
Guolin Ke committed
110
111
112
113
114
115
};

/*!
* \brief Log loss metric for binary classification task.
*/
class BinaryLoglossMetric: public BinaryMetric<BinaryLoglossMetric> {
Nikita Titov's avatar
Nikita Titov committed
116
 public:
Guolin Ke's avatar
Guolin Ke committed
117
  explicit BinaryLoglossMetric(const Config& config) :BinaryMetric<BinaryLoglossMetric>(config) {}
Guolin Ke's avatar
Guolin Ke committed
118

119
  inline static double LossOnPoint(label_t label, double prob) {
Guolin Ke's avatar
Guolin Ke committed
120
    if (label <= 0) {
Guolin Ke's avatar
Guolin Ke committed
121
122
123
124
125
126
127
128
129
130
131
132
      if (1.0f - prob > kEpsilon) {
        return -std::log(1.0f - prob);
      }
    } else {
      if (prob > kEpsilon) {
        return -std::log(prob);
      }
    }
    return -std::log(kEpsilon);
  }

  inline static const char* Name() {
Guolin Ke's avatar
Guolin Ke committed
133
    return "binary_logloss";
Guolin Ke's avatar
Guolin Ke committed
134
135
136
137
138
139
  }
};
/*!
* \brief Error rate metric for binary classification task.
*/
class BinaryErrorMetric: public BinaryMetric<BinaryErrorMetric> {
Nikita Titov's avatar
Nikita Titov committed
140
 public:
Guolin Ke's avatar
Guolin Ke committed
141
  explicit BinaryErrorMetric(const Config& config) :BinaryMetric<BinaryErrorMetric>(config) {}
Guolin Ke's avatar
Guolin Ke committed
142

143
  inline static double LossOnPoint(label_t label, double prob) {
144
    if (prob <= 0.5f) {
Guolin Ke's avatar
Guolin Ke committed
145
      return label > 0;
Guolin Ke's avatar
Guolin Ke committed
146
    } else {
Guolin Ke's avatar
Guolin Ke committed
147
      return label <= 0;
Guolin Ke's avatar
Guolin Ke committed
148
149
150
151
    }
  }

  inline static const char* Name() {
Guolin Ke's avatar
Guolin Ke committed
152
    return "binary_error";
Guolin Ke's avatar
Guolin Ke committed
153
154
155
156
157
158
159
  }
};

/*!
* \brief Auc Metric for binary classification task.
*/
class AUCMetric: public Metric {
Nikita Titov's avatar
Nikita Titov committed
160
 public:
Guolin Ke's avatar
Guolin Ke committed
161
  explicit AUCMetric(const Config&) {
Guolin Ke's avatar
Guolin Ke committed
162
163
164
165
166
  }

  virtual ~AUCMetric() {
  }

Guolin Ke's avatar
Guolin Ke committed
167
  const std::vector<std::string>& GetName() const override {
168
    return name_;
169
170
  }

171
  double factor_to_bigger_better() const override {
172
    return 1.0f;
173
174
  }

Guolin Ke's avatar
Guolin Ke committed
175
  void Init(const Metadata& metadata, data_size_t num_data) override {
176
    name_.emplace_back("auc");
177

Guolin Ke's avatar
Guolin Ke committed
178
179
180
181
182
183
184
    num_data_ = num_data;
    // get label
    label_ = metadata.label();
    // get weights
    weights_ = metadata.weights();

    if (weights_ == nullptr) {
185
      sum_weights_ = static_cast<double>(num_data_);
Guolin Ke's avatar
Guolin Ke committed
186
187
188
189
190
191
192
193
    } else {
      sum_weights_ = 0.0f;
      for (data_size_t i = 0; i < num_data; ++i) {
        sum_weights_ += weights_[i];
      }
    }
  }

Guolin Ke's avatar
Guolin Ke committed
194
  std::vector<double> Eval(const double* score, const ObjectiveFunction*) const override {
195
196
197
198
199
    // get indices sorted by score, descent order
    std::vector<data_size_t> sorted_idx;
    for (data_size_t i = 0; i < num_data_; ++i) {
      sorted_idx.emplace_back(i);
    }
200
    Common::ParallelSort(sorted_idx.begin(), sorted_idx.end(), [score](data_size_t a, data_size_t b) {return score[a] > score[b]; });
201
    // temp sum of postive label
202
    double cur_pos = 0.0f;
203
    // total sum of postive label
204
    double sum_pos = 0.0f;
205
    // accumlate of auc
206
    double accum = 0.0f;
207
    // temp sum of negative label
208
    double cur_neg = 0.0f;
209
    double threshold = score[sorted_idx[0]];
210
    if (weights_ == nullptr) {  // no weights
Guolin Ke's avatar
Guolin Ke committed
211
      for (data_size_t i = 0; i < num_data_; ++i) {
212
        const label_t cur_label = label_[sorted_idx[i]];
213
        const double cur_score = score[sorted_idx[i]];
214
215
216
217
218
219
220
221
        // new threshold
        if (cur_score != threshold) {
          threshold = cur_score;
          // accmulate
          accum += cur_neg*(cur_pos * 0.5f + sum_pos);
          sum_pos += cur_pos;
          // reset
          cur_neg = cur_pos = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
222
        }
Guolin Ke's avatar
Guolin Ke committed
223
224
        cur_neg += (cur_label <= 0);
        cur_pos += (cur_label > 0);
Guolin Ke's avatar
Guolin Ke committed
225
      }
226
227
    } else {  // has weights
      for (data_size_t i = 0; i < num_data_; ++i) {
228
        const label_t cur_label = label_[sorted_idx[i]];
229
        const double cur_score = score[sorted_idx[i]];
230
        const label_t cur_weight = weights_[sorted_idx[i]];
231
232
233
234
235
236
237
238
239
        // new threshold
        if (cur_score != threshold) {
          threshold = cur_score;
          // accmulate
          accum += cur_neg*(cur_pos * 0.5f + sum_pos);
          sum_pos += cur_pos;
          // reset
          cur_neg = cur_pos = 0.0f;
        }
Guolin Ke's avatar
Guolin Ke committed
240
241
        cur_neg += (cur_label <= 0)*cur_weight;
        cur_pos += (cur_label > 0)*cur_weight;
wxchan's avatar
wxchan committed
242
      }
Guolin Ke's avatar
Guolin Ke committed
243
    }
244
245
    accum += cur_neg*(cur_pos * 0.5f + sum_pos);
    sum_pos += cur_pos;
246
    double auc = 1.0f;
247
248
249
    if (sum_pos > 0.0f && sum_pos != sum_weights_) {
      auc = accum / (sum_pos *(sum_weights_ - sum_pos));
    }
250
    return std::vector<double>(1, auc);
Guolin Ke's avatar
Guolin Ke committed
251
252
  }

Nikita Titov's avatar
Nikita Titov committed
253
 private:
Guolin Ke's avatar
Guolin Ke committed
254
255
256
  /*! \brief Number of data */
  data_size_t num_data_;
  /*! \brief Pointer of label */
257
  const label_t* label_;
Guolin Ke's avatar
Guolin Ke committed
258
  /*! \brief Pointer of weighs */
259
  const label_t* weights_;
Guolin Ke's avatar
Guolin Ke committed
260
  /*! \brief Sum weights */
261
  double sum_weights_;
Guolin Ke's avatar
Guolin Ke committed
262
  /*! \brief Name of test set */
263
  std::vector<std::string> name_;
Guolin Ke's avatar
Guolin Ke committed
264
265
266
};

}  // namespace LightGBM
Guolin Ke's avatar
Guolin Ke committed
267
#endif   // LightGBM_METRIC_BINARY_METRIC_HPP_