regression_metric.hpp 13.4 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
5
6
#ifndef LIGHTGBM_SRC_METRIC_REGRESSION_METRIC_HPP_
#define LIGHTGBM_SRC_METRIC_REGRESSION_METRIC_HPP_
Guolin Ke's avatar
Guolin Ke committed
7

8
9
10
#include <LightGBM/metric.h>
#include <LightGBM/utils/log.h>

11
12
#include <string>
#include <algorithm>
Guolin Ke's avatar
Guolin Ke committed
13
#include <cmath>
14
#include <vector>
Guolin Ke's avatar
Guolin Ke committed
15
16
17
18
19
20
21
22

namespace LightGBM {
/*!
* \brief Metric for regression task.
* Use static class "PointWiseLossCalculator" to calculate loss point-wise
*/
template<typename PointWiseLossCalculator>
class RegressionMetric: public Metric {
Nikita Titov's avatar
Nikita Titov committed
23
 public:
Guolin Ke's avatar
Guolin Ke committed
24
  explicit RegressionMetric(const Config& config) :config_(config) {
Guolin Ke's avatar
Guolin Ke committed
25
26
27
28
29
  }

  virtual ~RegressionMetric() {
  }

Guolin Ke's avatar
Guolin Ke committed
30
  const std::vector<std::string>& GetName() const override {
31
    return name_;
32
33
  }

34
  double factor_to_bigger_better() const override {
35
    return -1.0f;
36
37
  }

Guolin Ke's avatar
Guolin Ke committed
38
39
  void Init(const Metadata& metadata, data_size_t num_data) override {
    name_.emplace_back(PointWiseLossCalculator::Name());
Guolin Ke's avatar
Guolin Ke committed
40
41
42
43
44
45
    num_data_ = num_data;
    // get label
    label_ = metadata.label();
    // get weights
    weights_ = metadata.weights();
    if (weights_ == nullptr) {
46
      sum_weights_ = static_cast<double>(num_data_);
Guolin Ke's avatar
Guolin Ke committed
47
48
49
50
51
52
    } else {
      sum_weights_ = 0.0f;
      for (data_size_t i = 0; i < num_data_; ++i) {
        sum_weights_ += weights_[i];
      }
    }
Guolin Ke's avatar
Guolin Ke committed
53
54
55
    for (data_size_t i = 0; i < num_data_; ++i) {
      PointWiseLossCalculator::CheckLabel(label_[i]);
    }
Guolin Ke's avatar
Guolin Ke committed
56
  }
57

Guolin Ke's avatar
Guolin Ke committed
58
  std::vector<double> Eval(const double* score, const ObjectiveFunction* objective) const override {
59
    double sum_loss = 0.0f;
60
61
    if (objective == nullptr) {
      if (weights_ == nullptr) {
62
        #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) reduction(+:sum_loss)
63
64
        for (data_size_t i = 0; i < num_data_; ++i) {
          // add loss
65
          sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i], config_);
66
67
        }
      } else {
68
        #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) reduction(+:sum_loss)
69
70
        for (data_size_t i = 0; i < num_data_; ++i) {
          // add loss
71
          sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i], config_) * weights_[i];
72
        }
Guolin Ke's avatar
Guolin Ke committed
73
      }
74
    } else {
75
      if (weights_ == nullptr) {
76
        #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) reduction(+:sum_loss)
77
78
        for (data_size_t i = 0; i < num_data_; ++i) {
          // add loss
Guolin Ke's avatar
Guolin Ke committed
79
80
          double t = 0;
          objective->ConvertOutput(&score[i], &t);
81
          sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], t, config_);
82
83
        }
      } else {
84
        #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) reduction(+:sum_loss)
85
86
        for (data_size_t i = 0; i < num_data_; ++i) {
          // add loss
Guolin Ke's avatar
Guolin Ke committed
87
88
          double t = 0;
          objective->ConvertOutput(&score[i], &t);
89
          sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], t, config_) * weights_[i];
90
        }
wxchan's avatar
wxchan committed
91
      }
Guolin Ke's avatar
Guolin Ke committed
92
    }
93
94
    double loss = PointWiseLossCalculator::AverageLoss(sum_loss, sum_weights_);
    return std::vector<double>(1, loss);
Guolin Ke's avatar
Guolin Ke committed
95
96
  }

97
  inline static double AverageLoss(double sum_loss, double sum_weights) {
Guolin Ke's avatar
Guolin Ke committed
98
99
    return sum_loss / sum_weights;
  }
Guolin Ke's avatar
Guolin Ke committed
100
101
102
103

  inline static void CheckLabel(label_t) {
  }

104
 protected:
Guolin Ke's avatar
Guolin Ke committed
105
106
107
  /*! \brief Number of data */
  data_size_t num_data_;
  /*! \brief Pointer of label */
108
  const label_t* label_;
Guolin Ke's avatar
Guolin Ke committed
109
  /*! \brief Pointer of weighs */
110
  const label_t* weights_;
Guolin Ke's avatar
Guolin Ke committed
111
  /*! \brief Sum weights */
112
  double sum_weights_;
Guolin Ke's avatar
Guolin Ke committed
113
  /*! \brief Name of this test set */
Guolin Ke's avatar
Guolin Ke committed
114
  Config config_;
115
  std::vector<std::string> name_;
Guolin Ke's avatar
Guolin Ke committed
116
117
};

118
119
/*! \brief RMSE loss for regression task */
class RMSEMetric: public RegressionMetric<RMSEMetric> {
Nikita Titov's avatar
Nikita Titov committed
120
 public:
Guolin Ke's avatar
Guolin Ke committed
121
  explicit RMSEMetric(const Config& config) :RegressionMetric<RMSEMetric>(config) {}
122

Guolin Ke's avatar
Guolin Ke committed
123
  inline static double LossOnPoint(label_t label, double score, const Config&) {
124
125
126
127
128
129
130
131
132
133
134
135
136
    return (score - label)*(score - label);
  }

  inline static double AverageLoss(double sum_loss, double sum_weights) {
    // need sqrt the result for RMSE loss
    return std::sqrt(sum_loss / sum_weights);
  }

  inline static const char* Name() {
    return "rmse";
  }
};

Guolin Ke's avatar
Guolin Ke committed
137
138
/*! \brief L2 loss for regression task */
class L2Metric: public RegressionMetric<L2Metric> {
Nikita Titov's avatar
Nikita Titov committed
139
 public:
Guolin Ke's avatar
Guolin Ke committed
140
  explicit L2Metric(const Config& config) :RegressionMetric<L2Metric>(config) {}
Guolin Ke's avatar
Guolin Ke committed
141

Guolin Ke's avatar
Guolin Ke committed
142
  inline static double LossOnPoint(label_t label, double score, const Config&) {
Guolin Ke's avatar
Guolin Ke committed
143
144
145
    return (score - label)*(score - label);
  }

146
147
148
149
150
  inline static const char* Name() {
    return "l2";
  }
};

151
/*! \brief Quantile loss for regression task */
152
class QuantileMetric : public RegressionMetric<QuantileMetric> {
Nikita Titov's avatar
Nikita Titov committed
153
 public:
Guolin Ke's avatar
Guolin Ke committed
154
  explicit QuantileMetric(const Config& config) :RegressionMetric<QuantileMetric>(config) {
155
156
  }

Guolin Ke's avatar
Guolin Ke committed
157
  inline static double LossOnPoint(label_t label, double score, const Config& config) {
158
159
160
161
162
163
    double delta = label - score;
    if (delta < 0) {
      return (config.alpha - 1.0f) * delta;
    } else {
      return config.alpha * delta;
    }
Guolin Ke's avatar
Guolin Ke committed
164
165
166
  }

  inline static const char* Name() {
167
    return "quantile";
Guolin Ke's avatar
Guolin Ke committed
168
169
170
  }
};

171

Guolin Ke's avatar
Guolin Ke committed
172
173
/*! \brief L1 loss for regression task */
class L1Metric: public RegressionMetric<L1Metric> {
Nikita Titov's avatar
Nikita Titov committed
174
 public:
Guolin Ke's avatar
Guolin Ke committed
175
  explicit L1Metric(const Config& config) :RegressionMetric<L1Metric>(config) {}
Guolin Ke's avatar
Guolin Ke committed
176

Guolin Ke's avatar
Guolin Ke committed
177
  inline static double LossOnPoint(label_t label, double score, const Config&) {
Guolin Ke's avatar
Guolin Ke committed
178
179
180
    return std::fabs(score - label);
  }
  inline static const char* Name() {
181
    return "l1";
Guolin Ke's avatar
Guolin Ke committed
182
183
184
  }
};

Tsukasa OMOTO's avatar
Tsukasa OMOTO committed
185
186
/*! \brief Huber loss for regression task */
class HuberLossMetric: public RegressionMetric<HuberLossMetric> {
Nikita Titov's avatar
Nikita Titov committed
187
 public:
Guolin Ke's avatar
Guolin Ke committed
188
  explicit HuberLossMetric(const Config& config) :RegressionMetric<HuberLossMetric>(config) {
Guolin Ke's avatar
Guolin Ke committed
189
  }
Tsukasa OMOTO's avatar
Tsukasa OMOTO committed
190

Guolin Ke's avatar
Guolin Ke committed
191
  inline static double LossOnPoint(label_t label, double score, const Config& config) {
192
    const double diff = score - label;
193
    if (std::abs(diff) <= config.alpha) {
Guolin Ke's avatar
Guolin Ke committed
194
195
      return 0.5f * diff * diff;
    } else {
196
      return config.alpha * (std::abs(diff) - 0.5f * config.alpha);
Tsukasa OMOTO's avatar
Tsukasa OMOTO committed
197
    }
Guolin Ke's avatar
Guolin Ke committed
198
  }
Tsukasa OMOTO's avatar
Tsukasa OMOTO committed
199

Guolin Ke's avatar
Guolin Ke committed
200
201
202
  inline static const char* Name() {
    return "huber";
  }
Tsukasa OMOTO's avatar
Tsukasa OMOTO committed
203
204
};

Tsukasa OMOTO's avatar
Tsukasa OMOTO committed
205
206
207
/*! \brief Fair loss for regression task */
// http://research.microsoft.com/en-us/um/people/zhang/INRIA/Publis/Tutorial-Estim/node24.html
class FairLossMetric: public RegressionMetric<FairLossMetric> {
Nikita Titov's avatar
Nikita Titov committed
208
 public:
Guolin Ke's avatar
Guolin Ke committed
209
  explicit FairLossMetric(const Config& config) :RegressionMetric<FairLossMetric>(config) {
Guolin Ke's avatar
Guolin Ke committed
210
  }
Tsukasa OMOTO's avatar
Tsukasa OMOTO committed
211

Guolin Ke's avatar
Guolin Ke committed
212
  inline static double LossOnPoint(label_t label, double score, const Config& config) {
213
    const double x = std::fabs(score - label);
214
    const double c = config.fair_c;
215
    return c * x - c * c * std::log1p(x / c);
Guolin Ke's avatar
Guolin Ke committed
216
  }
Tsukasa OMOTO's avatar
Tsukasa OMOTO committed
217

Guolin Ke's avatar
Guolin Ke committed
218
219
220
  inline static const char* Name() {
    return "fair";
  }
Tsukasa OMOTO's avatar
Tsukasa OMOTO committed
221
222
};

223
224
/*! \brief Poisson regression loss for regression task */
class PoissonMetric: public RegressionMetric<PoissonMetric> {
Nikita Titov's avatar
Nikita Titov committed
225
 public:
Guolin Ke's avatar
Guolin Ke committed
226
  explicit PoissonMetric(const Config& config) :RegressionMetric<PoissonMetric>(config) {
227
228
  }

Guolin Ke's avatar
Guolin Ke committed
229
  inline static double LossOnPoint(label_t label, double score, const Config&) {
230
231
232
233
234
235
236
237
238
239
240
    const double eps = 1e-10f;
    if (score < eps) {
      score = eps;
    }
    return score - label * std::log(score);
  }
  inline static const char* Name() {
    return "poisson";
  }
};

241

Andrew Ziem's avatar
Andrew Ziem committed
242
/*! \brief MAPE regression loss for regression task */
243
class MAPEMetric : public RegressionMetric<MAPEMetric> {
Nikita Titov's avatar
Nikita Titov committed
244
 public:
Guolin Ke's avatar
Guolin Ke committed
245
  explicit MAPEMetric(const Config& config) :RegressionMetric<MAPEMetric>(config) {
246
247
  }

Guolin Ke's avatar
Guolin Ke committed
248
  inline static double LossOnPoint(label_t label, double score, const Config&) {
249
250
251
252
253
254
255
    return std::fabs((label - score)) / std::max(1.0f, std::fabs(label));
  }
  inline static const char* Name() {
    return "mape";
  }
};

Guolin Ke's avatar
Guolin Ke committed
256
class GammaMetric : public RegressionMetric<GammaMetric> {
Nikita Titov's avatar
Nikita Titov committed
257
 public:
Guolin Ke's avatar
Guolin Ke committed
258
  explicit GammaMetric(const Config& config) :RegressionMetric<GammaMetric>(config) {
Guolin Ke's avatar
Guolin Ke committed
259
260
  }

Guolin Ke's avatar
Guolin Ke committed
261
  inline static double LossOnPoint(label_t label, double score, const Config&) {
Guolin Ke's avatar
Guolin Ke committed
262
263
264
    const double psi = 1.0;
    const double theta = -1.0 / score;
    const double a = psi;
Guolin Ke's avatar
Guolin Ke committed
265
    const double b = -Common::SafeLog(-theta);
266
    const double c = 1. / psi * Common::SafeLog(label / psi) - Common::SafeLog(label) - 0;  // 0 = std::lgamma(1.0 / psi) = std::lgamma(1.0);
Guolin Ke's avatar
Guolin Ke committed
267
268
269
270
271
    return -((label * theta - b) / a + c);
  }
  inline static const char* Name() {
    return "gamma";
  }
Guolin Ke's avatar
Guolin Ke committed
272
273

  inline static void CheckLabel(label_t label) {
274
    CHECK_GT(label, 0);
Guolin Ke's avatar
Guolin Ke committed
275
  }
Guolin Ke's avatar
Guolin Ke committed
276
277
278
279
};


class GammaDevianceMetric : public RegressionMetric<GammaDevianceMetric> {
Nikita Titov's avatar
Nikita Titov committed
280
 public:
Guolin Ke's avatar
Guolin Ke committed
281
  explicit GammaDevianceMetric(const Config& config) :RegressionMetric<GammaDevianceMetric>(config) {
Guolin Ke's avatar
Guolin Ke committed
282
283
  }

Guolin Ke's avatar
Guolin Ke committed
284
  inline static double LossOnPoint(label_t label, double score, const Config&) {
Guolin Ke's avatar
Guolin Ke committed
285
286
    const double epsilon = 1.0e-9;
    const double tmp = label / (score + epsilon);
Guolin Ke's avatar
Guolin Ke committed
287
    return tmp - Common::SafeLog(tmp) - 1;
Guolin Ke's avatar
Guolin Ke committed
288
289
  }
  inline static const char* Name() {
Guolin Ke's avatar
Guolin Ke committed
290
    return "gamma_deviance";
Guolin Ke's avatar
Guolin Ke committed
291
  }
Guolin Ke's avatar
Guolin Ke committed
292
  inline static double AverageLoss(double sum_loss, double) {
Guolin Ke's avatar
Guolin Ke committed
293
294
    return sum_loss * 2;
  }
Guolin Ke's avatar
Guolin Ke committed
295
  inline static void CheckLabel(label_t label) {
296
    CHECK_GT(label, 0);
Guolin Ke's avatar
Guolin Ke committed
297
  }
Guolin Ke's avatar
Guolin Ke committed
298
299
300
};

class TweedieMetric : public RegressionMetric<TweedieMetric> {
Nikita Titov's avatar
Nikita Titov committed
301
 public:
Guolin Ke's avatar
Guolin Ke committed
302
  explicit TweedieMetric(const Config& config) :RegressionMetric<TweedieMetric>(config) {
Guolin Ke's avatar
Guolin Ke committed
303
304
  }

Guolin Ke's avatar
Guolin Ke committed
305
  inline static double LossOnPoint(label_t label, double score, const Config& config) {
Guolin Ke's avatar
Guolin Ke committed
306
    const double rho = config.tweedie_variance_power;
307
308
309
310
    const double eps = 1e-10f;
    if (score < eps) {
      score = eps;
    }
Guolin Ke's avatar
Guolin Ke committed
311
312
313
314
315
316
317
318
319
320
    const double a = label * std::exp((1 - rho) * std::log(score)) / (1 - rho);
    const double b = std::exp((2 - rho) * std::log(score)) / (2 - rho);
    return -a + b;
  }
  inline static const char* Name() {
    return "tweedie";
  }
};


321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
class R2Metric: public Metric {
 public:
  explicit R2Metric(const Config& config) :config_(config) {}
  const std::vector<std::string>& GetName() const override {
    return name_;
  }

  double factor_to_bigger_better() const override {
    return 1.0f;
  }

  void Init(const Metadata& metadata, data_size_t num_data) override {
    name_.emplace_back("r2");
    num_data_ = num_data;
    label_ = metadata.label();
    weights_ = metadata.weights();

    double sum_label = 0.0f;
    if (weights_ == nullptr) {
      sum_weights_ = static_cast<double>(num_data_);
      #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) reduction(+:sum_label)
      for (data_size_t i = 0; i < num_data_; ++i) {
        sum_label += label_[i];
      }
    } else {
      double local_sum_weights = 0.0f;
      #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) reduction(+:local_sum_weights, sum_label)
      for (data_size_t i = 0; i < num_data_; ++i) {
        local_sum_weights += weights_[i];
        sum_label += label_[i] * weights_[i];
      }
      sum_weights_ = local_sum_weights;
    }
    label_mean_ = sum_label / sum_weights_;

    total_sum_squares_ = 0.0f;
    double local_total_sum_squares = 0.0f;
    if (weights_ == nullptr) {
      #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) reduction(+:local_total_sum_squares)
      for (data_size_t i = 0; i < num_data_; ++i) {
        double diff = label_[i] - label_mean_;
        local_total_sum_squares += diff * diff;
      }
    } else {
      #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) reduction(+:local_total_sum_squares)
      for (data_size_t i = 0; i < num_data_; ++i) {
        double diff = label_[i] - label_mean_;
        local_total_sum_squares += diff * diff * weights_[i];
      }
    }
    total_sum_squares_ = local_total_sum_squares;
  }

  std::vector<double> Eval(const double* score, const ObjectiveFunction* objective) const override {
    double residual_sum_squares = 0.0f;
    if (objective == nullptr) {
      if (weights_ == nullptr) {
        #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) reduction(+:residual_sum_squares)
        for (data_size_t i = 0; i < num_data_; ++i) {
          double diff = label_[i] - score[i];
          residual_sum_squares += diff * diff;
        }
      } else {
        #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) reduction(+:residual_sum_squares)
        for (data_size_t i = 0; i < num_data_; ++i) {
          double diff = label_[i] - score[i];
          residual_sum_squares += diff * diff * weights_[i];
        }
      }
    } else {
        if (weights_ == nullptr) {
        #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) reduction(+:residual_sum_squares)
        for (data_size_t i = 0; i < num_data_; ++i) {
          double t = 0;
          objective->ConvertOutput(&score[i], &t);
          double diff = label_[i] - t;
          residual_sum_squares += diff * diff;
        }
      } else {
        #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) reduction(+:residual_sum_squares)
        for (data_size_t i = 0; i < num_data_; ++i) {
          double t = 0;
          objective->ConvertOutput(&score[i], &t);
          double diff = label_[i] - t;
          residual_sum_squares += diff * diff * weights_[i];
        }
      }
    }

    double r2 = 1.0 - (residual_sum_squares / total_sum_squares_);
    if (std::fabs(total_sum_squares_) < kZeroThreshold) {
      return std::vector<double>(1, std::fabs(residual_sum_squares) < kZeroThreshold ? 1.0 : 0.0);
    }
    return std::vector<double>(1, r2);
  }

 protected:
  data_size_t num_data_;
  const label_t* label_;
  const label_t* weights_;
  double sum_weights_;
  Config config_;
  std::vector<std::string> name_;

  // Custom members for R2 calculation
  double label_mean_;
  double total_sum_squares_;
};


Guolin Ke's avatar
Guolin Ke committed
431
}  // namespace LightGBM
432
#endif   // LIGHTGBM_SRC_METRIC_REGRESSION_METRIC_HPP_