multiclass_metric.hpp 10.9 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
5
6
7
8
#ifndef LIGHTGBM_METRIC_MULTICLASS_METRIC_HPP_
#define LIGHTGBM_METRIC_MULTICLASS_METRIC_HPP_

#include <LightGBM/metric.h>
Guolin Ke's avatar
Guolin Ke committed
9
10
#include <LightGBM/utils/log.h>

11
#include <string>
12
#include <cmath>
13
#include <vector>
14
15
16
17
18
19
20
21

namespace LightGBM {
/*!
* \brief Metric for multiclass task.
* Use static class "PointWiseLossCalculator" to calculate loss point-wise
*/
template<typename PointWiseLossCalculator>
class MulticlassMetric: public Metric {
Nikita Titov's avatar
Nikita Titov committed
22
 public:
23
  explicit MulticlassMetric(const Config& config) :config_(config) {
Guolin Ke's avatar
Guolin Ke committed
24
    num_class_ = config.num_class;
25
26
27
28
29
  }

  virtual ~MulticlassMetric() {
  }

Guolin Ke's avatar
Guolin Ke committed
30
  void Init(const Metadata& metadata, data_size_t num_data) override {
Belinda Trotta's avatar
Belinda Trotta committed
31
    name_.emplace_back(PointWiseLossCalculator::Name(config_));
32
33
34
35
36
37
    num_data_ = num_data;
    // get label
    label_ = metadata.label();
    // get weights
    weights_ = metadata.weights();
    if (weights_ == nullptr) {
38
      sum_weights_ = static_cast<double>(num_data_);
39
40
41
42
43
44
45
    } else {
      sum_weights_ = 0.0f;
      for (data_size_t i = 0; i < num_data_; ++i) {
        sum_weights_ += weights_[i];
      }
    }
  }
46

Guolin Ke's avatar
Guolin Ke committed
47
  const std::vector<std::string>& GetName() const override {
48
    return name_;
49
50
  }

51
  double factor_to_bigger_better() const override {
52
    return -1.0f;
53
  }
54

Guolin Ke's avatar
Guolin Ke committed
55
  std::vector<double> Eval(const double* score, const ObjectiveFunction* objective) const override {
56
    double sum_loss = 0.0;
Guolin Ke's avatar
Guolin Ke committed
57
58
59
    int num_tree_per_iteration = num_class_;
    int num_pred_per_row = num_class_;
    if (objective != nullptr) {
Guolin Ke's avatar
Guolin Ke committed
60
      num_tree_per_iteration = objective->NumModelPerIteration();
Guolin Ke's avatar
Guolin Ke committed
61
62
      num_pred_per_row = objective->NumPredictOneRow();
    }
63
64
65
66
    if (objective != nullptr) {
      if (weights_ == nullptr) {
        #pragma omp parallel for schedule(static) reduction(+:sum_loss)
        for (data_size_t i = 0; i < num_data_; ++i) {
Guolin Ke's avatar
Guolin Ke committed
67
          std::vector<double> raw_score(num_tree_per_iteration);
68
69
          for (int k = 0; k < num_tree_per_iteration; ++k) {
            size_t idx = static_cast<size_t>(num_data_) * k + i;
Guolin Ke's avatar
Guolin Ke committed
70
            raw_score[k] = static_cast<double>(score[idx]);
71
          }
Guolin Ke's avatar
Guolin Ke committed
72
73
          std::vector<double> rec(num_pred_per_row);
          objective->ConvertOutput(raw_score.data(), rec.data());
74
          // add loss
Guolin Ke's avatar
Guolin Ke committed
75
          sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], &rec, config_);
76
77
78
79
        }
      } else {
        #pragma omp parallel for schedule(static) reduction(+:sum_loss)
        for (data_size_t i = 0; i < num_data_; ++i) {
Guolin Ke's avatar
Guolin Ke committed
80
          std::vector<double> raw_score(num_tree_per_iteration);
81
82
          for (int k = 0; k < num_tree_per_iteration; ++k) {
            size_t idx = static_cast<size_t>(num_data_) * k + i;
Guolin Ke's avatar
Guolin Ke committed
83
            raw_score[k] = static_cast<double>(score[idx]);
84
          }
Guolin Ke's avatar
Guolin Ke committed
85
86
          std::vector<double> rec(num_pred_per_row);
          objective->ConvertOutput(raw_score.data(), rec.data());
87
          // add loss
Guolin Ke's avatar
Guolin Ke committed
88
          sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], &rec, config_) * weights_[i];
89
90
91
        }
      }
    } else {
92
93
94
95
96
97
98
99
100
      if (weights_ == nullptr) {
        #pragma omp parallel for schedule(static) reduction(+:sum_loss)
        for (data_size_t i = 0; i < num_data_; ++i) {
          std::vector<double> rec(num_tree_per_iteration);
          for (int k = 0; k < num_tree_per_iteration; ++k) {
            size_t idx = static_cast<size_t>(num_data_) * k + i;
            rec[k] = static_cast<double>(score[idx]);
          }
          // add loss
Guolin Ke's avatar
Guolin Ke committed
101
          sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], &rec, config_);
102
103
104
105
106
107
108
109
110
111
        }
      } else {
        #pragma omp parallel for schedule(static) reduction(+:sum_loss)
        for (data_size_t i = 0; i < num_data_; ++i) {
          std::vector<double> rec(num_tree_per_iteration);
          for (int k = 0; k < num_tree_per_iteration; ++k) {
            size_t idx = static_cast<size_t>(num_data_) * k + i;
            rec[k] = static_cast<double>(score[idx]);
          }
          // add loss
Guolin Ke's avatar
Guolin Ke committed
112
          sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], &rec, config_) * weights_[i];
113
114
115
        }
      }
    }
116
117
    double loss = sum_loss / sum_weights_;
    return std::vector<double>(1, loss);
118
119
  }

Nikita Titov's avatar
Nikita Titov committed
120
 private:
121
122
123
  /*! \brief Number of data */
  data_size_t num_data_;
  /*! \brief Pointer of label */
124
  const label_t* label_;
125
  /*! \brief Pointer of weighs */
126
  const label_t* weights_;
127
  /*! \brief Sum weights */
128
  double sum_weights_;
129
  /*! \brief Name of this test set */
130
  std::vector<std::string> name_;
Guolin Ke's avatar
Guolin Ke committed
131
  int num_class_;
Belinda Trotta's avatar
Belinda Trotta committed
132
133
  /*! \brief config parameters*/
  Config config_;
134
135
};

Belinda Trotta's avatar
Belinda Trotta committed
136
/*! \brief top-k error for multiclass task; if k=1 (default) this is the usual multi-error */
137
class MultiErrorMetric: public MulticlassMetric<MultiErrorMetric> {
Nikita Titov's avatar
Nikita Titov committed
138
 public:
Guolin Ke's avatar
Guolin Ke committed
139
  explicit MultiErrorMetric(const Config& config) :MulticlassMetric<MultiErrorMetric>(config) {}
140

Guolin Ke's avatar
Guolin Ke committed
141
  inline static double LossOnPoint(label_t label, std::vector<double>* score, const Config& config) {
142
    size_t k = static_cast<size_t>(label);
Guolin Ke's avatar
Guolin Ke committed
143
    auto& ref_score = *score;
Belinda Trotta's avatar
Belinda Trotta committed
144
    int num_larger = 0;
Guolin Ke's avatar
Guolin Ke committed
145
    for (size_t i = 0; i < score->size(); ++i) {
Guolin Ke's avatar
Guolin Ke committed
146
      if (ref_score[i] >= ref_score[k]) ++num_larger;
Belinda Trotta's avatar
Belinda Trotta committed
147
      if (num_larger > config.multi_error_top_k) return 1.0f;
148
    }
Guolin Ke's avatar
Guolin Ke committed
149
    return 0.0f;
150
151
  }

Belinda Trotta's avatar
Belinda Trotta committed
152
  inline static const std::string Name(const Config& config) {
153
154
155
156
157
    if (config.multi_error_top_k == 1) {
      return "multi_error";
    } else {
      return "multi_error@" + std::to_string(config.multi_error_top_k);
    }
158
159
160
161
  }
};

/*! \brief Logloss for multiclass task */
Guolin Ke's avatar
Guolin Ke committed
162
class MultiSoftmaxLoglossMetric: public MulticlassMetric<MultiSoftmaxLoglossMetric> {
Nikita Titov's avatar
Nikita Titov committed
163
 public:
Guolin Ke's avatar
Guolin Ke committed
164
  explicit MultiSoftmaxLoglossMetric(const Config& config) :MulticlassMetric<MultiSoftmaxLoglossMetric>(config) {}
165

Guolin Ke's avatar
Guolin Ke committed
166
  inline static double LossOnPoint(label_t label, std::vector<double>* score, const Config&) {
167
    size_t k = static_cast<size_t>(label);
Guolin Ke's avatar
Guolin Ke committed
168
169
170
    auto& ref_score = *score;
    if (ref_score[k] > kEpsilon) {
      return static_cast<double>(-std::log(ref_score[k]));
171
172
173
174
    } else {
      return -std::log(kEpsilon);
    }
  }
175

Belinda Trotta's avatar
Belinda Trotta committed
176
  inline static const std::string Name(const Config&) {
177
    return "multi_logloss";
178
179
180
  }
};

Belinda Trotta's avatar
Belinda Trotta committed
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
/*! \brief Auc-mu for multiclass task*/
class AucMuMetric : public Metric {
public:
  explicit AucMuMetric(const Config& config) : config_(config) {
    num_class_ = config.num_class;
    class_weights_ = config.auc_mu_weights_matrix;
  }

  virtual ~AucMuMetric() {}

  const std::vector<std::string>& GetName() const override { return name_; }

  double factor_to_bigger_better() const override { return 1.0f; }

  void Init(const Metadata& metadata, data_size_t num_data) override {
    name_.emplace_back("auc_mu");

    num_data_ = num_data;
    label_ = metadata.label();

    // sort the data indices by true class
    sorted_data_idx_ = std::vector<data_size_t>(num_data_, 0);
    for (data_size_t i = 0; i < num_data_; ++i) {
      sorted_data_idx_[i] = i;
    }
    Common::ParallelSort(sorted_data_idx_.begin(), sorted_data_idx_.end(),
      [this](data_size_t a, data_size_t b) { return label_[a] < label_[b]; });
  }

  std::vector<double> Eval(const double* score, const ObjectiveFunction*) const override {
    // the notation follows that used in the paper introducing the auc-mu metric:
    // http://proceedings.mlr.press/v97/kleiman19a/kleiman19a.pdf

    // get size of each class
    auto class_sizes = std::vector<data_size_t>(num_class_, 0);
    for (data_size_t i = 0; i < num_data_; ++i) {
      data_size_t curr_label = static_cast<data_size_t>(label_[i]);
      ++class_sizes[curr_label];
    }

    auto S = std::vector<std::vector<double>>(num_class_, std::vector<double>(num_class_, 0));
    int i_start = 0;
    for (int i = 0; i < num_class_; ++i) {
      int j_start = i_start + class_sizes[i];
      for (int j = i + 1; j < num_class_; ++j) {
        std::vector<double> curr_v;
        for (int k = 0; k < num_class_; ++k) {
          curr_v.emplace_back(class_weights_[i][k] - class_weights_[j][k]);
        }
        double t1 = curr_v[i] - curr_v[j];
        // extract the data indices belonging to class i or j
        std::vector<data_size_t> class_i_j_indices;
        class_i_j_indices.assign(sorted_data_idx_.begin() + i_start, sorted_data_idx_.begin() + i_start + class_sizes[i]);
        class_i_j_indices.insert(class_i_j_indices.end(),
          sorted_data_idx_.begin() + j_start, sorted_data_idx_.begin() + j_start + class_sizes[j]);
        // sort according to distance from separating hyperplane
        std::vector<std::pair<data_size_t, double>> dist;
        for (data_size_t k = 0; static_cast<size_t>(k) < class_i_j_indices.size(); ++k) {
          data_size_t a = class_i_j_indices[k];
          double v_a = 0;
          for (int m = 0; m < num_class_; ++m) {
            v_a += curr_v[m] * score[num_data_ * m + a];
          }
          dist.push_back(std::pair<data_size_t, double>(a, t1 * v_a));
        }
        Common::ParallelSort(dist.begin(), dist.end(),
          [this](std::pair<data_size_t, double> a, std::pair<data_size_t, double> b) {
          // if scores are equal, put j class first
          if (std::fabs(a.second - b.second) < kEpsilon) {
            return label_[a.first] > label_[b.first];
          }
          else if (a.second < b.second) {
            return true;
          } else {
            return false;
          }
        });
        // calculate auc
        double num_j = 0;
        double last_j_dist = 0;
        double num_current_j = 0;
        for (size_t k = 0; k < dist.size(); ++k) {
          data_size_t a = dist[k].first;
          double curr_dist = dist[k].second;
          if (label_[a] == i) {
            if (std::fabs(curr_dist - last_j_dist) < kEpsilon) {
              S[i][j] += num_j - 0.5 * num_current_j;  // members of class j with same distance as a contribute 0.5
            } else {
              S[i][j] += num_j;
            }
          } else {
            ++num_j;
            if (std::fabs(curr_dist - last_j_dist) < kEpsilon) {
              ++num_current_j;
            } else {
              last_j_dist = dist[k].second;
              num_current_j = 1;
            }
          }
        }
        j_start += class_sizes[j];
      }
      i_start += class_sizes[i];
    }

    double ans = 0;
    for (int i = 0; i < num_class_; ++i) {
      for (int j = i + 1; j < num_class_; ++j) {
        ans += S[i][j] / (class_sizes[i] * class_sizes[j]);
      }
    }
    ans = 2 * ans / (num_class_ * (num_class_ - 1));
    return std::vector<double>(1, ans);
  }

private:
  /*! \brief Number of data*/
  data_size_t num_data_;
  /*! \brief Pointer to label*/
  const label_t* label_;
  /*! \brief Name of this metric*/
  std::vector<std::string> name_;
  /*! \brief Number of classes*/
  int num_class_;
  /*! \brief class_weights*/
  std::vector<std::vector<double>> class_weights_;
  /*! \brief config parameters*/
  Config config_;
  /*! \brief index to data, sorted by true class*/
  std::vector<data_size_t> sorted_data_idx_;
};

313
314
}  // namespace LightGBM
#endif   // LightGBM_METRIC_MULTICLASS_METRIC_HPP_