Unverified Commit 1782fcb1 authored by Belinda Trotta's avatar Belinda Trotta Committed by GitHub
Browse files

Auc mu weights (#3349)

* Update auc_mu metric to use data weights if provided

* Calculate class sizes and total weights in Init so we only do it once

* Fix lint error

* Empty commit to trigger CI jobs
parent f7ad9457
...@@ -199,6 +199,17 @@ class AucMuMetric : public Metric { ...@@ -199,6 +199,17 @@ class AucMuMetric : public Metric {
num_data_ = num_data; num_data_ = num_data;
label_ = metadata.label(); label_ = metadata.label();
// get weights
weights_ = metadata.weights();
if (weights_ == nullptr) {
sum_weights_ = static_cast<double>(num_data_);
} else {
sum_weights_ = 0.0f;
for (data_size_t i = 0; i < num_data_; ++i) {
sum_weights_ += weights_[i];
}
}
// sort the data indices by true class // sort the data indices by true class
sorted_data_idx_ = std::vector<data_size_t>(num_data_, 0); sorted_data_idx_ = std::vector<data_size_t>(num_data_, 0);
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
...@@ -206,23 +217,32 @@ class AucMuMetric : public Metric { ...@@ -206,23 +217,32 @@ class AucMuMetric : public Metric {
} }
Common::ParallelSort(sorted_data_idx_.begin(), sorted_data_idx_.end(), Common::ParallelSort(sorted_data_idx_.begin(), sorted_data_idx_.end(),
[this](data_size_t a, data_size_t b) { return label_[a] < label_[b]; }); [this](data_size_t a, data_size_t b) { return label_[a] < label_[b]; });
}
std::vector<double> Eval(const double* score, const ObjectiveFunction*) const override {
// the notation follows that used in the paper introducing the auc-mu metric:
// http://proceedings.mlr.press/v97/kleiman19a/kleiman19a.pdf
// get size of each class // get size of each class
auto class_sizes = std::vector<data_size_t>(num_class_, 0); class_sizes_ = std::vector<data_size_t>(num_class_, 0);
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
data_size_t curr_label = static_cast<data_size_t>(label_[i]); data_size_t curr_label = static_cast<data_size_t>(label_[i]);
++class_sizes[curr_label]; ++class_sizes_[curr_label];
} }
// get total weight of data in each class
class_data_weights_ = std::vector<double>(num_class_, 0);
if (weights_ != nullptr) {
for (data_size_t i = 0; i < num_data_; ++i) {
data_size_t curr_label = static_cast<data_size_t>(label_[i]);
class_data_weights_[curr_label] += weights_[i];
}
}
}
std::vector<double> Eval(const double* score, const ObjectiveFunction*) const override {
// the notation follows that used in the paper introducing the auc-mu metric:
// http://proceedings.mlr.press/v97/kleiman19a/kleiman19a.pdf
auto S = std::vector<std::vector<double>>(num_class_, std::vector<double>(num_class_, 0)); auto S = std::vector<std::vector<double>>(num_class_, std::vector<double>(num_class_, 0));
int i_start = 0; int i_start = 0;
for (int i = 0; i < num_class_; ++i) { for (int i = 0; i < num_class_; ++i) {
int j_start = i_start + class_sizes[i]; int j_start = i_start + class_sizes_[i];
for (int j = i + 1; j < num_class_; ++j) { for (int j = i + 1; j < num_class_; ++j) {
std::vector<double> curr_v; std::vector<double> curr_v;
for (int k = 0; k < num_class_; ++k) { for (int k = 0; k < num_class_; ++k) {
...@@ -231,9 +251,9 @@ class AucMuMetric : public Metric { ...@@ -231,9 +251,9 @@ class AucMuMetric : public Metric {
double t1 = curr_v[i] - curr_v[j]; double t1 = curr_v[i] - curr_v[j];
// extract the data indices belonging to class i or j // extract the data indices belonging to class i or j
std::vector<data_size_t> class_i_j_indices; std::vector<data_size_t> class_i_j_indices;
class_i_j_indices.assign(sorted_data_idx_.begin() + i_start, sorted_data_idx_.begin() + i_start + class_sizes[i]); class_i_j_indices.assign(sorted_data_idx_.begin() + i_start, sorted_data_idx_.begin() + i_start + class_sizes_[i]);
class_i_j_indices.insert(class_i_j_indices.end(), class_i_j_indices.insert(class_i_j_indices.end(),
sorted_data_idx_.begin() + j_start, sorted_data_idx_.begin() + j_start + class_sizes[j]); sorted_data_idx_.begin() + j_start, sorted_data_idx_.begin() + j_start + class_sizes_[j]);
// sort according to distance from separating hyperplane // sort according to distance from separating hyperplane
std::vector<std::pair<data_size_t, double>> dist; std::vector<std::pair<data_size_t, double>> dist;
for (data_size_t k = 0; static_cast<size_t>(k) < class_i_j_indices.size(); ++k) { for (data_size_t k = 0; static_cast<size_t>(k) < class_i_j_indices.size(); ++k) {
...@@ -259,38 +279,64 @@ class AucMuMetric : public Metric { ...@@ -259,38 +279,64 @@ class AucMuMetric : public Metric {
double num_j = 0; double num_j = 0;
double last_j_dist = 0; double last_j_dist = 0;
double num_current_j = 0; double num_current_j = 0;
for (size_t k = 0; k < dist.size(); ++k) { if (weights_ == nullptr) {
data_size_t a = dist[k].first; for (size_t k = 0; k < dist.size(); ++k) {
double curr_dist = dist[k].second; data_size_t a = dist[k].first;
if (label_[a] == i) { double curr_dist = dist[k].second;
if (std::fabs(curr_dist - last_j_dist) < kEpsilon) { if (label_[a] == i) {
S[i][j] += num_j - 0.5 * num_current_j; // members of class j with same distance as a contribute 0.5 if (std::fabs(curr_dist - last_j_dist) < kEpsilon) {
S[i][j] += num_j - 0.5 * num_current_j; // members of class j with same distance as a contribute 0.5
} else {
S[i][j] += num_j;
}
} else { } else {
S[i][j] += num_j; ++num_j;
if (std::fabs(curr_dist - last_j_dist) < kEpsilon) {
++num_current_j;
} else {
last_j_dist = dist[k].second;
num_current_j = 1;
}
} }
} else { }
++num_j; } else {
if (std::fabs(curr_dist - last_j_dist) < kEpsilon) { for (size_t k = 0; k < dist.size(); ++k) {
++num_current_j; data_size_t a = dist[k].first;
double curr_dist = dist[k].second;
double curr_weight = weights_[a];
if (label_[a] == i) {
if (std::fabs(curr_dist - last_j_dist) < kEpsilon) {
S[i][j] += curr_weight * (num_j - 0.5 * num_current_j); // members of class j with same distance as a contribute 0.5
} else {
S[i][j] += curr_weight * num_j;
}
} else { } else {
last_j_dist = dist[k].second; num_j += curr_weight;
num_current_j = 1; if (std::fabs(curr_dist - last_j_dist) < kEpsilon) {
num_current_j += curr_weight;
} else {
last_j_dist = dist[k].second;
num_current_j = curr_weight;
}
} }
} }
} }
j_start += class_sizes[j]; j_start += class_sizes_[j];
} }
i_start += class_sizes[i]; i_start += class_sizes_[i];
} }
double ans = 0; double ans = 0;
for (int i = 0; i < num_class_; ++i) { for (int i = 0; i < num_class_; ++i) {
for (int j = i + 1; j < num_class_; ++j) { for (int j = i + 1; j < num_class_; ++j) {
ans += (S[i][j] / class_sizes[i]) / class_sizes[j]; if (weights_ == nullptr) {
ans += (S[i][j] / class_sizes_[i]) / class_sizes_[j];
} else {
ans += (S[i][j] / class_data_weights_[i]) / class_data_weights_[j];
}
} }
} }
ans = (2 * ans / num_class_) / (num_class_ - 1); ans = (2.0 * ans / num_class_) / (num_class_ - 1);
return std::vector<double>(1, ans); return std::vector<double>(1.0, ans);
} }
private: private:
...@@ -302,8 +348,16 @@ class AucMuMetric : public Metric { ...@@ -302,8 +348,16 @@ class AucMuMetric : public Metric {
std::vector<std::string> name_; std::vector<std::string> name_;
/*! \brief Number of classes*/ /*! \brief Number of classes*/
int num_class_; int num_class_;
/*! \brief class_weights*/ /*! \brief Class auc-mu weights*/
std::vector<std::vector<double>> class_weights_; std::vector<std::vector<double>> class_weights_;
/*! \brief Data weights */
const label_t* weights_;
/*! \brief Sum of data weights */
double sum_weights_;
/*! \brief Sum of data weights in each class*/
std::vector<double> class_data_weights_;
/*! \brief Number of data in each class*/
std::vector<data_size_t> class_sizes_;
/*! \brief config parameters*/ /*! \brief config parameters*/
Config config_; Config config_;
/*! \brief index to data, sorted by true class*/ /*! \brief index to data, sorted by true class*/
......
...@@ -526,6 +526,23 @@ class TestEngine(unittest.TestCase): ...@@ -526,6 +526,23 @@ class TestEngine(unittest.TestCase):
results_auc_mu = {} results_auc_mu = {}
lgb.train(params, lgb_X, num_boost_round=10, valid_sets=[lgb_X], evals_result=results_auc_mu) lgb.train(params, lgb_X, num_boost_round=10, valid_sets=[lgb_X], evals_result=results_auc_mu)
self.assertAlmostEqual(results_auc_mu['training']['auc_mu'][-1], 0.5) self.assertAlmostEqual(results_auc_mu['training']['auc_mu'][-1], 0.5)
# test that weighted data gives different auc_mu
lgb_X = lgb.Dataset(X, label=y)
lgb_X_weighted = lgb.Dataset(X, label=y, weight=np.abs(np.random.normal(size=y.shape)))
results_unweighted = {}
results_weighted = {}
params = dict(params, num_classes=10, num_leaves=5)
lgb.train(params, lgb_X, num_boost_round=10, valid_sets=[lgb_X], evals_result=results_unweighted)
lgb.train(params, lgb_X_weighted, num_boost_round=10, valid_sets=[lgb_X_weighted],
evals_result=results_weighted)
self.assertLess(results_weighted['training']['auc_mu'][-1], 1)
self.assertNotEqual(results_unweighted['training']['auc_mu'][-1], results_weighted['training']['auc_mu'][-1])
# test that equal data weights give same auc_mu as unweighted data
lgb_X_weighted = lgb.Dataset(X, label=y, weight=np.ones(y.shape) * 0.5)
lgb.train(params, lgb_X_weighted, num_boost_round=10, valid_sets=[lgb_X_weighted],
evals_result=results_weighted)
self.assertAlmostEqual(results_unweighted['training']['auc_mu'][-1], results_weighted['training']['auc_mu'][-1],
places=5)
# should give 1 when accuracy = 1 # should give 1 when accuracy = 1
X = X[:10, :] X = X[:10, :]
y = y[:10] y = y[:10]
...@@ -538,7 +555,7 @@ class TestEngine(unittest.TestCase): ...@@ -538,7 +555,7 @@ class TestEngine(unittest.TestCase):
results = {} results = {}
lgb.train(params, lgb_X, num_boost_round=100, valid_sets=[lgb_X], evals_result=results) lgb.train(params, lgb_X, num_boost_round=100, valid_sets=[lgb_X], evals_result=results)
self.assertAlmostEqual(results['training']['auc_mu'][-1], 1) self.assertAlmostEqual(results['training']['auc_mu'][-1], 1)
# test loading weights # test loading class weights
Xy = np.loadtxt(os.path.join(os.path.dirname(os.path.realpath(__file__)), Xy = np.loadtxt(os.path.join(os.path.dirname(os.path.realpath(__file__)),
'../../examples/multiclass_classification/multiclass.train')) '../../examples/multiclass_classification/multiclass.train'))
y = Xy[:, 0] y = Xy[:, 0]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment