"mmdet/engine/optimizers/__init__.py" did not exist on "fdfe3c4f8ba935ae428a8a496ce57755d5b2ea98"
Commit 7426ac3c authored by Guolin Ke's avatar Guolin Ke
Browse files

add map metric

parent e9b82412
......@@ -179,7 +179,7 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can
* used in binary classification. Set this to ```true``` if training data are unbalance.
* ```max_position```, default=```20```, type=int
* used in lambdarank, will optimize NDCG at this position.
* ```label_gain```, default=```{0,1,3,7,15,31,63,...}```, type=multi-double
* ```label_gain```, default=```0,1,3,7,15,31,63,...```, type=multi-double
* used in lambdarank, relevant gain for labels. For example, the gain of label ```2``` is ```3``` if using default label gains.
* Separate by ```,```
* ```num_class```, default=```1```, type=int, alias=```num_classes```
......@@ -193,6 +193,7 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can
* ```huber```, [Huber loss](https://en.wikipedia.org/wiki/Huber_loss "Huber loss - Wikipedia")
* ```fair```, [Fair loss](http://research.microsoft.com/en-us/um/people/zhang/INRIA/Publis/Tutorial-Estim/node24.html)
* ```ndcg```, [NDCG](https://en.wikipedia.org/wiki/Discounted_cumulative_gain#Normalized_DCG)
* ```map```, [MAP](https://www.kaggle.com/wiki/MeanAveragePrecision)
* ```auc```, [AUC](https://en.wikipedia.org/wiki/Area_under_the_curve_(pharmacokinetics))
* ```binary_logloss```, [log loss](https://www.kaggle.com/wiki/LogarithmicLoss)
* ```binary_error```. For one sample ```0``` for correct classification, ```1``` for error classification.
......@@ -203,7 +204,7 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can
* frequency for metric output
* ```is_training_metric```, default=```false```, type=bool
* set this to true if need to output metric result of training
* ```ndcg_at```, default=```{1,2,3,4,5}```, type=multi-int, alias=```ndcg_eval_at```
* ```ndcg_at```, default=```1,2,3,4,5```, type=multi-int, alias=```ndcg_eval_at```,```eval_at```
* NDCG evaluation position, separate by ```,```
## Network parameters
......
......@@ -339,6 +339,7 @@ struct ParameterAlias {
{ "tranining_metric", "is_training_metric" },
{ "train_metric", "is_training_metric" },
{ "ndcg_at", "ndcg_eval_at" },
{ "eval_at", "ndcg_eval_at" },
{ "min_data_per_leaf", "min_data_in_leaf" },
{ "min_data", "min_data_in_leaf" },
{ "min_child_samples", "min_data_in_leaf" },
......
#ifndef LIGHTGBM_METRIC_MAP_METRIC_HPP_
#define LIGHTGBM_METRIC_MAP_METRIC_HPP_
#include <LightGBM/utils/common.h>
#include <LightGBM/utils/log.h>
#include <LightGBM/metric.h>
#include <omp.h>
#include <sstream>
#include <vector>
namespace LightGBM {
class MapMetric:public Metric {
public:
explicit MapMetric(const MetricConfig& config) {
// get eval position
for (auto k : config.eval_at) {
eval_at_.push_back(static_cast<data_size_t>(k));
}
// get number of threads
#pragma omp parallel
#pragma omp master
{
num_threads_ = omp_get_num_threads();
}
}
~MapMetric() {
}
void Init(const Metadata& metadata, data_size_t num_data) override {
std::stringstream str_buf;
for (auto k : eval_at_) {
name_.emplace_back(std::string("map@") + std::to_string(k));
}
num_data_ = num_data;
// get label
label_ = metadata.label();
// get query boundaries
query_boundaries_ = metadata.query_boundaries();
if (query_boundaries_ == nullptr) {
Log::Fatal("For MAP metric, there should be query information");
}
num_queries_ = metadata.num_queries();
Log::Info("total groups: %d , total data: %d", num_queries_, num_data_);
// get query weights
query_weights_ = metadata.query_weights();
if (query_weights_ == nullptr) {
sum_query_weights_ = static_cast<double>(num_queries_);
} else {
sum_query_weights_ = 0.0f;
for (data_size_t i = 0; i < num_queries_; ++i) {
sum_query_weights_ += query_weights_[i];
}
}
}
const std::vector<std::string>& GetName() const override {
return name_;
}
double factor_to_bigger_better() const override {
return 1.0f;
}
void CalMapAtK(std::vector<int> ks, const float* label,
const double* score, data_size_t num_data, std::vector<double>* out) const {
// get sorted indices by score
std::vector<data_size_t> sorted_idx;
for (data_size_t i = 0; i < num_data; ++i) {
sorted_idx.emplace_back(i);
}
std::sort(sorted_idx.begin(), sorted_idx.end(),
[score](data_size_t a, data_size_t b) {return score[a] > score[b]; });
int num_hit = 0;
double sum_ap = 0.0f;
data_size_t cur_left = 0;
for (size_t i = 0; i < ks.size(); ++i) {
data_size_t cur_k = ks[i];
if (cur_k > num_data) { cur_k = num_data; }
for (data_size_t j = cur_left; j < cur_k; ++j) {
data_size_t idx = sorted_idx[j];
if (label[idx] > 0.5f) {
++num_hit;
sum_ap += static_cast<double>(num_hit) / (i + 1.0f);
}
}
(*out)[i] = sum_ap / cur_k;
cur_left = cur_k;
}
}
std::vector<double> Eval(const double* score) const override {
// some buffers for multi-threading sum up
std::vector<std::vector<double>> result_buffer_;
for (int i = 0; i < num_threads_; ++i) {
result_buffer_.emplace_back(eval_at_.size(), 0.0f);
}
std::vector<double> tmp_map(eval_at_.size(), 0.0f);
if (query_weights_ == nullptr) {
#pragma omp parallel for schedule(guided) firstprivate(tmp_map)
for (data_size_t i = 0; i < num_queries_; ++i) {
const int tid = omp_get_thread_num();
CalMapAtK(eval_at_, label_ + query_boundaries_[i],
score + query_boundaries_[i], query_boundaries_[i + 1] - query_boundaries_[i], &tmp_map);
for (size_t j = 0; j < eval_at_.size(); ++j) {
result_buffer_[tid][j] += tmp_map[j];
}
}
} else {
#pragma omp parallel for schedule(guided) firstprivate(tmp_map)
for (data_size_t i = 0; i < num_queries_; ++i) {
const int tid = omp_get_thread_num();
CalMapAtK(eval_at_, label_ + query_boundaries_[i],
score + query_boundaries_[i], query_boundaries_[i + 1] - query_boundaries_[i], &tmp_map);
for (size_t j = 0; j < eval_at_.size(); ++j) {
result_buffer_[tid][j] += tmp_map[j] * query_weights_[i];
}
}
}
// Get final average MAP
std::vector<double> result(eval_at_.size(), 0.0f);
for (size_t j = 0; j < result.size(); ++j) {
for (int i = 0; i < num_threads_; ++i) {
result[j] += result_buffer_[i][j];
}
result[j] /= sum_query_weights_;
}
return result;
}
private:
/*! \brief Number of data */
data_size_t num_data_;
/*! \brief Pointer of label */
const float* label_;
/*! \brief Query boundaries information */
const data_size_t* query_boundaries_;
/*! \brief Number of queries */
data_size_t num_queries_;
/*! \brief Weights of queries */
const float* query_weights_;
/*! \brief Sum weights of queries */
double sum_query_weights_;
/*! \brief Evaluate position of Nmap */
std::vector<data_size_t> eval_at_;
/*! \brief Number of threads */
int num_threads_;
std::vector<std::string> name_;
};
} // namespace LightGBM
#endif // LIGHTGBM_METRIC_MAP_METRIC_HPP_
......@@ -2,6 +2,7 @@
#include "regression_metric.hpp"
#include "binary_metric.hpp"
#include "rank_metric.hpp"
#include "map_metric.hpp"
#include "multiclass_metric.hpp"
namespace LightGBM {
......@@ -23,6 +24,8 @@ Metric* Metric::CreateMetric(const std::string& type, const MetricConfig& config
return new AUCMetric(config);
} else if (type == std::string("ndcg")) {
return new NDCGMetric(config);
} else if (type == std::string("map")) {
return new MapMetric(config);
} else if (type == std::string("multi_logloss")) {
return new MultiLoglossMetric(config);
} else if (type == std::string("multi_error")) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment