Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
7426ac3c
Commit
7426ac3c
authored
Jan 20, 2017
by
Guolin Ke
Browse files
add map metric
parent
e9b82412
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
164 additions
and
2 deletions
+164
-2
docs/Parameters.md
docs/Parameters.md
+3
-2
include/LightGBM/config.h
include/LightGBM/config.h
+1
-0
src/metric/map_metric.hpp
src/metric/map_metric.hpp
+157
-0
src/metric/metric.cpp
src/metric/metric.cpp
+3
-0
No files found.
docs/Parameters.md
View file @
7426ac3c
...
...
@@ -179,7 +179,7 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can
*
used in binary classification. Set this to
```true```
if training data are unbalance.
*
```max_position```
, default=
```20```
, type=int
*
used in lambdarank, will optimize NDCG at this position.
*
```label_gain```
, default=
```
{
0,1,3,7,15,31,63,...
}
```
, type=multi-double
*
```label_gain```
, default=
```0,1,3,7,15,31,63,...```
, type=multi-double
*
used in lambdarank, relevant gain for labels. For example, the gain of label
```2```
is
```3```
if using default label gains.
*
Separate by
```,```
*
```num_class```
, default=
```1```
, type=int, alias=
```num_classes```
...
...
@@ -193,6 +193,7 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can
*
```huber```
,
[
Huber loss
](
https://en.wikipedia.org/wiki/Huber_loss
"Huber loss - Wikipedia"
)
*
```fair```
,
[
Fair loss
](
http://research.microsoft.com/en-us/um/people/zhang/INRIA/Publis/Tutorial-Estim/node24.html
)
*
```ndcg```
,
[
NDCG
](
https://en.wikipedia.org/wiki/Discounted_cumulative_gain#Normalized_DCG
)
*
```map```
,
[
MAP
](
https://www.kaggle.com/wiki/MeanAveragePrecision
)
*
```auc```
,
[
AUC
](
https://en.wikipedia.org/wiki/Area_under_the_curve_(pharmacokinetics
)
)
*
```binary_logloss```
,
[
log loss
](
https://www.kaggle.com/wiki/LogarithmicLoss
)
*
```binary_error```
. For one sample
```0```
for correct classification,
```1```
for error classification.
...
...
@@ -203,7 +204,7 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can
*
frequency for metric output
*
```is_training_metric```
, default=
```false```
, type=bool
*
set this to true if need to output metric result of training
*
```ndcg_at```
, default=
```
{
1,2,3,4,5
}
```
, type=multi-int, alias=
```ndcg_eval_at```
*
```ndcg_at```
, default=
```1,2,3,4,5```
, type=multi-int, alias=
```ndcg_eval_at```
,
```eval_at```
*
NDCG evaluation position, separate by
```,```
## Network parameters
...
...
include/LightGBM/config.h
View file @
7426ac3c
...
...
@@ -339,6 +339,7 @@ struct ParameterAlias {
{
"tranining_metric"
,
"is_training_metric"
},
{
"train_metric"
,
"is_training_metric"
},
{
"ndcg_at"
,
"ndcg_eval_at"
},
{
"eval_at"
,
"ndcg_eval_at"
},
{
"min_data_per_leaf"
,
"min_data_in_leaf"
},
{
"min_data"
,
"min_data_in_leaf"
},
{
"min_child_samples"
,
"min_data_in_leaf"
},
...
...
src/metric/map_metric.hpp
0 → 100644
View file @
7426ac3c
#ifndef LIGHTGBM_METRIC_MAP_METRIC_HPP_
#define LIGHTGBM_METRIC_MAP_METRIC_HPP_
#include <LightGBM/utils/common.h>
#include <LightGBM/utils/log.h>
#include <LightGBM/metric.h>
#include <omp.h>
#include <sstream>
#include <vector>
namespace
LightGBM
{
class
MapMetric
:
public
Metric
{
public:
explicit
MapMetric
(
const
MetricConfig
&
config
)
{
// get eval position
for
(
auto
k
:
config
.
eval_at
)
{
eval_at_
.
push_back
(
static_cast
<
data_size_t
>
(
k
));
}
// get number of threads
#pragma omp parallel
#pragma omp master
{
num_threads_
=
omp_get_num_threads
();
}
}
~
MapMetric
()
{
}
void
Init
(
const
Metadata
&
metadata
,
data_size_t
num_data
)
override
{
std
::
stringstream
str_buf
;
for
(
auto
k
:
eval_at_
)
{
name_
.
emplace_back
(
std
::
string
(
"map@"
)
+
std
::
to_string
(
k
));
}
num_data_
=
num_data
;
// get label
label_
=
metadata
.
label
();
// get query boundaries
query_boundaries_
=
metadata
.
query_boundaries
();
if
(
query_boundaries_
==
nullptr
)
{
Log
::
Fatal
(
"For MAP metric, there should be query information"
);
}
num_queries_
=
metadata
.
num_queries
();
Log
::
Info
(
"total groups: %d , total data: %d"
,
num_queries_
,
num_data_
);
// get query weights
query_weights_
=
metadata
.
query_weights
();
if
(
query_weights_
==
nullptr
)
{
sum_query_weights_
=
static_cast
<
double
>
(
num_queries_
);
}
else
{
sum_query_weights_
=
0.0
f
;
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
sum_query_weights_
+=
query_weights_
[
i
];
}
}
}
const
std
::
vector
<
std
::
string
>&
GetName
()
const
override
{
return
name_
;
}
double
factor_to_bigger_better
()
const
override
{
return
1.0
f
;
}
void
CalMapAtK
(
std
::
vector
<
int
>
ks
,
const
float
*
label
,
const
double
*
score
,
data_size_t
num_data
,
std
::
vector
<
double
>*
out
)
const
{
// get sorted indices by score
std
::
vector
<
data_size_t
>
sorted_idx
;
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
sorted_idx
.
emplace_back
(
i
);
}
std
::
sort
(
sorted_idx
.
begin
(),
sorted_idx
.
end
(),
[
score
](
data_size_t
a
,
data_size_t
b
)
{
return
score
[
a
]
>
score
[
b
];
});
int
num_hit
=
0
;
double
sum_ap
=
0.0
f
;
data_size_t
cur_left
=
0
;
for
(
size_t
i
=
0
;
i
<
ks
.
size
();
++
i
)
{
data_size_t
cur_k
=
ks
[
i
];
if
(
cur_k
>
num_data
)
{
cur_k
=
num_data
;
}
for
(
data_size_t
j
=
cur_left
;
j
<
cur_k
;
++
j
)
{
data_size_t
idx
=
sorted_idx
[
j
];
if
(
label
[
idx
]
>
0.5
f
)
{
++
num_hit
;
sum_ap
+=
static_cast
<
double
>
(
num_hit
)
/
(
i
+
1.0
f
);
}
}
(
*
out
)[
i
]
=
sum_ap
/
cur_k
;
cur_left
=
cur_k
;
}
}
std
::
vector
<
double
>
Eval
(
const
double
*
score
)
const
override
{
// some buffers for multi-threading sum up
std
::
vector
<
std
::
vector
<
double
>>
result_buffer_
;
for
(
int
i
=
0
;
i
<
num_threads_
;
++
i
)
{
result_buffer_
.
emplace_back
(
eval_at_
.
size
(),
0.0
f
);
}
std
::
vector
<
double
>
tmp_map
(
eval_at_
.
size
(),
0.0
f
);
if
(
query_weights_
==
nullptr
)
{
#pragma omp parallel for schedule(guided) firstprivate(tmp_map)
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
const
int
tid
=
omp_get_thread_num
();
CalMapAtK
(
eval_at_
,
label_
+
query_boundaries_
[
i
],
score
+
query_boundaries_
[
i
],
query_boundaries_
[
i
+
1
]
-
query_boundaries_
[
i
],
&
tmp_map
);
for
(
size_t
j
=
0
;
j
<
eval_at_
.
size
();
++
j
)
{
result_buffer_
[
tid
][
j
]
+=
tmp_map
[
j
];
}
}
}
else
{
#pragma omp parallel for schedule(guided) firstprivate(tmp_map)
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
const
int
tid
=
omp_get_thread_num
();
CalMapAtK
(
eval_at_
,
label_
+
query_boundaries_
[
i
],
score
+
query_boundaries_
[
i
],
query_boundaries_
[
i
+
1
]
-
query_boundaries_
[
i
],
&
tmp_map
);
for
(
size_t
j
=
0
;
j
<
eval_at_
.
size
();
++
j
)
{
result_buffer_
[
tid
][
j
]
+=
tmp_map
[
j
]
*
query_weights_
[
i
];
}
}
}
// Get final average MAP
std
::
vector
<
double
>
result
(
eval_at_
.
size
(),
0.0
f
);
for
(
size_t
j
=
0
;
j
<
result
.
size
();
++
j
)
{
for
(
int
i
=
0
;
i
<
num_threads_
;
++
i
)
{
result
[
j
]
+=
result_buffer_
[
i
][
j
];
}
result
[
j
]
/=
sum_query_weights_
;
}
return
result
;
}
private:
/*! \brief Number of data */
data_size_t
num_data_
;
/*! \brief Pointer of label */
const
float
*
label_
;
/*! \brief Query boundaries information */
const
data_size_t
*
query_boundaries_
;
/*! \brief Number of queries */
data_size_t
num_queries_
;
/*! \brief Weights of queries */
const
float
*
query_weights_
;
/*! \brief Sum weights of queries */
double
sum_query_weights_
;
/*! \brief Evaluate position of Nmap */
std
::
vector
<
data_size_t
>
eval_at_
;
/*! \brief Number of threads */
int
num_threads_
;
std
::
vector
<
std
::
string
>
name_
;
};
}
// namespace LightGBM
#endif // LIGHTGBM_METRIC_MAP_METRIC_HPP_
src/metric/metric.cpp
View file @
7426ac3c
...
...
@@ -2,6 +2,7 @@
#include "regression_metric.hpp"
#include "binary_metric.hpp"
#include "rank_metric.hpp"
#include "map_metric.hpp"
#include "multiclass_metric.hpp"
namespace
LightGBM
{
...
...
@@ -23,6 +24,8 @@ Metric* Metric::CreateMetric(const std::string& type, const MetricConfig& config
return
new
AUCMetric
(
config
);
}
else
if
(
type
==
std
::
string
(
"ndcg"
))
{
return
new
NDCGMetric
(
config
);
}
else
if
(
type
==
std
::
string
(
"map"
))
{
return
new
MapMetric
(
config
);
}
else
if
(
type
==
std
::
string
(
"multi_logloss"
))
{
return
new
MultiLoglossMetric
(
config
);
}
else
if
(
type
==
std
::
string
(
"multi_error"
))
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment