Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
fa0ba222
Unverified
Commit
fa0ba222
authored
May 12, 2024
by
Jess
Committed by
GitHub
May 12, 2024
Browse files
Merge pull request #9 from JessicaOjo/africamgsm
remove added metrics -afrimgsm
parents
6bb95bbe
58692fb5
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
0 additions
and
25 deletions
+0
-25
lm_eval/api/metrics.py
lm_eval/api/metrics.py
+0
-24
lm_eval/api/task.py
lm_eval/api/task.py
+0
-1
No files found.
lm_eval/api/metrics.py
View file @
fa0ba222
...
...
@@ -58,20 +58,6 @@ def f1_score(items):
return
np
.
max
(
fscore
)
@
register_aggregation
(
"squad_f1"
)
def
squad_f1_score
(
items
):
gold_squad
,
pred_squad
=
[],
[]
for
index
,
(
ref
,
pred
)
in
enumerate
(
items
):
pred_dict
=
{
'prediction_text'
:
str
(
pred
),
'id'
:
str
(
index
)}
ref_dict
=
{
'answers'
:
{
'answer_start'
:
[
0
],
'text'
:
str
(
ref
)},
'id'
:
str
(
index
)}
gold_squad
.
append
(
ref_dict
)
pred_squad
.
append
(
pred_dict
)
squad_metric
=
hf_evaluate
.
load
(
"squad"
)
results_squad
=
squad_metric
.
compute
(
predictions
=
pred_squad
,
references
=
gold_squad
)
return
results_squad
[
'f1'
]
@
register_aggregation
(
"matthews_corrcoef"
)
def
matthews_corrcoef
(
items
):
unzipped_list
=
list
(
zip
(
*
items
))
...
...
@@ -192,16 +178,6 @@ def exact_match_fn(**kwargs):
return
exact_match
.
compute
(
**
kwargs
)
@
register_metric
(
metric
=
"squad"
,
higher_is_better
=
True
,
output_type
=
"generate_until"
,
aggregation
=
"squad_f1"
)
def
squad_fn
(
items
):
return
items
@
register_metric
(
metric
=
"perplexity"
,
higher_is_better
=
False
,
...
...
lm_eval/api/task.py
View file @
fa0ba222
...
...
@@ -1294,7 +1294,6 @@ class ConfigurableTask(Task):
**
({
"f1"
:
(
gold
,
pred
)}
if
"f1"
in
use_metric
else
{}),
**
({
"mcc"
:
(
gold
,
pred
)}
if
"mcc"
in
use_metric
else
{}),
**
({
"acc_norm"
:
acc_norm
}
if
"acc_norm"
in
use_metric
else
{}),
**
({
"squad"
:
(
gold
,
pred
)}
if
"squad"
in
use_metric
else
{}),
**
({
"exact_match"
:
exact_match
}
if
"exact_match"
in
use_metric
else
{}),
**
(
{
"brier_score"
:
(
gold
,
prob_norm
)}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment