Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
87d93e99
Unverified
Commit
87d93e99
authored
Jul 09, 2023
by
Lintang Sutawika
Committed by
GitHub
Jul 09, 2023
Browse files
use prediction and prediction index
parent
3aeb95b6
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
10 additions
and
6 deletions
+10
-6
lm_eval/api/task.py
lm_eval/api/task.py
+10
-6
No files found.
lm_eval/api/task.py
View file @
87d93e99
...
...
@@ -889,22 +889,26 @@ class ConfigurableTask(Task):
# and this stores our "regular" conditional loglikelihoods
lls
=
lls
[::
2
]
pred_idx
=
np
.
argmax
(
lls
)
pred_idx_norm
=
np
.
argmax
(
lls
/
completion_len
)
if
self
.
_config
.
gold_alias
is
not
None
:
gold
=
int
(
self
.
gold_alias
(
doc
))
pred
=
np
.
argmax
(
lls
)
pred_norm
=
np
.
argmax
(
lls
/
completion_len
)
pred
=
pred_idx
pred_norm
=
pred_idx_norm
else
:
gold
=
self
.
doc_to_target
(
doc
)
pred
=
choices
[
np
.
argmax
(
lls
)]
pred_norm
=
choices
[
np
.
argmax
(
lls
/
completion_len
)]
gold_idx
=
choices
.
index
(
gold
)
pred
=
choices
[
pred_idx
]
pred_norm
=
choices
[
pred_idx_norm
]
acc
=
1.0
if
pred
==
gold
else
0.0
acc_norm
=
1.0
if
pred_norm
==
gold
else
0.0
result_dict
=
{
**
({
"acc"
:
acc
}
if
"acc"
in
use_metric
else
{}),
**
({
"f1"
:
(
gold
,
pred
)}
if
"f1"
in
use_metric
else
{}),
**
({
"mcc"
:
(
gold
,
pred
)}
if
"mcc"
in
use_metric
else
{}),
**
({
"f1"
:
(
gold
_idx
,
pred
_idx
)}
if
"f1"
in
use_metric
else
{}),
**
({
"mcc"
:
(
gold
_idx
,
pred
_idx
)}
if
"mcc"
in
use_metric
else
{}),
**
({
"acc_norm"
:
acc_norm
}
if
"acc_norm"
in
use_metric
else
{}),
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment