Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
cc238121
Unverified
Commit
cc238121
authored
Jan 08, 2022
by
Leo Gao
Committed by
GitHub
Jan 08, 2022
Browse files
Merge pull request #243 from bigscience-workshop/thomas/fix_multirc
Fix multirc
parents
170ae096
73d0ae5e
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
10 additions
and
6 deletions
+10
-6
lm_eval/metrics.py
lm_eval/metrics.py
+4
-3
lm_eval/tasks/superglue.py
lm_eval/tasks/superglue.py
+4
-3
tests/testdata/multirc-v1-loglikelihood
tests/testdata/multirc-v1-loglikelihood
+1
-0
tests/testdata/multirc-v1-res.json
tests/testdata/multirc-v1-res.json
+1
-0
No files found.
lm_eval/metrics.py
View file @
cc238121
...
...
@@ -52,13 +52,14 @@ def acc_all(items):
docs
=
list
(
zip
(
*
items
))[
1
]
for
doc
,
pred
in
zip
(
docs
,
preds
):
paragraph_id
=
doc
[
"idx"
][
"paragraph"
]
question_id
=
doc
[
"idx"
][
"question"
]
if
question_id
not
in
question_scoring_dict
:
question_scoring_dict
[
question_id
]
=
[]
if
(
paragraph_id
,
question_id
)
not
in
question_scoring_dict
:
question_scoring_dict
[
(
paragraph_id
,
question_id
)
]
=
[]
gold_label
=
doc
[
"label"
]
==
1
question_scoring_dict
[
question_id
].
append
(
gold_label
==
pred
)
question_scoring_dict
[(
paragraph_id
,
question_id
)].
append
(
gold_label
==
pred
)
acc
=
np
.
mean
([
int
(
all
(
x
))
for
x
in
question_scoring_dict
.
values
()])
return
acc
...
...
lm_eval/tasks/superglue.py
View file @
cc238121
...
...
@@ -188,7 +188,7 @@ class Copa(HFTask):
class
MultiRC
(
HFTask
):
VERSION
=
0
VERSION
=
1
DATASET_PATH
=
"super_glue"
DATASET_NAME
=
"multirc"
...
...
@@ -210,7 +210,7 @@ class MultiRC(HFTask):
@
staticmethod
def
format_answer
(
answer
,
label
):
label_str
=
"yes"
if
label
else
"no"
return
f
"
{
label_str
}
,
{
answe
r
}
"
return
f
"
{
answer
}
\n
Is the answer correct?
{
label_st
r
}
"
def
construct_requests
(
self
,
doc
,
ctx
):
true_choice
=
self
.
format_answer
(
answer
=
doc
[
"answer"
],
label
=
True
)
...
...
@@ -222,7 +222,8 @@ class MultiRC(HFTask):
return
ll_true_choice
,
ll_false_choice
def
process_results
(
self
,
doc
,
results
):
pred
=
np
.
argmax
(
results
)
ll_true_choice
,
ll_false_choice
=
results
pred
=
ll_true_choice
>
ll_false_choice
return
{
"acc"
:
(
pred
,
doc
)
}
...
...
tests/testdata/multirc-v1-loglikelihood
0 → 100644
View file @
cc238121
0e793bd6f637a70a04c6f2cda080188fc037961b2f909095fe63f7bdbc4a90c6
\ No newline at end of file
tests/testdata/multirc-v1-res.json
0 → 100644
View file @
cc238121
{
"results"
:
{
"multirc"
:
{
"acc"
:
0.046169989506820566
,
"acc_stderr"
:
0.006801377886208738
}},
"versions"
:
{
"multirc"
:
1
}}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment