Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
827e2d51
Unverified
Commit
827e2d51
authored
Apr 28, 2022
by
Stella Biderman
Committed by
GitHub
Apr 28, 2022
Browse files
Merge pull request #306 from jon-tow/add-swag
Add `SWAG`
parents
1da3d719
9b6c45a8
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
63 additions
and
0 deletions
+63
-0
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+2
-0
lm_eval/tasks/swag.py
lm_eval/tasks/swag.py
+59
-0
tests/testdata/swag-v0-loglikelihood
tests/testdata/swag-v0-loglikelihood
+1
-0
tests/testdata/swag-v0-res.json
tests/testdata/swag-v0-res.json
+1
-0
No files found.
lm_eval/tasks/__init__.py
View file @
827e2d51
...
@@ -15,6 +15,7 @@ from . import wsc273
...
@@ -15,6 +15,7 @@ from . import wsc273
from
.
import
winogrande
from
.
import
winogrande
from
.
import
quac
from
.
import
quac
from
.
import
hellaswag
from
.
import
hellaswag
from
.
import
swag
from
.
import
openbookqa
from
.
import
openbookqa
from
.
import
squad
from
.
import
squad
from
.
import
naturalqs
from
.
import
naturalqs
...
@@ -136,6 +137,7 @@ TASK_REGISTRY = {
...
@@ -136,6 +137,7 @@ TASK_REGISTRY = {
# "quac": quac.QuAC, # not implemented yet
# "quac": quac.QuAC, # not implemented yet
"logiqa"
:
logiqa
.
LogiQA
,
"logiqa"
:
logiqa
.
LogiQA
,
"hellaswag"
:
hellaswag
.
HellaSwag
,
"hellaswag"
:
hellaswag
.
HellaSwag
,
"swag"
:
swag
.
SWAG
,
"openbookqa"
:
openbookqa
.
OpenBookQA
,
"openbookqa"
:
openbookqa
.
OpenBookQA
,
"squad2"
:
squad
.
SQuAD2
,
"squad2"
:
squad
.
SQuAD2
,
"race"
:
race
.
RACE
,
"race"
:
race
.
RACE
,
...
...
lm_eval/tasks/swag.py
0 → 100644
View file @
827e2d51
"""
SWAG: A Large-Scale Adversarial Dataset for Grounded Commonsense Inference
https://arxiv.org/pdf/1808.05326.pdf
SWAG (Situations With Adversarial Generations) is an adversarial dataset
that consists of 113k multiple choice questions about grounded situations. Each
question is a video caption from LSMDC or ActivityNet Captions, with four answer
choices about what might happen next in the scene. The correct answer is the
(real) video caption for the next event in the video; the three incorrect
answers are adversarially generated and human verified, so as to fool machines
but not humans.
Homepage: https://rowanzellers.com/swag/
"""
from
lm_eval.base
import
MultipleChoiceTask
_CITATION
=
"""
@inproceedings{zellers2018swagaf,
title={SWAG: A Large-Scale Adversarial Dataset for Grounded Commonsense Inference},
author={Zellers, Rowan and Bisk, Yonatan and Schwartz, Roy and Choi, Yejin},
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
year={2018}
}
"""
class
SWAG
(
MultipleChoiceTask
):
VERSION
=
0
DATASET_PATH
=
"swag"
DATASET_NAME
=
"regular"
def
has_training_docs
(
self
):
return
True
def
has_validation_docs
(
self
):
return
True
def
has_test_docs
(
self
):
return
False
def
training_docs
(
self
):
if
self
.
_training_docs
is
None
:
self
.
_training_docs
=
list
(
map
(
self
.
_process_doc
,
self
.
dataset
[
"train"
]))
return
self
.
_training_docs
def
validation_docs
(
self
):
return
map
(
self
.
_process_doc
,
self
.
dataset
[
"validation"
])
def
_process_doc
(
self
,
doc
):
out_doc
=
{
"query"
:
doc
[
"startphrase"
],
"choices"
:
[
doc
[
"ending0"
],
doc
[
"ending1"
],
doc
[
"ending2"
],
doc
[
"ending3"
]],
"gold"
:
int
(
doc
[
"label"
]),
}
return
out_doc
def
doc_to_text
(
self
,
doc
):
return
doc
[
"query"
]
tests/testdata/swag-v0-loglikelihood
0 → 100644
View file @
827e2d51
be4fcbad876124c4ba3c71970538a97fec0e36a9cc677c70b6c9243a7bcee0ec
\ No newline at end of file
tests/testdata/swag-v0-res.json
0 → 100644
View file @
827e2d51
{
"results"
:
{
"swag"
:
{
"acc"
:
0.2482255323402979
,
"acc_norm"
:
0.24882535239428172
,
"acc_norm_stderr"
:
0.00305666959496067
,
"acc_stderr"
:
0.003054201832644171
}},
"versions"
:
{
"swag"
:
0
}}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment