Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
346e2c22
Commit
346e2c22
authored
Jul 22, 2023
by
jon-tow
Committed by
guac
Jul 22, 2023
Browse files
feat(tasks): Add SIQA
parent
df3da98c
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
68 additions
and
0 deletions
+68
-0
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+2
-0
lm_eval/tasks/siqa.py
lm_eval/tasks/siqa.py
+66
-0
No files found.
lm_eval/tasks/__init__.py
View file @
346e2c22
...
@@ -23,6 +23,7 @@ from . import naturalqs
...
@@ -23,6 +23,7 @@ from . import naturalqs
from
.
import
sat
from
.
import
sat
from
.
import
arithmetic
from
.
import
arithmetic
from
.
import
lambada
from
.
import
lambada
from
.
import
siqa
from
.
import
piqa
from
.
import
piqa
from
.
import
prost
from
.
import
prost
from
.
import
mc_taco
from
.
import
mc_taco
...
@@ -123,6 +124,7 @@ TASK_REGISTRY = {
...
@@ -123,6 +124,7 @@ TASK_REGISTRY = {
"lambada_standard"
:
lambada
.
LambadaStandard
,
"lambada_standard"
:
lambada
.
LambadaStandard
,
"lambada_openai_cloze"
:
lambada_cloze
.
LambadaOpenAICloze
,
"lambada_openai_cloze"
:
lambada_cloze
.
LambadaOpenAICloze
,
"lambada_standard_cloze"
:
lambada_cloze
.
LambadaStandardCloze
,
"lambada_standard_cloze"
:
lambada_cloze
.
LambadaStandardCloze
,
"siqa"
:
siqa
.
SIQA
,
# multilingual lambada
# multilingual lambada
**
lambada_multilingual
.
construct_tasks
(),
**
lambada_multilingual
.
construct_tasks
(),
"wikitext"
:
wikitext
.
WikiText
,
"wikitext"
:
wikitext
.
WikiText
,
...
...
lm_eval/tasks/siqa.py
0 → 100644
View file @
346e2c22
"""
SOCIAL IQA: Commonsense Reasoning about Social Interactions
https://aclanthology.org/D19-1454.pdf
Social IQa: Social Interaction QA, is a question-answering benchmark for testing
social commonsense intelligence. Contrary to many prior benchmarks that focus on
physical or taxonomic knowledge, Social IQa focuses on reasoning about people’s
actions and their social implications. For example, given an action like "Jesse
saw a concert" and a question like "Why did Jesse do this?", humans can easily
infer that Jesse wanted "to see their favorite performer" or "to enjoy the music",
and not "to see what's happening inside" or "to see if it works". The actions in Social IQa
span a wide variety of social situations, and answer candidates contain both human-curated
answers and adversarially-filtered machine-generated candidates.
Social IQa contains over 37,000 QA pairs for evaluating models’ abilities to reason
about the social implications of everyday events and situations.
Homepage: https://leaderboard.allenai.org/socialiqa/submissions/get-started
"""
from
lm_eval.base
import
MultipleChoiceTask
_CITATION
=
"""
@inproceedings{Sap2019SocialIC,
title={Social IQA: Commonsense Reasoning about Social Interactions},
author={Maarten Sap and Hannah Rashkin and Derek Chen and Ronan Le Bras and Yejin Choi},
booktitle={Conference on Empirical Methods in Natural Language Processing},
year={2019}
}
"""
class
SIQA
(
MultipleChoiceTask
):
VERSION
=
0
DATASET_PATH
=
"social_i_qa"
DATASET_NAME
=
None
def
has_training_docs
(
self
):
return
True
def
has_validation_docs
(
self
):
return
True
def
has_test_docs
(
self
):
return
False
def
training_docs
(
self
):
if
self
.
has_training_docs
():
if
self
.
_training_docs
is
None
:
self
.
_training_docs
=
list
(
map
(
self
.
_process_doc
,
self
.
dataset
[
"train"
])
)
return
self
.
_training_docs
def
validation_docs
(
self
):
if
self
.
has_validation_docs
():
return
map
(
self
.
_process_doc
,
self
.
dataset
[
"validation"
])
def
_process_doc
(
self
,
doc
):
return
{
"query"
:
f
"
{
doc
[
'context'
]
}
\n
Question:
{
doc
[
'question'
]
}
"
,
"choices"
:
[
doc
[
'answerA'
],
doc
[
'answerB'
],
doc
[
'answerC'
]],
"gold"
:
int
(
doc
[
'label'
])
-
1
,
# `-1` because the labels are 1-indexed.
}
def
doc_to_text
(
self
,
doc
):
return
doc
[
"query"
]
+
"
\n
Answer:"
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment