Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
dae7b868
Commit
dae7b868
authored
Feb 05, 2022
by
Quentin Gregory Anthony
Browse files
Added decontamination to remaining evals
parent
341663a9
Changes
33
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
79 additions
and
0 deletions
+79
-0
lm_eval/tasks/quac.py
lm_eval/tasks/quac.py
+6
-0
lm_eval/tasks/race.py
lm_eval/tasks/race.py
+6
-0
lm_eval/tasks/sat.py
lm_eval/tasks/sat.py
+7
-0
lm_eval/tasks/squad.py
lm_eval/tasks/squad.py
+6
-0
lm_eval/tasks/storycloze.py
lm_eval/tasks/storycloze.py
+6
-0
lm_eval/tasks/superglue.py
lm_eval/tasks/superglue.py
+6
-0
lm_eval/tasks/translation.py
lm_eval/tasks/translation.py
+6
-0
lm_eval/tasks/triviaqa.py
lm_eval/tasks/triviaqa.py
+6
-0
lm_eval/tasks/truthfulqa.py
lm_eval/tasks/truthfulqa.py
+6
-0
lm_eval/tasks/unscramble.py
lm_eval/tasks/unscramble.py
+6
-0
lm_eval/tasks/webqs.py
lm_eval/tasks/webqs.py
+6
-0
lm_eval/tasks/wikitext.py
lm_eval/tasks/wikitext.py
+6
-0
lm_eval/tasks/wsc273.py
lm_eval/tasks/wsc273.py
+6
-0
No files found.
lm_eval/tasks/quac.py
View file @
dae7b868
...
...
@@ -66,6 +66,12 @@ class QuAC(Task):
def
doc_to_text
(
self
,
doc
):
return
'TITLE: '
+
doc
[
'title'
]
+
'
\n
'
+
'PARAGRAPH: '
+
doc
[
'paragraph'
]
+
'
\n\n
'
+
'Q: '
+
doc
[
'question'
]
+
'
\n\n
'
+
'A: '
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
'paragraph'
]
def
doc_to_target
(
self
,
doc
):
return
doc
[
'answer'
]
...
...
lm_eval/tasks/race.py
View file @
dae7b868
...
...
@@ -86,6 +86,12 @@ class RACE(HFTask):
text
+=
self
.
last_problem
(
doc
)[
'question'
]
return
text
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
'article'
]
def
doc_to_target
(
self
,
doc
):
return
" "
+
self
.
get_answer_option
(
self
.
last_problem
(
doc
))
...
...
lm_eval/tasks/sat.py
View file @
dae7b868
...
...
@@ -63,3 +63,10 @@ class SATAnalogies(MultipleChoiceTask):
def
doc_to_text
(
self
,
doc
):
return
"{} is to {} as"
.
format
(
*
doc
[
'query'
])
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
"source"
]
+
" "
+
doc
[
"query"
]
lm_eval/tasks/squad.py
View file @
dae7b868
...
...
@@ -44,6 +44,12 @@ class SQuAD2(HFTask):
def
doc_to_text
(
self
,
doc
):
return
'Title: '
+
doc
[
'title'
]
+
'
\n\n
'
+
'Background: '
+
doc
[
'context'
]
+
'
\n\n
'
+
'Question: '
+
doc
[
'question'
]
+
'
\n\n
'
+
'Answer:'
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
'context'
]
def
doc_to_target
(
self
,
doc
):
answer_list
=
doc
[
'answers'
][
'text'
]
if
len
(
answer_list
)
>
0
:
...
...
lm_eval/tasks/storycloze.py
View file @
dae7b868
...
...
@@ -36,6 +36,12 @@ class StoryCloze(Task):
def
doc_to_text
(
self
,
doc
):
return
' '
.
join
([
*
doc
[
1
:
5
]])
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
"context"
]
def
doc_to_target
(
self
,
doc
):
return
" "
+
doc
[
int
(
doc
[
-
1
])
-
4
]
...
...
lm_eval/tasks/superglue.py
View file @
dae7b868
...
...
@@ -28,6 +28,12 @@ class BoolQ(HFTask):
def
doc_to_text
(
self
,
doc
):
return
f
"
{
doc
[
'passage'
]
}
\n
Question:
{
doc
[
'question'
]
}
?
\n
Answer:"
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
'passage'
]
def
doc_to_target
(
self
,
doc
):
return
" "
+
yesno
(
doc
[
'label'
])
...
...
lm_eval/tasks/translation.py
View file @
dae7b868
...
...
@@ -109,6 +109,12 @@ class GeneralTranslationTask(Task):
tar_lang
=
code_to_language
(
language_codes
[
1
])
return
f
"
{
src_lang
}
phrase: "
+
doc
[
"src"
]
+
f
"
\n
{
tar_lang
}
phrase:"
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
"ref"
]
+
" "
+
doc
[
"ref"
]
def
doc_to_target
(
self
,
doc
):
# This shows a single target, though there may be multiple targets in a lang test
return
" "
+
doc
[
"ref"
]
if
isinstance
(
doc
[
"ref"
],
str
)
else
doc
[
"ref"
][
0
]
...
...
lm_eval/tasks/triviaqa.py
View file @
dae7b868
...
...
@@ -39,6 +39,12 @@ class TriviaQA(Task):
def
doc_to_text
(
self
,
doc
):
return
f
"Question:
{
doc
[
'Question'
]
}
\n
Answer:"
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
'Question'
]
+
" "
+
doc
[
'SearchResults'
][
'Description'
]
def
doc_to_target
(
self
,
doc
):
return
" "
+
doc
[
'Answer'
][
'Value'
]
...
...
lm_eval/tasks/truthfulqa.py
View file @
dae7b868
...
...
@@ -82,6 +82,12 @@ class TruthfulQAMultipleChoice(Task):
def
doc_to_text
(
self
,
doc
):
return
QA_PROMPT
+
"
\n\n
Q: "
+
doc
[
'question'
]
+
"
\n
A:"
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
'question'
]
def
doc_to_target
(
self
,
doc
):
return
" "
...
...
lm_eval/tasks/unscramble.py
View file @
dae7b868
...
...
@@ -48,6 +48,12 @@ class WordUnscrambleTask(Task):
def
doc_to_text
(
self
,
doc
):
return
doc
[
"context"
]
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
"context"
]
def
doc_to_target
(
self
,
doc
):
return
doc
[
"completion"
]
...
...
lm_eval/tasks/webqs.py
View file @
dae7b868
...
...
@@ -20,6 +20,12 @@ class WebQs(HFTask):
def
doc_to_text
(
self
,
doc
):
return
"Question: "
+
doc
[
'question'
]
+
'
\n
Answer:'
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
'question'
]
def
doc_to_target
(
self
,
doc
):
# this picks one answer to be the "correct" one, despite sometimes
# multiple correct answers being possible.
...
...
lm_eval/tasks/wikitext.py
View file @
dae7b868
...
...
@@ -80,6 +80,12 @@ class WikiText(PerplexityTask):
def
doc_to_target
(
self
,
doc
):
return
wikitext_detokenizer
(
doc
)
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
"text"
]
def
count_words
(
self
,
doc
):
# count number of words in *original doc before detokenization*
...
...
lm_eval/tasks/wsc273.py
View file @
dae7b868
...
...
@@ -65,6 +65,12 @@ class WinogradSchemaChallenge273(HFTask):
def
doc_to_text
(
self
,
doc
):
return
self
.
partial_context
(
doc
,
doc
[
"options"
][
doc
[
"label"
]])
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
"text"
]
@
classmethod
def
partial_context
(
cls
,
doc
,
option
):
# Substitute the pronoun in the original text with the specified
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment