Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
e00d682f
Unverified
Commit
e00d682f
authored
Apr 30, 2022
by
Jonathan Tow
Committed by
GitHub
Apr 30, 2022
Browse files
Merge pull request #261 from EleutherAI/researcher2
Update CLI options and introduce decontamination
parents
eb8163e9
ab6883b1
Changes
59
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
139 additions
and
0 deletions
+139
-0
lm_eval/tasks/hellaswag.py
lm_eval/tasks/hellaswag.py
+6
-0
lm_eval/tasks/hendrycks_ethics.py
lm_eval/tasks/hendrycks_ethics.py
+24
-0
lm_eval/tasks/hendrycks_math.py
lm_eval/tasks/hendrycks_math.py
+6
-0
lm_eval/tasks/hendrycks_test.py
lm_eval/tasks/hendrycks_test.py
+6
-0
lm_eval/tasks/lambada.py
lm_eval/tasks/lambada.py
+6
-0
lm_eval/tasks/lambada_cloze.py
lm_eval/tasks/lambada_cloze.py
+6
-0
lm_eval/tasks/logiqa.py
lm_eval/tasks/logiqa.py
+7
-0
lm_eval/tasks/mathqa.py
lm_eval/tasks/mathqa.py
+6
-0
lm_eval/tasks/mc_taco.py
lm_eval/tasks/mc_taco.py
+6
-0
lm_eval/tasks/mutual.py
lm_eval/tasks/mutual.py
+6
-0
lm_eval/tasks/naturalqs.py
lm_eval/tasks/naturalqs.py
+6
-0
lm_eval/tasks/openbookqa.py
lm_eval/tasks/openbookqa.py
+6
-0
lm_eval/tasks/piqa.py
lm_eval/tasks/piqa.py
+6
-0
lm_eval/tasks/prost.py
lm_eval/tasks/prost.py
+6
-0
lm_eval/tasks/pubmedqa.py
lm_eval/tasks/pubmedqa.py
+6
-0
lm_eval/tasks/qa4mre.py
lm_eval/tasks/qa4mre.py
+6
-0
lm_eval/tasks/quac.py
lm_eval/tasks/quac.py
+6
-0
lm_eval/tasks/race.py
lm_eval/tasks/race.py
+6
-0
lm_eval/tasks/sat.py
lm_eval/tasks/sat.py
+6
-0
lm_eval/tasks/sciq.py
lm_eval/tasks/sciq.py
+6
-0
No files found.
lm_eval/tasks/hellaswag.py
View file @
e00d682f
...
...
@@ -69,3 +69,9 @@ class HellaSwag(MultipleChoiceTask):
def
doc_to_text
(
self
,
doc
):
return
doc
[
"query"
]
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
"query"
]
lm_eval/tasks/hendrycks_ethics.py
View file @
e00d682f
...
...
@@ -90,6 +90,12 @@ class EthicsCM(Ethics):
def
doc_to_text
(
self
,
doc
):
return
"{}
\n
Question: Is this wrong?
\n
Answer:"
.
format
(
doc
[
"input"
])
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
"input"
]
def
doc_to_target
(
self
,
doc
):
return
" {}"
.
format
(
yesno
(
int
(
doc
[
"label"
])))
...
...
@@ -125,6 +131,12 @@ class EthicsDeontology(Ethics):
prompt
=
" "
.
join
([
doc
[
"scenario"
],
doc
[
"excuse"
]])
return
"Question: Would most people believe this reasonable or unreasonable to say?
\"
{}
\"\n
Answer:"
.
format
(
prompt
)
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
" "
.
join
([
doc
[
"scenario"
],
doc
[
"excuse"
]])
def
doc_to_target
(
self
,
doc
):
target
=
[
"unreasonable"
,
"reasonable"
][
int
(
doc
[
"label"
])]
return
" {}"
.
format
(
target
)
...
...
@@ -170,6 +182,12 @@ class EthicsJustice(Ethics):
def
doc_to_text
(
self
,
doc
):
return
"Question: Would most people believe this reasonable or unreasonable to say?
\"
{}
\"\n
Answer:"
.
format
(
doc
[
"scenario"
])
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
"scenario"
]
def
doc_to_target
(
self
,
doc
):
target
=
[
"unreasonable"
,
"reasonable"
][
int
(
doc
[
"label"
])]
return
" {}"
.
format
(
target
)
...
...
@@ -232,6 +250,12 @@ class EthicsUtilitarianismOriginal(Ethics):
def
doc_to_text
(
self
,
doc
):
return
'Activity: "{}"
\n
Rating:'
.
format
(
doc
[
"activity"
])
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
"activity"
]
def
doc_to_target
(
self
,
doc
):
return
" "
+
doc
[
"rating"
]
...
...
lm_eval/tasks/hendrycks_math.py
View file @
e00d682f
...
...
@@ -54,6 +54,12 @@ class Math(Task):
def
doc_to_text
(
self
,
doc
):
return
"Problem: "
+
doc
[
"problem"
]
+
"
\n
Answer:"
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
"problem"
]
def
doc_to_target
(
self
,
doc
):
return
" "
+
doc
[
"solution"
]
...
...
lm_eval/tasks/hendrycks_test.py
View file @
e00d682f
...
...
@@ -111,3 +111,9 @@ class GeneralHendrycksTest(MultipleChoiceTask):
def
doc_to_text
(
self
,
doc
):
return
doc
[
"query"
]
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
"query"
]
lm_eval/tasks/lambada.py
View file @
e00d682f
...
...
@@ -55,6 +55,12 @@ class LAMBADA(Task):
def
doc_to_text
(
self
,
doc
):
return
doc
[
'text'
].
rsplit
(
' '
,
1
)[
0
]
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
'text'
]
def
doc_to_target
(
self
,
doc
):
return
" "
+
doc
[
'text'
].
rsplit
(
' '
,
1
)[
1
]
...
...
lm_eval/tasks/lambada_cloze.py
View file @
e00d682f
...
...
@@ -34,5 +34,11 @@ class LAMBADA_cloze(LAMBADA):
def
doc_to_text
(
self
,
doc
):
return
doc
[
'text'
].
rsplit
(
' '
,
1
)[
0
]
+
" ____. ->"
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
'text'
]
def
doc_to_target
(
self
,
doc
):
return
" "
+
doc
[
'text'
].
rsplit
(
' '
,
1
)[
1
]
lm_eval/tasks/logiqa.py
View file @
e00d682f
...
...
@@ -72,6 +72,7 @@ class LogiQA(MultipleChoiceTask):
return
prompt
choices
=
[
'a'
,
'b'
,
'c'
,
'd'
]
return
{
"passage"
:
doc
[
"context"
],
# Used for decontamination
"query"
:
format_example
(
doc
,
choices
),
"choices"
:
doc
[
"options"
],
"gold"
:
choices
.
index
(
doc
[
"label"
])
...
...
@@ -79,3 +80,9 @@ class LogiQA(MultipleChoiceTask):
def
doc_to_text
(
self
,
doc
):
return
doc
[
"query"
]
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
"passage"
]
lm_eval/tasks/mathqa.py
View file @
e00d682f
...
...
@@ -62,3 +62,9 @@ class MathQA(MultipleChoiceTask):
def
doc_to_text
(
self
,
doc
):
return
doc
[
"query"
]
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
"query"
]
lm_eval/tasks/mc_taco.py
View file @
e00d682f
...
...
@@ -58,6 +58,12 @@ class MCTACO(Task):
return
f
"
{
doc
[
'sentence'
]
}
\n
Question:
{
doc
[
'question'
]
}
\n
"
\
f
"Answer:
{
doc
[
'answer'
]
}
\n
Plausible:"
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
'question'
]
+
" "
+
doc
[
'sentence'
]
def
doc_to_target
(
self
,
doc
):
return
" "
+
[
"no"
,
"yes"
][
doc
[
'label'
]]
...
...
lm_eval/tasks/mutual.py
View file @
e00d682f
...
...
@@ -52,6 +52,12 @@ class MuTualBase(Task):
def
doc_to_text
(
self
,
doc
):
return
self
.
detokenize
(
doc
[
"article"
])
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
"article"
]
def
doc_to_target
(
self
,
doc
):
return
" "
+
self
.
detokenize
(
doc
[
"options"
][
self
.
CHOICES
.
index
(
doc
[
"answers"
])])
...
...
lm_eval/tasks/naturalqs.py
View file @
e00d682f
...
...
@@ -63,6 +63,12 @@ class NaturalQs(Task):
def
doc_to_text
(
self
,
doc
):
return
'Q: '
+
doc
[
'question'
][
'text'
]
+
'
\n\n
'
+
'A:'
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
'question'
][
'text'
]
def
doc_to_target
(
self
,
doc
):
# There's a short answer and a long answer. Based on the paper, I'm using the long answer.
short_answer
=
doc
[
'annotations'
][
'short_answers'
][
0
][
'text'
]
...
...
lm_eval/tasks/openbookqa.py
View file @
e00d682f
...
...
@@ -63,3 +63,9 @@ class OpenBookQA(MultipleChoiceTask):
def
doc_to_text
(
self
,
doc
):
return
doc
[
"query"
]
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
"query"
]
lm_eval/tasks/piqa.py
View file @
e00d682f
...
...
@@ -58,3 +58,9 @@ class PiQA(MultipleChoiceTask):
def
doc_to_text
(
self
,
doc
):
return
"Question: "
+
doc
[
"goal"
]
+
"
\n
Answer:"
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
"goal"
]
lm_eval/tasks/prost.py
View file @
e00d682f
...
...
@@ -71,3 +71,9 @@ class PROST(MultipleChoiceTask):
def
doc_to_text
(
self
,
doc
):
return
doc
[
"query"
]
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
"query"
]
lm_eval/tasks/pubmedqa.py
View file @
e00d682f
...
...
@@ -58,6 +58,12 @@ class Pubmed_QA(Task):
doc
[
"final_decision"
]
)
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
"question"
]
+
" "
+
"
\n
"
.
join
(
doc
[
"context"
][
"contexts"
])
def
doc_to_target
(
self
,
doc
):
return
" {}"
.
format
(
doc
[
"final_decision"
])
...
...
lm_eval/tasks/qa4mre.py
View file @
e00d682f
...
...
@@ -57,6 +57,12 @@ class QA4MRE(MultipleChoiceTask):
def
doc_to_text
(
self
,
doc
):
return
"{}
\n
Question: {}
\n
Answer:"
.
format
(
doc
[
"source"
],
doc
[
"query"
])
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
"source"
]
+
" "
+
doc
[
"query"
]
class
QA4MRE_2011
(
QA4MRE
):
DATASET_NAME
=
"2011.main.EN"
...
...
lm_eval/tasks/quac.py
View file @
e00d682f
...
...
@@ -57,6 +57,12 @@ class QuAC(Task):
def
doc_to_text
(
self
,
doc
):
return
'TITLE: '
+
doc
[
'title'
]
+
'
\n
'
+
'PARAGRAPH: '
+
doc
[
'paragraph'
]
+
'
\n\n
'
+
'Q: '
+
doc
[
'question'
]
+
'
\n\n
'
+
'A: '
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
'paragraph'
]
def
doc_to_target
(
self
,
doc
):
return
doc
[
'answer'
]
...
...
lm_eval/tasks/race.py
View file @
e00d682f
...
...
@@ -104,6 +104,12 @@ class RACE(Task):
text
+=
self
.
last_problem
(
doc
)[
'question'
]
return
text
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
'article'
]
def
doc_to_target
(
self
,
doc
):
return
" "
+
self
.
get_answer_option
(
self
.
last_problem
(
doc
))
...
...
lm_eval/tasks/sat.py
View file @
e00d682f
...
...
@@ -67,3 +67,9 @@ class SATAnalogies(MultipleChoiceTask):
def
doc_to_text
(
self
,
doc
):
return
"{} is to {} as"
.
format
(
*
doc
[
'query'
])
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
"source"
]
+
"
\n
"
+
" "
.
join
(
doc
[
"query"
])
lm_eval/tasks/sciq.py
View file @
e00d682f
...
...
@@ -65,3 +65,9 @@ class SciQ(MultipleChoiceTask):
def
doc_to_text
(
self
,
doc
):
return
"{}
\n
Question: {}
\n
Answer:"
.
format
(
doc
[
"source"
],
doc
[
"query"
]).
strip
()
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
"source"
]
+
" "
+
doc
[
"query"
]
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment