Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
ec03783f
Commit
ec03783f
authored
Sep 04, 2023
by
lintangsutawika
Browse files
update for held-in tasks
parent
4f5b72bc
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
64 additions
and
36 deletions
+64
-36
lm_eval/benchmarks/flan/prompt_templates/flan_anli.yaml
lm_eval/benchmarks/flan/prompt_templates/flan_anli.yaml
+18
-18
lm_eval/benchmarks/flan/prompt_templates/flan_arc.yaml
lm_eval/benchmarks/flan/prompt_templates/flan_arc.yaml
+7
-8
lm_eval/benchmarks/flan/yaml_templates/cot_template_yaml
lm_eval/benchmarks/flan/yaml_templates/cot_template_yaml
+0
-2
lm_eval/benchmarks/flan/yaml_templates/held_in_template_yaml
lm_eval/benchmarks/flan/yaml_templates/held_in_template_yaml
+0
-2
lm_eval/benchmarks/flan_anli.yaml
lm_eval/benchmarks/flan_anli.yaml
+17
-0
lm_eval/benchmarks/flan_boolq.yaml
lm_eval/benchmarks/flan_boolq.yaml
+7
-0
lm_eval/benchmarks/flan_held_in.yaml
lm_eval/benchmarks/flan_held_in.yaml
+8
-6
lm_eval/benchmarks/flan_rte.yaml
lm_eval/benchmarks/flan_rte.yaml
+7
-0
No files found.
lm_eval/benchmarks/flan/prompt_templates/flan_anli.yaml
View file @
ec03783f
# Flan Prompt Templates
# Flan Prompt Templates
prompts
:
prompts
:
"
template-0"
:
"
template-0"
:
doc_to_text
:
"
{{
context
}}
\n\n
Choose
your
answer:
based
on
the
paragraph
above
can
we
conclude
that
\"
{{hypothesis}}
\"
?
\n\n
OPTIONS:
\n
-
Yes
\n
-
It
\
's
impossible
to
say
\n
-
No
\n
I
think
the
answer
is"
doc_to_text
:
"
{{
premise
}}
\n\n
Choose
your
answer:
based
on
the
paragraph
above
can
we
conclude
that
\"
{{hypothesis}}
\"
?
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No
\n
I
think
the
answer
is"
doc_to_target
:
"
{{[
'
Yes
',
'
It
\
's
impossible
to
say
',
'No'
][label]}}"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-1"
:
"
template-1"
:
doc_to_text
:
"
{{
context
}}
\n\n
Based
on
that
paragraph
can
we
conclude
that
this
sentence
is
true?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It
\
's
impossible
to
say
\n
-
No"
doc_to_text
:
"
{{
premise
}}
\n\n
Based
on
that
paragraph
can
we
conclude
that
this
sentence
is
true?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
'
Yes
',
'
It
\
's
impossible
to
say
',
'No'
][label]}}"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-2"
:
"
template-2"
:
doc_to_text
:
"
{{
context
}}
\n\n
Can
we
draw
the
following
conclusion?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It
\
's
impossible
to
say
\n
-
No"
doc_to_text
:
"
{{
premise
}}
\n\n
Can
we
draw
the
following
conclusion?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
'
Yes
',
'
It
\
's
impossible
to
say
',
'No'
][label]}}"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-3"
:
"
template-3"
:
doc_to_text
:
"
{{
context
}}
\n
Does
this
next
sentence
follow,
given
the
preceding
text?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It
\
's
impossible
to
say
\n
-
No"
doc_to_text
:
"
{{
premise
}}
\n
Does
this
next
sentence
follow,
given
the
preceding
text?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
'
Yes
',
'
It
\
's
impossible
to
say
',
'No'
][label]}}"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-4"
:
"
template-4"
:
doc_to_text
:
"
{{
context
}}
\n
Can
we
infer
the
following?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It
\
's
impossible
to
say
\n
-
No
\n
The
answer
is:"
doc_to_text
:
"
{{
premise
}}
\n
Can
we
infer
the
following?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No
\n
The
answer
is:"
doc_to_target
:
"
{{[
'
Yes
',
'
It
\
's
impossible
to
say
',
'No'
][label]}}"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-5"
:
"
template-5"
:
doc_to_text
:
"
Read
the
following
paragraph
and
determine
if
the
hypothesis
is
true:
\n\n
{{
context
}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It
\
's
impossible
to
say
\n
-
No
\n
Hypothesis:
{{hypothesis}}
\n\n\n
"
doc_to_text
:
"
Read
the
following
paragraph
and
determine
if
the
hypothesis
is
true:
\n\n
{{
premise
}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No
\n
Hypothesis:
{{hypothesis}}
\n\n\n
"
doc_to_target
:
"
{{[
'
Yes
',
'
It
\
's
impossible
to
say
',
'No'
][label]}}"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-6"
:
"
template-6"
:
doc_to_text
:
"
Read
the
text
and
determine
if
the
sentence
is
true
(see
options
at
the
end):
\n\n
{{
context
}}
\n\n
Sentence:
{{hypothesis}}
\n
OPTIONS:
\n
-
Yes
\n
-
It
\
's
impossible
to
say
\n
-
No"
doc_to_text
:
"
Read
the
text
and
determine
if
the
sentence
is
true
(see
options
at
the
end):
\n\n
{{
premise
}}
\n\n
Sentence:
{{hypothesis}}
\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
'
Yes
',
'
It
\
's
impossible
to
say
',
'No'
][label]}}"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-7"
:
"
template-7"
:
doc_to_text
:
"
Can
we
draw
the
following
hypothesis
from
the
context
(see
options)?
\n\n
Context:
\n\n
{{
context
}}
\n\n
Hypothesis:
{{hypothesis}}
\n
OPTIONS:
\n
-
Yes
\n
-
It
\
's
impossible
to
say
\n
-
No"
doc_to_text
:
"
Can
we
draw
the
following
hypothesis
from
the
context
(see
options)?
\n\n
Context:
\n\n
{{
premise
}}
\n\n
Hypothesis:
{{hypothesis}}
\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
'
Yes
',
'
It
\
's
impossible
to
say
',
'No'
][label]}}"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-8"
:
"
template-8"
:
doc_to_text
:
"
Choose
from
options:
Determine
if
the
sentence
is
true
based
on
the
text
below:
\n
{{hypothesis}}
\n\n
{{
context
}}
\n
OPTIONS:
\n
-
Yes
\n
-
It
\
's
impossible
to
say
\n
-
No"
doc_to_text
:
"
Choose
from
options:
Determine
if
the
sentence
is
true
based
on
the
text
below:
\n
{{hypothesis}}
\n\n
{{
premise
}}
\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
'
Yes
',
'
It
\
's
impossible
to
say
',
'No'
][label]}}"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
lm_eval/benchmarks/flan/prompt_templates/flan_arc.yaml
View file @
ec03783f
...
@@ -2,23 +2,22 @@
...
@@ -2,23 +2,22 @@
prompts
:
prompts
:
"
template-0"
:
"
template-0"
:
doc_to_text
:
"
{{question}}
\n\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_text
:
"
{{question}}
\n\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{
[
choices.text
]
[choices.label.index(answerKey)]}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
"
template-1"
:
"
template-1"
:
doc_to_text
:
"
Question:
{{question}}
\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}
\n
Answer:"
doc_to_text
:
"
Question:
{{question}}
\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}
\n
Answer:"
doc_to_target
:
"
{{
[
choices.text
]
[choices.label.index(answerKey)]}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
"
template-2"
:
"
template-2"
:
doc_to_text
:
"
Question:
{{question}}
\n\n
What
is
the
correct
answer
to
the
question
from
the
following
choices?
\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_text
:
"
Question:
{{question}}
\n\n
What
is
the
correct
answer
to
the
question
from
the
following
choices?
\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{
[
choices.text
]
[choices.label.index(answerKey)]}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
"
template-3"
:
"
template-3"
:
doc_to_text
:
"
Q:
{{question}}
\n
What
is
the
correct
answer
to
this
question?
\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}...A:"
doc_to_text
:
"
Q:
{{question}}
\n
What
is
the
correct
answer
to
this
question?
\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}...A:"
doc_to_target
:
"
{{
[
choices.text
]
[choices.label.index(answerKey)]}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
"
template-4"
:
"
template-4"
:
doc_to_text
:
"
Choose
your
answer?
\n\n
{{question}}
\n\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_text
:
"
Choose
your
answer?
\n\n
{{question}}
\n\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{
[
choices.text
]
[choices.label.index(answerKey)]}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
"
template-5"
:
"
template-5"
:
doc_to_text
:
"
Answer
the
question
\n\n
{{question}}
\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_text
:
"
Answer
the
question
\n\n
{{question}}
\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{
[
choices.text
]
[choices.label.index(answerKey)]}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
"
template-6"
:
"
template-6"
:
doc_to_text
:
"
{{question}}
\n\n
Pick
the
answer
from
these
options
\n\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_text
:
"
{{question}}
\n\n
Pick
the
answer
from
these
options
\n\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{[choices.text][choices.label.index(answerKey)]}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
lm_eval/benchmarks/flan/yaml_templates/cot_template_yaml
View file @
ec03783f
...
@@ -6,8 +6,6 @@ metric_list:
...
@@ -6,8 +6,6 @@ metric_list:
- metric: exact_match
- metric: exact_match
aggregation: mean
aggregation: mean
higher_is_better: true
higher_is_better: true
ignore_case: true
ignore_punctuation: true
generation_kwargs:
generation_kwargs:
until:
until:
- "\n\n"
- "\n\n"
...
...
lm_eval/benchmarks/flan/yaml_templates/held_in_template_yaml
View file @
ec03783f
...
@@ -4,8 +4,6 @@ metric_list:
...
@@ -4,8 +4,6 @@ metric_list:
- metric: exact_match
- metric: exact_match
aggregation: mean
aggregation: mean
higher_is_better: true
higher_is_better: true
ignore_case: true
ignore_punctuation: true
generation_kwargs:
generation_kwargs:
until:
until:
- "</s>"
- "</s>"
...
...
lm_eval/benchmarks/flan_anli.yaml
0 → 100644
View file @
ec03783f
group
:
flan_anli
task
:
-
include
:
flan/yaml_templates/held_in_template_yaml
task
:
anli_r1
dataset_path
:
anli
use_prompt
:
flan/prompt_templates/flan_anli.yaml:*
validation_split
:
dev_r1
-
include
:
flan/yaml_templates/held_in_template_yaml
task
:
anli_r2
dataset_path
:
anli
use_prompt
:
flan/prompt_templates/flan_anli.yaml:*
validation_split
:
dev_r2
-
include
:
flan/yaml_templates/held_in_template_yaml
task
:
anli_r3
dataset_path
:
anli
use_prompt
:
flan/prompt_templates/flan_anli.yaml:*
validation_split
:
dev_r3
lm_eval/benchmarks/flan_boolq.yaml
0 → 100644
View file @
ec03783f
group
:
flan_boolq
task
:
-
include
:
flan/yaml_templates/held_in_template_yaml
dataset_path
:
super_glue
dataset_name
:
boolq
use_prompt
:
flan/prompt_templates/flan_boolq.yaml:*
validation_split
:
validation
lm_eval/benchmarks/flan_held_in.yaml
View file @
ec03783f
...
@@ -26,12 +26,14 @@ task:
...
@@ -26,12 +26,14 @@ task:
use_prompt
:
flan/prompt_templates/flan_anli.yaml:*
use_prompt
:
flan/prompt_templates/flan_anli.yaml:*
validation_split
:
dev_r3
validation_split
:
dev_r3
-
include
:
flan/yaml_templates/held_in_template_yaml
-
include
:
flan/yaml_templates/held_in_template_yaml
task
:
ai2_arc
task
:
arc_easy
dataset_path
:
ARC-Easy
dataset_path
:
ai2_arc
use_prompt
:
local:*
dataset_name
:
ARC-Easy
use_prompt
:
flan/prompt_templates/flan_arc.yaml:*
validation_split
:
validation
validation_split
:
validation
-
include
:
flan/yaml_templates/held_in_template_yaml
-
include
:
flan/yaml_templates/held_in_template_yaml
task
:
ai2_arc
task
:
arc_challenge
dataset_path
:
ARC-Challange
dataset_path
:
ai2_arc
use_prompt
:
local:*
dataset_name
:
ARC-Challenge
use_prompt
:
flan/prompt_templates/flan_arc.yaml:*
validation_split
:
validation
validation_split
:
validation
lm_eval/benchmarks/flan_rte.yaml
0 → 100644
View file @
ec03783f
group
:
flan_rte
task
:
-
include
:
flan/yaml_templates/held_in_template_yaml
dataset_path
:
super_glue
dataset_name
:
rte
use_prompt
:
flan/prompt_templates/flan_rte.yaml:*
validation_split
:
validation
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment