Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
d5071b70
Commit
d5071b70
authored
Jan 19, 2024
by
lintangsutawika
Browse files
task for testing recursive
parent
4f69410c
Changes
16
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
45 additions
and
274 deletions
+45
-274
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+33
-12
lm_eval/tasks/benchmarks/flan/flan_anli.yaml
lm_eval/tasks/benchmarks/flan/flan_anli.yaml
+0
-17
lm_eval/tasks/benchmarks/flan/flan_arc.yaml
lm_eval/tasks/benchmarks/flan/flan_arc.yaml
+0
-14
lm_eval/tasks/benchmarks/flan/flan_boolq.yaml
lm_eval/tasks/benchmarks/flan/flan_boolq.yaml
+0
-7
lm_eval/tasks/benchmarks/flan/flan_cot.yaml
lm_eval/tasks/benchmarks/flan/flan_cot.yaml
+0
-11
lm_eval/tasks/benchmarks/flan/flan_held_in.yaml
lm_eval/tasks/benchmarks/flan/flan_held_in.yaml
+0
-6
lm_eval/tasks/benchmarks/flan/flan_held_in_yaml
lm_eval/tasks/benchmarks/flan/flan_held_in_yaml
+0
-39
lm_eval/tasks/benchmarks/flan/flan_held_out.yaml
lm_eval/tasks/benchmarks/flan/flan_held_out.yaml
+0
-13
lm_eval/tasks/benchmarks/flan/flan_rte.yaml
lm_eval/tasks/benchmarks/flan/flan_rte.yaml
+0
-7
lm_eval/tasks/benchmarks/flan/prompt_templates/anli.yaml
lm_eval/tasks/benchmarks/flan/prompt_templates/anli.yaml
+0
-29
lm_eval/tasks/benchmarks/flan/prompt_templates/arc.yaml
lm_eval/tasks/benchmarks/flan/prompt_templates/arc.yaml
+0
-23
lm_eval/tasks/benchmarks/flan/prompt_templates/boolq.yaml
lm_eval/tasks/benchmarks/flan/prompt_templates/boolq.yaml
+0
-33
lm_eval/tasks/benchmarks/flan/prompt_templates/rte.yaml
lm_eval/tasks/benchmarks/flan/prompt_templates/rte.yaml
+0
-29
lm_eval/tasks/benchmarks/flan/yaml_templates/cot_template_yaml
...al/tasks/benchmarks/flan/yaml_templates/cot_template_yaml
+0
-21
lm_eval/tasks/benchmarks/flan/yaml_templates/held_in_template_yaml
...asks/benchmarks/flan/yaml_templates/held_in_template_yaml
+0
-13
lm_eval/tasks/benchmarks/test.yaml
lm_eval/tasks/benchmarks/test.yaml
+12
-0
No files found.
lm_eval/tasks/__init__.py
View file @
d5071b70
...
...
@@ -56,17 +56,27 @@ def register_configurable_task(config: Dict[str, str]) -> int:
def
register_configurable_group
(
config
:
Dict
[
str
,
str
],
yaml_path
:
str
=
None
)
->
int
:
group
=
config
[
"group"
]
all_task_list
=
config
[
"task"
]
config_list
=
[
task
for
task
in
all_task_list
if
type
(
task
)
!=
str
]
task_list
=
[
task
for
task
in
all_task_list
if
type
(
task
)
==
str
]
for
task_config
in
config_list
:
if
group
not
in
[
"grouptest"
,
"arc_stuff"
]:
return
0
task_config_list
=
[]
group_config_list
=
[]
registered_task_or_group_list
=
[]
for
task
in
config
[
"task"
]:
if
isinstance
(
task
,
str
):
registered_task_or_group_list
.
append
(
task
)
elif
list
(
task
.
keys
())
==
[
"group"
,
"task"
]:
group_config_list
.
append
(
task
)
else
:
task_config_list
.
append
(
task
)
for
task_config
in
task_config_list
:
base_config
=
{}
task_name_config
=
{}
if
"task"
in
task_config
:
task_name
=
task_config
[
"task"
]
if
task_name
in
ALL_TASKS
:
if
task_name
in
TASK_REGISTRY
:
task_obj
=
get_task_dict
(
task_name
)[
task_name
]
if
type
(
task_obj
)
==
tuple
:
_
,
task_obj
=
task_obj
...
...
@@ -74,6 +84,8 @@ def register_configurable_group(config: Dict[str, str], yaml_path: str = None) -
if
task_obj
is
not
None
:
base_config
=
task_obj
.
_config
.
to_dict
(
keep_callable
=
True
)
task_name_config
[
"task"
]
=
f
"
{
group
}
_
{
task_name
}
"
# elif task_name in GROUP_REGISTRY:
task_config
=
utils
.
load_yaml_config
(
yaml_path
,
task_config
)
var_configs
=
check_prompt_config
(
...
...
@@ -88,7 +100,16 @@ def register_configurable_group(config: Dict[str, str], yaml_path: str = None) -
for
config
in
var_configs
:
register_configurable_task
(
config
)
task_names
=
utils
.
pattern_match
(
task_list
,
ALL_TASKS
)
for
group_config
in
group_config_list
:
sub_group
=
group_config
[
"group"
]
register_configurable_group
(
group_config
,
yaml_path
)
if
group
in
GROUP_REGISTRY
:
GROUP_REGISTRY
[
group
].
append
(
sub_group
)
else
:
GROUP_REGISTRY
[
group
]
=
[
sub_group
]
ALL_TASKS
.
add
(
group
)
task_names
=
utils
.
pattern_match
(
registered_task_or_group_list
,
ALL_TASKS
)
for
task
in
task_names
:
if
(
task
in
TASK_REGISTRY
)
or
(
task
in
GROUP_REGISTRY
):
if
group
in
GROUP_REGISTRY
:
...
...
@@ -143,7 +164,7 @@ def get_task_name_from_config(task_config: Dict[str, str]) -> str:
return
"{dataset_path}"
.
format
(
**
task_config
)
def
include_task_folder
(
task_dir
:
str
,
register_task
:
bool
=
True
)
->
None
:
def
include_task_folder
(
task_dir
:
str
,
register_task
:
bool
=
True
,
task_name
:
str
=
None
)
->
None
:
"""
Calling this function
"""
...
...
@@ -198,18 +219,18 @@ def include_task_folder(task_dir: str, register_task: bool = True) -> None:
return
0
def
include_path
(
task_dir
):
include_task_folder
(
task_dir
)
def
include_path
(
task_dir
,
task_name
=
None
):
include_task_folder
(
task_dir
,
task_name
=
task_name
)
# Register Benchmarks after all tasks have been added
include_task_folder
(
task_dir
,
register_task
=
False
)
include_task_folder
(
task_dir
,
register_task
=
False
,
task_name
=
task_name
)
return
0
def
initialize_tasks
(
verbosity
=
"INFO"
):
def
initialize_tasks
(
verbosity
=
"INFO"
,
task_name
=
None
):
eval_logger
.
setLevel
(
getattr
(
logging
,
f
"
{
verbosity
}
"
))
task_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
+
"/"
include_path
(
task_dir
)
include_path
(
task_dir
,
task_name
=
task_name
)
def
get_task
(
task_name
,
config
):
...
...
lm_eval/tasks/benchmarks/flan/flan_anli.yaml
deleted
100644 → 0
View file @
4f69410c
group
:
flan_anli
task
:
-
include
:
yaml_templates/held_in_template_yaml
task
:
anli_r1
dataset_path
:
anli
use_prompt
:
prompt_templates/anli.yaml:*
validation_split
:
dev_r1
-
include
:
yaml_templates/held_in_template_yaml
task
:
anli_r2
dataset_path
:
anli
use_prompt
:
prompt_templates/anli.yaml:*
validation_split
:
dev_r2
-
include
:
yaml_templates/held_in_template_yaml
task
:
anli_r3
dataset_path
:
anli
use_prompt
:
prompt_templates/anli.yaml:*
validation_split
:
dev_r3
lm_eval/tasks/benchmarks/flan/flan_arc.yaml
deleted
100644 → 0
View file @
4f69410c
group
:
flan_arc
task
:
-
include
:
yaml_templates/held_in_template_yaml
task
:
arc_easy
dataset_path
:
ai2_arc
dataset_name
:
ARC-Easy
use_prompt
:
prompt_templates/arc.yaml:*
validation_split
:
validation
-
include
:
yaml_templates/held_in_template_yaml
task
:
arc_challenge
dataset_path
:
ai2_arc
dataset_name
:
ARC-Challenge
use_prompt
:
prompt_templates/arc.yaml:*
validation_split
:
validation
lm_eval/tasks/benchmarks/flan/flan_boolq.yaml
deleted
100644 → 0
View file @
4f69410c
group
:
flan_boolq
task
:
-
include
:
yaml_templates/held_in_template_yaml
dataset_path
:
super_glue
dataset_name
:
boolq
use_prompt
:
prompt_templates/boolq.yaml:*
validation_split
:
validation
lm_eval/tasks/benchmarks/flan/flan_cot.yaml
deleted
100644 → 0
View file @
4f69410c
group
:
flan_cot
task
:
-
include
:
yaml_templates/cot_template_yaml
dataset_path
:
gsmk
dataset_name
:
boolq
use_prompt
:
promptsource:*
validation_split
:
validation
-
include
:
yaml_templates/cot_template_yaml
dataset_path
:
EleutherAI/asdiv
use_prompt
:
promptsource:*
validation_split
:
validation
lm_eval/tasks/benchmarks/flan/flan_held_in.yaml
deleted
100644 → 0
View file @
4f69410c
group
:
flan_held_in
task
:
-
flan_boolq
-
flan_rte
-
flan_anli
-
flan_arc
lm_eval/tasks/benchmarks/flan/flan_held_in_yaml
deleted
100644 → 0
View file @
4f69410c
group: flan_held_in
task:
- include: flan/yaml_templates/held_in_template_yaml
dataset_path: super_glue
dataset_name: boolq
use_prompt: flan/prompt_templates/boolq.yaml:*
validation_split: validation
- include: flan/yaml_templates/held_in_template_yaml
dataset_path: super_glue
dataset_name: rte
use_prompt: flan/prompt_templates/rte.yaml:*
validation_split: validation
- include: flan/yaml_templates/held_in_template_yaml
task: anli_r1
dataset_path: anli
use_prompt: flan/prompt_templates/anli.yaml:*
validation_split: dev_r1
- include: flan/yaml_templates/held_in_template_yaml
task: anli_r2
dataset_path: anli
use_prompt: flan/prompt_templates/anli.yaml:*
validation_split: dev_r2
- include: flan/yaml_templates/held_in_template_yaml
task: anli_r3
dataset_path: anli
use_prompt: flan/prompt_templates/anli.yaml:*
validation_split: dev_r3
- include: flan/yaml_templates/held_in_template_yaml
task: arc_easy
dataset_path: ai2_arc
dataset_name: ARC-Easy
use_prompt: flan/prompt_templates/arc.yaml:*
validation_split: validation
- include: flan/yaml_templates/held_in_template_yaml
task: arc_challenge
dataset_path: ai2_arc
dataset_name: ARC-Challenge
use_prompt: flan/prompt_templates/arc.yaml:*
validation_split: validation
lm_eval/tasks/benchmarks/flan/flan_held_out.yaml
deleted
100644 → 0
View file @
4f69410c
group
:
flan_held_out
task
:
# BBH
-
bbh_flan_zeroshot
-
bbh_flan_fewshot
-
bbh_flan_cot_fewshot
-
bbh_flan_cot_zeroshot
# MMLU
-
mmlu
-
mmlu_flan_n_shot_generative
-
mmlu_flan_n_shot_loglikelihood
-
mmlu_flan_cot_zeroshot
-
mmlu_flan_cot_fewshot
lm_eval/tasks/benchmarks/flan/flan_rte.yaml
deleted
100644 → 0
View file @
4f69410c
group
:
flan_rte
task
:
-
include
:
yaml_templates/held_in_template_yaml
dataset_path
:
super_glue
dataset_name
:
rte
use_prompt
:
prompt_templates/rte.yaml:*
validation_split
:
validation
lm_eval/tasks/benchmarks/flan/prompt_templates/anli.yaml
deleted
100644 → 0
View file @
4f69410c
# Flan Prompt Templates
prompts
:
"
template-0"
:
doc_to_text
:
"
{{premise}}
\n\n
Choose
your
answer:
based
on
the
paragraph
above
can
we
conclude
that
\"
{{hypothesis}}
\"
?
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No
\n
I
think
the
answer
is"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-1"
:
doc_to_text
:
"
{{premise}}
\n\n
Based
on
that
paragraph
can
we
conclude
that
this
sentence
is
true?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-2"
:
doc_to_text
:
"
{{premise}}
\n\n
Can
we
draw
the
following
conclusion?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-3"
:
doc_to_text
:
"
{{premise}}
\n
Does
this
next
sentence
follow,
given
the
preceding
text?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-4"
:
doc_to_text
:
"
{{premise}}
\n
Can
we
infer
the
following?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No
\n
The
answer
is:"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-5"
:
doc_to_text
:
"
Read
the
following
paragraph
and
determine
if
the
hypothesis
is
true:
\n\n
{{premise}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No
\n
Hypothesis:
{{hypothesis}}
\n\n\n
"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-6"
:
doc_to_text
:
"
Read
the
text
and
determine
if
the
sentence
is
true
(see
options
at
the
end):
\n\n
{{premise}}
\n\n
Sentence:
{{hypothesis}}
\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-7"
:
doc_to_text
:
"
Can
we
draw
the
following
hypothesis
from
the
context
(see
options)?
\n\n
Context:
\n\n
{{premise}}
\n\n
Hypothesis:
{{hypothesis}}
\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-8"
:
doc_to_text
:
"
Choose
from
options:
Determine
if
the
sentence
is
true
based
on
the
text
below:
\n
{{hypothesis}}
\n\n
{{premise}}
\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
lm_eval/tasks/benchmarks/flan/prompt_templates/arc.yaml
deleted
100644 → 0
View file @
4f69410c
# Flan Prompt Templates
prompts
:
"
template-0"
:
doc_to_text
:
"
{{question}}
\n\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
"
template-1"
:
doc_to_text
:
"
Question:
{{question}}
\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}
\n
Answer:"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
"
template-2"
:
doc_to_text
:
"
Question:
{{question}}
\n\n
What
is
the
correct
answer
to
the
question
from
the
following
choices?
\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
"
template-3"
:
doc_to_text
:
"
Q:
{{question}}
\n
What
is
the
correct
answer
to
this
question?
\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}...A:"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
"
template-4"
:
doc_to_text
:
"
Choose
your
answer?
\n\n
{{question}}
\n\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
"
template-5"
:
doc_to_text
:
"
Answer
the
question
\n\n
{{question}}
\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
"
template-6"
:
doc_to_text
:
"
{{question}}
\n\n
Pick
the
answer
from
these
options
\n\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
lm_eval/tasks/benchmarks/flan/prompt_templates/boolq.yaml
deleted
100644 → 0
View file @
4f69410c
# Flan Prompt Templates
prompts
:
"
template-0"
:
doc_to_text
:
"
{{passage}}
\n\n
Can
we
conclude
that
{{question}}?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
"
template-1"
:
doc_to_text
:
"
{{passage}}
\n\n
Is
it
true
that
{{question}}?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
"
template-2"
:
doc_to_text
:
"
{{passage}}
\n\n
{{question}}?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
"
template-3"
:
doc_to_text
:
"
Text:
{{passage}}
\n\n
Question:
{{question}}?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
"
template-4"
:
doc_to_text
:
"
{{passage}}
\n\n
What's
the
best
answer
to
this
question:
{{question}}?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
"
template-5"
:
doc_to_text
:
"
{{passage}}
\n
Based
on
the
above
text
what's
the
best
answer
to
this
question:
{{question}}?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
"
template-6"
:
doc_to_text
:
"
{{passage}}
\n
Answer
this
question
making
sure
that
the
answer
is
supposed
by
the
text:
{{question}}?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
"
template-7"
:
doc_to_text
:
"
{{passage}}
\n\n
Is
the
following
statement
correct
based
on
the
text
\n\n
{{question}}
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
"
template-8"
:
# doc_to_text: "{{title}}\n\n{{passage}}\n\nIs this statement correct \"{{question}}\"?\n\nOPTIONS:\n- no\n- yes"
doc_to_text
:
"
{{passage}}
\n\n
Is
this
statement
correct
\"
{{question}}
\"
?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
"
template-9"
:
doc_to_text
:
"
Is
it
true
that
{{question}}
based
on
the
following
text?
\n\n
{{passage}}
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
lm_eval/tasks/benchmarks/flan/prompt_templates/rte.yaml
deleted
100644 → 0
View file @
4f69410c
# Flan Prompt Templates
prompts
:
"
template-0"
:
doc_to_text
:
"
{{premise}}
\n\n
Question
with
options:
Based
on
the
paragraph
above
can
we
conclude
that
\"
{{hypothesis}}
\"
?
\n\n
OPTIONS:
\n
-
yes
\n
-
no"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
"
template-1"
:
doc_to_text
:
"
{{premise}}
\n\n
Based
on
that
paragraph
can
we
conclude
that
the
sentence
below
is
true?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
yes
\n
-
no"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
"
template-2"
:
doc_to_text
:
"
{{premise}}
\n\n
Q
with
options:
Can
we
draw
the
following
conclusion?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
yes
\n
-
no"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
"
template-3"
:
doc_to_text
:
"
{{premise}}
\n
Does
this
next
sentence
follow,
given
the
preceding
text?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
yes
\n
-
no"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
"
template-4"
:
doc_to_text
:
"
{{premise}}
\n
OPTIONS:
\n
-
yes
\n
-
no
\n
Question:
Can
we
infer
the
following?
\n
{{hypothesis}}"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
"
template-5"
:
doc_to_text
:
"
Read
the
following
paragraph
and
determine
if
the
hypothesis
is
true.
Select
from
options
at
the
end:
\n\n
{{premise}}
\n\n
Hypothesis:
{{hypothesis}}
\n
OPTIONS:
\n
-
yes
\n
-
no
\n
The
answer
is"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
"
template-6"
:
doc_to_text
:
"
Read
the
text
and
determine
if
the
sentence
is
true:
\n\n
{{premise}}
\n\n
Sentence:
{{hypothesis}}
\n
OPTIONS:
\n
-
yes
\n
-
no
\n
A:"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
"
template-7"
:
doc_to_text
:
"
Question
with
options:
can
we
draw
the
following
hypothesis
from
the
context?
\n\n
Context:
\n\n
{{premise}}
\n\n
Hypothesis:
{{hypothesis}}
\n
OPTIONS:
\n
-
yes
\n
-
no
\n
A:"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
"
template-8"
:
doc_to_text
:
"
Determine
if
the
sentence
is
true
based
on
the
text
below.
Choose
from
options.
\n
{{hypothesis}}
\n\n
{{premise}}
\n
OPTIONS:
\n
-
yes
\n
-
no"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
lm_eval/tasks/benchmarks/flan/yaml_templates/cot_template_yaml
deleted
100644 → 0
View file @
4f69410c
group: flan-cot
output_type: generate_until
validation_split: validation
doc_to_target: "{{answer}}"
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
generation_kwargs:
until:
- "\n\n"
do_sample: false
temperature: 0.0
filter_list:
- name: "get-answer"
filter:
- function: "regex"
regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
- function: "take_first"
metadata:
version: 1.0
lm_eval/tasks/benchmarks/flan/yaml_templates/held_in_template_yaml
deleted
100644 → 0
View file @
4f69410c
output_type: generate_until
validation_split: validation
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
generation_kwargs:
until:
- "</s>"
do_sample: false
temperature: 0.0
metadata:
version: 1.0
lm_eval/tasks/benchmarks/test.yaml
0 → 100644
View file @
d5071b70
group
:
grouptest
task
:
-
boolq
-
group
:
arc_stuff
task
:
-
arc_challenge
-
task
:
arc_easy
metric_list
:
-
metric
:
acc
num_fewshot
:
3
# - task: mmlu_stem
# num_fewshot: 2
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment