Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
0dc556dc
"test/vscode:/vscode.git/clone" did not exist on "746032612ef73c5bed03b26f72f0a05ca661085e"
Commit
0dc556dc
authored
Sep 25, 2023
by
lintangsutawika
Browse files
add function to add both task and benchmark
parent
c6403765
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
46 additions
and
144 deletions
+46
-144
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+44
-28
lm_eval/tasks/benchmarks/flan/prompt_templates/flan_anli.yaml
...val/tasks/benchmarks/flan/prompt_templates/flan_anli.yaml
+0
-29
lm_eval/tasks/benchmarks/flan/prompt_templates/flan_arc.yaml
lm_eval/tasks/benchmarks/flan/prompt_templates/flan_arc.yaml
+0
-23
lm_eval/tasks/benchmarks/flan/prompt_templates/flan_boolq.yaml
...al/tasks/benchmarks/flan/prompt_templates/flan_boolq.yaml
+0
-33
lm_eval/tasks/benchmarks/flan/prompt_templates/flan_rte.yaml
lm_eval/tasks/benchmarks/flan/prompt_templates/flan_rte.yaml
+0
-29
main.py
main.py
+2
-2
No files found.
lm_eval/tasks/__init__.py
View file @
0dc556dc
...
...
@@ -45,6 +45,18 @@ def register_configurable_group(config: Dict[str, str]) -> int:
task_list
=
[
task
for
task
in
all_task_list
if
type
(
task
)
==
str
]
for
task_config
in
config_list
:
# if "task" in task_config:
# task = task_config["task"]
# if task in GROUP_REGISTRY:
# task_list = GROUP_REGISTRY[task]
# elif task in TASK_REGISTRY:
# task_list = [TASK_REGISTRY[task]]
# for _task in task_list:
# task_config = {
# **_task["CONFIG"],
# **task_config
# }
var_configs
=
check_prompt_config
(
{
**
task_config
,
...
...
@@ -109,36 +121,40 @@ def include_task_folder(task_dir: str, register_task=True) -> None:
Calling this function
"""
for
root
,
subdirs
,
file_list
in
os
.
walk
(
task_dir
):
if
(
subdirs
==
[]
or
subdirs
==
[
"__pycache__"
])
and
(
len
(
file_list
)
>
0
):
for
f
in
file_list
:
if
f
.
endswith
(
".yaml"
):
yaml_path
=
os
.
path
.
join
(
root
,
f
)
try
:
config
=
utils
.
load_yaml_config
(
yaml_path
)
if
register_task
:
all_configs
=
check_prompt_config
(
config
)
for
config
in
all_configs
:
register_configurable_task
(
config
)
else
:
# If a `task` in config is a list,
# that means it's a benchmark
if
type
(
config
[
"task"
])
==
list
:
register_configurable_group
(
config
)
except
Exception
as
error
:
eval_logger
.
warning
(
"Failed to load config in
\n
"
f
"
{
yaml_path
}
\n
"
" Config will not be added to registry
\n
"
f
" Error:
{
error
}
"
)
# if (subdirs == [] or subdirs == ["__pycache__"]) and (len(file_list) > 0):
for
f
in
file_list
:
if
f
.
endswith
(
".yaml"
):
yaml_path
=
os
.
path
.
join
(
root
,
f
)
try
:
config
=
utils
.
load_yaml_config
(
yaml_path
)
if
register_task
:
all_configs
=
check_prompt_config
(
config
)
for
config
in
all_configs
:
register_configurable_task
(
config
)
else
:
# If a `task` in config is a list,
# that means it's a benchmark
if
type
(
config
[
"task"
])
==
list
:
register_configurable_group
(
config
)
except
Exception
as
error
:
eval_logger
.
warning
(
"Failed to load config in
\n
"
f
"
{
yaml_path
}
\n
"
" Config will not be added to registry
\n
"
f
" Error:
{
error
}
"
)
def
include_path
(
task_dir
):
include_task_folder
(
task_dir
)
# Register Benchmarks after all tasks have been added
include_task_folder
(
task_dir
,
register_task
=
False
)
return
0
task_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
+
"/"
include_task_folder
(
task_dir
)
# Register Benchmarks after all tasks have been added
include_task_folder
(
task_dir
,
register_task
=
False
)
include_path
(
task_dir
)
def
get_task
(
task_name
,
config
):
...
...
lm_eval/tasks/benchmarks/flan/prompt_templates/flan_anli.yaml
deleted
100644 → 0
View file @
c6403765
# Flan Prompt Templates
prompts
:
"
template-0"
:
doc_to_text
:
"
{{premise}}
\n\n
Choose
your
answer:
based
on
the
paragraph
above
can
we
conclude
that
\"
{{hypothesis}}
\"
?
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No
\n
I
think
the
answer
is"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-1"
:
doc_to_text
:
"
{{premise}}
\n\n
Based
on
that
paragraph
can
we
conclude
that
this
sentence
is
true?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-2"
:
doc_to_text
:
"
{{premise}}
\n\n
Can
we
draw
the
following
conclusion?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-3"
:
doc_to_text
:
"
{{premise}}
\n
Does
this
next
sentence
follow,
given
the
preceding
text?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-4"
:
doc_to_text
:
"
{{premise}}
\n
Can
we
infer
the
following?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No
\n
The
answer
is:"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-5"
:
doc_to_text
:
"
Read
the
following
paragraph
and
determine
if
the
hypothesis
is
true:
\n\n
{{premise}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No
\n
Hypothesis:
{{hypothesis}}
\n\n\n
"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-6"
:
doc_to_text
:
"
Read
the
text
and
determine
if
the
sentence
is
true
(see
options
at
the
end):
\n\n
{{premise}}
\n\n
Sentence:
{{hypothesis}}
\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-7"
:
doc_to_text
:
"
Can
we
draw
the
following
hypothesis
from
the
context
(see
options)?
\n\n
Context:
\n\n
{{premise}}
\n\n
Hypothesis:
{{hypothesis}}
\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-8"
:
doc_to_text
:
"
Choose
from
options:
Determine
if
the
sentence
is
true
based
on
the
text
below:
\n
{{hypothesis}}
\n\n
{{premise}}
\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
lm_eval/tasks/benchmarks/flan/prompt_templates/flan_arc.yaml
deleted
100644 → 0
View file @
c6403765
# Flan Prompt Templates
prompts
:
"
template-0"
:
doc_to_text
:
"
{{question}}
\n\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
"
template-1"
:
doc_to_text
:
"
Question:
{{question}}
\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}
\n
Answer:"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
"
template-2"
:
doc_to_text
:
"
Question:
{{question}}
\n\n
What
is
the
correct
answer
to
the
question
from
the
following
choices?
\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
"
template-3"
:
doc_to_text
:
"
Q:
{{question}}
\n
What
is
the
correct
answer
to
this
question?
\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}...A:"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
"
template-4"
:
doc_to_text
:
"
Choose
your
answer?
\n\n
{{question}}
\n\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
"
template-5"
:
doc_to_text
:
"
Answer
the
question
\n\n
{{question}}
\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
"
template-6"
:
doc_to_text
:
"
{{question}}
\n\n
Pick
the
answer
from
these
options
\n\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
lm_eval/tasks/benchmarks/flan/prompt_templates/flan_boolq.yaml
deleted
100644 → 0
View file @
c6403765
# Flan Prompt Templates
prompts
:
"
template-0"
:
doc_to_text
:
"
{{passage}}
\n\n
Can
we
conclude
that
{{question}}?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
"
template-1"
:
doc_to_text
:
"
{{passage}}
\n\n
Is
it
true
that
{{question}}?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
"
template-2"
:
doc_to_text
:
"
{{passage}}
\n\n
{{question}}?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
"
template-3"
:
doc_to_text
:
"
Text:
{{passage}}
\n\n
Question:
{{question}}?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
"
template-4"
:
doc_to_text
:
"
{{passage}}
\n\n
What's
the
best
answer
to
this
question:
{{question}}?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
"
template-5"
:
doc_to_text
:
"
{{passage}}
\n
Based
on
the
above
text
what's
the
best
answer
to
this
question:
{{question}}?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
"
template-6"
:
doc_to_text
:
"
{{passage}}
\n
Answer
this
question
making
sure
that
the
answer
is
supposed
by
the
text:
{{question}}?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
"
template-7"
:
doc_to_text
:
"
{{passage}}
\n\n
Is
the
following
statement
correct
based
on
the
text
\n\n
{{question}}
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
"
template-8"
:
# doc_to_text: "{{title}}\n\n{{passage}}\n\nIs this statement correct \"{{question}}\"?\n\nOPTIONS:\n- no\n- yes"
doc_to_text
:
"
{{passage}}
\n\n
Is
this
statement
correct
\"
{{question}}
\"
?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
"
template-9"
:
doc_to_text
:
"
Is
it
true
that
{{question}}
based
on
the
following
text?
\n\n
{{passage}}
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
lm_eval/tasks/benchmarks/flan/prompt_templates/flan_rte.yaml
deleted
100644 → 0
View file @
c6403765
# Flan Prompt Templates
prompts
:
"
template-0"
:
doc_to_text
:
"
{{premise}}
\n\n
Question
with
options:
Based
on
the
paragraph
above
can
we
conclude
that
\"
{{hypothesis}}
\"
?
\n\n
OPTIONS:
\n
-
yes
\n
-
no"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
"
template-1"
:
doc_to_text
:
"
{{premise}}
\n\n
Based
on
that
paragraph
can
we
conclude
that
the
sentence
below
is
true?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
yes
\n
-
no"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
"
template-2"
:
doc_to_text
:
"
{{premise}}
\n\n
Q
with
options:
Can
we
draw
the
following
conclusion?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
yes
\n
-
no"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
"
template-3"
:
doc_to_text
:
"
{{premise}}
\n
Does
this
next
sentence
follow,
given
the
preceding
text?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
yes
\n
-
no"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
"
template-4"
:
doc_to_text
:
"
{{premise}}
\n
OPTIONS:
\n
-
yes
\n
-
no
\n
Question:
Can
we
infer
the
following?
\n
{{hypothesis}}"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
"
template-5"
:
doc_to_text
:
"
Read
the
following
paragraph
and
determine
if
the
hypothesis
is
true.
Select
from
options
at
the
end:
\n\n
{{premise}}
\n\n
Hypothesis:
{{hypothesis}}
\n
OPTIONS:
\n
-
yes
\n
-
no
\n
The
answer
is"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
"
template-6"
:
doc_to_text
:
"
Read
the
text
and
determine
if
the
sentence
is
true:
\n\n
{{premise}}
\n\n
Sentence:
{{hypothesis}}
\n
OPTIONS:
\n
-
yes
\n
-
no
\n
A:"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
"
template-7"
:
doc_to_text
:
"
Question
with
options:
can
we
draw
the
following
hypothesis
from
the
context?
\n\n
Context:
\n\n
{{premise}}
\n\n
Hypothesis:
{{hypothesis}}
\n
OPTIONS:
\n
-
yes
\n
-
no
\n
A:"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
"
template-8"
:
doc_to_text
:
"
Determine
if
the
sentence
is
true
based
on
the
text
below.
Choose
from
options.
\n
{{hypothesis}}
\n\n
{{premise}}
\n
OPTIONS:
\n
-
yes
\n
-
no"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
main.py
View file @
0dc556dc
...
...
@@ -10,7 +10,7 @@ from pathlib import Path
from
lm_eval
import
evaluator
,
utils
from
lm_eval.api.registry
import
ALL_TASKS
from
lm_eval.logger
import
eval_logger
,
SPACING
from
lm_eval.tasks
import
include_
task_folder
from
lm_eval.tasks
import
include_
path
os
.
environ
[
"TOKENIZERS_PARALLELISM"
]
=
"false"
...
...
@@ -111,7 +111,7 @@ def main() -> None:
if
args
.
include_path
is
not
None
:
eval_logger
.
info
(
f
"Including path:
{
args
.
include_path
}
"
)
include_
task_folder
(
args
.
include_path
)
include_
path
(
args
.
include_path
)
if
args
.
tasks
is
None
:
task_names
=
ALL_TASKS
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment