Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
e5306ea6
Commit
e5306ea6
authored
Jul 03, 2023
by
lintangsutawika
Browse files
added script to build benchmarks from promptsouce
parent
3713ec52
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
88 additions
and
40 deletions
+88
-40
lm_eval/tasks/benchmarks/build_promptsource_tasks.py
lm_eval/tasks/benchmarks/build_promptsource_tasks.py
+0
-40
lm_eval/tasks/benchmarks/t0_eval.yml
lm_eval/tasks/benchmarks/t0_eval.yml
+16
-0
scripts/build_benchmark.py
scripts/build_benchmark.py
+72
-0
No files found.
lm_eval/tasks/benchmarks/build_promptsource_tasks.py
deleted
100644 → 0
View file @
3713ec52
import
os
import
argparse
from
lm_eval
import
utils
from
promptsource.templates
import
DatasetTemplates
def
parse_args
():
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--benchmark"
,
required
=
True
)
parser
.
add_argument
(
"--model_args"
,
default
=
""
)
def
main
():
args
=
parse_args
()
path
=
args
.
benchmark
yaml_path
=
""
with
open
(
path
)
as
file
:
TASK_LIST
=
file
.
readlines
()
for
dataset_name
,
subset_name
in
TASK_LIST
:
if
subset_name
is
None
:
prompts
=
DatasetTemplates
(
dataset_name
=
dataset_name
)
else
:
prompts
=
DatasetTemplates
(
dataset_name
=
dataset_name
,
subset_name
=
subset_name
)
with
open
(
os
.
path
.
join
(
yaml_path
,
"promptsource_template.yaml"
))
as
file
:
yaml_dict
=
file
.
readline
()
for
prompt_name
in
prompts
.
all_template_names
:
config_dict
=
{
"include"
:
"promptsource_template.yaml"
,
"use_prompts"
:
prompts
[
prompt_name
],
**
yaml_dict
,
}
return
config_dict
lm_eval/tasks/benchmarks/t0_eval.yml
0 → 100644
View file @
e5306ea6
-
dataset_path
:
"
super_glue"
# Coreference Resolution
dataset_name
:
"
wsc.fixed"
-
dataset_path
:
"
winogrande"
# Coreference Resolution
dataset_name
:
"
winogrande_xl"
-
dataset_path
:
"
super_glue"
# Natural Language Inference
dataset_name
:
"
cb"
-
dataset_path
:
"
super_glue"
# Natural Language Inference
dataset_name
:
"
rte"
-
dataset_path
:
"
anli"
# Natural Language Inference
dataset_name
:
null
-
dataset_path
:
"
super_glue"
# Sentence Completion
dataset_name
:
"
copa"
-
dataset_path
:
"
hellaswag"
# Natural Language Inference
dataset_name
:
null
-
dataset_path
:
"
super_glue"
# Word Sense Disambiguation
dataset_name
:
"
wic"
scripts/build_benchmark.py
0 → 100644
View file @
e5306ea6
import
os
import
yaml
import
argparse
from
tqdm
import
tqdm
from
promptsource.templates
import
DatasetTemplates
from
lm_eval
import
utils
# from lm_eval.api.registry import ALL_TASKS
from
lm_eval.logger
import
eval_logger
# from lm_eval.tasks import include_task_folder
def
parse_args
():
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--benchmark_name"
,
required
=
True
)
parser
.
add_argument
(
"--benchmark_path"
,
required
=
True
)
parser
.
add_argument
(
"--task_save_path"
,
default
=
"lm_eval/tasks/"
)
return
parser
.
parse_args
()
if
__name__
==
"__main__"
:
args
=
parse_args
()
with
open
(
args
.
benchmark_path
)
as
file
:
TASK_LIST
=
yaml
.
full_load
(
file
)
for
task
in
tqdm
(
TASK_LIST
):
eval_logger
.
info
(
f
"Processing
{
task
}
"
)
dataset_name
=
task
[
"dataset_path"
]
if
"dataset_name"
in
task
:
subset_name
=
task
[
"dataset_name"
]
else
:
subset_name
=
None
if
subset_name
is
None
:
file_name
=
f
"promptsource_
{
dataset_name
}
"
file_path
=
os
.
path
.
join
(
args
.
task_save_path
,
f
"
{
dataset_name
}
"
)
else
:
file_name
=
f
"promptsource_
{
dataset_name
}
_
{
subset_name
}
"
file_path
=
os
.
path
.
join
(
args
.
task_save_path
,
f
"
{
dataset_name
}
/
{
subset_name
}
"
)
os
.
makedirs
(
file_path
,
exist_ok
=
True
)
if
subset_name
is
None
:
prompts
=
DatasetTemplates
(
dataset_name
=
dataset_name
)
else
:
prompts
=
DatasetTemplates
(
dataset_name
=
dataset_name
,
subset_name
=
subset_name
)
for
idx
,
prompt_name
in
enumerate
(
prompts
.
all_template_names
):
full_file_name
=
(
file_name
+
f
"_
{
idx
}
.yml"
)
# .format(prompt_name.replace(" ", "_").lower())
config_dict
=
{
"group"
:
args
.
benchmark_name
,
"include"
:
"promptsource_template.yaml"
,
"use_prompts"
:
f
"promptsource:
{
prompt_name
}
"
,
}
file_save_path
=
os
.
path
.
join
(
file_path
,
full_file_name
)
eval_logger
.
info
(
f
"Save to
{
file_save_path
}
"
)
with
open
(
file_save_path
,
"w"
)
as
yaml_file
:
yaml
.
dump
(
config_dict
,
yaml_file
)
# return config_dict
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment