Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
e795efcf
Commit
e795efcf
authored
Sep 04, 2023
by
lintangsutawika
Browse files
updates
parent
c8b76a3d
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
75 additions
and
38 deletions
+75
-38
lm_eval/tasks/bbh/README.md
lm_eval/tasks/bbh/README.md
+1
-1
lm_eval/tasks/bbh/_generate_configs.py
lm_eval/tasks/bbh/_generate_configs.py
+72
-20
lm_eval/tasks/bbh/_template_yaml
lm_eval/tasks/bbh/_template_yaml
+0
-15
lm_eval/tasks/bbh/flan_cot_fewshot/_flan_cot_fewshot_template_yaml
...asks/bbh/flan_cot_fewshot/_flan_cot_fewshot_template_yaml
+1
-1
lm_eval/tasks/bbh/flan_cot_zeroshot/_flan_cot_zeroshot_template_yaml
...ks/bbh/flan_cot_zeroshot/_flan_cot_zeroshot_template_yaml
+1
-1
No files found.
lm_eval/tasks/bbh/README.md
View file @
e795efcf
...
...
@@ -25,7 +25,7 @@ Homepage: https://github.com/suzgunmirac/BIG-Bench-Hard
#### Groups
-
`bbh`
-
`bbh
_flan_zeroshot
`
#### Tasks
...
...
lm_eval/tasks/bbh/_generate_configs.py
View file @
e795efcf
"""
Take in a YAML, and output all other splits with this YAML
"""
import
os
import
re
import
yaml
import
inspect
import
datasets
import
requests
import
argparse
import
datasets
from
tqdm
import
tqdm
from
lm_eval
import
utils
from
lm_eval.logger
import
eval_logger
def
parse_args
():
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--base_yaml_path"
,
required
=
True
)
parser
.
add_argument
(
"--save_prefix_path"
,
default
=
"flan_zeroshot"
)
parser
.
add_argument
(
"--cot"
,
default
=
False
)
parser
.
add_argument
(
"--fewshot"
,
default
=
False
)
parser
.
add_argument
(
"--task_prefix"
,
default
=
""
)
return
parser
.
parse_args
()
if
__name__
==
"__main__"
:
args
=
parse_args
()
def
main
()
->
None
:
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
with
open
(
args
.
base_yaml_path
)
as
f
:
base_yaml
=
yaml
.
full_load
(
f
)
base_doc_to_text
=
"Q: {{input}}
\n
A:"
answer_regex
=
re
.
compile
(
"(?<=answer is )(.*)(?=.)"
)
dataset_path
=
"lukaemon/bbh"
for
task
in
tqdm
(
datasets
.
get_dataset_infos
(
dataset_path
).
keys
()):
file_name
=
f
"
{
task
}
.yaml"
try
:
with
open
(
f
"
{
file_name
}
"
,
"w"
)
as
f
:
f
.
write
(
"# Generated by _generate_configs.py
\n
"
)
yaml
.
dump
(
{
"include"
:
"_template_yaml"
,
"task"
:
f
"
{
dataset_path
.
split
(
'/'
)[
-
1
]
}
_
{
task
}
"
,
"dataset_name"
:
task
,
},
f
,
)
except
FileExistsError
:
pass
resp
=
requests
.
get
(
f
"https://raw.githubusercontent.com/suzgunmirac/BIG-Bench-Hard/main/cot-prompts/
{
task
}
.txt"
).
content
.
decode
(
'utf-8'
)
prompt
=
resp
.
split
(
"
\n
-----
\n
"
)[
-
1
]
description
,
*
few_shot
=
prompt
.
split
(
"
\n\n
Q:"
)
prefix_doc_to_text
=
""
if
args
.
fewshot
:
if
args
.
cot
:
prefix_doc_to_text
=
" "
.
join
(
few_shot
)
else
:
for
shot
in
few_shot
:
shot
=
"Q:"
+
shot
try
:
answer
=
answer_regex
.
search
(
shot
)[
0
]
except
:
print
(
"task"
,
task
)
print
(
shot
)
example
=
shot
.
split
(
"Let
\'
s think step by step."
)[
0
]
prefix_doc_to_text
+=
f
"
{
example
}{
answer
}
\n\n
"
doc_to_text
=
prefix_doc_to_text
+
base_doc_to_text
if
args
.
cot
:
doc_to_text
=
doc_to_text
+
" Let's think step by step.
\n
"
yaml_dict
=
{
"include"
:
"_template_yaml"
,
"task"
:
f
"bbh_
{
args
.
task_prefix
}
_
{
task
}
"
,
"dataset_name"
:
task
,
"description"
:
description
+
"
\n\n
"
,
"doc_to_text"
:
doc_to_text
,
}
file_save_path
=
args
.
save_prefix_path
+
f
"/
{
task
}
.yaml"
eval_logger
.
info
(
f
"Saving yaml for subset
{
task
}
to
{
file_save_path
}
"
)
with
open
(
file_save_path
,
"w"
)
as
yaml_file
:
yaml
.
dump
(
yaml_dict
,
yaml_file
,
width
=
float
(
"inf"
),
allow_unicode
=
True
,
default_style
=
'"'
)
if
__name__
==
"__main__"
:
main
()
# https://raw.githubusercontent.com/suzgunmirac/BIG-Bench-Hard/main/cot-prompts/boolean_expressions.txt
lm_eval/tasks/bbh/_template_yaml
deleted
100644 → 0
View file @
c8b76a3d
group: bbh
dataset_path: lukaemon/bbh
output_type: greedy_until
test_split: test
doc_to_text: "Q: {{input}}\nA:"
doc_to_target: "{{target}}"
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
generation_kwargs:
until:
- "</s>"
do_sample: false
temperature: 0.0
lm_eval/tasks/bbh/flan_cot_fewshot/_flan_cot_fewshot_template_yaml
View file @
e795efcf
group: bbh_flan_fewshot
group: bbh_flan_
cot_
fewshot
dataset_path: lukaemon/bbh
output_type: greedy_until
test_split: test
...
...
lm_eval/tasks/bbh/flan_cot_zeroshot/_flan_cot_zeroshot_template_yaml
View file @
e795efcf
group: bbh_flan_zeroshot
group: bbh_flan_
cot_
zeroshot
dataset_path: lukaemon/bbh
output_type: greedy_until
test_split: test
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment