Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
3f090027
Commit
3f090027
authored
Sep 26, 2023
by
lintangsutawika
Browse files
moved files
parent
a5e93901
Changes
15
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
48 additions
and
16 deletions
+48
-16
lm_eval/prompts/__init__.py
lm_eval/prompts/__init__.py
+5
-2
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+9
-2
lm_eval/tasks/benchmarks/flan/flan_anli.yaml
lm_eval/tasks/benchmarks/flan/flan_anli.yaml
+3
-3
lm_eval/tasks/benchmarks/flan/flan_arc.yaml
lm_eval/tasks/benchmarks/flan/flan_arc.yaml
+14
-0
lm_eval/tasks/benchmarks/flan/flan_boolq.yaml
lm_eval/tasks/benchmarks/flan/flan_boolq.yaml
+1
-1
lm_eval/tasks/benchmarks/flan/flan_cot.yaml
lm_eval/tasks/benchmarks/flan/flan_cot.yaml
+0
-0
lm_eval/tasks/benchmarks/flan/flan_held_in.yaml
lm_eval/tasks/benchmarks/flan/flan_held_in.yaml
+6
-0
lm_eval/tasks/benchmarks/flan/flan_held_in_yaml
lm_eval/tasks/benchmarks/flan/flan_held_in_yaml
+7
-7
lm_eval/tasks/benchmarks/flan/flan_held_out.yaml
lm_eval/tasks/benchmarks/flan/flan_held_out.yaml
+0
-0
lm_eval/tasks/benchmarks/flan/flan_rte.yaml
lm_eval/tasks/benchmarks/flan/flan_rte.yaml
+0
-0
lm_eval/tasks/benchmarks/flan/prompt_templates/anli.yaml
lm_eval/tasks/benchmarks/flan/prompt_templates/anli.yaml
+0
-0
lm_eval/tasks/benchmarks/flan/prompt_templates/arc.yaml
lm_eval/tasks/benchmarks/flan/prompt_templates/arc.yaml
+0
-0
lm_eval/tasks/benchmarks/flan/prompt_templates/boolq.yaml
lm_eval/tasks/benchmarks/flan/prompt_templates/boolq.yaml
+0
-0
lm_eval/tasks/benchmarks/flan/prompt_templates/rte.yaml
lm_eval/tasks/benchmarks/flan/prompt_templates/rte.yaml
+0
-0
lm_eval/utils.py
lm_eval/utils.py
+3
-1
No files found.
lm_eval/prompts/__init__.py
View file @
3f090027
...
...
@@ -65,7 +65,7 @@ def get_prompt(prompt_id: str, dataset_name: str = None, subset_name: str = None
)
def
load_prompt_list
(
use_prompt
:
str
,
dataset_name
=
None
,
subset_name
=
None
,
**
kwargs
):
def
load_prompt_list
(
use_prompt
:
str
,
dataset_name
=
None
,
subset_name
=
None
,
file_dir
=
None
,
**
kwargs
):
category_name
,
prompt_name
=
use_prompt
.
split
(
":"
)
...
...
@@ -84,6 +84,9 @@ def load_prompt_list(use_prompt: str, dataset_name=None, subset_name=None, **kwa
elif
".yaml"
in
category_name
:
import
yaml
if
file_dir
is
not
None
:
category_name
=
os
.
path
.
realpath
(
os
.
path
.
join
(
file_dir
,
category_name
))
with
open
(
category_name
,
"rb"
)
as
file
:
prompt_yaml_file
=
yaml
.
full_load
(
file
)
...
...
@@ -98,7 +101,7 @@ def load_prompt_list(use_prompt: str, dataset_name=None, subset_name=None, **kwa
# for prompt in prompt_name:
# prompt_list.append(utils.pattern_match(prompt_name, prompts.all_template_names))
# else:
prompt_list
=
utils
.
pattern_match
(
prompt_name
,
prompts
.
all_template_names
)
#
prompt_list = utils.pattern_match(prompt_name, prompts.all_template_names)
return
[
":"
.
join
([
category_name
,
prompt
])
for
prompt
in
prompt_list
]
...
...
lm_eval/tasks/__init__.py
View file @
3f090027
...
...
@@ -38,7 +38,7 @@ def register_configurable_task(config: Dict[str, str]) -> int:
return
0
def
register_configurable_group
(
config
:
Dict
[
str
,
str
])
->
int
:
def
register_configurable_group
(
config
:
Dict
[
str
,
str
]
,
yaml_path
:
str
=
None
)
->
int
:
group
=
config
[
"group"
]
all_task_list
=
config
[
"task"
]
config_list
=
[
task
for
task
in
all_task_list
if
type
(
task
)
!=
str
]
...
...
@@ -57,6 +57,7 @@ def register_configurable_group(config: Dict[str, str]) -> int:
# **_task["CONFIG"],
# **task_config
# }
task_config
=
utils
.
load_yaml_config
(
yaml_path
,
task_config
)
var_configs
=
check_prompt_config
(
{
**
task_config
,
...
...
@@ -128,6 +129,10 @@ def include_task_folder(task_dir: str, register_task=True) -> None:
try
:
config
=
utils
.
load_yaml_config
(
yaml_path
)
# if ("prompts" in config) and (len(config.keys()) == 1):
# continue
if
register_task
:
all_configs
=
check_prompt_config
(
config
)
for
config
in
all_configs
:
...
...
@@ -136,9 +141,11 @@ def include_task_folder(task_dir: str, register_task=True) -> None:
# If a `task` in config is a list,
# that means it's a benchmark
if
type
(
config
[
"task"
])
==
list
:
register_configurable_group
(
config
)
register_configurable_group
(
config
,
yaml_path
)
except
Exception
as
error
:
import
traceback
print
(
traceback
.
format_exc
())
eval_logger
.
warning
(
"Failed to load config in
\n
"
f
"
{
yaml_path
}
\n
"
...
...
lm_eval/tasks/benchmarks/flan_anli.yaml
→
lm_eval/tasks/benchmarks/flan
/flan
_anli.yaml
View file @
3f090027
...
...
@@ -3,15 +3,15 @@ task:
-
include
:
flan/yaml_templates/held_in_template_yaml
task
:
anli_r1
dataset_path
:
anli
use_prompt
:
flan/prompt_templates/
flan_
anli.yaml:*
use_prompt
:
flan/prompt_templates/anli.yaml:*
validation_split
:
dev_r1
-
include
:
flan/yaml_templates/held_in_template_yaml
task
:
anli_r2
dataset_path
:
anli
use_prompt
:
flan/prompt_templates/
flan_
anli.yaml:*
use_prompt
:
flan/prompt_templates/anli.yaml:*
validation_split
:
dev_r2
-
include
:
flan/yaml_templates/held_in_template_yaml
task
:
anli_r3
dataset_path
:
anli
use_prompt
:
flan/prompt_templates/
flan_
anli.yaml:*
use_prompt
:
flan/prompt_templates/anli.yaml:*
validation_split
:
dev_r3
lm_eval/tasks/benchmarks/flan/flan_arc.yaml
0 → 100644
View file @
3f090027
group
:
flan_arc
task
:
-
include
:
flan/yaml_templates/held_in_template_yaml
task
:
arc_easy
dataset_path
:
ai2_arc
dataset_name
:
ARC-Easy
use_prompt
:
flan/prompt_templates/arc.yaml:*
validation_split
:
validation
-
include
:
flan/yaml_templates/held_in_template_yaml
task
:
arc_challenge
dataset_path
:
ai2_arc
dataset_name
:
ARC-Challenge
use_prompt
:
flan/prompt_templates/arc.yaml:*
validation_split
:
validation
lm_eval/tasks/benchmarks/flan_boolq.yaml
→
lm_eval/tasks/benchmarks/flan
/flan
_boolq.yaml
View file @
3f090027
...
...
@@ -3,5 +3,5 @@ task:
-
include
:
flan/yaml_templates/held_in_template_yaml
dataset_path
:
super_glue
dataset_name
:
boolq
use_prompt
:
flan/prompt_templates/
flan_
boolq.yaml:*
use_prompt
:
flan/prompt_templates/boolq.yaml:*
validation_split
:
validation
lm_eval/tasks/benchmarks/flan_cot.yaml
→
lm_eval/tasks/benchmarks/flan
/flan
_cot.yaml
View file @
3f090027
File moved
lm_eval/tasks/benchmarks/flan/flan_held_in.yaml
0 → 100644
View file @
3f090027
group
:
flan_held_in
task
:
-
flan_boolq
-
flan_rte
-
flan_anli
-
flan_arc
lm_eval/tasks/benchmarks/flan_held_in
.
yaml
→
lm_eval/tasks/benchmarks/flan
/flan
_held_in
_
yaml
View file @
3f090027
...
...
@@ -3,37 +3,37 @@ task:
- include: flan/yaml_templates/held_in_template_yaml
dataset_path: super_glue
dataset_name: boolq
use_prompt
:
flan/prompt_templates/
flan_
boolq.yaml:*
use_prompt: flan/prompt_templates/boolq.yaml:*
validation_split: validation
- include: flan/yaml_templates/held_in_template_yaml
dataset_path: super_glue
dataset_name: rte
use_prompt
:
flan/prompt_templates/
flan_
rte.yaml:*
use_prompt: flan/prompt_templates/rte.yaml:*
validation_split: validation
- include: flan/yaml_templates/held_in_template_yaml
task: anli_r1
dataset_path: anli
use_prompt
:
flan/prompt_templates/
flan_
anli.yaml:*
use_prompt: flan/prompt_templates/anli.yaml:*
validation_split: dev_r1
- include: flan/yaml_templates/held_in_template_yaml
task: anli_r2
dataset_path: anli
use_prompt
:
flan/prompt_templates/
flan_
anli.yaml:*
use_prompt: flan/prompt_templates/anli.yaml:*
validation_split: dev_r2
- include: flan/yaml_templates/held_in_template_yaml
task: anli_r3
dataset_path: anli
use_prompt
:
flan/prompt_templates/
flan_
anli.yaml:*
use_prompt: flan/prompt_templates/anli.yaml:*
validation_split: dev_r3
- include: flan/yaml_templates/held_in_template_yaml
task: arc_easy
dataset_path: ai2_arc
dataset_name: ARC-Easy
use_prompt
:
flan/prompt_templates/
flan_
arc.yaml:*
use_prompt: flan/prompt_templates/arc.yaml:*
validation_split: validation
- include: flan/yaml_templates/held_in_template_yaml
task: arc_challenge
dataset_path: ai2_arc
dataset_name: ARC-Challenge
use_prompt
:
flan/prompt_templates/
flan_
arc.yaml:*
use_prompt: flan/prompt_templates/arc.yaml:*
validation_split: validation
lm_eval/tasks/benchmarks/flan_held_out.yaml
→
lm_eval/tasks/benchmarks/flan
/flan
_held_out.yaml
View file @
3f090027
File moved
lm_eval/tasks/benchmarks/flan_rte.yaml
→
lm_eval/tasks/benchmarks/flan
/flan
_rte.yaml
View file @
3f090027
File moved
lm_eval/tasks/benchmarks/flan/prompt_templates/
flan_
anli.yaml
→
lm_eval/tasks/benchmarks/flan/prompt_templates/anli.yaml
View file @
3f090027
File moved
lm_eval/tasks/benchmarks/flan/prompt_templates/
flan_
arc.yaml
→
lm_eval/tasks/benchmarks/flan/prompt_templates/arc.yaml
View file @
3f090027
File moved
lm_eval/tasks/benchmarks/flan/prompt_templates/
flan_
boolq.yaml
→
lm_eval/tasks/benchmarks/flan/prompt_templates/boolq.yaml
View file @
3f090027
File moved
lm_eval/tasks/benchmarks/flan/prompt_templates/
flan_
rte.yaml
→
lm_eval/tasks/benchmarks/flan/prompt_templates/rte.yaml
View file @
3f090027
File moved
lm_eval/utils.py
View file @
3f090027
...
...
@@ -426,7 +426,9 @@ def load_yaml_config(yaml_path=None, yaml_config=None, yaml_dir=None):
if
yaml_config
is
None
:
with
open
(
yaml_path
,
"rb"
)
as
file
:
yaml_config
=
yaml
.
full_load
(
file
)
yaml_dir
=
os
.
path
.
dirname
(
yaml_path
)
if
yaml_dir
is
None
:
yaml_dir
=
os
.
path
.
dirname
(
yaml_path
)
assert
yaml_dir
is
not
None
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment