Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
51f27158
Commit
51f27158
authored
Feb 01, 2024
by
lintangsutawika
Browse files
udpate with merge
parents
924c9790
f5408b6b
Changes
50
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
177 additions
and
107 deletions
+177
-107
lm_eval/models/optimum_lm.py
lm_eval/models/optimum_lm.py
+69
-0
lm_eval/models/vllm_causallms.py
lm_eval/models/vllm_causallms.py
+15
-8
lm_eval/prompts/__init__.py
lm_eval/prompts/__init__.py
+1
-1
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+9
-9
lm_eval/tasks/bbh/_generate_configs.py
lm_eval/tasks/bbh/_generate_configs.py
+2
-2
lm_eval/tasks/belebele/_generate_configs.py
lm_eval/tasks/belebele/_generate_configs.py
+3
-3
lm_eval/tasks/bigbench/generate_tasks.py
lm_eval/tasks/bigbench/generate_tasks.py
+1
-1
lm_eval/tasks/blimp/generate_configs.py
lm_eval/tasks/blimp/generate_configs.py
+1
-1
lm_eval/tasks/ceval/_generate_configs.py
lm_eval/tasks/ceval/_generate_configs.py
+3
-3
lm_eval/tasks/cmmlu/_generate_configs.py
lm_eval/tasks/cmmlu/_generate_configs.py
+3
-3
lm_eval/tasks/code_x_glue/code-text/bleu.py
lm_eval/tasks/code_x_glue/code-text/bleu.py
+1
-1
lm_eval/tasks/csatqa/_generate_configs.py
lm_eval/tasks/csatqa/_generate_configs.py
+2
-2
lm_eval/tasks/gsm8k/gsm8k.yaml
lm_eval/tasks/gsm8k/gsm8k.yaml
+1
-0
lm_eval/tasks/mmlu/_generate_configs.py
lm_eval/tasks/mmlu/_generate_configs.py
+4
-4
lm_eval/tasks/model_written_evals/advanced_ai_risk/_generate_configs.py
...model_written_evals/advanced_ai_risk/_generate_configs.py
+1
-1
lm_eval/tasks/model_written_evals/persona/_generate_configs.py
...al/tasks/model_written_evals/persona/_generate_configs.py
+1
-1
lm_eval/tasks/qasper/utils.py
lm_eval/tasks/qasper/utils.py
+1
-1
lm_eval/tasks/super_glue/wsc/t5-prompt.yaml
lm_eval/tasks/super_glue/wsc/t5-prompt.yaml
+2
-5
lm_eval/tasks/super_glue/wsc/t5_utils.py
lm_eval/tasks/super_glue/wsc/t5_utils.py
+56
-60
lm_eval/tasks/xwinograd/utils.py
lm_eval/tasks/xwinograd/utils.py
+1
-1
No files found.
lm_eval/models/optimum_lm.py
0 → 100644
View file @
51f27158
from
importlib.util
import
find_spec
from
pathlib
import
Path
from
lm_eval.api.registry
import
register_model
from
lm_eval.models.huggingface
import
HFLM
@
register_model
(
"openvino"
)
class
OptimumLM
(
HFLM
):
"""
Optimum Intel provides a simple interface to optimize Transformer models and convert them to
\
OpenVINO™ Intermediate Representation (IR) format to accelerate end-to-end pipelines on
\
Intel® architectures using OpenVINO™ runtime.
"""
def
__init__
(
self
,
device
=
"cpu"
,
**
kwargs
,
)
->
None
:
if
"backend"
in
kwargs
:
# optimum currently only supports causal models
assert
(
kwargs
[
"backend"
]
==
"causal"
),
"Currently, only OVModelForCausalLM is supported."
self
.
openvino_device
=
device
super
().
__init__
(
device
=
self
.
openvino_device
,
backend
=
kwargs
.
get
(
"backend"
,
"causal"
),
**
kwargs
,
)
def
_create_model
(
self
,
pretrained
:
str
,
revision
=
"main"
,
dtype
=
"auto"
,
trust_remote_code
=
False
,
**
kwargs
,
)
->
None
:
if
not
find_spec
(
"optimum"
):
raise
Exception
(
"package `optimum` is not installed. Please install it via `pip install optimum[openvino]`"
)
else
:
from
optimum.intel.openvino
import
OVModelForCausalLM
model_kwargs
=
kwargs
if
kwargs
else
{}
model_file
=
Path
(
pretrained
)
/
"openvino_model.xml"
if
model_file
.
exists
():
export
=
False
else
:
export
=
True
kwargs
[
"ov_config"
]
=
{
"PERFORMANCE_HINT"
:
"LATENCY"
,
"NUM_STREAMS"
:
"1"
,
"CACHE_DIR"
:
""
,
}
self
.
_model
=
OVModelForCausalLM
.
from_pretrained
(
pretrained
,
revision
=
revision
,
trust_remote_code
=
trust_remote_code
,
export
=
export
,
device
=
self
.
openvino_device
.
upper
(),
**
model_kwargs
,
)
lm_eval/models/vllm_causallms.py
View file @
51f27158
...
@@ -170,18 +170,12 @@ class VLLM(LM):
...
@@ -170,18 +170,12 @@ class VLLM(LM):
stop
:
Optional
[
List
[
str
]]
=
None
,
stop
:
Optional
[
List
[
str
]]
=
None
,
**
kwargs
,
**
kwargs
,
):
):
if
"do_sample"
in
kwargs
.
keys
():
kwargs
.
pop
(
"do_sample"
)
if
generate
:
if
generate
:
# hf defaults
kwargs
=
self
.
modify_gen_kwargs
(
kwargs
)
kwargs
[
"skip_special_tokens"
]
=
kwargs
.
get
(
"skip_special_tokens"
,
False
)
kwargs
[
"spaces_between_special_tokens"
]
=
kwargs
.
get
(
"spaces_between_special_tokens"
,
False
)
sampling_params
=
SamplingParams
(
max_tokens
=
max_tokens
,
stop
=
stop
,
**
kwargs
)
sampling_params
=
SamplingParams
(
max_tokens
=
max_tokens
,
stop
=
stop
,
**
kwargs
)
else
:
else
:
sampling_params
=
SamplingParams
(
sampling_params
=
SamplingParams
(
temperature
=
0
,
prompt_logprobs
=
2
,
max_tokens
=
1
temperature
=
0
,
prompt_logprobs
=
1
,
max_tokens
=
1
)
)
if
self
.
data_parallel_size
>
1
:
if
self
.
data_parallel_size
>
1
:
requests
=
[
list
(
x
)
for
x
in
divide
(
requests
,
self
.
data_parallel_size
)]
requests
=
[
list
(
x
)
for
x
in
divide
(
requests
,
self
.
data_parallel_size
)]
...
@@ -438,3 +432,16 @@ class VLLM(LM):
...
@@ -438,3 +432,16 @@ class VLLM(LM):
break
break
return
continuation_logprobs
,
is_greedy
return
continuation_logprobs
,
is_greedy
@
staticmethod
def
modify_gen_kwargs
(
kwargs
:
dict
)
->
dict
:
# sampling_params
do_sample
=
kwargs
.
pop
(
"do_sample"
,
None
)
if
do_sample
is
False
or
"temperature"
not
in
kwargs
:
kwargs
[
"temperature"
]
=
0.0
# hf defaults
kwargs
[
"skip_special_tokens"
]
=
kwargs
.
get
(
"skip_special_tokens"
,
False
)
kwargs
[
"spaces_between_special_tokens"
]
=
kwargs
.
get
(
"spaces_between_special_tokens"
,
False
)
return
kwargs
lm_eval/prompts/__init__.py
View file @
51f27158
...
@@ -117,7 +117,7 @@ class PromptString:
...
@@ -117,7 +117,7 @@ class PromptString:
# TODO need a way to process doc_to_choice
# TODO need a way to process doc_to_choice
if
"doc_to_choice"
in
self
.
prompt_string
:
if
"doc_to_choice"
in
self
.
prompt_string
:
raise
"Not yet implemented to accept doc_to_choice"
raise
Exception
(
"Not yet implemented to accept doc_to_choice"
)
text_string
=
utils
.
apply_template
(
doc_to_text
,
doc
)
text_string
=
utils
.
apply_template
(
doc_to_text
,
doc
)
target_string
=
utils
.
apply_template
(
doc_to_target
,
doc
)
target_string
=
utils
.
apply_template
(
doc_to_target
,
doc
)
...
...
lm_eval/tasks/__init__.py
View file @
51f27158
...
@@ -43,7 +43,7 @@ def register_configurable_task(config: Dict[str, str]) -> int:
...
@@ -43,7 +43,7 @@ def register_configurable_task(config: Dict[str, str]) -> int:
if
"group"
in
config
:
if
"group"
in
config
:
if
config
[
"group"
]
==
config
[
"task"
]:
if
config
[
"group"
]
==
config
[
"task"
]:
raise
ValueError
(
"task and group name cannot be the same"
)
raise
ValueError
(
"task and group name cannot be the same"
)
elif
typ
e
(
config
[
"group"
]
)
==
str
:
elif
isinstanc
e
(
config
[
"group"
]
,
str
)
:
group_name
=
[
config
[
"group"
]]
group_name
=
[
config
[
"group"
]]
else
:
else
:
group_name
=
config
[
"group"
]
group_name
=
config
[
"group"
]
...
@@ -57,8 +57,8 @@ def register_configurable_task(config: Dict[str, str]) -> int:
...
@@ -57,8 +57,8 @@ def register_configurable_task(config: Dict[str, str]) -> int:
def
register_configurable_group
(
config
:
Dict
[
str
,
str
],
yaml_path
:
str
=
None
)
->
int
:
def
register_configurable_group
(
config
:
Dict
[
str
,
str
],
yaml_path
:
str
=
None
)
->
int
:
group
=
config
[
"group"
]
group
=
config
[
"group"
]
all_task_list
=
config
[
"task"
]
all_task_list
=
config
[
"task"
]
config_list
=
[
task
for
task
in
all_task_list
if
typ
e
(
task
)
!=
str
]
config_list
=
[
task
for
task
in
all_task_list
if
not
isinstanc
e
(
task
,
str
)
]
task_list
=
[
task
for
task
in
all_task_list
if
typ
e
(
task
)
==
str
]
task_list
=
[
task
for
task
in
all_task_list
if
isinstanc
e
(
task
,
str
)
]
for
task_config
in
config_list
:
for
task_config
in
config_list
:
...
@@ -67,12 +67,12 @@ def register_configurable_group(config: Dict[str, str], yaml_path: str = None) -
...
@@ -67,12 +67,12 @@ def register_configurable_group(config: Dict[str, str], yaml_path: str = None) -
if
"task"
in
task_config
:
if
"task"
in
task_config
:
task_name
=
task_config
[
"task"
]
task_name
=
task_config
[
"task"
]
if
task_name
in
ALL_TASKS
:
if
task_name
in
ALL_TASKS
:
task_obj
=
get_task_dict
(
task_name
)
[
task_name
]
task_obj
=
TASK_REGISTRY
[
task_name
]
if
typ
e
(
task_obj
)
==
tuple
:
if
isinstanc
e
(
task_obj
,
tuple
)
:
_
,
task_obj
=
task_obj
_
,
task_obj
=
task_obj
if
task_obj
is
not
None
:
if
task_obj
is
not
None
:
base_config
=
task_obj
.
_config
.
to_dict
(
keep_callable
=
True
)
base_config
=
task_obj
.
CONFIG
.
to_dict
(
keep_callable
=
True
)
task_name_config
[
"task"
]
=
f
"
{
group
}
_
{
task_name
}
"
task_name_config
[
"task"
]
=
f
"
{
group
}
_
{
task_name
}
"
task_config
=
utils
.
load_yaml_config
(
yaml_path
,
task_config
)
task_config
=
utils
.
load_yaml_config
(
yaml_path
,
task_config
)
...
@@ -166,10 +166,10 @@ def include_task_folder(task_dir: str, register_task: bool = True) -> None:
...
@@ -166,10 +166,10 @@ def include_task_folder(task_dir: str, register_task: bool = True) -> None:
)
)
for
config
in
all_configs
:
for
config
in
all_configs
:
if
register_task
:
if
register_task
:
if
typ
e
(
config
[
"task"
]
)
==
str
:
if
isinstanc
e
(
config
[
"task"
]
,
str
)
:
register_configurable_task
(
config
)
register_configurable_task
(
config
)
else
:
else
:
if
typ
e
(
config
[
"task"
]
)
==
list
:
if
isinstanc
e
(
config
[
"task"
]
,
list
)
:
register_configurable_group
(
config
,
yaml_path
)
register_configurable_group
(
config
,
yaml_path
)
# Log this silently and show it only when
# Log this silently and show it only when
...
@@ -243,7 +243,7 @@ def get_task_dict(task_name_list: List[Union[str, Dict, Task]], **kwargs):
...
@@ -243,7 +243,7 @@ def get_task_dict(task_name_list: List[Union[str, Dict, Task]], **kwargs):
task_name_from_config_dict
=
{}
task_name_from_config_dict
=
{}
task_name_from_object_dict
=
{}
task_name_from_object_dict
=
{}
if
typ
e
(
task_name_list
)
!=
list
:
if
not
isinstanc
e
(
task_name_list
,
list
)
:
task_name_list
=
[
task_name_list
]
task_name_list
=
[
task_name_list
]
for
task_element
in
task_name_list
:
for
task_element
in
task_name_list
:
...
...
lm_eval/tasks/bbh/_generate_configs.py
View file @
51f27158
...
@@ -28,7 +28,7 @@ if __name__ == "__main__":
...
@@ -28,7 +28,7 @@ if __name__ == "__main__":
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
with
open
(
args
.
base_yaml_path
)
as
f
:
with
open
(
args
.
base_yaml_path
,
encoding
=
"utf-8"
)
as
f
:
base_yaml
=
yaml
.
full_load
(
f
)
base_yaml
=
yaml
.
full_load
(
f
)
base_doc_to_text
=
"Q: {{input}}
\n
A:"
base_doc_to_text
=
"Q: {{input}}
\n
A:"
...
@@ -70,7 +70,7 @@ if __name__ == "__main__":
...
@@ -70,7 +70,7 @@ if __name__ == "__main__":
file_save_path
=
args
.
save_prefix_path
+
f
"/
{
task
}
.yaml"
file_save_path
=
args
.
save_prefix_path
+
f
"/
{
task
}
.yaml"
utils
.
eval_logger
.
info
(
f
"Saving yaml for subset
{
task
}
to
{
file_save_path
}
"
)
utils
.
eval_logger
.
info
(
f
"Saving yaml for subset
{
task
}
to
{
file_save_path
}
"
)
with
open
(
file_save_path
,
"w"
)
as
yaml_file
:
with
open
(
file_save_path
,
"w"
,
encoding
=
"utf-8"
)
as
yaml_file
:
yaml
.
dump
(
yaml
.
dump
(
yaml_dict
,
yaml_dict
,
yaml_file
,
yaml_file
,
...
...
lm_eval/tasks/belebele/_generate_configs.py
View file @
51f27158
...
@@ -27,13 +27,13 @@ if __name__ == "__main__":
...
@@ -27,13 +27,13 @@ if __name__ == "__main__":
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
with
open
(
args
.
base_yaml_path
)
as
f
:
with
open
(
args
.
base_yaml_path
,
encoding
=
"utf-8"
)
as
f
:
base_yaml
=
yaml
.
full_load
(
f
)
base_yaml
=
yaml
.
full_load
(
f
)
if
args
.
cot_prompt_path
is
not
None
:
if
args
.
cot_prompt_path
is
not
None
:
import
json
import
json
with
open
(
args
.
cot_prompt_path
)
as
f
:
with
open
(
args
.
cot_prompt_path
,
encoding
=
"utf-8"
)
as
f
:
cot_file
=
json
.
load
(
f
)
cot_file
=
json
.
load
(
f
)
def
query
():
def
query
():
...
@@ -54,7 +54,7 @@ if __name__ == "__main__":
...
@@ -54,7 +54,7 @@ if __name__ == "__main__":
file_save_path
=
args
.
save_prefix_path
+
f
"_
{
lang
}
.yaml"
file_save_path
=
args
.
save_prefix_path
+
f
"_
{
lang
}
.yaml"
logging
.
info
(
f
"Saving yaml for subset
{
lang
}
to
{
file_save_path
}
"
)
logging
.
info
(
f
"Saving yaml for subset
{
lang
}
to
{
file_save_path
}
"
)
with
open
(
file_save_path
,
"w"
)
as
yaml_file
:
with
open
(
file_save_path
,
"w"
,
encoding
=
"utf-8"
)
as
yaml_file
:
yaml
.
dump
(
yaml
.
dump
(
yaml_dict
,
yaml_dict
,
yaml_file
,
yaml_file
,
...
...
lm_eval/tasks/bigbench/generate_tasks.py
View file @
51f27158
...
@@ -181,7 +181,7 @@ def main() -> None:
...
@@ -181,7 +181,7 @@ def main() -> None:
for
task
in
all_subtasks
:
for
task
in
all_subtasks
:
file_name
=
f
"
{
task
}
.yaml"
file_name
=
f
"
{
task
}
.yaml"
try
:
try
:
with
open
(
f
"
{
path
}
/
{
file_name
}
"
,
"w"
)
as
f
:
with
open
(
f
"
{
path
}
/
{
file_name
}
"
,
"w"
,
encoding
=
"utf-8"
)
as
f
:
f
.
write
(
"# Generated by utils.py
\n
"
)
f
.
write
(
"# Generated by utils.py
\n
"
)
yaml
.
dump
(
yaml
.
dump
(
{
{
...
...
lm_eval/tasks/blimp/generate_configs.py
View file @
51f27158
...
@@ -75,7 +75,7 @@ def main() -> None:
...
@@ -75,7 +75,7 @@ def main() -> None:
for
task
in
all_subtasks
:
for
task
in
all_subtasks
:
file_name
=
f
"
{
task
}
.yaml"
file_name
=
f
"
{
task
}
.yaml"
try
:
try
:
with
open
(
f
"
{
file_name
}
"
,
"w"
)
as
f
:
with
open
(
f
"
{
file_name
}
"
,
"w"
,
encoding
=
"utf-8"
)
as
f
:
f
.
write
(
"# Generated by utils.py
\n
"
)
f
.
write
(
"# Generated by utils.py
\n
"
)
yaml
.
dump
(
yaml
.
dump
(
{
{
...
...
lm_eval/tasks/ceval/_generate_configs.py
View file @
51f27158
...
@@ -79,13 +79,13 @@ if __name__ == "__main__":
...
@@ -79,13 +79,13 @@ if __name__ == "__main__":
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
with
open
(
args
.
base_yaml_path
)
as
f
:
with
open
(
args
.
base_yaml_path
,
encoding
=
"utf-8"
)
as
f
:
base_yaml
=
yaml
.
full_load
(
f
)
base_yaml
=
yaml
.
full_load
(
f
)
if
args
.
cot_prompt_path
is
not
None
:
if
args
.
cot_prompt_path
is
not
None
:
import
json
import
json
with
open
(
args
.
cot_prompt_path
)
as
f
:
with
open
(
args
.
cot_prompt_path
,
encoding
=
"utf-8"
)
as
f
:
cot_file
=
json
.
load
(
f
)
cot_file
=
json
.
load
(
f
)
for
subject_eng
,
subject_zh
in
tqdm
(
SUBJECTS
.
items
()):
for
subject_eng
,
subject_zh
in
tqdm
(
SUBJECTS
.
items
()):
...
@@ -107,7 +107,7 @@ if __name__ == "__main__":
...
@@ -107,7 +107,7 @@ if __name__ == "__main__":
file_save_path
=
args
.
save_prefix_path
+
f
"_
{
subject_eng
}
.yaml"
file_save_path
=
args
.
save_prefix_path
+
f
"_
{
subject_eng
}
.yaml"
eval_logger
.
info
(
f
"Saving yaml for subset
{
subject_eng
}
to
{
file_save_path
}
"
)
eval_logger
.
info
(
f
"Saving yaml for subset
{
subject_eng
}
to
{
file_save_path
}
"
)
with
open
(
file_save_path
,
"w"
)
as
yaml_file
:
with
open
(
file_save_path
,
"w"
,
encoding
=
"utf-8"
)
as
yaml_file
:
yaml
.
dump
(
yaml
.
dump
(
yaml_dict
,
yaml_dict
,
yaml_file
,
yaml_file
,
...
...
lm_eval/tasks/cmmlu/_generate_configs.py
View file @
51f27158
...
@@ -94,13 +94,13 @@ if __name__ == "__main__":
...
@@ -94,13 +94,13 @@ if __name__ == "__main__":
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
with
open
(
args
.
base_yaml_path
)
as
f
:
with
open
(
args
.
base_yaml_path
,
encoding
=
"utf-8"
)
as
f
:
base_yaml
=
yaml
.
full_load
(
f
)
base_yaml
=
yaml
.
full_load
(
f
)
if
args
.
cot_prompt_path
is
not
None
:
if
args
.
cot_prompt_path
is
not
None
:
import
json
import
json
with
open
(
args
.
cot_prompt_path
)
as
f
:
with
open
(
args
.
cot_prompt_path
,
encoding
=
"utf-8"
)
as
f
:
cot_file
=
json
.
load
(
f
)
cot_file
=
json
.
load
(
f
)
for
subject_eng
,
subject_zh
in
tqdm
(
SUBJECTS
.
items
()):
for
subject_eng
,
subject_zh
in
tqdm
(
SUBJECTS
.
items
()):
...
@@ -122,7 +122,7 @@ if __name__ == "__main__":
...
@@ -122,7 +122,7 @@ if __name__ == "__main__":
file_save_path
=
args
.
save_prefix_path
+
f
"_
{
subject_eng
}
.yaml"
file_save_path
=
args
.
save_prefix_path
+
f
"_
{
subject_eng
}
.yaml"
eval_logger
.
info
(
f
"Saving yaml for subset
{
subject_eng
}
to
{
file_save_path
}
"
)
eval_logger
.
info
(
f
"Saving yaml for subset
{
subject_eng
}
to
{
file_save_path
}
"
)
with
open
(
file_save_path
,
"w"
)
as
yaml_file
:
with
open
(
file_save_path
,
"w"
,
encoding
=
"utf-8"
)
as
yaml_file
:
yaml
.
dump
(
yaml
.
dump
(
yaml_dict
,
yaml_dict
,
yaml_file
,
yaml_file
,
...
...
lm_eval/tasks/code_x_glue/code-text/bleu.py
View file @
51f27158
...
@@ -184,7 +184,7 @@ def splitPuncts(line):
...
@@ -184,7 +184,7 @@ def splitPuncts(line):
def
computeMaps
(
predictions
,
goldfile
):
def
computeMaps
(
predictions
,
goldfile
):
predictionMap
:
Dict
[
str
,
list
]
=
{}
predictionMap
:
Dict
[
str
,
list
]
=
{}
goldMap
:
Dict
[
str
,
list
]
=
{}
goldMap
:
Dict
[
str
,
list
]
=
{}
gf
=
open
(
goldfile
,
"r"
)
gf
=
open
(
goldfile
,
"r"
,
encoding
=
"utf-8"
)
for
row
in
predictions
:
for
row
in
predictions
:
cols
=
row
.
strip
().
split
(
"
\t
"
)
cols
=
row
.
strip
().
split
(
"
\t
"
)
...
...
lm_eval/tasks/csatqa/_generate_configs.py
View file @
51f27158
...
@@ -25,7 +25,7 @@ if __name__ == "__main__":
...
@@ -25,7 +25,7 @@ if __name__ == "__main__":
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
with
open
(
args
.
base_yaml_path
)
as
f
:
with
open
(
args
.
base_yaml_path
,
encoding
=
"utf-8"
)
as
f
:
base_yaml
=
yaml
.
full_load
(
f
)
base_yaml
=
yaml
.
full_load
(
f
)
for
name
in
tqdm
(
SUBSETS
):
for
name
in
tqdm
(
SUBSETS
):
...
@@ -39,7 +39,7 @@ if __name__ == "__main__":
...
@@ -39,7 +39,7 @@ if __name__ == "__main__":
file_save_path
=
args
.
save_prefix_path
+
f
"_
{
name
.
lower
()
}
.yaml"
file_save_path
=
args
.
save_prefix_path
+
f
"_
{
name
.
lower
()
}
.yaml"
eval_logger
.
info
(
f
"Saving yaml for subset
{
name
}
to
{
file_save_path
}
"
)
eval_logger
.
info
(
f
"Saving yaml for subset
{
name
}
to
{
file_save_path
}
"
)
with
open
(
file_save_path
,
"w"
)
as
yaml_file
:
with
open
(
file_save_path
,
"w"
,
encoding
=
"utf-8"
)
as
yaml_file
:
yaml
.
dump
(
yaml
.
dump
(
yaml_dict
,
yaml_dict
,
yaml_file
,
yaml_file
,
...
...
lm_eval/tasks/gsm8k/gsm8k.yaml
View file @
51f27158
...
@@ -24,6 +24,7 @@ generation_kwargs:
...
@@ -24,6 +24,7 @@ generation_kwargs:
-
"
\n\n
"
-
"
\n\n
"
-
"
Question:"
-
"
Question:"
do_sample
:
false
do_sample
:
false
temperature
:
0.0
repeats
:
1
repeats
:
1
num_fewshot
:
5
num_fewshot
:
5
filter_list
:
filter_list
:
...
...
lm_eval/tasks/mmlu/_generate_configs.py
View file @
51f27158
...
@@ -85,13 +85,13 @@ if __name__ == "__main__":
...
@@ -85,13 +85,13 @@ if __name__ == "__main__":
# get filename of base_yaml so we can `"include": ` it in our "other" YAMLs.
# get filename of base_yaml so we can `"include": ` it in our "other" YAMLs.
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
with
open
(
args
.
base_yaml_path
)
as
f
:
with
open
(
args
.
base_yaml_path
,
encoding
=
"utf-8"
)
as
f
:
base_yaml
=
yaml
.
full_load
(
f
)
base_yaml
=
yaml
.
full_load
(
f
)
if
args
.
cot_prompt_path
is
not
None
:
if
args
.
cot_prompt_path
is
not
None
:
import
json
import
json
with
open
(
args
.
cot_prompt_path
)
as
f
:
with
open
(
args
.
cot_prompt_path
,
encoding
=
"utf-8"
)
as
f
:
cot_file
=
json
.
load
(
f
)
cot_file
=
json
.
load
(
f
)
ALL_CATEGORIES
=
[]
ALL_CATEGORIES
=
[]
...
@@ -120,7 +120,7 @@ if __name__ == "__main__":
...
@@ -120,7 +120,7 @@ if __name__ == "__main__":
file_save_path
=
args
.
save_prefix_path
+
f
"_
{
subject
}
.yaml"
file_save_path
=
args
.
save_prefix_path
+
f
"_
{
subject
}
.yaml"
eval_logger
.
info
(
f
"Saving yaml for subset
{
subject
}
to
{
file_save_path
}
"
)
eval_logger
.
info
(
f
"Saving yaml for subset
{
subject
}
to
{
file_save_path
}
"
)
with
open
(
file_save_path
,
"w"
)
as
yaml_file
:
with
open
(
file_save_path
,
"w"
,
encoding
=
"utf-8"
)
as
yaml_file
:
yaml
.
dump
(
yaml
.
dump
(
yaml_dict
,
yaml_dict
,
yaml_file
,
yaml_file
,
...
@@ -142,7 +142,7 @@ if __name__ == "__main__":
...
@@ -142,7 +142,7 @@ if __name__ == "__main__":
file_save_path
=
args
.
save_prefix_path
+
".yaml"
file_save_path
=
args
.
save_prefix_path
+
".yaml"
eval_logger
.
info
(
f
"Saving benchmark config to
{
file_save_path
}
"
)
eval_logger
.
info
(
f
"Saving benchmark config to
{
file_save_path
}
"
)
with
open
(
file_save_path
,
"w"
)
as
yaml_file
:
with
open
(
file_save_path
,
"w"
,
encoding
=
"utf-8"
)
as
yaml_file
:
yaml
.
dump
(
yaml
.
dump
(
{
{
"group"
:
f
"mmlu_
{
args
.
task_prefix
}
"
"group"
:
f
"mmlu_
{
args
.
task_prefix
}
"
...
...
lm_eval/tasks/model_written_evals/advanced_ai_risk/_generate_configs.py
View file @
51f27158
...
@@ -9,7 +9,7 @@ def main() -> None:
...
@@ -9,7 +9,7 @@ def main() -> None:
for
task
in
tqdm
(
datasets
.
get_dataset_infos
(
dataset_path
).
keys
()):
for
task
in
tqdm
(
datasets
.
get_dataset_infos
(
dataset_path
).
keys
()):
file_name
=
f
"
{
task
}
.yaml"
file_name
=
f
"
{
task
}
.yaml"
try
:
try
:
with
open
(
f
"
{
file_name
}
"
,
"w"
)
as
f
:
with
open
(
f
"
{
file_name
}
"
,
"w"
,
encoding
=
"utf-8"
)
as
f
:
f
.
write
(
"# Generated by _generate_configs.py
\n
"
)
f
.
write
(
"# Generated by _generate_configs.py
\n
"
)
yaml
.
dump
(
yaml
.
dump
(
{
{
...
...
lm_eval/tasks/model_written_evals/persona/_generate_configs.py
View file @
51f27158
...
@@ -9,7 +9,7 @@ def main() -> None:
...
@@ -9,7 +9,7 @@ def main() -> None:
for
task
in
tqdm
(
datasets
.
get_dataset_infos
(
dataset_path
).
keys
()):
for
task
in
tqdm
(
datasets
.
get_dataset_infos
(
dataset_path
).
keys
()):
file_name
=
f
"
{
task
}
.yaml"
file_name
=
f
"
{
task
}
.yaml"
try
:
try
:
with
open
(
f
"
{
file_name
}
"
,
"w"
)
as
f
:
with
open
(
f
"
{
file_name
}
"
,
"w"
,
encoding
=
"utf-8"
)
as
f
:
f
.
write
(
"# Generated by _generate_configs.py
\n
"
)
f
.
write
(
"# Generated by _generate_configs.py
\n
"
)
yaml
.
dump
(
yaml
.
dump
(
{
{
...
...
lm_eval/tasks/qasper/utils.py
View file @
51f27158
...
@@ -50,7 +50,7 @@ def process_docs(dataset, set_answer_type="bool"):
...
@@ -50,7 +50,7 @@ def process_docs(dataset, set_answer_type="bool"):
obs_list
[
"abstract"
].
append
(
abstract
)
obs_list
[
"abstract"
].
append
(
abstract
)
obs_list
[
"question"
].
append
(
question
)
obs_list
[
"question"
].
append
(
question
)
obs_list
[
"answer_type"
].
append
(
answer_type
)
obs_list
[
"answer_type"
].
append
(
answer_type
)
if
typ
e
(
answer
)
==
list
:
if
isinstanc
e
(
answer
,
list
)
:
answer
=
", "
.
join
(
answer
)
answer
=
", "
.
join
(
answer
)
obs_list
[
"answer"
].
append
(
answer
)
obs_list
[
"answer"
].
append
(
answer
)
...
...
lm_eval/tasks/super_glue/wsc/t5-prompt.yaml
View file @
51f27158
...
@@ -7,6 +7,7 @@ training_split: train
...
@@ -7,6 +7,7 @@ training_split: train
validation_split
:
validation
validation_split
:
validation
output_type
:
generate_until
output_type
:
generate_until
doc_to_text
:
!function
"
t5_utils.doc_to_text"
doc_to_text
:
!function
"
t5_utils.doc_to_text"
process_results
:
!function
"
t5_utils.process_results"
doc_to_target
:
label
doc_to_target
:
label
generation_kwargs
:
generation_kwargs
:
until
:
until
:
...
@@ -15,9 +16,5 @@ metric_list:
...
@@ -15,9 +16,5 @@ metric_list:
-
metric
:
accuracy
-
metric
:
accuracy
aggregation
:
mean
aggregation
:
mean
higher_is_better
:
true
higher_is_better
:
true
filter_list
:
-
name
:
"
wsc_postprocessor"
filter
:
-
function
:
!function
t5_utils.WSCPostprocess
metadata
:
metadata
:
version
:
0
.0
version
:
1
.0
lm_eval/tasks/super_glue/wsc/t5_utils.py
View file @
51f27158
import
re
import
re
from
lm_eval.api.filter
import
Filter
from
typing
import
List
def
doc_to_text
(
x
):
def
doc_to_text
(
x
):
text
=
re
.
sub
(
r
" X "
,
" *"
+
x
[
"span2_text"
]
+
"* "
,
_wsc_inputs
(
x
))
text
=
re
.
sub
(
r
" X "
,
" *"
+
x
[
"span2_text"
]
+
"* "
,
_wsc_inputs
(
x
))
...
@@ -24,7 +23,7 @@ def _wsc_inputs(x):
...
@@ -24,7 +23,7 @@ def _wsc_inputs(x):
[
[
" "
.
join
(
words
[:
pronoun_index
]),
" "
.
join
(
words
[:
pronoun_index
]),
"X"
,
"X"
,
" "
.
join
(
words
[
pronoun_index
+
1
:]),
" "
.
join
(
words
[
pronoun_index
+
1
:]),
]
]
)
)
...
@@ -52,9 +51,7 @@ def _wsc_inputs(x):
...
@@ -52,9 +51,7 @@ def _wsc_inputs(x):
return
create_input
()
return
create_input
()
class
WSCPostprocess
(
Filter
):
DETERMINERS
=
{
def
__init__
(
self
,
**
kwargs
):
self
.
determiners
=
{
"a"
,
"a"
,
"an"
,
"an"
,
"few"
,
"few"
,
...
@@ -76,18 +73,18 @@ class WSCPostprocess(Filter):
...
@@ -76,18 +73,18 @@ class WSCPostprocess(Filter):
"which"
,
"which"
,
"whose"
,
"whose"
,
"your"
,
"your"
,
}
}
def
clean
(
s
elf
,
s
)
:
def
clean
(
s
:
str
)
->
str
:
"""Ignore capitalization and determiners."""
"""Ignore capitalization and determiners."""
s
=
s
.
strip
().
lower
()
s
=
s
.
strip
().
lower
()
return
" "
.
join
([
w
for
w
in
s
.
split
(
" "
)
if
w
not
in
self
.
determiners
])
return
" "
.
join
([
w
for
w
in
s
.
split
(
" "
)
if
w
not
in
DETERMINERS
])
def
apply
(
self
,
resps
,
docs
):
filtered_resps
=
[]
def
process_results
(
docs
:
dict
,
resps
:
List
):
for
prediction
,
reference
in
zip
(
*
(
resps
,
docs
[
"span1_text"
])):
prediction
=
clean
(
resps
[
0
])
prediction
=
self
.
clean
(
prediction
[
0
])
reference
=
clean
(
docs
[
"span1_text"
])
reference
=
self
.
clean
(
reference
)
if
(
"'"
in
prediction
)
!=
(
"'"
in
reference
):
if
(
"'"
in
prediction
)
!=
(
"'"
in
reference
):
# referent is "Bob's hat" as predicting the referent.
# referent is "Bob's hat" as predicting the referent.
...
@@ -102,6 +99,5 @@ class WSCPostprocess(Filter):
...
@@ -102,6 +99,5 @@ class WSCPostprocess(Filter):
referent_words
referent_words
)
or
referent_words
.
issubset
(
prediction_words
)
)
or
referent_words
.
issubset
(
prediction_words
)
filtered_resps
.
append
(
predicted_referent
)
acc
=
1.0
if
predicted_referent
==
docs
[
"label"
]
else
0.0
return
{
"accuracy"
:
acc
}
return
filtered_resps
lm_eval/tasks/xwinograd/utils.py
View file @
51f27158
...
@@ -51,7 +51,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
...
@@ -51,7 +51,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
for
lang
in
LANGUAGES
:
for
lang
in
LANGUAGES
:
file_name
=
f
"xwinograd_
{
lang
}
.yaml"
file_name
=
f
"xwinograd_
{
lang
}
.yaml"
try
:
try
:
with
open
(
f
"
{
output_dir
}
/
{
file_name
}
"
,
"w"
if
overwrite
else
"x"
)
as
f
:
with
open
(
f
"
{
output_dir
}
/
{
file_name
}
"
,
"w"
if
overwrite
else
"x"
,
encoding
=
"utf-8"
)
as
f
:
f
.
write
(
"# Generated by utils.py
\n
"
)
f
.
write
(
"# Generated by utils.py
\n
"
)
yaml
.
dump
(
yaml
.
dump
(
{
{
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment