Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
cb8889cc
"vscode:/vscode.git/clone" did not exist on "8a6e56b8087fe5d32a945f053eb07fd1d8648525"
Commit
cb8889cc
authored
Feb 05, 2024
by
lintangsutawika
Browse files
merged with latest update from main
parents
ec05e561
74119471
Changes
69
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
739 additions
and
139 deletions
+739
-139
lm_eval/models/huggingface.py
lm_eval/models/huggingface.py
+26
-11
lm_eval/models/optimum_lm.py
lm_eval/models/optimum_lm.py
+69
-0
lm_eval/models/vllm_causallms.py
lm_eval/models/vllm_causallms.py
+15
-8
lm_eval/prompts/__init__.py
lm_eval/prompts/__init__.py
+1
-1
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+251
-92
lm_eval/tasks/bbh/_generate_configs.py
lm_eval/tasks/bbh/_generate_configs.py
+2
-2
lm_eval/tasks/bbh/cot_fewshot/_cot_fewshot_template_yaml
lm_eval/tasks/bbh/cot_fewshot/_cot_fewshot_template_yaml
+1
-0
lm_eval/tasks/bbh/fewshot/_fewshot_template_yaml
lm_eval/tasks/bbh/fewshot/_fewshot_template_yaml
+1
-0
lm_eval/tasks/belebele/_generate_configs.py
lm_eval/tasks/belebele/_generate_configs.py
+4
-4
lm_eval/tasks/belebele/belebele_default.yaml
lm_eval/tasks/belebele/belebele_default.yaml
+0
-4
lm_eval/tasks/benchmarks/flan/_held_in_template_yaml
lm_eval/tasks/benchmarks/flan/_held_in_template_yaml
+14
-0
lm_eval/tasks/benchmarks/flan/flan_held_in.yaml
lm_eval/tasks/benchmarks/flan/flan_held_in.yaml
+331
-0
lm_eval/tasks/benchmarks/flan/flan_held_out.yaml
lm_eval/tasks/benchmarks/flan/flan_held_out.yaml
+13
-0
lm_eval/tasks/benchmarks/multimedqa/multimedqa.yaml
lm_eval/tasks/benchmarks/multimedqa/multimedqa.yaml
+0
-6
lm_eval/tasks/bigbench/generate_tasks.py
lm_eval/tasks/bigbench/generate_tasks.py
+1
-1
lm_eval/tasks/blimp/generate_configs.py
lm_eval/tasks/blimp/generate_configs.py
+1
-1
lm_eval/tasks/ceval/_generate_configs.py
lm_eval/tasks/ceval/_generate_configs.py
+3
-3
lm_eval/tasks/cmmlu/_generate_configs.py
lm_eval/tasks/cmmlu/_generate_configs.py
+3
-3
lm_eval/tasks/code_x_glue/code-text/bleu.py
lm_eval/tasks/code_x_glue/code-text/bleu.py
+1
-1
lm_eval/tasks/csatqa/_generate_configs.py
lm_eval/tasks/csatqa/_generate_configs.py
+2
-2
No files found.
lm_eval/models/huggingface.py
View file @
cb8889cc
...
@@ -108,8 +108,8 @@ class HFLM(LM):
...
@@ -108,8 +108,8 @@ class HFLM(LM):
assert
not
parallelize
,
"`parallelize=True` is not compatible with passing pre-initialized model to `pretrained`"
assert
not
parallelize
,
"`parallelize=True` is not compatible with passing pre-initialized model to `pretrained`"
self
.
_model
=
pretrained
self
.
_model
=
pretrained
self
.
_device
=
self
.
_model
.
device
self
.
_device
=
self
.
_model
.
device
self
.
_config
=
self
.
_model
.
config
self
.
_config
=
self
.
_model
.
config
gpus
=
0
if
tokenizer
:
if
tokenizer
:
assert
isinstance
(
assert
isinstance
(
...
@@ -200,6 +200,7 @@ class HFLM(LM):
...
@@ -200,6 +200,7 @@ class HFLM(LM):
)
)
# access self._model through self.model property outside this method
# access self._model through self.model property outside this method
if
isinstance
(
self
.
model
,
torch
.
nn
.
Module
):
self
.
model
.
eval
()
self
.
model
.
eval
()
self
.
model
.
tie_weights
()
self
.
model
.
tie_weights
()
...
@@ -238,6 +239,16 @@ class HFLM(LM):
...
@@ -238,6 +239,16 @@ class HFLM(LM):
if
self
.
config
.
model_type
==
"qwen"
:
if
self
.
config
.
model_type
==
"qwen"
:
# Qwen's trust_remote_code tokenizer does not allow for adding special tokens
# Qwen's trust_remote_code tokenizer does not allow for adding special tokens
self
.
tokenizer
.
pad_token
=
"<|endoftext|>"
self
.
tokenizer
.
pad_token
=
"<|endoftext|>"
elif
(
self
.
tokenizer
.
__class__
.
__name__
==
"RWKVWorldTokenizer"
or
self
.
tokenizer
.
__class__
.
__name__
==
"Rwkv5Tokenizer"
):
# The RWKV world tokenizer, does not allow for adding special tokens / setting the pad token (which is set as 0)
# The additional tokenizer name check is needed, as there exists rwkv4 models with neox tokenizer
# ---
# Note that the world tokenizer class name, might change in the future for the final huggingface merge
# https://github.com/huggingface/transformers/pull/26963
assert
self
.
tokenizer
.
pad_token_id
==
0
else
:
else
:
self
.
tokenizer
.
add_special_tokens
({
"pad_token"
:
"<|pad|>"
})
self
.
tokenizer
.
add_special_tokens
({
"pad_token"
:
"<|pad|>"
})
...
@@ -361,7 +372,7 @@ class HFLM(LM):
...
@@ -361,7 +372,7 @@ class HFLM(LM):
def
_get_backend
(
def
_get_backend
(
self
,
self
,
config
:
transformers
.
AutoConfig
,
config
:
Union
[
transformers
.
PretrainedConfig
,
transformers
.
AutoConfig
]
,
backend
:
Optional
[
Literal
[
"default"
,
"causal"
,
"seq2seq"
]]
=
"default"
,
backend
:
Optional
[
Literal
[
"default"
,
"causal"
,
"seq2seq"
]]
=
"default"
,
trust_remote_code
:
Optional
[
bool
]
=
False
,
trust_remote_code
:
Optional
[
bool
]
=
False
,
)
->
None
:
)
->
None
:
...
@@ -602,8 +613,7 @@ class HFLM(LM):
...
@@ -602,8 +613,7 @@ class HFLM(LM):
(
batch_size
,
max_length
),
device
=
self
.
device
(
batch_size
,
max_length
),
device
=
self
.
device
).
long
()
).
long
()
for
_
in
range
(
5
):
for
_
in
range
(
5
):
out
=
F
.
log_softmax
(
self
.
_model_call
(
test_batch
,
**
call_kwargs
),
dim
=-
1
)
out
=
F
.
log_softmax
(
self
.
_model_call
(
test_batch
,
**
call_kwargs
),
dim
=-
1
)
# noqa: F841
out
=
out
# Identity process so that it passes pre-commit
return
batch_size
return
batch_size
...
@@ -705,10 +715,14 @@ class HFLM(LM):
...
@@ -705,10 +715,14 @@ class HFLM(LM):
return
self
.
model
(
inps
).
logits
return
self
.
model
(
inps
).
logits
def
_model_generate
(
self
,
context
,
max_length
,
stop
,
**
generation_kwargs
):
def
_model_generate
(
self
,
context
,
max_length
,
stop
,
**
generation_kwargs
):
# we require users to pass do_sample=True explicitly
# temperature = 0.0 if not set
# for non-greedy gen. This should be reevaluated when considering beam search.
# if do_sample is false and temp==0.0:
if
"do_sample"
not
in
generation_kwargs
:
# remove temperature, as do_sample=False takes care of this
generation_kwargs
[
"do_sample"
]
=
False
# and we don't want a warning from HF
generation_kwargs
[
"temperature"
]
=
generation_kwargs
.
get
(
"temperature"
,
0.0
)
do_sample
=
generation_kwargs
.
get
(
"do_sample"
,
None
)
if
do_sample
is
False
and
generation_kwargs
.
get
(
"temperature"
)
==
0.0
:
generation_kwargs
.
pop
(
"temperature"
)
# build stopping criteria
# build stopping criteria
stopping_criteria
=
stop_sequences_criteria
(
stopping_criteria
=
stop_sequences_criteria
(
self
.
tokenizer
,
stop
,
context
.
shape
[
1
],
context
.
shape
[
0
]
self
.
tokenizer
,
stop
,
context
.
shape
[
1
],
context
.
shape
[
0
]
...
@@ -1045,6 +1059,7 @@ class HFLM(LM):
...
@@ -1045,6 +1059,7 @@ class HFLM(LM):
return
-
len
(
toks
),
x
[
0
]
return
-
len
(
toks
),
x
[
0
]
pbar
=
tqdm
(
total
=
len
(
requests
),
disable
=
(
self
.
rank
!=
0
))
pbar
=
tqdm
(
total
=
len
(
requests
),
disable
=
(
self
.
rank
!=
0
))
adaptive_batch_size
=
None
if
self
.
batch_size
==
"auto"
:
if
self
.
batch_size
==
"auto"
:
# using rolling window with maximum context
# using rolling window with maximum context
print
(
"Passed argument batch_size = auto. Detecting largest batch size"
)
print
(
"Passed argument batch_size = auto. Detecting largest batch size"
)
...
@@ -1089,7 +1104,7 @@ class HFLM(LM):
...
@@ -1089,7 +1104,7 @@ class HFLM(LM):
)
)
else
:
else
:
raise
ValueError
(
raise
ValueError
(
f
"Expected `kwargs` to be of type `dict` but got
{
kwargs
}
"
f
"Expected `kwargs` to be of type `dict` but got
{
type
(
gen_
kwargs
)
}
"
)
)
if
not
until
:
if
not
until
:
until
=
[
self
.
tok_decode
(
self
.
eot_token_id
)]
until
=
[
self
.
tok_decode
(
self
.
eot_token_id
)]
...
...
lm_eval/models/optimum_lm.py
0 → 100644
View file @
cb8889cc
from
importlib.util
import
find_spec
from
pathlib
import
Path
from
lm_eval.api.registry
import
register_model
from
lm_eval.models.huggingface
import
HFLM
@
register_model
(
"openvino"
)
class
OptimumLM
(
HFLM
):
"""
Optimum Intel provides a simple interface to optimize Transformer models and convert them to
\
OpenVINO™ Intermediate Representation (IR) format to accelerate end-to-end pipelines on
\
Intel® architectures using OpenVINO™ runtime.
"""
def
__init__
(
self
,
device
=
"cpu"
,
**
kwargs
,
)
->
None
:
if
"backend"
in
kwargs
:
# optimum currently only supports causal models
assert
(
kwargs
[
"backend"
]
==
"causal"
),
"Currently, only OVModelForCausalLM is supported."
self
.
openvino_device
=
device
super
().
__init__
(
device
=
self
.
openvino_device
,
backend
=
kwargs
.
get
(
"backend"
,
"causal"
),
**
kwargs
,
)
def
_create_model
(
self
,
pretrained
:
str
,
revision
=
"main"
,
dtype
=
"auto"
,
trust_remote_code
=
False
,
**
kwargs
,
)
->
None
:
if
not
find_spec
(
"optimum"
):
raise
Exception
(
"package `optimum` is not installed. Please install it via `pip install optimum[openvino]`"
)
else
:
from
optimum.intel.openvino
import
OVModelForCausalLM
model_kwargs
=
kwargs
if
kwargs
else
{}
model_file
=
Path
(
pretrained
)
/
"openvino_model.xml"
if
model_file
.
exists
():
export
=
False
else
:
export
=
True
kwargs
[
"ov_config"
]
=
{
"PERFORMANCE_HINT"
:
"LATENCY"
,
"NUM_STREAMS"
:
"1"
,
"CACHE_DIR"
:
""
,
}
self
.
_model
=
OVModelForCausalLM
.
from_pretrained
(
pretrained
,
revision
=
revision
,
trust_remote_code
=
trust_remote_code
,
export
=
export
,
device
=
self
.
openvino_device
.
upper
(),
**
model_kwargs
,
)
lm_eval/models/vllm_causallms.py
View file @
cb8889cc
...
@@ -170,18 +170,12 @@ class VLLM(LM):
...
@@ -170,18 +170,12 @@ class VLLM(LM):
stop
:
Optional
[
List
[
str
]]
=
None
,
stop
:
Optional
[
List
[
str
]]
=
None
,
**
kwargs
,
**
kwargs
,
):
):
if
"do_sample"
in
kwargs
.
keys
():
kwargs
.
pop
(
"do_sample"
)
if
generate
:
if
generate
:
# hf defaults
kwargs
=
self
.
modify_gen_kwargs
(
kwargs
)
kwargs
[
"skip_special_tokens"
]
=
kwargs
.
get
(
"skip_special_tokens"
,
False
)
kwargs
[
"spaces_between_special_tokens"
]
=
kwargs
.
get
(
"spaces_between_special_tokens"
,
False
)
sampling_params
=
SamplingParams
(
max_tokens
=
max_tokens
,
stop
=
stop
,
**
kwargs
)
sampling_params
=
SamplingParams
(
max_tokens
=
max_tokens
,
stop
=
stop
,
**
kwargs
)
else
:
else
:
sampling_params
=
SamplingParams
(
sampling_params
=
SamplingParams
(
temperature
=
0
,
prompt_logprobs
=
2
,
max_tokens
=
1
temperature
=
0
,
prompt_logprobs
=
1
,
max_tokens
=
1
)
)
if
self
.
data_parallel_size
>
1
:
if
self
.
data_parallel_size
>
1
:
requests
=
[
list
(
x
)
for
x
in
divide
(
requests
,
self
.
data_parallel_size
)]
requests
=
[
list
(
x
)
for
x
in
divide
(
requests
,
self
.
data_parallel_size
)]
...
@@ -438,3 +432,16 @@ class VLLM(LM):
...
@@ -438,3 +432,16 @@ class VLLM(LM):
break
break
return
continuation_logprobs
,
is_greedy
return
continuation_logprobs
,
is_greedy
@
staticmethod
def
modify_gen_kwargs
(
kwargs
:
dict
)
->
dict
:
# sampling_params
do_sample
=
kwargs
.
pop
(
"do_sample"
,
None
)
if
do_sample
is
False
or
"temperature"
not
in
kwargs
:
kwargs
[
"temperature"
]
=
0.0
# hf defaults
kwargs
[
"skip_special_tokens"
]
=
kwargs
.
get
(
"skip_special_tokens"
,
False
)
kwargs
[
"spaces_between_special_tokens"
]
=
kwargs
.
get
(
"spaces_between_special_tokens"
,
False
)
return
kwargs
lm_eval/prompts/__init__.py
View file @
cb8889cc
...
@@ -117,7 +117,7 @@ class PromptString:
...
@@ -117,7 +117,7 @@ class PromptString:
# TODO need a way to process doc_to_choice
# TODO need a way to process doc_to_choice
if
"doc_to_choice"
in
self
.
prompt_string
:
if
"doc_to_choice"
in
self
.
prompt_string
:
raise
"Not yet implemented to accept doc_to_choice"
raise
Exception
(
"Not yet implemented to accept doc_to_choice"
)
text_string
=
utils
.
apply_template
(
doc_to_text
,
doc
)
text_string
=
utils
.
apply_template
(
doc_to_text
,
doc
)
target_string
=
utils
.
apply_template
(
doc_to_target
,
doc
)
target_string
=
utils
.
apply_template
(
doc_to_target
,
doc
)
...
...
lm_eval/tasks/__init__.py
View file @
cb8889cc
import
os
import
os
import
abc
import
abc
import
yaml
import
collections
import
collections
from
functools
import
partial
from
functools
import
partial
from
typing
import
List
,
Union
,
Dict
from
typing
import
List
,
Union
,
Dict
from
lm_eval
import
utils
from
lm_eval
import
utils
from
lm_eval
import
prompts
from
lm_eval.api.task
import
Task
,
ConfigurableTask
from
lm_eval.api.task
import
TaskConfig
,
Task
,
ConfigurableTask
import
logging
import
logging
# # import python tasks
# import squadv2.task
# import scrolls.task
# python_tasks = {
# "squadv2": squadv2.task.SQuAD2,
# "scrolls_quality": scrolls.task.QuALITY,
# "scrolls_narrativeqa": scrolls.task.NarrativeQA,
# "scrolls_contractnli": scrolls.task.ContractNLI,
# "scrolls_govreport": scrolls.task.GovReport,
# "scrolls_summscreenfd": scrolls.task.SummScreenFD,
# "scrolls_qmsum": scrolls.task.QMSum,
# }
eval_logger
=
utils
.
eval_logger
class
TaskManager
:
"""TaskManager indexes all tasks from the default `lm_eval/tasks/`
GROUP_KEYS
=
[
"group"
,
"task"
,
"weight_by_size"
]
and an optional directory if provided.
PYTHON_TASK_KEYS
=
[
"task"
,
"class"
]
class
TaskManager
(
abc
.
ABC
):
"""
def
__init__
(
def
__init__
(
self
,
self
,
verbosity
=
"INFO"
,
verbosity
=
"INFO"
,
...
@@ -40,79 +24,132 @@ class TaskManager(abc.ABC):
...
@@ -40,79 +24,132 @@ class TaskManager(abc.ABC):
self
.
verbosity
=
verbosity
self
.
verbosity
=
verbosity
self
.
include_path
=
include_path
self
.
include_path
=
include_path
self
.
logger
=
eval_logger
.
setLevel
(
getattr
(
logging
,
f
"
{
verbosity
}
"
))
self
.
logger
=
utils
.
eval_logger
self
.
logger
.
setLevel
(
getattr
(
logging
,
f
"
{
verbosity
}
"
))
self
.
ALL_TASKS
=
self
.
initialize_tasks
(
self
.
_task_index
=
self
.
initialize_tasks
(
include_path
=
include_path
include_path
=
include_path
)
)
self
.
_all_tasks
=
sorted
(
list
(
self
.
_task_index
.
keys
()))
self
.
task_group_map
=
collections
.
defaultdict
(
list
)
def
initialize_tasks
(
self
,
include_path
=
None
):
def
initialize_tasks
(
self
,
include_path
:
str
=
None
):
"""Creates an dictionary of tasks index.
:param include_path: str = None
An additional path to be searched for tasks
:return
Dictionary of task names as key and task metadata
"""
all_paths
=
[
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
+
"/"
]
all_paths
=
[
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
+
"/"
]
if
include_path
is
not
None
:
if
include_path
is
not
None
:
if
isinstance
(
include_path
,
str
):
if
isinstance
(
include_path
,
str
):
include_path
=
[
include_path
]
include_path
=
[
include_path
]
all_paths
.
extend
(
include_path
)
all_paths
.
extend
(
include_path
)
ALL_TASKS
=
{}
task_index
=
{}
for
task_dir
in
all_paths
:
for
task_dir
in
all_paths
:
tasks
=
self
.
_get_task_and_group
(
task_dir
)
tasks
=
self
.
_get_task_and_group
(
task_dir
)
ALL_TASKS
=
{
**
tasks
,
**
ALL_TASKS
}
task_index
=
{
**
tasks
,
**
task_index
}
return
ALL_TASKS
return
task_index
@
property
def
all_tasks
(
self
):
def
all_tasks
(
self
):
return
sorted
(
list
(
self
.
ALL_TASKS
.
keys
()))
return
self
.
_all_tasks
@
property
def
task_index
(
self
):
return
self
.
_task_index
def
match_tasks
(
self
,
task_list
):
return
utils
.
pattern_match
(
task_list
,
self
.
all_tasks
)
def
_name_is_registered
(
self
,
name
):
def
_name_is_registered
(
self
,
name
):
if
name
in
self
.
ALL_TASKS
:
if
name
in
self
.
all_tasks
:
return
True
return
True
return
False
return
False
def
_name_is_task
(
self
,
name
):
def
_name_is_task
(
self
,
name
):
if
self
.
_name_is_registered
(
name
)
and
(
"task"
in
self
.
ALL_TASKS
[
name
][
"type"
]):
if
self
.
_name_is_registered
(
name
)
and
(
"task"
in
self
.
task_index
[
name
][
"type"
]):
return
True
return
False
def
_name_is_group
(
self
,
name
):
if
self
.
_name_is_registered
(
name
)
and
(
self
.
task_index
[
name
][
"type"
]
==
"group"
):
return
True
return
True
return
False
return
False
def
_name_is_python_task
(
self
,
name
):
def
_name_is_python_task
(
self
,
name
):
if
self
.
_name_is_registered
(
name
)
and
(
self
.
ALL_TASKS
[
name
][
"type"
]
==
"python_task"
):
if
self
.
_name_is_registered
(
name
)
and
(
self
.
task_index
[
name
][
"type"
]
==
"python_task"
):
return
True
return
True
return
False
return
False
def
_config_is_task
(
self
,
config
):
def
_config_is_task
(
self
,
config
):
if
set
(
config
.
keys
())
<=
set
(
GROUP_KEYS
):
if
(
"task"
in
config
)
and
isinstance
(
config
[
"task"
],
str
):
return
True
return
False
return
False
def
_config_is_group
(
self
,
config
):
if
(
"task"
in
config
)
and
isinstance
(
config
[
"task"
],
list
):
return
True
return
True
return
False
def
_config_is_python_task
(
self
,
config
):
def
_config_is_python_task
(
self
,
config
):
if
set
(
config
.
keys
())
==
set
(
PYTHON_TASK_KEYS
)
:
if
"class"
in
config
:
return
True
return
True
return
False
return
False
def
_get_yaml_path
(
self
,
name
):
def
_get_yaml_path
(
self
,
name
):
assert
name
in
self
.
ALL_TASKS
assert
name
in
self
.
task_index
return
self
.
ALL_TASKS
[
name
][
"yaml_path"
]
return
self
.
task_index
[
name
][
"yaml_path"
]
def
_get_config
(
self
,
name
):
def
_get_config
(
self
,
name
):
assert
name
in
self
.
ALL_TASKS
assert
name
in
self
.
task_index
yaml_path
=
self
.
_get_yaml_path
(
name
)
yaml_path
=
self
.
_get_yaml_path
(
name
)
return
utils
.
load_yaml_config
(
"full"
,
yaml_path
)
if
yaml_path
==
-
1
:
return
{}
else
:
return
utils
.
load_yaml_config
(
yaml_path
,
mode
=
"full"
)
def
_get_tasklist
(
self
,
name
):
def
_get_tasklist
(
self
,
name
):
assert
self
.
_name_is_task
(
name
)
==
False
assert
self
.
_name_is_task
(
name
)
==
False
return
self
.
ALL_TASKS
[
name
][
"task"
]
return
self
.
task_index
[
name
][
"task"
]
def
_process_alias
(
self
,
config
,
group
=
None
):
# If the group is not the same as the original
# group which the group alias was intended for,
# Set the group_alias to None instead.
if
(
"group_alias"
in
config
)
and
(
"group"
in
config
)
and
group
is
not
None
:
if
config
[
"group"
]
!=
group
:
config
[
"group_alias"
]
=
None
return
config
def
_load_individual_task_or_group
(
def
_load_individual_task_or_group
(
self
,
self
,
name_or_config
:
Union
[
str
,
dict
]
=
None
,
name_or_config
:
Union
[
str
,
dict
]
=
None
,
parent_name
:
str
=
None
,
parent_name
:
str
=
None
,
update_config
:
dict
=
None
update_config
:
dict
=
None
,
yaml_path
:
str
=
None
,
)
->
ConfigurableTask
:
)
->
ConfigurableTask
:
def
load_task
(
config
,
task
,
group
=
None
,
yaml_path
=
None
):
def
load_task
(
config
,
task
,
group
=
None
,
is_python_class
=
False
):
if
"include"
in
config
:
if
is_python_class
:
assert
yaml_path
is
not
None
config
.
update
(
utils
.
load_yaml_config
(
yaml_path
,
yaml_config
=
{
"include"
:
config
.
pop
(
"include"
)},
mode
=
"full"
,
)
)
if
self
.
_config_is_python_task
(
config
):
task_object
=
config
[
"class"
]()
task_object
=
config
[
"class"
]()
else
:
else
:
config
=
self
.
_process_alias
(
config
,
group
=
group
)
task_object
=
ConfigurableTask
(
config
=
config
)
task_object
=
ConfigurableTask
(
config
=
config
)
if
group
is
not
None
:
if
group
is
not
None
:
task_object
=
(
group
,
task_object
)
task_object
=
(
group
,
task_object
)
...
@@ -124,15 +161,26 @@ class TaskManager(abc.ABC):
...
@@ -124,15 +161,26 @@ class TaskManager(abc.ABC):
name_or_config
=
{
"task"
:
name_or_config
,
**
update_config
}
name_or_config
=
{
"task"
:
name_or_config
,
**
update_config
}
elif
self
.
_name_is_task
(
name_or_config
):
elif
self
.
_name_is_task
(
name_or_config
):
task_config
=
self
.
_get_config
(
name_or_config
)
task_config
=
self
.
_get_config
(
name_or_config
)
is_python_class
=
False
return
load_task
(
task_config
,
task
=
name_or_config
,
group
=
parent_name
)
if
self
.
_name_is_python_task
(
name_or_config
):
is_python_class
=
True
return
load_task
(
task_config
,
task
=
name_or_config
,
group
=
parent_name
,
is_python_class
=
is_python_class
)
else
:
else
:
group_name
=
name_or_config
group_name
=
name_or_config
subtask_list
=
self
.
_get_tasklist
(
name_or_config
)
subtask_list
=
self
.
_get_tasklist
(
name_or_config
)
if
subtask_list
==
-
1
:
if
subtask_list
==
-
1
:
subtask_list
=
self
.
_get_config
(
name_or_config
)[
"task"
]
group_config
=
self
.
_get_config
(
name_or_config
)
subtask_list
=
group_config
[
"task"
]
# This checks if we're at the root.
if
parent_name
is
None
:
group_config
=
self
.
_get_config
(
name_or_config
)
if
set
(
group_config
.
keys
())
>
set
([
"task"
,
"group"
]):
update_config
=
{
k
:
v
for
k
,
v
in
group_config
.
items
()
if
k
not
in
[
"task"
,
"group"
]
}
yaml_path
=
self
.
_get_yaml_path
(
group_name
)
if
(
update_config
is
not
None
)
and
(
"group_alias"
in
update_config
):
group_name
=
update_config
[
"group_alias"
]
update_config
.
pop
(
"group_alias"
)
if
isinstance
(
name_or_config
,
dict
):
if
isinstance
(
name_or_config
,
dict
):
...
@@ -145,7 +193,8 @@ class TaskManager(abc.ABC):
...
@@ -145,7 +193,8 @@ class TaskManager(abc.ABC):
if
self
.
_config_is_task
(
name_or_config
):
if
self
.
_config_is_task
(
name_or_config
):
name
=
name_or_config
[
"task"
]
name
=
name_or_config
[
"task"
]
# If the name is registered as a group
# If the name is registered as a group
if
self
.
_name_is_task
(
name
)
is
False
:
# if self._name_is_task(name) is False:
if
self
.
_name_is_group
(
name
):
group_name
=
name
group_name
=
name
update_config
=
{
k
:
v
for
k
,
v
in
name_or_config
.
items
()
if
k
!=
"task"
}
update_config
=
{
k
:
v
for
k
,
v
in
name_or_config
.
items
()
if
k
!=
"task"
}
subtask_list
=
self
.
_get_tasklist
(
name
)
subtask_list
=
self
.
_get_tasklist
(
name
)
...
@@ -154,28 +203,49 @@ class TaskManager(abc.ABC):
...
@@ -154,28 +203,49 @@ class TaskManager(abc.ABC):
else
:
else
:
if
self
.
_name_is_registered
(
name
):
if
self
.
_name_is_registered
(
name
):
base_task_config
=
self
.
_get_config
(
name
)
base_task_config
=
self
.
_get_config
(
name
)
# Check if this is a duplicate.
if
parent_name
is
not
None
:
name_or_config
[
"group"
]
=
parent_name
num_duplicate
=
len
(
list
(
filter
(
lambda
x
:
x
.
startswith
(
name
),
self
.
task_group_map
[
parent_name
])))
if
num_duplicate
>
0
:
name
=
f
"
{
name
}
-
{
num_duplicate
}
"
self
.
task_group_map
[
parent_name
].
append
(
name
)
task_config
=
{
task_config
=
{
**
base_task_config
,
**
base_task_config
,
**
name_or_config
,
**
name_or_config
,
}
}
else
:
else
:
task_config
=
name_or_config
task_config
=
name_or_config
return
load_task
(
task_config
,
task
=
name
,
group
=
parent_name
)
return
load_task
(
task_config
,
task
=
name
,
group
=
parent_name
,
yaml_path
=
yaml_path
)
else
:
else
:
group_name
=
name_or_config
[
"group"
]
group_name
=
name_or_config
[
"group"
]
subtask_list
=
name_or_config
[
"task"
]
subtask_list
=
name_or_config
[
"task"
]
# update_config = {k:v for k,v in name_or_config.items() if k != "task"}
if
set
(
name_or_config
.
keys
())
>
set
([
"task"
,
"group"
]):
update_config
=
{
k
:
v
for
k
,
v
in
name_or_config
.
items
()
if
k
not
in
[
"task"
,
"group"
]
}
all_subtasks
=
{}
all_subtasks
=
{}
if
(
parent_name
is
not
None
)
and
((
self
.
_name_is_registered
(
group_name
)
is
False
)
or
(
self
.
_get_yaml_path
(
group_name
)
==
-
1
))
:
if
(
parent_name
is
not
None
):
all_subtasks
=
{
group_name
:
(
parent_name
,
None
)}
all_subtasks
=
{
group_name
:
(
parent_name
,
None
)}
fn
=
partial
(
self
.
_load_individual_task_or_group
,
parent_name
=
group_name
,
update_config
=
update_config
)
fn
=
partial
(
self
.
_load_individual_task_or_group
,
parent_name
=
group_name
,
update_config
=
update_config
,
yaml_path
=
yaml_path
)
all_subtasks
=
{
**
all_subtasks
,
**
dict
(
collections
.
ChainMap
(
*
map
(
fn
,
subtask_list
)))}
all_subtasks
=
{
**
all_subtasks
,
**
dict
(
collections
.
ChainMap
(
*
map
(
fn
,
subtask_list
)))}
return
all_subtasks
return
all_subtasks
def
load_task_or_group
(
self
,
task_list
:
Union
[
str
,
list
]
=
None
)
->
dict
:
def
load_task_or_group
(
self
,
task_list
:
Union
[
str
,
list
]
=
None
)
->
dict
:
"""Loads a dictionary of task objects from a list
:param task_list: Union[str, list] = None
Single string or list of string of task names to be loaded
:return
Dictionary of task objects
"""
if
isinstance
(
task_list
,
str
):
if
isinstance
(
task_list
,
str
):
task_list
=
[
task_list
]
task_list
=
[
task_list
]
...
@@ -189,20 +259,43 @@ class TaskManager(abc.ABC):
...
@@ -189,20 +259,43 @@ class TaskManager(abc.ABC):
)
)
return
all_loaded_tasks
return
all_loaded_tasks
def
load_config
(
self
,
config
:
Dict
):
return
self
.
_load_individual_task_or_group
(
config
)
def
_get_task_and_group
(
self
,
task_dir
:
str
):
def
_get_task_and_group
(
self
,
task_dir
:
str
):
"""Creates an dictionary of tasks index with the following metadata,
- `type`, that can be either `task`, `python_task`, or `group`.
`task` refer to regular task configs, `python_task` are special
yaml files that only consists of `task` and `class` parameters.
`group` are group configs.
- `yaml_path`, path to the yaml file. If the entry is a `group` that
was configured through a task config, the yaml_path will be -1
and all subtasks will be listed in `task` (see below)
- `task`, reserved for entries with `type` as `group`. This will list
all subtasks. When a group config is created (as opposed to task
config having `group` parameter set), this will be set to -1 to
avoid recursive indexing. The whole list of subtasks will be loaded
at evaluation.
:param task_dir: str
A directory to check for tasks
:return
Dictionary of task names as key and task metadata
"""
tasks_and_groups
=
collections
.
defaultdict
()
tasks_and_groups
=
collections
.
defaultdict
()
for
root
,
_
,
file_list
in
os
.
walk
(
task_dir
):
for
root
,
_
,
file_list
in
os
.
walk
(
task_dir
):
for
f
in
file_list
:
for
f
in
file_list
:
if
f
.
endswith
(
".yaml"
):
if
f
.
endswith
(
".yaml"
):
yaml_path
=
os
.
path
.
join
(
root
,
f
)
yaml_path
=
os
.
path
.
join
(
root
,
f
)
config
=
utils
.
load_yaml_config
(
"simple"
,
yaml_path
)
config
=
utils
.
load_yaml_config
(
yaml_path
,
mode
=
"simple"
)
if
se
t
(
config
.
keys
())
==
set
(
PYTHON_TASK_KEYS
):
if
se
lf
.
_
config
_is_python_task
(
config
):
# This is a python class config
# This is a python class config
tasks_and_groups
[
config
[
"task"
]]
=
{
tasks_and_groups
[
config
[
"task"
]]
=
{
"type"
:
"python_task"
,
"type"
:
"python_task"
,
"yaml_path"
:
yaml_path
,
"yaml_path"
:
yaml_path
,
}
}
elif
se
t
(
config
.
keys
())
<=
set
(
GROUP_KEYS
):
elif
se
lf
.
_
config
_is_group
(
config
):
# This is a group config
# This is a group config
tasks_and_groups
[
config
[
"group"
]]
=
{
tasks_and_groups
[
config
[
"group"
]]
=
{
"type"
:
"group"
,
"type"
:
"group"
,
...
@@ -213,7 +306,17 @@ class TaskManager(abc.ABC):
...
@@ -213,7 +306,17 @@ class TaskManager(abc.ABC):
# when called.
# when called.
"yaml_path"
:
yaml_path
,
"yaml_path"
:
yaml_path
,
}
}
else
:
# # Registered the level 1 tasks from a group config
# for config in config["task"]:
# if isinstance(config, dict) and self._config_is_task(config):
# task = config["task"]
# tasks_and_groups[task] = {
# "type": "task",
# "yaml_path": yaml_path,
# }
elif
self
.
_config_is_task
(
config
):
# This is a task config
# This is a task config
task
=
config
[
"task"
]
task
=
config
[
"task"
]
tasks_and_groups
[
task
]
=
{
tasks_and_groups
[
task
]
=
{
...
@@ -235,41 +338,97 @@ class TaskManager(abc.ABC):
...
@@ -235,41 +338,97 @@ class TaskManager(abc.ABC):
}
}
else
:
else
:
tasks_and_groups
[
group
][
"task"
].
append
(
task
)
tasks_and_groups
[
group
][
"task"
].
append
(
task
)
else
:
self
.
logger
.
debug
(
f
"File
{
f
}
in
{
root
}
could not be loaded"
)
return
tasks_and_groups
return
tasks_and_groups
def
include_path
(
task_dir
):
logger
=
utils
.
eval_logger
logger
.
setLevel
(
getattr
(
logging
,
"INFO"
))
logger
.
info
(
"To still use tasks loaded from args.include_path,"
"see an example of the new TaskManager API in https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/interface.md#external-library-usage"
)
return
0
def
initialize_tasks
(
verbosity
=
"INFO"
):
logger
=
utils
.
eval_logger
logger
.
setLevel
(
getattr
(
logging
,
f
"
{
verbosity
}
"
))
logger
.
info
(
"lm_eval.tasks.initialize_tasks() is deprecated and no longer necessary. "
"It will be removed in v0.4.2 release. "
"TaskManager will instead be used."
)
return
0
def
get_task_name_from_config
(
task_config
:
Dict
[
str
,
str
])
->
str
:
if
"task"
in
task_config
:
return
task_config
[
"task"
]
if
"dataset_name"
in
task_config
:
return
"{dataset_path}_{dataset_name}"
.
format
(
**
task_config
)
else
:
return
"{dataset_path}"
.
format
(
**
task_config
)
def
get_task_name_from_object
(
task_object
):
if
hasattr
(
task_object
,
"config"
):
return
task_object
.
_config
[
"task"
]
# TODO: scrap this
# this gives a mechanism for non-registered tasks to have a custom name anyways when reporting
return
(
task_object
.
EVAL_HARNESS_NAME
if
hasattr
(
task_object
,
"EVAL_HARNESS_NAME"
)
else
type
(
task_object
).
__name__
)
def
get_task_dict
(
task_name_list
:
List
[
Union
[
str
,
Dict
,
Task
]],
task_manager
:
TaskManager
=
None
):
"""Creates a dictionary of task objects from either a name of task, config, or prepared Task object.
:param task_name_list: List[Union[str, Dict, Task]]
Name of model or LM object, see lm_eval.models.get_model
:param task_manager: TaskManager = None
A TaskManager object that stores indexed tasks. If not set,
task_manager will load one. This should be set by the user
if there are additional paths that want to be included
via `include_path`
:return
Dictionary of task objects
"""
task_name_from_string_dict
=
{}
task_name_from_config_dict
=
{}
task_name_from_object_dict
=
{}
if
isinstance
(
task_name_list
,
str
):
task_name_list
=
[
task_name_list
]
string_task_name_list
=
[
task
for
task
in
task_name_list
if
isinstance
(
task
,
str
)]
others_task_name_list
=
[
task
for
task
in
task_name_list
if
~
isinstance
(
task
,
str
)]
if
len
(
string_task_name_list
)
>
0
:
if
task_manager
is
None
:
task_manager
=
TaskManager
()
task_name_from_string_dict
=
task_manager
.
load_task_or_group
(
string_task_name_list
)
for
task_element
in
others_task_name_list
:
if
isinstance
(
task_element
,
dict
):
task_name_from_config_dict
=
{
**
task_name_from_config_dict
,
**
task_manager
.
load_config
(
config
=
task_element
),
}
elif
isinstance
(
task_element
,
Task
):
task_name_from_object_dict
=
{
**
task_name_from_object_dict
,
get_task_name_from_object
(
task_element
):
task_element
,
}
# def check_prompt_config(
assert
set
(
task_name_from_string_dict
.
keys
()).
isdisjoint
(
# config: Dict[str, str], yaml_path: str = None
set
(
task_name_from_object_dict
.
keys
())
# ) -> List[Dict[str, str]]:
)
# all_configs = []
return
{
# if "use_prompt" in config:
**
task_name_from_string_dict
,
# prompt_list = prompts.load_prompt_list(
**
task_name_from_config_dict
,
# use_prompt=config["use_prompt"],
**
task_name_from_object_dict
,
# dataset_name=config["dataset_path"],
}
# subset_name=config["dataset_name"] if "dataset_name" in config else None,
# yaml_path=yaml_path,
# )
# for idx, prompt_variation in enumerate(prompt_list):
# all_configs.append(
# {
# **config,
# **{"use_prompt": prompt_variation},
# **{
# "task": "_".join(
# [
# config["task"]
# if "task" in config
# else get_task_name_from_config(config),
# prompt_variation.split("/")[-1]
# if ".yaml" in prompt_variation
# else prompt_variation,
# ]
# )
# },
# **{"output_type": "generate_until"},
# }
# )
# else:
# all_configs.append(config)
# return all_configs
lm_eval/tasks/bbh/_generate_configs.py
View file @
cb8889cc
...
@@ -28,7 +28,7 @@ if __name__ == "__main__":
...
@@ -28,7 +28,7 @@ if __name__ == "__main__":
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
with
open
(
args
.
base_yaml_path
)
as
f
:
with
open
(
args
.
base_yaml_path
,
encoding
=
"utf-8"
)
as
f
:
base_yaml
=
yaml
.
full_load
(
f
)
base_yaml
=
yaml
.
full_load
(
f
)
base_doc_to_text
=
"Q: {{input}}
\n
A:"
base_doc_to_text
=
"Q: {{input}}
\n
A:"
...
@@ -70,7 +70,7 @@ if __name__ == "__main__":
...
@@ -70,7 +70,7 @@ if __name__ == "__main__":
file_save_path
=
args
.
save_prefix_path
+
f
"/
{
task
}
.yaml"
file_save_path
=
args
.
save_prefix_path
+
f
"/
{
task
}
.yaml"
utils
.
eval_logger
.
info
(
f
"Saving yaml for subset
{
task
}
to
{
file_save_path
}
"
)
utils
.
eval_logger
.
info
(
f
"Saving yaml for subset
{
task
}
to
{
file_save_path
}
"
)
with
open
(
file_save_path
,
"w"
)
as
yaml_file
:
with
open
(
file_save_path
,
"w"
,
encoding
=
"utf-8"
)
as
yaml_file
:
yaml
.
dump
(
yaml
.
dump
(
yaml_dict
,
yaml_dict
,
yaml_file
,
yaml_file
,
...
...
lm_eval/tasks/bbh/cot_fewshot/_cot_fewshot_template_yaml
View file @
cb8889cc
...
@@ -29,3 +29,4 @@ filter_list:
...
@@ -29,3 +29,4 @@ filter_list:
num_fewshot: 0
num_fewshot: 0
metadata:
metadata:
version: 2.0
version: 2.0
num_fewshot: 3 # controls what is printed in n-shot
lm_eval/tasks/bbh/fewshot/_fewshot_template_yaml
View file @
cb8889cc
...
@@ -20,3 +20,4 @@ generation_kwargs:
...
@@ -20,3 +20,4 @@ generation_kwargs:
num_fewshot: 0
num_fewshot: 0
metadata:
metadata:
version: 1.0
version: 1.0
num_fewshot: 3 # will be printed in results table
lm_eval/tasks/belebele/_generate_configs.py
View file @
cb8889cc
...
@@ -27,13 +27,13 @@ if __name__ == "__main__":
...
@@ -27,13 +27,13 @@ if __name__ == "__main__":
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
with
open
(
args
.
base_yaml_path
)
as
f
:
with
open
(
args
.
base_yaml_path
,
encoding
=
"utf-8"
)
as
f
:
base_yaml
=
yaml
.
full_load
(
f
)
base_yaml
=
yaml
.
full_load
(
f
)
if
args
.
cot_prompt_path
is
not
None
:
if
args
.
cot_prompt_path
is
not
None
:
import
json
import
json
with
open
(
args
.
cot_prompt_path
)
as
f
:
with
open
(
args
.
cot_prompt_path
,
encoding
=
"utf-8"
)
as
f
:
cot_file
=
json
.
load
(
f
)
cot_file
=
json
.
load
(
f
)
def
query
():
def
query
():
...
@@ -42,7 +42,7 @@ if __name__ == "__main__":
...
@@ -42,7 +42,7 @@ if __name__ == "__main__":
print
(
query
())
print
(
query
())
languages
=
[
split
[
"split"
]
for
split
in
query
()]
languages
=
[
split
[
"split"
]
for
split
in
query
()]
for
lang
in
tqdm
(
languages
):
for
lang
in
tqdm
(
[
lang
for
lang
in
languages
if
"default"
not
in
lang
]
):
yaml_dict
=
{
yaml_dict
=
{
"include"
:
base_yaml_name
,
"include"
:
base_yaml_name
,
"task"
:
f
"belebele_
{
args
.
task_prefix
}
_
{
lang
}
"
"task"
:
f
"belebele_
{
args
.
task_prefix
}
_
{
lang
}
"
...
@@ -54,7 +54,7 @@ if __name__ == "__main__":
...
@@ -54,7 +54,7 @@ if __name__ == "__main__":
file_save_path
=
args
.
save_prefix_path
+
f
"_
{
lang
}
.yaml"
file_save_path
=
args
.
save_prefix_path
+
f
"_
{
lang
}
.yaml"
logging
.
info
(
f
"Saving yaml for subset
{
lang
}
to
{
file_save_path
}
"
)
logging
.
info
(
f
"Saving yaml for subset
{
lang
}
to
{
file_save_path
}
"
)
with
open
(
file_save_path
,
"w"
)
as
yaml_file
:
with
open
(
file_save_path
,
"w"
,
encoding
=
"utf-8"
)
as
yaml_file
:
yaml
.
dump
(
yaml
.
dump
(
yaml_dict
,
yaml_dict
,
yaml_file
,
yaml_file
,
...
...
lm_eval/tasks/belebele/belebele_default.yaml
deleted
100644 → 0
View file @
ec05e561
"
fewshot_split"
:
"
default"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_default"
"
test_split"
:
"
default"
lm_eval/tasks/benchmarks/flan/_held_in_template_yaml
0 → 100644
View file @
cb8889cc
output_type: generate_until
test_split: null
doc_to_choice: null
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
generation_kwargs:
until:
- "</s>"
do_sample: false
temperature: 0.0
metadata:
version: 1.0
lm_eval/tasks/benchmarks/flan/flan_held_in.yaml
0 → 100644
View file @
cb8889cc
group
:
flan_held_in
group_alias
:
Flan (Held-In)
task
:
# ANLI R1
-
group
:
anli_r1_flan
group_alias
:
ANLI R1
task
:
-
task
:
anli_r1
task_alias
:
prompt-0
include
:
_held_in_template_yaml
doc_to_text
:
"
{{premise}}
\n\n
Choose
your
answer:
based
on
the
paragraph
above
can
we
conclude
that
\"
{{hypothesis}}
\"
?
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No
\n
I
think
the
answer
is"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
-
task
:
anli_r1
task_alias
:
prompt-1
include
:
_held_in_template_yaml
doc_to_text
:
"
{{premise}}
\n\n
Based
on
that
paragraph
can
we
conclude
that
this
sentence
is
true?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
-
task
:
anli_r1
task_alias
:
prompt-2
include
:
_held_in_template_yaml
doc_to_text
:
"
{{premise}}
\n\n
Can
we
draw
the
following
conclusion?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
-
task
:
anli_r1
task_alias
:
prompt-3
include
:
_held_in_template_yaml
doc_to_text
:
"
{{premise}}
\n
Does
this
next
sentence
follow,
given
the
preceding
text?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
-
task
:
anli_r1
task_alias
:
prompt-4
include
:
_held_in_template_yaml
doc_to_text
:
"
{{premise}}
\n
Can
we
infer
the
following?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No
\n
The
answer
is:"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
-
task
:
anli_r1
task_alias
:
prompt-5
include
:
_held_in_template_yaml
doc_to_text
:
"
Read
the
following
paragraph
and
determine
if
the
hypothesis
is
true:
\n\n
{{premise}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No
\n
Hypothesis:
{{hypothesis}}
\n\n\n
"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
-
task
:
anli_r1
task_alias
:
prompt-6
include
:
_held_in_template_yaml
doc_to_text
:
"
Read
the
text
and
determine
if
the
sentence
is
true
(see
options
at
the
end):
\n\n
{{premise}}
\n\n
Sentence:
{{hypothesis}}
\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
-
task
:
anli_r1
task_alias
:
prompt-7
include
:
_held_in_template_yaml
doc_to_text
:
"
Can
we
draw
the
following
hypothesis
from
the
context
(see
options)?
\n\n
Context:
\n\n
{{premise}}
\n\n
Hypothesis:
{{hypothesis}}
\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
-
task
:
anli_r1
task_alias
:
prompt-8
include
:
_held_in_template_yaml
doc_to_text
:
"
Choose
from
options:
Determine
if
the
sentence
is
true
based
on
the
text
below:
\n
{{hypothesis}}
\n\n
{{premise}}
\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
# ANLI R2
-
group
:
anli_r2_flan
group_alias
:
ANLI R2
task
:
-
task
:
anli_r2
task_alias
:
prompt-0
include
:
_held_in_template_yaml
doc_to_text
:
"
{{premise}}
\n\n
Choose
your
answer:
based
on
the
paragraph
above
can
we
conclude
that
\"
{{hypothesis}}
\"
?
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No
\n
I
think
the
answer
is"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
-
task
:
anli_r2
task_alias
:
prompt-1
include
:
_held_in_template_yaml
doc_to_text
:
"
{{premise}}
\n\n
Based
on
that
paragraph
can
we
conclude
that
this
sentence
is
true?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
-
task
:
anli_r2
task_alias
:
prompt-2
include
:
_held_in_template_yaml
doc_to_text
:
"
{{premise}}
\n\n
Can
we
draw
the
following
conclusion?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
-
task
:
anli_r2
task_alias
:
prompt-3
include
:
_held_in_template_yaml
doc_to_text
:
"
{{premise}}
\n
Does
this
next
sentence
follow,
given
the
preceding
text?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
-
task
:
anli_r2
task_alias
:
prompt-4
include
:
_held_in_template_yaml
doc_to_text
:
"
{{premise}}
\n
Can
we
infer
the
following?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No
\n
The
answer
is:"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
-
task
:
anli_r2
task_alias
:
prompt-5
include
:
_held_in_template_yaml
doc_to_text
:
"
Read
the
following
paragraph
and
determine
if
the
hypothesis
is
true:
\n\n
{{premise}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No
\n
Hypothesis:
{{hypothesis}}
\n\n\n
"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
-
task
:
anli_r2
task_alias
:
prompt-6
include
:
_held_in_template_yaml
doc_to_text
:
"
Read
the
text
and
determine
if
the
sentence
is
true
(see
options
at
the
end):
\n\n
{{premise}}
\n\n
Sentence:
{{hypothesis}}
\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
-
task
:
anli_r2
task_alias
:
prompt-7
include
:
_held_in_template_yaml
doc_to_text
:
"
Can
we
draw
the
following
hypothesis
from
the
context
(see
options)?
\n\n
Context:
\n\n
{{premise}}
\n\n
Hypothesis:
{{hypothesis}}
\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
-
task
:
anli_r2
task_alias
:
prompt-8
include
:
_held_in_template_yaml
doc_to_text
:
"
Choose
from
options:
Determine
if
the
sentence
is
true
based
on
the
text
below:
\n
{{hypothesis}}
\n\n
{{premise}}
\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
# ANLI R3
-
group
:
anli_r3_flan
group_alias
:
ANLI R3
task
:
-
task
:
anli_r3
task_alias
:
prompt-0
include
:
_held_in_template_yaml
doc_to_text
:
"
{{premise}}
\n\n
Choose
your
answer:
based
on
the
paragraph
above
can
we
conclude
that
\"
{{hypothesis}}
\"
?
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No
\n
I
think
the
answer
is"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
-
task
:
anli_r3
task_alias
:
prompt-1
include
:
_held_in_template_yaml
doc_to_text
:
"
{{premise}}
\n\n
Based
on
that
paragraph
can
we
conclude
that
this
sentence
is
true?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
-
task
:
anli_r3
task_alias
:
prompt-2
include
:
_held_in_template_yaml
doc_to_text
:
"
{{premise}}
\n\n
Can
we
draw
the
following
conclusion?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
-
task
:
anli_r3
task_alias
:
prompt-3
include
:
_held_in_template_yaml
doc_to_text
:
"
{{premise}}
\n
Does
this
next
sentence
follow,
given
the
preceding
text?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
-
task
:
anli_r3
task_alias
:
prompt-4
include
:
_held_in_template_yaml
doc_to_text
:
"
{{premise}}
\n
Can
we
infer
the
following?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No
\n
The
answer
is:"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
-
task
:
anli_r3
task_alias
:
prompt-5
include
:
_held_in_template_yaml
doc_to_text
:
"
Read
the
following
paragraph
and
determine
if
the
hypothesis
is
true:
\n\n
{{premise}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No
\n
Hypothesis:
{{hypothesis}}
\n\n\n
"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
-
task
:
anli_r3
task_alias
:
prompt-6
include
:
_held_in_template_yaml
doc_to_text
:
"
Read
the
text
and
determine
if
the
sentence
is
true
(see
options
at
the
end):
\n\n
{{premise}}
\n\n
Sentence:
{{hypothesis}}
\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
-
task
:
anli_r3
task_alias
:
prompt-7
include
:
_held_in_template_yaml
doc_to_text
:
"
Can
we
draw
the
following
hypothesis
from
the
context
(see
options)?
\n\n
Context:
\n\n
{{premise}}
\n\n
Hypothesis:
{{hypothesis}}
\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
-
task
:
anli_r3
task_alias
:
prompt-8
include
:
_held_in_template_yaml
doc_to_text
:
"
Choose
from
options:
Determine
if
the
sentence
is
true
based
on
the
text
below:
\n
{{hypothesis}}
\n\n
{{premise}}
\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
# Arc Easy
-
group
:
arc_easy_flan
group_alias
:
Arc Easy
task
:
-
task
:
arc_easy
task_alias
:
prompt-0
include
:
_held_in_template_yaml
doc_to_text
:
"
{{question}}
\n\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
-
task
:
arc_easy
task_alias
:
prompt-1
include
:
_held_in_template_yaml
doc_to_text
:
"
Question:
{{question}}
\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}
\n
Answer:"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
-
task
:
arc_easy
task_alias
:
prompt-2
include
:
_held_in_template_yaml
doc_to_text
:
"
Question:
{{question}}
\n\n
What
is
the
correct
answer
to
the
question
from
the
following
choices?
\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
-
task
:
arc_easy
task_alias
:
prompt-3
include
:
_held_in_template_yaml
doc_to_text
:
"
Q:
{{question}}
\n
What
is
the
correct
answer
to
this
question?
\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}...A:"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
-
task
:
arc_easy
task_alias
:
prompt-4
include
:
_held_in_template_yaml
doc_to_text
:
"
Choose
your
answer?
\n\n
{{question}}
\n\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
-
task
:
arc_easy
task_alias
:
prompt-5
include
:
_held_in_template_yaml
doc_to_text
:
"
Answer
the
question
\n\n
{{question}}
\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
-
task
:
arc_easy
task_alias
:
prompt-6
include
:
_held_in_template_yaml
doc_to_text
:
"
{{question}}
\n\n
Pick
the
answer
from
these
options
\n\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
# Arc Challenge
-
group
:
arc_challenge_flan
group_alias
:
Arc Challenge
task
:
-
task
:
arc_challenge
task_alias
:
prompt-0
include
:
_held_in_template_yaml
doc_to_text
:
"
{{question}}
\n\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
-
task
:
arc_challenge
task_alias
:
prompt-1
include
:
_held_in_template_yaml
doc_to_text
:
"
Question:
{{question}}
\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}
\n
Answer:"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
-
task
:
arc_challenge
task_alias
:
prompt-2
include
:
_held_in_template_yaml
doc_to_text
:
"
Question:
{{question}}
\n\n
What
is
the
correct
answer
to
the
question
from
the
following
choices?
\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
-
task
:
arc_challenge
task_alias
:
prompt-3
include
:
_held_in_template_yaml
doc_to_text
:
"
Q:
{{question}}
\n
What
is
the
correct
answer
to
this
question?
\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}...A:"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
-
task
:
arc_challenge
task_alias
:
prompt-4
include
:
_held_in_template_yaml
doc_to_text
:
"
Choose
your
answer?
\n\n
{{question}}
\n\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
-
task
:
arc_challenge
task_alias
:
prompt-5
include
:
_held_in_template_yaml
doc_to_text
:
"
Answer
the
question
\n\n
{{question}}
\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
-
task
:
arc_challenge
task_alias
:
prompt-6
include
:
_held_in_template_yaml
doc_to_text
:
"
{{question}}
\n\n
Pick
the
answer
from
these
options
\n\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
# BoolQ
-
group
:
boolq_flan
group_alias
:
BoolQ
task
:
-
task
:
boolq
task_alias
:
prompt-0
include
:
_held_in_template_yaml
doc_to_text
:
"
{{passage}}
\n\n
Can
we
conclude
that
{{question}}?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
-
task
:
boolq
task_alias
:
prompt-1
include
:
_held_in_template_yaml
doc_to_text
:
"
{{passage}}
\n\n
Is
it
true
that
{{question}}?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
-
task
:
boolq
task_alias
:
prompt-2
include
:
_held_in_template_yaml
doc_to_text
:
"
{{passage}}
\n\n
{{question}}?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
-
task
:
boolq
task_alias
:
prompt-3
include
:
_held_in_template_yaml
doc_to_text
:
"
Text:
{{passage}}
\n\n
Question:
{{question}}?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
-
task
:
boolq
task_alias
:
prompt-4
include
:
_held_in_template_yaml
doc_to_text
:
"
{{passage}}
\n\n
What's
the
best
answer
to
this
question:
{{question}}?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
-
task
:
boolq
task_alias
:
prompt-5
include
:
_held_in_template_yaml
doc_to_text
:
"
{{passage}}
\n
Based
on
the
above
text
what's
the
best
answer
to
this
question:
{{question}}?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
-
task
:
boolq
task_alias
:
prompt-6
include
:
_held_in_template_yaml
doc_to_text
:
"
{{passage}}
\n
Answer
this
question
making
sure
that
the
answer
is
supposed
by
the
text:
{{question}}?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
-
task
:
boolq
task_alias
:
prompt-7
include
:
_held_in_template_yaml
doc_to_text
:
"
{{passage}}
\n\n
Is
the
following
statement
correct
based
on
the
text
\n\n
{{question}}
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
-
task
:
boolq
task_alias
:
prompt-8
include
:
_held_in_template_yaml
doc_to_text
:
"
{{passage}}
\n\n
Is
this
statement
correct
\"
{{question}}
\"
?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
-
task
:
boolq
task_alias
:
prompt-9
include
:
_held_in_template_yaml
doc_to_text
:
"
Is
it
true
that
{{question}}
based
on
the
following
text?
\n\n
{{passage}}
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
# RTE
-
group
:
rte_flan
group_alias
:
RTE
task
:
-
task
:
rte
task_alias
:
prompt-0
include
:
_held_in_template_yaml
doc_to_text
:
"
{{sentence1}}
\n\n
Question
with
options:
Based
on
the
paragraph
above
can
we
conclude
that
\"
{{sentence2}}
\"
?
\n\n
OPTIONS:
\n
-
yes
\n
-
no"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
-
task
:
rte
task_alias
:
prompt-1
include
:
_held_in_template_yaml
doc_to_text
:
"
{{sentence1}}
\n\n
Based
on
that
paragraph
can
we
conclude
that
the
sentence
below
is
true?
\n
{{sentence2}}
\n\n
OPTIONS:
\n
-
yes
\n
-
no"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
-
task
:
rte
task_alias
:
prompt-2
include
:
_held_in_template_yaml
doc_to_text
:
"
{{sentence1}}
\n\n
Q
with
options:
Can
we
draw
the
following
conclusion?
\n
{{sentence2}}
\n\n
OPTIONS:
\n
-
yes
\n
-
no"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
-
task
:
rte
task_alias
:
prompt-3
include
:
_held_in_template_yaml
doc_to_text
:
"
{{sentence1}}
\n
Does
this
next
sentence
follow,
given
the
preceding
text?
\n
{{sentence2}}
\n\n
OPTIONS:
\n
-
yes
\n
-
no"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
-
task
:
rte
task_alias
:
prompt-4
include
:
_held_in_template_yaml
doc_to_text
:
"
{{sentence1}}
\n
OPTIONS:
\n
-
yes
\n
-
no
\n
Question:
Can
we
infer
the
following?
\n
{{sentence2}}"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
-
task
:
rte
task_alias
:
prompt-5
include
:
_held_in_template_yaml
doc_to_text
:
"
Read
the
following
paragraph
and
determine
if
the
hypothesis
is
true.
Select
from
options
at
the
end:
\n\n
{{sentence1}}
\n\n
Hypothesis:
{{sentence2}}
\n
OPTIONS:
\n
-
yes
\n
-
no
\n
The
answer
is"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
-
task
:
rte
task_alias
:
prompt-6
include
:
_held_in_template_yaml
doc_to_text
:
"
Read
the
text
and
determine
if
the
sentence
is
true:
\n\n
{{sentence1}}
\n\n
Sentence:
{{sentence2}}
\n
OPTIONS:
\n
-
yes
\n
-
no
\n
A:"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
-
task
:
rte
task_alias
:
prompt-7
include
:
_held_in_template_yaml
doc_to_text
:
"
Question
with
options:
can
we
draw
the
following
hypothesis
from
the
context?
\n\n
Context:
\n\n
{{sentence1}}
\n\n
Hypothesis:
{{sentence2}}
\n
OPTIONS:
\n
-
yes
\n
-
no
\n
A:"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
-
task
:
rte
task_alias
:
prompt-8
include
:
_held_in_template_yaml
doc_to_text
:
"
Determine
if
the
sentence
is
true
based
on
the
text
below.
Choose
from
options.
\n
{{sentence2}}
\n\n
{{sentence1}}
\n
OPTIONS:
\n
-
yes
\n
-
no"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
lm_eval/tasks/benchmarks/flan/flan_held_out.yaml
0 → 100644
View file @
cb8889cc
group
:
flan_held_out
task
:
# BBH
-
bbh_zeroshot
-
bbh_fewshot
-
bbh_cot_fewshot
-
bbh_cot_zeroshot
# MMLU
-
mmlu
-
mmlu_flan_n_shot_generative
-
mmlu_flan_n_shot_loglikelihood
-
mmlu_flan_cot_zeroshot
-
mmlu_flan_cot_fewshot
lm_eval/tasks/benchmarks/multimedqa/multimedqa.yaml
View file @
cb8889cc
...
@@ -5,19 +5,13 @@ task:
...
@@ -5,19 +5,13 @@ task:
-
medqa_4options
-
medqa_4options
-
task
:
mmlu_anatomy
-
task
:
mmlu_anatomy
task_alias
:
"
anatomy
(mmlu)"
task_alias
:
"
anatomy
(mmlu)"
group_alias
:
null
-
task
:
mmlu_clinical_knowledge
-
task
:
mmlu_clinical_knowledge
task_alias
:
"
clinical_knowledge
(mmlu)"
task_alias
:
"
clinical_knowledge
(mmlu)"
group_alias
:
null
-
task
:
mmlu_college_medicine
-
task
:
mmlu_college_medicine
task_alias
:
"
college_medicine
(mmlu)"
task_alias
:
"
college_medicine
(mmlu)"
group_alias
:
null
-
task
:
mmlu_medical_genetics
-
task
:
mmlu_medical_genetics
task_alias
:
"
medical_genetics
(mmlu)"
task_alias
:
"
medical_genetics
(mmlu)"
group_alias
:
null
-
task
:
mmlu_professional_medicine
-
task
:
mmlu_professional_medicine
task_alias
:
"
professional_medicine
(mmlu)"
task_alias
:
"
professional_medicine
(mmlu)"
group_alias
:
null
-
task
:
mmlu_college_biology
-
task
:
mmlu_college_biology
task_alias
:
"
college_biology
(mmlu)"
task_alias
:
"
college_biology
(mmlu)"
group_alias
:
null
lm_eval/tasks/bigbench/generate_tasks.py
View file @
cb8889cc
...
@@ -181,7 +181,7 @@ def main() -> None:
...
@@ -181,7 +181,7 @@ def main() -> None:
for
task
in
all_subtasks
:
for
task
in
all_subtasks
:
file_name
=
f
"
{
task
}
.yaml"
file_name
=
f
"
{
task
}
.yaml"
try
:
try
:
with
open
(
f
"
{
path
}
/
{
file_name
}
"
,
"w"
)
as
f
:
with
open
(
f
"
{
path
}
/
{
file_name
}
"
,
"w"
,
encoding
=
"utf-8"
)
as
f
:
f
.
write
(
"# Generated by utils.py
\n
"
)
f
.
write
(
"# Generated by utils.py
\n
"
)
yaml
.
dump
(
yaml
.
dump
(
{
{
...
...
lm_eval/tasks/blimp/generate_configs.py
View file @
cb8889cc
...
@@ -75,7 +75,7 @@ def main() -> None:
...
@@ -75,7 +75,7 @@ def main() -> None:
for
task
in
all_subtasks
:
for
task
in
all_subtasks
:
file_name
=
f
"
{
task
}
.yaml"
file_name
=
f
"
{
task
}
.yaml"
try
:
try
:
with
open
(
f
"
{
file_name
}
"
,
"w"
)
as
f
:
with
open
(
f
"
{
file_name
}
"
,
"w"
,
encoding
=
"utf-8"
)
as
f
:
f
.
write
(
"# Generated by utils.py
\n
"
)
f
.
write
(
"# Generated by utils.py
\n
"
)
yaml
.
dump
(
yaml
.
dump
(
{
{
...
...
lm_eval/tasks/ceval/_generate_configs.py
View file @
cb8889cc
...
@@ -79,13 +79,13 @@ if __name__ == "__main__":
...
@@ -79,13 +79,13 @@ if __name__ == "__main__":
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
with
open
(
args
.
base_yaml_path
)
as
f
:
with
open
(
args
.
base_yaml_path
,
encoding
=
"utf-8"
)
as
f
:
base_yaml
=
yaml
.
full_load
(
f
)
base_yaml
=
yaml
.
full_load
(
f
)
if
args
.
cot_prompt_path
is
not
None
:
if
args
.
cot_prompt_path
is
not
None
:
import
json
import
json
with
open
(
args
.
cot_prompt_path
)
as
f
:
with
open
(
args
.
cot_prompt_path
,
encoding
=
"utf-8"
)
as
f
:
cot_file
=
json
.
load
(
f
)
cot_file
=
json
.
load
(
f
)
for
subject_eng
,
subject_zh
in
tqdm
(
SUBJECTS
.
items
()):
for
subject_eng
,
subject_zh
in
tqdm
(
SUBJECTS
.
items
()):
...
@@ -107,7 +107,7 @@ if __name__ == "__main__":
...
@@ -107,7 +107,7 @@ if __name__ == "__main__":
file_save_path
=
args
.
save_prefix_path
+
f
"_
{
subject_eng
}
.yaml"
file_save_path
=
args
.
save_prefix_path
+
f
"_
{
subject_eng
}
.yaml"
eval_logger
.
info
(
f
"Saving yaml for subset
{
subject_eng
}
to
{
file_save_path
}
"
)
eval_logger
.
info
(
f
"Saving yaml for subset
{
subject_eng
}
to
{
file_save_path
}
"
)
with
open
(
file_save_path
,
"w"
)
as
yaml_file
:
with
open
(
file_save_path
,
"w"
,
encoding
=
"utf-8"
)
as
yaml_file
:
yaml
.
dump
(
yaml
.
dump
(
yaml_dict
,
yaml_dict
,
yaml_file
,
yaml_file
,
...
...
lm_eval/tasks/cmmlu/_generate_configs.py
View file @
cb8889cc
...
@@ -94,13 +94,13 @@ if __name__ == "__main__":
...
@@ -94,13 +94,13 @@ if __name__ == "__main__":
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
with
open
(
args
.
base_yaml_path
)
as
f
:
with
open
(
args
.
base_yaml_path
,
encoding
=
"utf-8"
)
as
f
:
base_yaml
=
yaml
.
full_load
(
f
)
base_yaml
=
yaml
.
full_load
(
f
)
if
args
.
cot_prompt_path
is
not
None
:
if
args
.
cot_prompt_path
is
not
None
:
import
json
import
json
with
open
(
args
.
cot_prompt_path
)
as
f
:
with
open
(
args
.
cot_prompt_path
,
encoding
=
"utf-8"
)
as
f
:
cot_file
=
json
.
load
(
f
)
cot_file
=
json
.
load
(
f
)
for
subject_eng
,
subject_zh
in
tqdm
(
SUBJECTS
.
items
()):
for
subject_eng
,
subject_zh
in
tqdm
(
SUBJECTS
.
items
()):
...
@@ -122,7 +122,7 @@ if __name__ == "__main__":
...
@@ -122,7 +122,7 @@ if __name__ == "__main__":
file_save_path
=
args
.
save_prefix_path
+
f
"_
{
subject_eng
}
.yaml"
file_save_path
=
args
.
save_prefix_path
+
f
"_
{
subject_eng
}
.yaml"
eval_logger
.
info
(
f
"Saving yaml for subset
{
subject_eng
}
to
{
file_save_path
}
"
)
eval_logger
.
info
(
f
"Saving yaml for subset
{
subject_eng
}
to
{
file_save_path
}
"
)
with
open
(
file_save_path
,
"w"
)
as
yaml_file
:
with
open
(
file_save_path
,
"w"
,
encoding
=
"utf-8"
)
as
yaml_file
:
yaml
.
dump
(
yaml
.
dump
(
yaml_dict
,
yaml_dict
,
yaml_file
,
yaml_file
,
...
...
lm_eval/tasks/code_x_glue/code-text/bleu.py
View file @
cb8889cc
...
@@ -184,7 +184,7 @@ def splitPuncts(line):
...
@@ -184,7 +184,7 @@ def splitPuncts(line):
def
computeMaps
(
predictions
,
goldfile
):
def
computeMaps
(
predictions
,
goldfile
):
predictionMap
:
Dict
[
str
,
list
]
=
{}
predictionMap
:
Dict
[
str
,
list
]
=
{}
goldMap
:
Dict
[
str
,
list
]
=
{}
goldMap
:
Dict
[
str
,
list
]
=
{}
gf
=
open
(
goldfile
,
"r"
)
gf
=
open
(
goldfile
,
"r"
,
encoding
=
"utf-8"
)
for
row
in
predictions
:
for
row
in
predictions
:
cols
=
row
.
strip
().
split
(
"
\t
"
)
cols
=
row
.
strip
().
split
(
"
\t
"
)
...
...
lm_eval/tasks/csatqa/_generate_configs.py
View file @
cb8889cc
...
@@ -25,7 +25,7 @@ if __name__ == "__main__":
...
@@ -25,7 +25,7 @@ if __name__ == "__main__":
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
with
open
(
args
.
base_yaml_path
)
as
f
:
with
open
(
args
.
base_yaml_path
,
encoding
=
"utf-8"
)
as
f
:
base_yaml
=
yaml
.
full_load
(
f
)
base_yaml
=
yaml
.
full_load
(
f
)
for
name
in
tqdm
(
SUBSETS
):
for
name
in
tqdm
(
SUBSETS
):
...
@@ -39,7 +39,7 @@ if __name__ == "__main__":
...
@@ -39,7 +39,7 @@ if __name__ == "__main__":
file_save_path
=
args
.
save_prefix_path
+
f
"_
{
name
.
lower
()
}
.yaml"
file_save_path
=
args
.
save_prefix_path
+
f
"_
{
name
.
lower
()
}
.yaml"
eval_logger
.
info
(
f
"Saving yaml for subset
{
name
}
to
{
file_save_path
}
"
)
eval_logger
.
info
(
f
"Saving yaml for subset
{
name
}
to
{
file_save_path
}
"
)
with
open
(
file_save_path
,
"w"
)
as
yaml_file
:
with
open
(
file_save_path
,
"w"
,
encoding
=
"utf-8"
)
as
yaml_file
:
yaml
.
dump
(
yaml
.
dump
(
yaml_dict
,
yaml_dict
,
yaml_file
,
yaml_file
,
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment