Commit cb8889cc authored by lintangsutawika's avatar lintangsutawika
Browse files

merged with latest update from main

parents ec05e561 74119471
...@@ -108,8 +108,8 @@ class HFLM(LM): ...@@ -108,8 +108,8 @@ class HFLM(LM):
assert not parallelize, "`parallelize=True` is not compatible with passing pre-initialized model to `pretrained`" assert not parallelize, "`parallelize=True` is not compatible with passing pre-initialized model to `pretrained`"
self._model = pretrained self._model = pretrained
self._device = self._model.device self._device = self._model.device
self._config = self._model.config self._config = self._model.config
gpus = 0
if tokenizer: if tokenizer:
assert isinstance( assert isinstance(
...@@ -200,8 +200,9 @@ class HFLM(LM): ...@@ -200,8 +200,9 @@ class HFLM(LM):
) )
# access self._model through self.model property outside this method # access self._model through self.model property outside this method
self.model.eval() if isinstance(self.model, torch.nn.Module):
self.model.tie_weights() self.model.eval()
self.model.tie_weights()
if isinstance(pretrained, str) and (gpus >= 1 or str(self.device) == "mps"): if isinstance(pretrained, str) and (gpus >= 1 or str(self.device) == "mps"):
# TODO: can remove this whole snippet except in the mps case, perhaps? # TODO: can remove this whole snippet except in the mps case, perhaps?
...@@ -238,6 +239,16 @@ class HFLM(LM): ...@@ -238,6 +239,16 @@ class HFLM(LM):
if self.config.model_type == "qwen": if self.config.model_type == "qwen":
# Qwen's trust_remote_code tokenizer does not allow for adding special tokens # Qwen's trust_remote_code tokenizer does not allow for adding special tokens
self.tokenizer.pad_token = "<|endoftext|>" self.tokenizer.pad_token = "<|endoftext|>"
elif (
self.tokenizer.__class__.__name__ == "RWKVWorldTokenizer"
or self.tokenizer.__class__.__name__ == "Rwkv5Tokenizer"
):
# The RWKV world tokenizer, does not allow for adding special tokens / setting the pad token (which is set as 0)
# The additional tokenizer name check is needed, as there exists rwkv4 models with neox tokenizer
# ---
# Note that the world tokenizer class name, might change in the future for the final huggingface merge
# https://github.com/huggingface/transformers/pull/26963
assert self.tokenizer.pad_token_id == 0
else: else:
self.tokenizer.add_special_tokens({"pad_token": "<|pad|>"}) self.tokenizer.add_special_tokens({"pad_token": "<|pad|>"})
...@@ -361,7 +372,7 @@ class HFLM(LM): ...@@ -361,7 +372,7 @@ class HFLM(LM):
def _get_backend( def _get_backend(
self, self,
config: transformers.AutoConfig, config: Union[transformers.PretrainedConfig, transformers.AutoConfig],
backend: Optional[Literal["default", "causal", "seq2seq"]] = "default", backend: Optional[Literal["default", "causal", "seq2seq"]] = "default",
trust_remote_code: Optional[bool] = False, trust_remote_code: Optional[bool] = False,
) -> None: ) -> None:
...@@ -602,8 +613,7 @@ class HFLM(LM): ...@@ -602,8 +613,7 @@ class HFLM(LM):
(batch_size, max_length), device=self.device (batch_size, max_length), device=self.device
).long() ).long()
for _ in range(5): for _ in range(5):
out = F.log_softmax(self._model_call(test_batch, **call_kwargs), dim=-1) out = F.log_softmax(self._model_call(test_batch, **call_kwargs), dim=-1) # noqa: F841
out = out # Identity process so that it passes pre-commit
return batch_size return batch_size
...@@ -705,10 +715,14 @@ class HFLM(LM): ...@@ -705,10 +715,14 @@ class HFLM(LM):
return self.model(inps).logits return self.model(inps).logits
def _model_generate(self, context, max_length, stop, **generation_kwargs): def _model_generate(self, context, max_length, stop, **generation_kwargs):
# we require users to pass do_sample=True explicitly # temperature = 0.0 if not set
# for non-greedy gen. This should be reevaluated when considering beam search. # if do_sample is false and temp==0.0:
if "do_sample" not in generation_kwargs: # remove temperature, as do_sample=False takes care of this
generation_kwargs["do_sample"] = False # and we don't want a warning from HF
generation_kwargs["temperature"] = generation_kwargs.get("temperature", 0.0)
do_sample = generation_kwargs.get("do_sample", None)
if do_sample is False and generation_kwargs.get("temperature") == 0.0:
generation_kwargs.pop("temperature")
# build stopping criteria # build stopping criteria
stopping_criteria = stop_sequences_criteria( stopping_criteria = stop_sequences_criteria(
self.tokenizer, stop, context.shape[1], context.shape[0] self.tokenizer, stop, context.shape[1], context.shape[0]
...@@ -1045,6 +1059,7 @@ class HFLM(LM): ...@@ -1045,6 +1059,7 @@ class HFLM(LM):
return -len(toks), x[0] return -len(toks), x[0]
pbar = tqdm(total=len(requests), disable=(self.rank != 0)) pbar = tqdm(total=len(requests), disable=(self.rank != 0))
adaptive_batch_size = None
if self.batch_size == "auto": if self.batch_size == "auto":
# using rolling window with maximum context # using rolling window with maximum context
print("Passed argument batch_size = auto. Detecting largest batch size") print("Passed argument batch_size = auto. Detecting largest batch size")
...@@ -1089,7 +1104,7 @@ class HFLM(LM): ...@@ -1089,7 +1104,7 @@ class HFLM(LM):
) )
else: else:
raise ValueError( raise ValueError(
f"Expected `kwargs` to be of type `dict` but got {kwargs}" f"Expected `kwargs` to be of type `dict` but got {type(gen_kwargs)}"
) )
if not until: if not until:
until = [self.tok_decode(self.eot_token_id)] until = [self.tok_decode(self.eot_token_id)]
......
from importlib.util import find_spec
from pathlib import Path
from lm_eval.api.registry import register_model
from lm_eval.models.huggingface import HFLM
@register_model("openvino")
class OptimumLM(HFLM):
"""
Optimum Intel provides a simple interface to optimize Transformer models and convert them to \
OpenVINO™ Intermediate Representation (IR) format to accelerate end-to-end pipelines on \
Intel® architectures using OpenVINO™ runtime.
"""
def __init__(
self,
device="cpu",
**kwargs,
) -> None:
if "backend" in kwargs:
# optimum currently only supports causal models
assert (
kwargs["backend"] == "causal"
), "Currently, only OVModelForCausalLM is supported."
self.openvino_device = device
super().__init__(
device=self.openvino_device,
backend=kwargs.get("backend", "causal"),
**kwargs,
)
def _create_model(
self,
pretrained: str,
revision="main",
dtype="auto",
trust_remote_code=False,
**kwargs,
) -> None:
if not find_spec("optimum"):
raise Exception(
"package `optimum` is not installed. Please install it via `pip install optimum[openvino]`"
)
else:
from optimum.intel.openvino import OVModelForCausalLM
model_kwargs = kwargs if kwargs else {}
model_file = Path(pretrained) / "openvino_model.xml"
if model_file.exists():
export = False
else:
export = True
kwargs["ov_config"] = {
"PERFORMANCE_HINT": "LATENCY",
"NUM_STREAMS": "1",
"CACHE_DIR": "",
}
self._model = OVModelForCausalLM.from_pretrained(
pretrained,
revision=revision,
trust_remote_code=trust_remote_code,
export=export,
device=self.openvino_device.upper(),
**model_kwargs,
)
...@@ -170,18 +170,12 @@ class VLLM(LM): ...@@ -170,18 +170,12 @@ class VLLM(LM):
stop: Optional[List[str]] = None, stop: Optional[List[str]] = None,
**kwargs, **kwargs,
): ):
if "do_sample" in kwargs.keys():
kwargs.pop("do_sample")
if generate: if generate:
# hf defaults kwargs = self.modify_gen_kwargs(kwargs)
kwargs["skip_special_tokens"] = kwargs.get("skip_special_tokens", False)
kwargs["spaces_between_special_tokens"] = kwargs.get(
"spaces_between_special_tokens", False
)
sampling_params = SamplingParams(max_tokens=max_tokens, stop=stop, **kwargs) sampling_params = SamplingParams(max_tokens=max_tokens, stop=stop, **kwargs)
else: else:
sampling_params = SamplingParams( sampling_params = SamplingParams(
temperature=0, prompt_logprobs=2, max_tokens=1 temperature=0, prompt_logprobs=1, max_tokens=1
) )
if self.data_parallel_size > 1: if self.data_parallel_size > 1:
requests = [list(x) for x in divide(requests, self.data_parallel_size)] requests = [list(x) for x in divide(requests, self.data_parallel_size)]
...@@ -438,3 +432,16 @@ class VLLM(LM): ...@@ -438,3 +432,16 @@ class VLLM(LM):
break break
return continuation_logprobs, is_greedy return continuation_logprobs, is_greedy
@staticmethod
def modify_gen_kwargs(kwargs: dict) -> dict:
# sampling_params
do_sample = kwargs.pop("do_sample", None)
if do_sample is False or "temperature" not in kwargs:
kwargs["temperature"] = 0.0
# hf defaults
kwargs["skip_special_tokens"] = kwargs.get("skip_special_tokens", False)
kwargs["spaces_between_special_tokens"] = kwargs.get(
"spaces_between_special_tokens", False
)
return kwargs
...@@ -117,7 +117,7 @@ class PromptString: ...@@ -117,7 +117,7 @@ class PromptString:
# TODO need a way to process doc_to_choice # TODO need a way to process doc_to_choice
if "doc_to_choice" in self.prompt_string: if "doc_to_choice" in self.prompt_string:
raise "Not yet implemented to accept doc_to_choice" raise Exception("Not yet implemented to accept doc_to_choice")
text_string = utils.apply_template(doc_to_text, doc) text_string = utils.apply_template(doc_to_text, doc)
target_string = utils.apply_template(doc_to_target, doc) target_string = utils.apply_template(doc_to_target, doc)
......
import os import os
import abc import abc
import yaml
import collections import collections
from functools import partial from functools import partial
from typing import List, Union, Dict from typing import List, Union, Dict
from lm_eval import utils from lm_eval import utils
from lm_eval import prompts from lm_eval.api.task import Task, ConfigurableTask
from lm_eval.api.task import TaskConfig, Task, ConfigurableTask
import logging import logging
# # import python tasks
# import squadv2.task
# import scrolls.task
# python_tasks = {
# "squadv2": squadv2.task.SQuAD2,
# "scrolls_quality": scrolls.task.QuALITY,
# "scrolls_narrativeqa": scrolls.task.NarrativeQA,
# "scrolls_contractnli": scrolls.task.ContractNLI,
# "scrolls_govreport": scrolls.task.GovReport,
# "scrolls_summscreenfd": scrolls.task.SummScreenFD,
# "scrolls_qmsum": scrolls.task.QMSum,
# }
eval_logger = utils.eval_logger class TaskManager:
"""TaskManager indexes all tasks from the default `lm_eval/tasks/`
GROUP_KEYS = ["group", "task", "weight_by_size"] and an optional directory if provided.
PYTHON_TASK_KEYS = ["task", "class"]
class TaskManager(abc.ABC):
"""
def __init__( def __init__(
self, self,
verbosity="INFO", verbosity="INFO",
...@@ -40,79 +24,132 @@ class TaskManager(abc.ABC): ...@@ -40,79 +24,132 @@ class TaskManager(abc.ABC):
self.verbosity = verbosity self.verbosity = verbosity
self.include_path = include_path self.include_path = include_path
self.logger = eval_logger.setLevel(getattr(logging, f"{verbosity}")) self.logger = utils.eval_logger
self.logger.setLevel(getattr(logging, f"{verbosity}"))
self.ALL_TASKS = self.initialize_tasks( self._task_index = self.initialize_tasks(
include_path=include_path include_path=include_path
) )
self._all_tasks = sorted(list(self._task_index.keys()))
self.task_group_map = collections.defaultdict(list)
def initialize_tasks(self, include_path: str = None):
"""Creates an dictionary of tasks index.
def initialize_tasks(self, include_path=None): :param include_path: str = None
An additional path to be searched for tasks
:return
Dictionary of task names as key and task metadata
"""
all_paths = [os.path.dirname(os.path.abspath(__file__)) + "/"] all_paths = [os.path.dirname(os.path.abspath(__file__)) + "/"]
if include_path is not None: if include_path is not None:
if isinstance(include_path, str): if isinstance(include_path, str):
include_path = [include_path] include_path = [include_path]
all_paths.extend(include_path) all_paths.extend(include_path)
ALL_TASKS = {} task_index = {}
for task_dir in all_paths: for task_dir in all_paths:
tasks = self._get_task_and_group(task_dir) tasks = self._get_task_and_group(task_dir)
ALL_TASKS = {**tasks, **ALL_TASKS} task_index = {**tasks, **task_index}
return ALL_TASKS return task_index
@property
def all_tasks(self): def all_tasks(self):
return sorted(list(self.ALL_TASKS.keys())) return self._all_tasks
@property
def task_index(self):
return self._task_index
def match_tasks(self, task_list):
return utils.pattern_match(
task_list, self.all_tasks
)
def _name_is_registered(self, name): def _name_is_registered(self, name):
if name in self.ALL_TASKS: if name in self.all_tasks:
return True return True
return False return False
def _name_is_task(self, name): def _name_is_task(self, name):
if self._name_is_registered(name) and ("task" in self.ALL_TASKS[name]["type"]): if self._name_is_registered(name) and ("task" in self.task_index[name]["type"]):
return True
return False
def _name_is_group(self, name):
if self._name_is_registered(name) and (self.task_index[name]["type"] == "group"):
return True return True
return False return False
def _name_is_python_task(self, name): def _name_is_python_task(self, name):
if self._name_is_registered(name) and (self.ALL_TASKS[name]["type"] == "python_task"): if self._name_is_registered(name) and (self.task_index[name]["type"] == "python_task"):
return True return True
return False return False
def _config_is_task(self, config): def _config_is_task(self, config):
if set(config.keys()) <= set(GROUP_KEYS): if ("task" in config) and isinstance(config["task"], str):
return False return True
return True return False
def _config_is_group(self, config):
if ("task" in config) and isinstance(config["task"], list):
return True
return False
def _config_is_python_task(self, config): def _config_is_python_task(self, config):
if set(config.keys()) == set(PYTHON_TASK_KEYS): if "class" in config:
return True return True
return False return False
def _get_yaml_path(self, name): def _get_yaml_path(self, name):
assert name in self.ALL_TASKS assert name in self.task_index
return self.ALL_TASKS[name]["yaml_path"] return self.task_index[name]["yaml_path"]
def _get_config(self, name): def _get_config(self, name):
assert name in self.ALL_TASKS assert name in self.task_index
yaml_path = self._get_yaml_path(name) yaml_path = self._get_yaml_path(name)
return utils.load_yaml_config("full", yaml_path) if yaml_path == -1:
return {}
else:
return utils.load_yaml_config(yaml_path, mode="full")
def _get_tasklist(self, name): def _get_tasklist(self, name):
assert self._name_is_task(name) == False assert self._name_is_task(name) == False
return self.ALL_TASKS[name]["task"] return self.task_index[name]["task"]
def _process_alias(self, config, group=None):
# If the group is not the same as the original
# group which the group alias was intended for,
# Set the group_alias to None instead.
if ("group_alias" in config) and ("group" in config) and group is not None:
if config["group"] != group:
config["group_alias"] = None
return config
def _load_individual_task_or_group( def _load_individual_task_or_group(
self, self,
name_or_config: Union[str, dict] = None, name_or_config: Union[str, dict] = None,
parent_name: str = None, parent_name: str = None,
update_config: dict = None update_config: dict = None,
yaml_path: str = None,
) -> ConfigurableTask: ) -> ConfigurableTask:
def load_task(config, task, group=None, yaml_path=None):
def load_task(config, task, group=None, is_python_class=False): if "include" in config:
if is_python_class: assert yaml_path is not None
config.update(
utils.load_yaml_config(
yaml_path,
yaml_config={"include": config.pop("include")},
mode="full",
)
)
if self._config_is_python_task(config):
task_object = config["class"]() task_object = config["class"]()
else: else:
config = self._process_alias(config, group=group)
task_object = ConfigurableTask(config=config) task_object = ConfigurableTask(config=config)
if group is not None: if group is not None:
task_object = (group, task_object) task_object = (group, task_object)
...@@ -124,15 +161,26 @@ class TaskManager(abc.ABC): ...@@ -124,15 +161,26 @@ class TaskManager(abc.ABC):
name_or_config = {"task": name_or_config, **update_config} name_or_config = {"task": name_or_config, **update_config}
elif self._name_is_task(name_or_config): elif self._name_is_task(name_or_config):
task_config = self._get_config(name_or_config) task_config = self._get_config(name_or_config)
is_python_class=False return load_task(task_config, task=name_or_config, group=parent_name)
if self._name_is_python_task(name_or_config):
is_python_class=True
return load_task(task_config, task=name_or_config, group=parent_name, is_python_class=is_python_class)
else: else:
group_name = name_or_config group_name = name_or_config
subtask_list = self._get_tasklist(name_or_config) subtask_list = self._get_tasklist(name_or_config)
if subtask_list == -1: if subtask_list == -1:
subtask_list = self._get_config(name_or_config)["task"] group_config = self._get_config(name_or_config)
subtask_list = group_config["task"]
# This checks if we're at the root.
if parent_name is None:
group_config = self._get_config(name_or_config)
if set(group_config.keys()) > set(["task", "group"]):
update_config = {
k:v for k,v in group_config.items() if k not in ["task", "group"]
}
yaml_path = self._get_yaml_path(group_name)
if (update_config is not None) and ("group_alias" in update_config):
group_name = update_config["group_alias"]
update_config.pop("group_alias")
if isinstance(name_or_config, dict): if isinstance(name_or_config, dict):
...@@ -145,7 +193,8 @@ class TaskManager(abc.ABC): ...@@ -145,7 +193,8 @@ class TaskManager(abc.ABC):
if self._config_is_task(name_or_config): if self._config_is_task(name_or_config):
name = name_or_config["task"] name = name_or_config["task"]
# If the name is registered as a group # If the name is registered as a group
if self._name_is_task(name) is False: # if self._name_is_task(name) is False:
if self._name_is_group(name):
group_name = name group_name = name
update_config = {k:v for k,v in name_or_config.items() if k != "task"} update_config = {k:v for k,v in name_or_config.items() if k != "task"}
subtask_list = self._get_tasklist(name) subtask_list = self._get_tasklist(name)
...@@ -154,28 +203,49 @@ class TaskManager(abc.ABC): ...@@ -154,28 +203,49 @@ class TaskManager(abc.ABC):
else: else:
if self._name_is_registered(name): if self._name_is_registered(name):
base_task_config = self._get_config(name) base_task_config = self._get_config(name)
# Check if this is a duplicate.
if parent_name is not None:
name_or_config["group"] = parent_name
num_duplicate = len(list(filter(lambda x: x.startswith(name), self.task_group_map[parent_name])))
if num_duplicate > 0:
name = f"{name}-{num_duplicate}"
self.task_group_map[parent_name].append(name)
task_config={ task_config={
**base_task_config, **base_task_config,
**name_or_config, **name_or_config,
} }
else: else:
task_config = name_or_config task_config = name_or_config
return load_task(task_config, task=name, group=parent_name) return load_task(task_config, task=name, group=parent_name, yaml_path=yaml_path)
else: else:
group_name = name_or_config["group"] group_name = name_or_config["group"]
subtask_list = name_or_config["task"] subtask_list = name_or_config["task"]
# update_config = {k:v for k,v in name_or_config.items() if k != "task"}
if set(name_or_config.keys()) > set(["task", "group"]):
update_config = {
k:v for k,v in name_or_config.items() if k not in ["task", "group"]
}
all_subtasks = {} all_subtasks = {}
if (parent_name is not None) and ((self._name_is_registered(group_name) is False) or (self._get_yaml_path(group_name) == -1)): if (parent_name is not None):
all_subtasks = {group_name: (parent_name, None)} all_subtasks = {group_name: (parent_name, None)}
fn = partial(self._load_individual_task_or_group, parent_name=group_name, update_config=update_config) fn = partial(self._load_individual_task_or_group, parent_name=group_name, update_config=update_config, yaml_path=yaml_path)
all_subtasks = {**all_subtasks, **dict(collections.ChainMap(*map(fn, subtask_list)))} all_subtasks = {**all_subtasks, **dict(collections.ChainMap(*map(fn, subtask_list)))}
return all_subtasks return all_subtasks
def load_task_or_group(self, task_list: Union[str, list] = None) -> dict: def load_task_or_group(self, task_list: Union[str, list] = None) -> dict:
"""Loads a dictionary of task objects from a list
:param task_list: Union[str, list] = None
Single string or list of string of task names to be loaded
:return
Dictionary of task objects
"""
if isinstance(task_list, str): if isinstance(task_list, str):
task_list = [task_list] task_list = [task_list]
...@@ -189,20 +259,43 @@ class TaskManager(abc.ABC): ...@@ -189,20 +259,43 @@ class TaskManager(abc.ABC):
) )
return all_loaded_tasks return all_loaded_tasks
def load_config(self, config: Dict):
return self._load_individual_task_or_group(config)
def _get_task_and_group(self, task_dir: str): def _get_task_and_group(self, task_dir: str):
"""Creates an dictionary of tasks index with the following metadata,
- `type`, that can be either `task`, `python_task`, or `group`.
`task` refer to regular task configs, `python_task` are special
yaml files that only consists of `task` and `class` parameters.
`group` are group configs.
- `yaml_path`, path to the yaml file. If the entry is a `group` that
was configured through a task config, the yaml_path will be -1
and all subtasks will be listed in `task` (see below)
- `task`, reserved for entries with `type` as `group`. This will list
all subtasks. When a group config is created (as opposed to task
config having `group` parameter set), this will be set to -1 to
avoid recursive indexing. The whole list of subtasks will be loaded
at evaluation.
:param task_dir: str
A directory to check for tasks
:return
Dictionary of task names as key and task metadata
"""
tasks_and_groups = collections.defaultdict() tasks_and_groups = collections.defaultdict()
for root, _, file_list in os.walk(task_dir): for root, _, file_list in os.walk(task_dir):
for f in file_list: for f in file_list:
if f.endswith(".yaml"): if f.endswith(".yaml"):
yaml_path = os.path.join(root, f) yaml_path = os.path.join(root, f)
config = utils.load_yaml_config("simple", yaml_path) config = utils.load_yaml_config(yaml_path, mode="simple")
if set(config.keys()) == set(PYTHON_TASK_KEYS): if self._config_is_python_task(config):
# This is a python class config # This is a python class config
tasks_and_groups[config["task"]] = { tasks_and_groups[config["task"]] = {
"type": "python_task", "type": "python_task",
"yaml_path": yaml_path, "yaml_path": yaml_path,
} }
elif set(config.keys()) <= set(GROUP_KEYS): elif self._config_is_group(config):
# This is a group config # This is a group config
tasks_and_groups[config["group"]] = { tasks_and_groups[config["group"]] = {
"type": "group", "type": "group",
...@@ -213,7 +306,17 @@ class TaskManager(abc.ABC): ...@@ -213,7 +306,17 @@ class TaskManager(abc.ABC):
# when called. # when called.
"yaml_path": yaml_path, "yaml_path": yaml_path,
} }
else:
# # Registered the level 1 tasks from a group config
# for config in config["task"]:
# if isinstance(config, dict) and self._config_is_task(config):
# task = config["task"]
# tasks_and_groups[task] = {
# "type": "task",
# "yaml_path": yaml_path,
# }
elif self._config_is_task(config):
# This is a task config # This is a task config
task = config["task"] task = config["task"]
tasks_and_groups[task] = { tasks_and_groups[task] = {
...@@ -235,41 +338,97 @@ class TaskManager(abc.ABC): ...@@ -235,41 +338,97 @@ class TaskManager(abc.ABC):
} }
else: else:
tasks_and_groups[group]["task"].append(task) tasks_and_groups[group]["task"].append(task)
else:
self.logger.debug(f"File {f} in {root} could not be loaded")
return tasks_and_groups return tasks_and_groups
def include_path(task_dir):
# def check_prompt_config( logger = utils.eval_logger
# config: Dict[str, str], yaml_path: str = None logger.setLevel(getattr(logging, "INFO"))
# ) -> List[Dict[str, str]]: logger.info(
# all_configs = [] "To still use tasks loaded from args.include_path,"
# if "use_prompt" in config: "see an example of the new TaskManager API in https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/interface.md#external-library-usage"
# prompt_list = prompts.load_prompt_list( )
# use_prompt=config["use_prompt"], return 0
# dataset_name=config["dataset_path"],
# subset_name=config["dataset_name"] if "dataset_name" in config else None, def initialize_tasks(verbosity="INFO"):
# yaml_path=yaml_path, logger = utils.eval_logger
# ) logger.setLevel(getattr(logging, f"{verbosity}"))
# for idx, prompt_variation in enumerate(prompt_list): logger.info(
# all_configs.append( "lm_eval.tasks.initialize_tasks() is deprecated and no longer necessary. "
# { "It will be removed in v0.4.2 release. "
# **config, "TaskManager will instead be used."
# **{"use_prompt": prompt_variation}, )
# **{ return 0
# "task": "_".join(
# [ def get_task_name_from_config(task_config: Dict[str, str]) -> str:
# config["task"] if "task" in task_config:
# if "task" in config return task_config["task"]
# else get_task_name_from_config(config), if "dataset_name" in task_config:
# prompt_variation.split("/")[-1] return "{dataset_path}_{dataset_name}".format(**task_config)
# if ".yaml" in prompt_variation else:
# else prompt_variation, return "{dataset_path}".format(**task_config)
# ]
# ) def get_task_name_from_object(task_object):
# }, if hasattr(task_object, "config"):
# **{"output_type": "generate_until"}, return task_object._config["task"]
# }
# ) # TODO: scrap this
# else: # this gives a mechanism for non-registered tasks to have a custom name anyways when reporting
# all_configs.append(config) return (
# return all_configs task_object.EVAL_HARNESS_NAME
if hasattr(task_object, "EVAL_HARNESS_NAME")
else type(task_object).__name__
)
def get_task_dict(task_name_list: List[Union[str, Dict, Task]], task_manager: TaskManager = None):
"""Creates a dictionary of task objects from either a name of task, config, or prepared Task object.
:param task_name_list: List[Union[str, Dict, Task]]
Name of model or LM object, see lm_eval.models.get_model
:param task_manager: TaskManager = None
A TaskManager object that stores indexed tasks. If not set,
task_manager will load one. This should be set by the user
if there are additional paths that want to be included
via `include_path`
:return
Dictionary of task objects
"""
task_name_from_string_dict = {}
task_name_from_config_dict = {}
task_name_from_object_dict = {}
if isinstance(task_name_list, str):
task_name_list = [task_name_list]
string_task_name_list = [task for task in task_name_list if isinstance(task, str)]
others_task_name_list = [task for task in task_name_list if ~isinstance(task, str)]
if len(string_task_name_list) > 0:
if task_manager is None:
task_manager = TaskManager()
task_name_from_string_dict = task_manager.load_task_or_group(string_task_name_list)
for task_element in others_task_name_list:
if isinstance(task_element, dict):
task_name_from_config_dict = {
**task_name_from_config_dict,
**task_manager.load_config(config=task_element),
}
elif isinstance(task_element, Task):
task_name_from_object_dict = {
**task_name_from_object_dict,
get_task_name_from_object(task_element): task_element,
}
assert set(task_name_from_string_dict.keys()).isdisjoint(
set(task_name_from_object_dict.keys())
)
return {
**task_name_from_string_dict,
**task_name_from_config_dict,
**task_name_from_object_dict,
}
...@@ -28,7 +28,7 @@ if __name__ == "__main__": ...@@ -28,7 +28,7 @@ if __name__ == "__main__":
# get filename of base_yaml so we can `"include": ` it in our other YAMLs. # get filename of base_yaml so we can `"include": ` it in our other YAMLs.
base_yaml_name = os.path.split(args.base_yaml_path)[-1] base_yaml_name = os.path.split(args.base_yaml_path)[-1]
with open(args.base_yaml_path) as f: with open(args.base_yaml_path, encoding="utf-8") as f:
base_yaml = yaml.full_load(f) base_yaml = yaml.full_load(f)
base_doc_to_text = "Q: {{input}}\nA:" base_doc_to_text = "Q: {{input}}\nA:"
...@@ -70,7 +70,7 @@ if __name__ == "__main__": ...@@ -70,7 +70,7 @@ if __name__ == "__main__":
file_save_path = args.save_prefix_path + f"/{task}.yaml" file_save_path = args.save_prefix_path + f"/{task}.yaml"
utils.eval_logger.info(f"Saving yaml for subset {task} to {file_save_path}") utils.eval_logger.info(f"Saving yaml for subset {task} to {file_save_path}")
with open(file_save_path, "w") as yaml_file: with open(file_save_path, "w", encoding="utf-8") as yaml_file:
yaml.dump( yaml.dump(
yaml_dict, yaml_dict,
yaml_file, yaml_file,
......
...@@ -29,3 +29,4 @@ filter_list: ...@@ -29,3 +29,4 @@ filter_list:
num_fewshot: 0 num_fewshot: 0
metadata: metadata:
version: 2.0 version: 2.0
num_fewshot: 3 # controls what is printed in n-shot
...@@ -20,3 +20,4 @@ generation_kwargs: ...@@ -20,3 +20,4 @@ generation_kwargs:
num_fewshot: 0 num_fewshot: 0
metadata: metadata:
version: 1.0 version: 1.0
num_fewshot: 3 # will be printed in results table
...@@ -27,13 +27,13 @@ if __name__ == "__main__": ...@@ -27,13 +27,13 @@ if __name__ == "__main__":
# get filename of base_yaml so we can `"include": ` it in our other YAMLs. # get filename of base_yaml so we can `"include": ` it in our other YAMLs.
base_yaml_name = os.path.split(args.base_yaml_path)[-1] base_yaml_name = os.path.split(args.base_yaml_path)[-1]
with open(args.base_yaml_path) as f: with open(args.base_yaml_path, encoding="utf-8") as f:
base_yaml = yaml.full_load(f) base_yaml = yaml.full_load(f)
if args.cot_prompt_path is not None: if args.cot_prompt_path is not None:
import json import json
with open(args.cot_prompt_path) as f: with open(args.cot_prompt_path, encoding="utf-8") as f:
cot_file = json.load(f) cot_file = json.load(f)
def query(): def query():
...@@ -42,7 +42,7 @@ if __name__ == "__main__": ...@@ -42,7 +42,7 @@ if __name__ == "__main__":
print(query()) print(query())
languages = [split["split"] for split in query()] languages = [split["split"] for split in query()]
for lang in tqdm(languages): for lang in tqdm([lang for lang in languages if "default" not in lang]):
yaml_dict = { yaml_dict = {
"include": base_yaml_name, "include": base_yaml_name,
"task": f"belebele_{args.task_prefix}_{lang}" "task": f"belebele_{args.task_prefix}_{lang}"
...@@ -54,7 +54,7 @@ if __name__ == "__main__": ...@@ -54,7 +54,7 @@ if __name__ == "__main__":
file_save_path = args.save_prefix_path + f"_{lang}.yaml" file_save_path = args.save_prefix_path + f"_{lang}.yaml"
logging.info(f"Saving yaml for subset {lang} to {file_save_path}") logging.info(f"Saving yaml for subset {lang} to {file_save_path}")
with open(file_save_path, "w") as yaml_file: with open(file_save_path, "w", encoding="utf-8") as yaml_file:
yaml.dump( yaml.dump(
yaml_dict, yaml_dict,
yaml_file, yaml_file,
......
"fewshot_split": "default"
"include": "_default_template_yaml"
"task": "belebele_default"
"test_split": "default"
output_type: generate_until
test_split: null
doc_to_choice: null
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
generation_kwargs:
until:
- "</s>"
do_sample: false
temperature: 0.0
metadata:
version: 1.0
group: flan_held_in
group_alias: Flan (Held-In)
task:
# ANLI R1
- group: anli_r1_flan
group_alias: ANLI R1
task:
- task: anli_r1
task_alias: prompt-0
include: _held_in_template_yaml
doc_to_text: "{{premise}}\n\nChoose your answer: based on the paragraph above can we conclude that \"{{hypothesis}}\"?\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nI think the answer is"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r1
task_alias: prompt-1
include: _held_in_template_yaml
doc_to_text: "{{premise}}\n\nBased on that paragraph can we conclude that this sentence is true?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r1
task_alias: prompt-2
include: _held_in_template_yaml
doc_to_text: "{{premise}}\n\nCan we draw the following conclusion?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r1
task_alias: prompt-3
include: _held_in_template_yaml
doc_to_text: "{{premise}}\nDoes this next sentence follow, given the preceding text?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r1
task_alias: prompt-4
include: _held_in_template_yaml
doc_to_text: "{{premise}}\nCan we infer the following?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nThe answer is:"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r1
task_alias: prompt-5
include: _held_in_template_yaml
doc_to_text: "Read the following paragraph and determine if the hypothesis is true:\n\n{{premise}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nHypothesis: {{hypothesis}}\n\n\n"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r1
task_alias: prompt-6
include: _held_in_template_yaml
doc_to_text: "Read the text and determine if the sentence is true (see options at the end):\n\n{{premise}}\n\nSentence: {{hypothesis}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r1
task_alias: prompt-7
include: _held_in_template_yaml
doc_to_text: "Can we draw the following hypothesis from the context (see options)? \n\nContext:\n\n{{premise}}\n\nHypothesis: {{hypothesis}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r1
task_alias: prompt-8
include: _held_in_template_yaml
doc_to_text: "Choose from options: Determine if the sentence is true based on the text below:\n{{hypothesis}}\n\n{{premise}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
# ANLI R2
- group: anli_r2_flan
group_alias: ANLI R2
task:
- task: anli_r2
task_alias: prompt-0
include: _held_in_template_yaml
doc_to_text: "{{premise}}\n\nChoose your answer: based on the paragraph above can we conclude that \"{{hypothesis}}\"?\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nI think the answer is"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r2
task_alias: prompt-1
include: _held_in_template_yaml
doc_to_text: "{{premise}}\n\nBased on that paragraph can we conclude that this sentence is true?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r2
task_alias: prompt-2
include: _held_in_template_yaml
doc_to_text: "{{premise}}\n\nCan we draw the following conclusion?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r2
task_alias: prompt-3
include: _held_in_template_yaml
doc_to_text: "{{premise}}\nDoes this next sentence follow, given the preceding text?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r2
task_alias: prompt-4
include: _held_in_template_yaml
doc_to_text: "{{premise}}\nCan we infer the following?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nThe answer is:"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r2
task_alias: prompt-5
include: _held_in_template_yaml
doc_to_text: "Read the following paragraph and determine if the hypothesis is true:\n\n{{premise}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nHypothesis: {{hypothesis}}\n\n\n"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r2
task_alias: prompt-6
include: _held_in_template_yaml
doc_to_text: "Read the text and determine if the sentence is true (see options at the end):\n\n{{premise}}\n\nSentence: {{hypothesis}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r2
task_alias: prompt-7
include: _held_in_template_yaml
doc_to_text: "Can we draw the following hypothesis from the context (see options)? \n\nContext:\n\n{{premise}}\n\nHypothesis: {{hypothesis}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r2
task_alias: prompt-8
include: _held_in_template_yaml
doc_to_text: "Choose from options: Determine if the sentence is true based on the text below:\n{{hypothesis}}\n\n{{premise}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
# ANLI R3
- group: anli_r3_flan
group_alias: ANLI R3
task:
- task: anli_r3
task_alias: prompt-0
include: _held_in_template_yaml
doc_to_text: "{{premise}}\n\nChoose your answer: based on the paragraph above can we conclude that \"{{hypothesis}}\"?\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nI think the answer is"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r3
task_alias: prompt-1
include: _held_in_template_yaml
doc_to_text: "{{premise}}\n\nBased on that paragraph can we conclude that this sentence is true?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r3
task_alias: prompt-2
include: _held_in_template_yaml
doc_to_text: "{{premise}}\n\nCan we draw the following conclusion?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r3
task_alias: prompt-3
include: _held_in_template_yaml
doc_to_text: "{{premise}}\nDoes this next sentence follow, given the preceding text?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r3
task_alias: prompt-4
include: _held_in_template_yaml
doc_to_text: "{{premise}}\nCan we infer the following?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nThe answer is:"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r3
task_alias: prompt-5
include: _held_in_template_yaml
doc_to_text: "Read the following paragraph and determine if the hypothesis is true:\n\n{{premise}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nHypothesis: {{hypothesis}}\n\n\n"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r3
task_alias: prompt-6
include: _held_in_template_yaml
doc_to_text: "Read the text and determine if the sentence is true (see options at the end):\n\n{{premise}}\n\nSentence: {{hypothesis}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r3
task_alias: prompt-7
include: _held_in_template_yaml
doc_to_text: "Can we draw the following hypothesis from the context (see options)? \n\nContext:\n\n{{premise}}\n\nHypothesis: {{hypothesis}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r3
task_alias: prompt-8
include: _held_in_template_yaml
doc_to_text: "Choose from options: Determine if the sentence is true based on the text below:\n{{hypothesis}}\n\n{{premise}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
# Arc Easy
- group: arc_easy_flan
group_alias: Arc Easy
task:
- task: arc_easy
task_alias: prompt-0
include: _held_in_template_yaml
doc_to_text: "{{question}}\n\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_easy
task_alias: prompt-1
include: _held_in_template_yaml
doc_to_text: "Question: {{question}}\nOPTIONS:\n- {{choices.text|join('\n- ')}}\nAnswer:"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_easy
task_alias: prompt-2
include: _held_in_template_yaml
doc_to_text: "Question: {{question}}\n\nWhat is the correct answer to the question from the following choices?\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_easy
task_alias: prompt-3
include: _held_in_template_yaml
doc_to_text: "Q: {{question}}\nWhat is the correct answer to this question?\nOPTIONS:\n- {{choices.text|join('\n- ')}}...A:"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_easy
task_alias: prompt-4
include: _held_in_template_yaml
doc_to_text: "Choose your answer?\n\n{{question}}\n\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_easy
task_alias: prompt-5
include: _held_in_template_yaml
doc_to_text: "Answer the question\n\n{{question}}\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_easy
task_alias: prompt-6
include: _held_in_template_yaml
doc_to_text: "{{question}}\n\nPick the answer from these options\n\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
# Arc Challenge
- group: arc_challenge_flan
group_alias: Arc Challenge
task:
- task: arc_challenge
task_alias: prompt-0
include: _held_in_template_yaml
doc_to_text: "{{question}}\n\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_challenge
task_alias: prompt-1
include: _held_in_template_yaml
doc_to_text: "Question: {{question}}\nOPTIONS:\n- {{choices.text|join('\n- ')}}\nAnswer:"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_challenge
task_alias: prompt-2
include: _held_in_template_yaml
doc_to_text: "Question: {{question}}\n\nWhat is the correct answer to the question from the following choices?\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_challenge
task_alias: prompt-3
include: _held_in_template_yaml
doc_to_text: "Q: {{question}}\nWhat is the correct answer to this question?\nOPTIONS:\n- {{choices.text|join('\n- ')}}...A:"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_challenge
task_alias: prompt-4
include: _held_in_template_yaml
doc_to_text: "Choose your answer?\n\n{{question}}\n\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_challenge
task_alias: prompt-5
include: _held_in_template_yaml
doc_to_text: "Answer the question\n\n{{question}}\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_challenge
task_alias: prompt-6
include: _held_in_template_yaml
doc_to_text: "{{question}}\n\nPick the answer from these options\n\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
# BoolQ
- group: boolq_flan
group_alias: BoolQ
task:
- task: boolq
task_alias: prompt-0
include: _held_in_template_yaml
doc_to_text: "{{passage}}\n\nCan we conclude that {{question}}?\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
- task: boolq
task_alias: prompt-1
include: _held_in_template_yaml
doc_to_text: "{{passage}}\n\nIs it true that {{question}}?\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
- task: boolq
task_alias: prompt-2
include: _held_in_template_yaml
doc_to_text: "{{passage}}\n\n{{question}}?\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
- task: boolq
task_alias: prompt-3
include: _held_in_template_yaml
doc_to_text: "Text: {{passage}}\n\nQuestion: {{question}}?\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
- task: boolq
task_alias: prompt-4
include: _held_in_template_yaml
doc_to_text: "{{passage}}\n\nWhat's the best answer to this question: {{question}}?\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
- task: boolq
task_alias: prompt-5
include: _held_in_template_yaml
doc_to_text: "{{passage}}\nBased on the above text what's the best answer to this question: {{question}}?\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
- task: boolq
task_alias: prompt-6
include: _held_in_template_yaml
doc_to_text: "{{passage}}\nAnswer this question making sure that the answer is supposed by the text: {{question}}?\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
- task: boolq
task_alias: prompt-7
include: _held_in_template_yaml
doc_to_text: "{{passage}}\n\nIs the following statement correct based on the text\n\n{{question}}\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
- task: boolq
task_alias: prompt-8
include: _held_in_template_yaml
doc_to_text: "{{passage}}\n\nIs this statement correct \"{{question}}\"?\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
- task: boolq
task_alias: prompt-9
include: _held_in_template_yaml
doc_to_text: "Is it true that {{question}} based on the following text?\n\n{{passage}}\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
# RTE
- group: rte_flan
group_alias: RTE
task:
- task: rte
task_alias: prompt-0
include: _held_in_template_yaml
doc_to_text: "{{sentence1}}\n\nQuestion with options: Based on the paragraph above can we conclude that \"{{sentence2}}\"?\n\nOPTIONS:\n- yes\n- no"
doc_to_target: "{{['yes', 'no'][label]}}"
- task: rte
task_alias: prompt-1
include: _held_in_template_yaml
doc_to_text: "{{sentence1}}\n\nBased on that paragraph can we conclude that the sentence below is true?\n{{sentence2}}\n\nOPTIONS:\n- yes\n- no"
doc_to_target: "{{['yes', 'no'][label]}}"
- task: rte
task_alias: prompt-2
include: _held_in_template_yaml
doc_to_text: "{{sentence1}}\n\nQ with options: Can we draw the following conclusion?\n{{sentence2}}\n\nOPTIONS:\n- yes\n- no"
doc_to_target: "{{['yes', 'no'][label]}}"
- task: rte
task_alias: prompt-3
include: _held_in_template_yaml
doc_to_text: "{{sentence1}}\nDoes this next sentence follow, given the preceding text?\n{{sentence2}}\n\nOPTIONS:\n- yes\n- no"
doc_to_target: "{{['yes', 'no'][label]}}"
- task: rte
task_alias: prompt-4
include: _held_in_template_yaml
doc_to_text: "{{sentence1}}\nOPTIONS:\n- yes\n- no\nQuestion: Can we infer the following?\n{{sentence2}}"
doc_to_target: "{{['yes', 'no'][label]}}"
- task: rte
task_alias: prompt-5
include: _held_in_template_yaml
doc_to_text: "Read the following paragraph and determine if the hypothesis is true. Select from options at the end:\n\n{{sentence1}}\n\nHypothesis: {{sentence2}}\nOPTIONS:\n- yes\n- no\nThe answer is"
doc_to_target: "{{['yes', 'no'][label]}}"
- task: rte
task_alias: prompt-6
include: _held_in_template_yaml
doc_to_text: "Read the text and determine if the sentence is true:\n\n{{sentence1}}\n\nSentence: {{sentence2}}\nOPTIONS:\n- yes\n- no\nA:"
doc_to_target: "{{['yes', 'no'][label]}}"
- task: rte
task_alias: prompt-7
include: _held_in_template_yaml
doc_to_text: "Question with options: can we draw the following hypothesis from the context? \n\nContext:\n\n{{sentence1}}\n\nHypothesis: {{sentence2}}\nOPTIONS:\n- yes\n- no\nA:"
doc_to_target: "{{['yes', 'no'][label]}}"
- task: rte
task_alias: prompt-8
include: _held_in_template_yaml
doc_to_text: "Determine if the sentence is true based on the text below. Choose from options.\n{{sentence2}}\n\n{{sentence1}}\nOPTIONS:\n- yes\n- no"
doc_to_target: "{{['yes', 'no'][label]}}"
group: flan_held_out
task:
# BBH
- bbh_zeroshot
- bbh_fewshot
- bbh_cot_fewshot
- bbh_cot_zeroshot
# MMLU
- mmlu
- mmlu_flan_n_shot_generative
- mmlu_flan_n_shot_loglikelihood
- mmlu_flan_cot_zeroshot
- mmlu_flan_cot_fewshot
...@@ -5,19 +5,13 @@ task: ...@@ -5,19 +5,13 @@ task:
- medqa_4options - medqa_4options
- task: mmlu_anatomy - task: mmlu_anatomy
task_alias: "anatomy (mmlu)" task_alias: "anatomy (mmlu)"
group_alias: null
- task: mmlu_clinical_knowledge - task: mmlu_clinical_knowledge
task_alias: "clinical_knowledge (mmlu)" task_alias: "clinical_knowledge (mmlu)"
group_alias: null
- task: mmlu_college_medicine - task: mmlu_college_medicine
task_alias: "college_medicine (mmlu)" task_alias: "college_medicine (mmlu)"
group_alias: null
- task: mmlu_medical_genetics - task: mmlu_medical_genetics
task_alias: "medical_genetics (mmlu)" task_alias: "medical_genetics (mmlu)"
group_alias: null
- task: mmlu_professional_medicine - task: mmlu_professional_medicine
task_alias: "professional_medicine (mmlu)" task_alias: "professional_medicine (mmlu)"
group_alias: null
- task: mmlu_college_biology - task: mmlu_college_biology
task_alias: "college_biology (mmlu)" task_alias: "college_biology (mmlu)"
group_alias: null
...@@ -181,7 +181,7 @@ def main() -> None: ...@@ -181,7 +181,7 @@ def main() -> None:
for task in all_subtasks: for task in all_subtasks:
file_name = f"{task}.yaml" file_name = f"{task}.yaml"
try: try:
with open(f"{path}/{file_name}", "w") as f: with open(f"{path}/{file_name}", "w", encoding="utf-8") as f:
f.write("# Generated by utils.py\n") f.write("# Generated by utils.py\n")
yaml.dump( yaml.dump(
{ {
......
...@@ -75,7 +75,7 @@ def main() -> None: ...@@ -75,7 +75,7 @@ def main() -> None:
for task in all_subtasks: for task in all_subtasks:
file_name = f"{task}.yaml" file_name = f"{task}.yaml"
try: try:
with open(f"{file_name}", "w") as f: with open(f"{file_name}", "w", encoding="utf-8") as f:
f.write("# Generated by utils.py\n") f.write("# Generated by utils.py\n")
yaml.dump( yaml.dump(
{ {
......
...@@ -79,13 +79,13 @@ if __name__ == "__main__": ...@@ -79,13 +79,13 @@ if __name__ == "__main__":
# get filename of base_yaml so we can `"include": ` it in our other YAMLs. # get filename of base_yaml so we can `"include": ` it in our other YAMLs.
base_yaml_name = os.path.split(args.base_yaml_path)[-1] base_yaml_name = os.path.split(args.base_yaml_path)[-1]
with open(args.base_yaml_path) as f: with open(args.base_yaml_path, encoding="utf-8") as f:
base_yaml = yaml.full_load(f) base_yaml = yaml.full_load(f)
if args.cot_prompt_path is not None: if args.cot_prompt_path is not None:
import json import json
with open(args.cot_prompt_path) as f: with open(args.cot_prompt_path, encoding="utf-8") as f:
cot_file = json.load(f) cot_file = json.load(f)
for subject_eng, subject_zh in tqdm(SUBJECTS.items()): for subject_eng, subject_zh in tqdm(SUBJECTS.items()):
...@@ -107,7 +107,7 @@ if __name__ == "__main__": ...@@ -107,7 +107,7 @@ if __name__ == "__main__":
file_save_path = args.save_prefix_path + f"_{subject_eng}.yaml" file_save_path = args.save_prefix_path + f"_{subject_eng}.yaml"
eval_logger.info(f"Saving yaml for subset {subject_eng} to {file_save_path}") eval_logger.info(f"Saving yaml for subset {subject_eng} to {file_save_path}")
with open(file_save_path, "w") as yaml_file: with open(file_save_path, "w", encoding="utf-8") as yaml_file:
yaml.dump( yaml.dump(
yaml_dict, yaml_dict,
yaml_file, yaml_file,
......
...@@ -94,13 +94,13 @@ if __name__ == "__main__": ...@@ -94,13 +94,13 @@ if __name__ == "__main__":
# get filename of base_yaml so we can `"include": ` it in our other YAMLs. # get filename of base_yaml so we can `"include": ` it in our other YAMLs.
base_yaml_name = os.path.split(args.base_yaml_path)[-1] base_yaml_name = os.path.split(args.base_yaml_path)[-1]
with open(args.base_yaml_path) as f: with open(args.base_yaml_path, encoding="utf-8") as f:
base_yaml = yaml.full_load(f) base_yaml = yaml.full_load(f)
if args.cot_prompt_path is not None: if args.cot_prompt_path is not None:
import json import json
with open(args.cot_prompt_path) as f: with open(args.cot_prompt_path, encoding="utf-8") as f:
cot_file = json.load(f) cot_file = json.load(f)
for subject_eng, subject_zh in tqdm(SUBJECTS.items()): for subject_eng, subject_zh in tqdm(SUBJECTS.items()):
...@@ -122,7 +122,7 @@ if __name__ == "__main__": ...@@ -122,7 +122,7 @@ if __name__ == "__main__":
file_save_path = args.save_prefix_path + f"_{subject_eng}.yaml" file_save_path = args.save_prefix_path + f"_{subject_eng}.yaml"
eval_logger.info(f"Saving yaml for subset {subject_eng} to {file_save_path}") eval_logger.info(f"Saving yaml for subset {subject_eng} to {file_save_path}")
with open(file_save_path, "w") as yaml_file: with open(file_save_path, "w", encoding="utf-8") as yaml_file:
yaml.dump( yaml.dump(
yaml_dict, yaml_dict,
yaml_file, yaml_file,
......
...@@ -184,7 +184,7 @@ def splitPuncts(line): ...@@ -184,7 +184,7 @@ def splitPuncts(line):
def computeMaps(predictions, goldfile): def computeMaps(predictions, goldfile):
predictionMap: Dict[str, list] = {} predictionMap: Dict[str, list] = {}
goldMap: Dict[str, list] = {} goldMap: Dict[str, list] = {}
gf = open(goldfile, "r") gf = open(goldfile, "r", encoding="utf-8")
for row in predictions: for row in predictions:
cols = row.strip().split("\t") cols = row.strip().split("\t")
......
...@@ -25,7 +25,7 @@ if __name__ == "__main__": ...@@ -25,7 +25,7 @@ if __name__ == "__main__":
# get filename of base_yaml so we can `"include": ` it in our other YAMLs. # get filename of base_yaml so we can `"include": ` it in our other YAMLs.
base_yaml_name = os.path.split(args.base_yaml_path)[-1] base_yaml_name = os.path.split(args.base_yaml_path)[-1]
with open(args.base_yaml_path) as f: with open(args.base_yaml_path, encoding="utf-8") as f:
base_yaml = yaml.full_load(f) base_yaml = yaml.full_load(f)
for name in tqdm(SUBSETS): for name in tqdm(SUBSETS):
...@@ -39,7 +39,7 @@ if __name__ == "__main__": ...@@ -39,7 +39,7 @@ if __name__ == "__main__":
file_save_path = args.save_prefix_path + f"_{name.lower()}.yaml" file_save_path = args.save_prefix_path + f"_{name.lower()}.yaml"
eval_logger.info(f"Saving yaml for subset {name} to {file_save_path}") eval_logger.info(f"Saving yaml for subset {name} to {file_save_path}")
with open(file_save_path, "w") as yaml_file: with open(file_save_path, "w", encoding="utf-8") as yaml_file:
yaml.dump( yaml.dump(
yaml_dict, yaml_dict,
yaml_file, yaml_file,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment