Commit f66fc06f authored by haileyschoelkopf's avatar haileyschoelkopf
Browse files

fix merge conflicts

parents b13753cd d714fc95
......@@ -108,8 +108,8 @@ class HFLM(LM):
assert not parallelize, "`parallelize=True` is not compatible with passing pre-initialized model to `pretrained`"
self._model = pretrained
self._device = self._model.device
self._config = self._model.config
gpus = 0
if tokenizer:
assert isinstance(
......@@ -200,8 +200,9 @@ class HFLM(LM):
)
# access self._model through self.model property outside this method
self.model.eval()
self.model.tie_weights()
if isinstance(self.model, torch.nn.Module):
self.model.eval()
self.model.tie_weights()
if isinstance(pretrained, str) and (gpus >= 1 or str(self.device) == "mps"):
# TODO: can remove this whole snippet except in the mps case, perhaps?
......@@ -238,6 +239,16 @@ class HFLM(LM):
if self.config.model_type == "qwen":
# Qwen's trust_remote_code tokenizer does not allow for adding special tokens
self.tokenizer.pad_token = "<|endoftext|>"
elif (
self.tokenizer.__class__.__name__ == "RWKVWorldTokenizer"
or self.tokenizer.__class__.__name__ == "Rwkv5Tokenizer"
):
# The RWKV world tokenizer, does not allow for adding special tokens / setting the pad token (which is set as 0)
# The additional tokenizer name check is needed, as there exists rwkv4 models with neox tokenizer
# ---
# Note that the world tokenizer class name, might change in the future for the final huggingface merge
# https://github.com/huggingface/transformers/pull/26963
assert self.tokenizer.pad_token_id == 0
else:
self.tokenizer.add_special_tokens({"pad_token": "<|pad|>"})
......@@ -361,7 +372,7 @@ class HFLM(LM):
def _get_backend(
self,
config: transformers.AutoConfig,
config: Union[transformers.PretrainedConfig, transformers.AutoConfig],
backend: Optional[Literal["default", "causal", "seq2seq"]] = "default",
trust_remote_code: Optional[bool] = False,
) -> None:
......@@ -602,8 +613,7 @@ class HFLM(LM):
(batch_size, max_length), device=self.device
).long()
for _ in range(5):
out = F.log_softmax(self._model_call(test_batch, **call_kwargs), dim=-1)
out = out # Identity process so that it passes pre-commit
out = F.log_softmax(self._model_call(test_batch, **call_kwargs), dim=-1) # noqa: F841
return batch_size
......@@ -705,10 +715,14 @@ class HFLM(LM):
return self.model(inps).logits
def _model_generate(self, context, max_length, stop, **generation_kwargs):
# we require users to pass do_sample=True explicitly
# for non-greedy gen. This should be reevaluated when considering beam search.
if "do_sample" not in generation_kwargs:
generation_kwargs["do_sample"] = False
# temperature = 0.0 if not set
# if do_sample is false and temp==0.0:
# remove temperature, as do_sample=False takes care of this
# and we don't want a warning from HF
generation_kwargs["temperature"] = generation_kwargs.get("temperature", 0.0)
do_sample = generation_kwargs.get("do_sample", None)
if do_sample is False and generation_kwargs.get("temperature") == 0.0:
generation_kwargs.pop("temperature")
# build stopping criteria
stopping_criteria = stop_sequences_criteria(
self.tokenizer, stop, context.shape[1], context.shape[0]
......@@ -1045,6 +1059,7 @@ class HFLM(LM):
return -len(toks), x[0]
pbar = tqdm(total=len(requests), disable=(self.rank != 0))
adaptive_batch_size = None
if self.batch_size == "auto":
# using rolling window with maximum context
print("Passed argument batch_size = auto. Detecting largest batch size")
......@@ -1089,7 +1104,7 @@ class HFLM(LM):
)
else:
raise ValueError(
f"Expected `kwargs` to be of type `dict` but got {kwargs}"
f"Expected `kwargs` to be of type `dict` but got {type(gen_kwargs)}"
)
if not until:
until = [self.tok_decode(self.eot_token_id)]
......
from importlib.util import find_spec
from pathlib import Path
from lm_eval.api.registry import register_model
from lm_eval.models.huggingface import HFLM
@register_model("openvino")
class OptimumLM(HFLM):
"""
Optimum Intel provides a simple interface to optimize Transformer models and convert them to \
OpenVINO™ Intermediate Representation (IR) format to accelerate end-to-end pipelines on \
Intel® architectures using OpenVINO™ runtime.
"""
def __init__(
self,
device="cpu",
**kwargs,
) -> None:
if "backend" in kwargs:
# optimum currently only supports causal models
assert (
kwargs["backend"] == "causal"
), "Currently, only OVModelForCausalLM is supported."
self.openvino_device = device
super().__init__(
device=self.openvino_device,
backend=kwargs.get("backend", "causal"),
**kwargs,
)
def _create_model(
self,
pretrained: str,
revision="main",
dtype="auto",
trust_remote_code=False,
**kwargs,
) -> None:
if not find_spec("optimum"):
raise Exception(
"package `optimum` is not installed. Please install it via `pip install optimum[openvino]`"
)
else:
from optimum.intel.openvino import OVModelForCausalLM
model_kwargs = kwargs if kwargs else {}
model_file = Path(pretrained) / "openvino_model.xml"
if model_file.exists():
export = False
else:
export = True
kwargs["ov_config"] = {
"PERFORMANCE_HINT": "LATENCY",
"NUM_STREAMS": "1",
"CACHE_DIR": "",
}
self._model = OVModelForCausalLM.from_pretrained(
pretrained,
revision=revision,
trust_remote_code=trust_remote_code,
export=export,
device=self.openvino_device.upper(),
**model_kwargs,
)
......@@ -170,18 +170,12 @@ class VLLM(LM):
stop: Optional[List[str]] = None,
**kwargs,
):
if "do_sample" in kwargs.keys():
kwargs.pop("do_sample")
if generate:
# hf defaults
kwargs["skip_special_tokens"] = kwargs.get("skip_special_tokens", False)
kwargs["spaces_between_special_tokens"] = kwargs.get(
"spaces_between_special_tokens", False
)
kwargs = self.modify_gen_kwargs(kwargs)
sampling_params = SamplingParams(max_tokens=max_tokens, stop=stop, **kwargs)
else:
sampling_params = SamplingParams(
temperature=0, prompt_logprobs=2, max_tokens=1
temperature=0, prompt_logprobs=1, max_tokens=1
)
if self.data_parallel_size > 1:
requests = [list(x) for x in divide(requests, self.data_parallel_size)]
......@@ -438,3 +432,16 @@ class VLLM(LM):
break
return continuation_logprobs, is_greedy
@staticmethod
def modify_gen_kwargs(kwargs: dict) -> dict:
# sampling_params
do_sample = kwargs.pop("do_sample", None)
if do_sample is False or "temperature" not in kwargs:
kwargs["temperature"] = 0.0
# hf defaults
kwargs["skip_special_tokens"] = kwargs.get("skip_special_tokens", False)
kwargs["spaces_between_special_tokens"] = kwargs.get(
"spaces_between_special_tokens", False
)
return kwargs
......@@ -117,7 +117,7 @@ class PromptString:
# TODO need a way to process doc_to_choice
if "doc_to_choice" in self.prompt_string:
raise "Not yet implemented to accept doc_to_choice"
raise Exception("Not yet implemented to accept doc_to_choice")
text_string = utils.apply_template(doc_to_text, doc)
target_string = utils.apply_template(doc_to_target, doc)
......
import os
import yaml
import abc
import collections
from functools import partial
from typing import List, Union, Dict
from lm_eval import utils
from lm_eval import prompts
from lm_eval.api.task import TaskConfig, Task, ConfigurableTask
from lm_eval.api.registry import (
register_task,
register_group,
TASK_REGISTRY,
GROUP_REGISTRY,
ALL_TASKS,
)
from lm_eval.api.task import Task, ConfigurableTask
import logging
# import python tasks
from .squadv2.task import SQuAD2
from .scrolls.task import (
QuALITY,
NarrativeQA,
ContractNLI,
GovReport,
SummScreenFD,
QMSum,
)
eval_logger = utils.eval_logger
def register_configurable_task(config: Dict[str, str]) -> int:
SubClass = type(
config["task"] + "ConfigurableTask",
(ConfigurableTask,),
{"CONFIG": TaskConfig(**config)},
)
if "task" in config:
task_name = "{}".format(config["task"])
register_task(task_name)(SubClass)
class TaskManager:
"""TaskManager indexes all tasks from the default `lm_eval/tasks/`
and an optional directory if provided.
if "group" in config:
if config["group"] == config["task"]:
raise ValueError("task and group name cannot be the same")
elif type(config["group"]) == str:
group_name = [config["group"]]
else:
group_name = config["group"]
"""
def __init__(
self,
verbosity="INFO",
include_path=None
) -> None:
self.verbosity = verbosity
self.include_path = include_path
self.logger = utils.eval_logger
self.logger.setLevel(getattr(logging, f"{verbosity}"))
self._task_index = self.initialize_tasks(
include_path=include_path
)
self._all_tasks = sorted(list(self._task_index.keys()))
for group in group_name:
register_group(group)(SubClass)
self.task_group_map = collections.defaultdict(list)
return 0
def initialize_tasks(self, include_path: str = None):
"""Creates an dictionary of tasks index.
:param include_path: str = None
An additional path to be searched for tasks
:return
Dictionary of task names as key and task metadata
"""
all_paths = [os.path.dirname(os.path.abspath(__file__)) + "/"]
if include_path is not None:
if isinstance(include_path, str):
include_path = [include_path]
all_paths.extend(include_path)
task_index = {}
for task_dir in all_paths:
tasks = self._get_task_and_group(task_dir)
task_index = {**tasks, **task_index}
def register_configurable_group(config: Dict[str, str], yaml_path: str = None) -> int:
group = config["group"]
all_task_list = config["task"]
config_list = [task for task in all_task_list if type(task) != str]
task_list = [task for task in all_task_list if type(task) == str]
for task_config in config_list:
base_config = {}
task_name_config = {}
if "task" in task_config:
task_name = task_config["task"]
if task_name in ALL_TASKS:
task_obj = get_task_dict(task_name)[task_name]
if type(task_obj) == tuple:
_, task_obj = task_obj
if task_obj is not None:
base_config = task_obj._config.to_dict(keep_callable=True)
task_name_config["task"] = f"{group}_{task_name}"
task_config = utils.load_yaml_config(yaml_path, task_config)
var_configs = check_prompt_config(
{
**base_config,
**task_config,
**{"group": group},
**task_name_config,
},
yaml_path=os.path.dirname(yaml_path),
return task_index
@property
def all_tasks(self):
return self._all_tasks
@property
def task_index(self):
return self._task_index
def match_tasks(self, task_list):
return utils.pattern_match(
task_list, self.all_tasks
)
for config in var_configs:
register_configurable_task(config)
task_names = utils.pattern_match(task_list, ALL_TASKS)
for task in task_names:
if (task in TASK_REGISTRY) or (task in GROUP_REGISTRY):
if group in GROUP_REGISTRY:
GROUP_REGISTRY[group].append(task)
def _name_is_registered(self, name):
if name in self.all_tasks:
return True
return False
def _name_is_task(self, name):
if self._name_is_registered(name) and ("task" in self.task_index[name]["type"]):
return True
return False
def _name_is_group(self, name):
if self._name_is_registered(name) and (self.task_index[name]["type"] == "group"):
return True
return False
def _name_is_python_task(self, name):
if self._name_is_registered(name) and (self.task_index[name]["type"] == "python_task"):
return True
return False
def _config_is_task(self, config):
if ("task" in config) and isinstance(config["task"], str):
return True
return False
def _config_is_group(self, config):
if ("task" in config) and isinstance(config["task"], list):
return True
return False
def _config_is_python_task(self, config):
if "class" in config:
return True
return False
def _get_yaml_path(self, name):
assert name in self.task_index
return self.task_index[name]["yaml_path"]
def _get_config(self, name):
assert name in self.task_index
yaml_path = self._get_yaml_path(name)
if yaml_path == -1:
return {}
else:
return utils.load_yaml_config(yaml_path, mode="full")
def _get_tasklist(self, name):
assert self._name_is_task(name) == False
return self.task_index[name]["task"]
def _process_alias(self, config, group=None):
# If the group is not the same as the original
# group which the group alias was intended for,
# Set the group_alias to None instead.
if ("group_alias" in config) and ("group" in config) and group is not None:
if config["group"] != group:
config["group_alias"] = None
return config
def _load_individual_task_or_group(
self,
name_or_config: Union[str, dict] = None,
parent_name: str = None,
update_config: dict = None,
yaml_path: str = None,
) -> ConfigurableTask:
def load_task(config, task, group=None, yaml_path=None):
if "include" in config:
assert yaml_path is not None
config.update(
utils.load_yaml_config(
yaml_path,
yaml_config={"include": config.pop("include")},
mode="full",
)
)
if self._config_is_python_task(config):
task_object = config["class"]()
else:
config = self._process_alias(config, group=group)
task_object = ConfigurableTask(config=config)
if group is not None:
task_object = (group, task_object)
return {task: task_object}
if isinstance(name_or_config, str):
if update_config is not None:
# Process name_or_config as a dict instead
name_or_config = {"task": name_or_config, **update_config}
elif self._name_is_task(name_or_config):
task_config = self._get_config(name_or_config)
return load_task(task_config, task=name_or_config, group=parent_name)
else:
GROUP_REGISTRY[group] = [task]
ALL_TASKS.add(group)
group_name = name_or_config
subtask_list = self._get_tasklist(name_or_config)
if subtask_list == -1:
group_config = self._get_config(name_or_config)
subtask_list = group_config["task"]
# This checks if we're at the root.
if parent_name is None:
group_config = self._get_config(name_or_config)
if set(group_config.keys()) > set(["task", "group"]):
update_config = {
k:v for k,v in group_config.items() if k not in ["task", "group"]
}
yaml_path = self._get_yaml_path(group_name)
return 0
if (update_config is not None) and ("group_alias" in update_config):
group_name = update_config["group_alias"]
update_config.pop("group_alias")
if isinstance(name_or_config, dict):
def check_prompt_config(
config: Dict[str, str], yaml_path: str = None
) -> List[Dict[str, str]]:
all_configs = []
if "use_prompt" in config:
prompt_list = prompts.load_prompt_list(
use_prompt=config["use_prompt"],
dataset_name=config["dataset_path"],
subset_name=config["dataset_name"] if "dataset_name" in config else None,
yaml_path=yaml_path,
)
for idx, prompt_variation in enumerate(prompt_list):
all_configs.append(
{
**config,
**{"use_prompt": prompt_variation},
**{
"task": "_".join(
[
config["task"]
if "task" in config
else get_task_name_from_config(config),
prompt_variation.split("/")[-1]
if ".yaml" in prompt_variation
else prompt_variation,
]
)
},
**{"output_type": "generate_until"},
if update_config is not None:
name_or_config={
**name_or_config,
**update_config,
}
)
else:
all_configs.append(config)
return all_configs
def get_task_name_from_config(task_config: Dict[str, str]) -> str:
if "dataset_name" in task_config:
return "{dataset_path}_{dataset_name}".format(**task_config)
else:
return "{dataset_path}".format(**task_config)
if self._config_is_task(name_or_config):
name = name_or_config["task"]
# If the name is registered as a group
# if self._name_is_task(name) is False:
if self._name_is_group(name):
group_name = name
update_config = {k:v for k,v in name_or_config.items() if k != "task"}
subtask_list = self._get_tasklist(name)
if subtask_list == -1:
subtask_list = self._get_config(name)["task"]
else:
if self._name_is_registered(name):
base_task_config = self._get_config(name)
# Check if this is a duplicate.
if parent_name is not None:
name_or_config["group"] = parent_name
num_duplicate = len(list(filter(lambda x: x.startswith(name), self.task_group_map[parent_name])))
if num_duplicate > 0:
name = f"{name}-{num_duplicate}"
self.task_group_map[parent_name].append(name)
task_config={
**base_task_config,
**name_or_config,
}
else:
task_config = name_or_config
return load_task(task_config, task=name, group=parent_name, yaml_path=yaml_path)
else:
group_name = name_or_config["group"]
subtask_list = name_or_config["task"]
# update_config = {k:v for k,v in name_or_config.items() if k != "task"}
if set(name_or_config.keys()) > set(["task", "group"]):
update_config = {
k:v for k,v in name_or_config.items() if k not in ["task", "group"]
}
all_subtasks = {}
if (parent_name is not None):
all_subtasks = {group_name: (parent_name, None)}
def include_task_folder(task_dir: str, register_task: bool = True) -> None:
"""
Calling this function
"""
fn = partial(self._load_individual_task_or_group, parent_name=group_name, update_config=update_config, yaml_path=yaml_path)
all_subtasks = {**all_subtasks, **dict(collections.ChainMap(*map(fn, subtask_list)))}
return all_subtasks
# Track whether any tasks failed during loading
import_fail = False
for root, subdirs, file_list in os.walk(task_dir):
# if (subdirs == [] or subdirs == ["__pycache__"]) and (len(file_list) > 0):
for f in file_list:
if f.endswith(".yaml"):
yaml_path = os.path.join(root, f)
try:
config = utils.load_yaml_config(yaml_path)
if "task" not in config:
continue
all_configs = check_prompt_config(
config, yaml_path=os.path.dirname(yaml_path)
)
for config in all_configs:
if register_task:
if type(config["task"]) == str:
register_configurable_task(config)
else:
if type(config["task"]) == list:
register_configurable_group(config, yaml_path)
# Log this silently and show it only when
# the user defines the appropriate verbosity.
except (ImportError, ModuleNotFoundError) as e:
import_fail = True
eval_logger.debug(
f"{yaml_path}: {e}. Config will not be added to registry."
)
except Exception as error:
import traceback
eval_logger.warning(
"Unexpected error loading config in\n"
f" {yaml_path}\n"
" Config will not be added to registry\n"
f" Error: {error}\n"
f" Traceback: {traceback.format_exc()}"
)
if import_fail:
eval_logger.warning(
"Some tasks could not be loaded due to missing dependencies."
" Run with `--verbosity DEBUG` for full details."
)
return 0
def load_task_or_group(self, task_list: Union[str, list] = None) -> dict:
"""Loads a dictionary of task objects from a list
:param task_list: Union[str, list] = None
Single string or list of string of task names to be loaded
def include_path(task_dir):
include_task_folder(task_dir)
# Register Benchmarks after all tasks have been added
include_task_folder(task_dir, register_task=False)
return 0
:return
Dictionary of task objects
"""
if isinstance(task_list, str):
task_list = [task_list]
all_loaded_tasks = dict(
collections.ChainMap(
*map(
self._load_individual_task_or_group,
task_list
)
)
)
return all_loaded_tasks
def load_config(self, config: Dict):
return self._load_individual_task_or_group(config)
def _get_task_and_group(self, task_dir: str):
"""Creates an dictionary of tasks index with the following metadata,
- `type`, that can be either `task`, `python_task`, or `group`.
`task` refer to regular task configs, `python_task` are special
yaml files that only consists of `task` and `class` parameters.
`group` are group configs.
- `yaml_path`, path to the yaml file. If the entry is a `group` that
was configured through a task config, the yaml_path will be -1
and all subtasks will be listed in `task` (see below)
- `task`, reserved for entries with `type` as `group`. This will list
all subtasks. When a group config is created (as opposed to task
config having `group` parameter set), this will be set to -1 to
avoid recursive indexing. The whole list of subtasks will be loaded
at evaluation.
:param task_dir: str
A directory to check for tasks
:return
Dictionary of task names as key and task metadata
"""
tasks_and_groups = collections.defaultdict()
for root, _, file_list in os.walk(task_dir):
for f in file_list:
if f.endswith(".yaml"):
yaml_path = os.path.join(root, f)
config = utils.load_yaml_config(yaml_path, mode="simple")
if self._config_is_python_task(config):
# This is a python class config
tasks_and_groups[config["task"]] = {
"type": "python_task",
"yaml_path": yaml_path,
}
elif self._config_is_group(config):
# This is a group config
tasks_and_groups[config["group"]] = {
"type": "group",
"task": -1, # This signals that
# we don't need to know
# the task list for indexing
# as it can be loaded
# when called.
"yaml_path": yaml_path,
}
def initialize_tasks(verbosity="INFO"):
eval_logger.setLevel(getattr(logging, f"{verbosity}"))
# # Registered the level 1 tasks from a group config
# for config in config["task"]:
# if isinstance(config, dict) and self._config_is_task(config):
# task = config["task"]
# tasks_and_groups[task] = {
# "type": "task",
# "yaml_path": yaml_path,
# }
elif self._config_is_task(config):
# This is a task config
task = config["task"]
tasks_and_groups[task] = {
"type": "task",
"yaml_path": yaml_path,
}
task_dir = os.path.dirname(os.path.abspath(__file__)) + "/"
include_path(task_dir)
if "group" in config:
groups = config["group"]
if isinstance(config["group"], str):
groups = [groups]
for group in groups:
if group not in tasks_and_groups:
tasks_and_groups[group] = {
"type": "group",
"task": [task],
"yaml_path": -1,
}
else:
tasks_and_groups[group]["task"].append(task)
else:
self.logger.debug(f"File {f} in {root} could not be loaded")
return tasks_and_groups
def include_path(task_dir):
logger = utils.eval_logger
logger.setLevel(getattr(logging, "INFO"))
logger.info(
"To still use tasks loaded from args.include_path,"
"see an example of the new TaskManager API in https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/interface.md#external-library-usage"
)
return 0
def get_task(task_name, config):
try:
return TASK_REGISTRY[task_name](config=config)
except KeyError:
eval_logger.info("Available tasks:")
eval_logger.info(list(TASK_REGISTRY) + list(GROUP_REGISTRY))
raise KeyError(f"Missing task {task_name}")
def initialize_tasks(verbosity="INFO"):
logger = utils.eval_logger
logger.setLevel(getattr(logging, f"{verbosity}"))
logger.info(
"lm_eval.tasks.initialize_tasks() is deprecated and no longer necessary. "
"It will be removed in v0.4.2 release. "
"TaskManager will instead be used."
)
return 0
def get_task_name_from_config(task_config: Dict[str, str]) -> str:
if "task" in task_config:
return task_config["task"]
if "dataset_name" in task_config:
return "{dataset_path}_{dataset_name}".format(**task_config)
else:
return "{dataset_path}".format(**task_config)
def get_task_name_from_object(task_object):
for name, class_ in TASK_REGISTRY.items():
if class_ is task_object:
return name
if hasattr(task_object, "config"):
return task_object._config["task"]
# TODO: scrap this
# this gives a mechanism for non-registered tasks to have a custom name anyways when reporting
......@@ -234,54 +382,40 @@ def get_task_name_from_object(task_object):
else type(task_object).__name__
)
def get_task_dict(task_name_list: List[Union[str, Dict, Task]], task_manager: TaskManager = None):
"""Creates a dictionary of task objects from either a name of task, config, or prepared Task object.
# TODO: pass num_fewshot and other cmdline overrides in a better way
def get_task_dict(task_name_list: List[Union[str, Dict, Task]], **kwargs):
config = {**kwargs}
:param task_name_list: List[Union[str, Dict, Task]]
Name of model or LM object, see lm_eval.models.get_model
:param task_manager: TaskManager = None
A TaskManager object that stores indexed tasks. If not set,
task_manager will load one. This should be set by the user
if there are additional paths that want to be included
via `include_path`
task_name_from_registry_dict = {}
:return
Dictionary of task objects
"""
task_name_from_string_dict = {}
task_name_from_config_dict = {}
task_name_from_object_dict = {}
if type(task_name_list) != list:
if isinstance(task_name_list, str):
task_name_list = [task_name_list]
for task_element in task_name_list:
if isinstance(task_element, str):
if task_element in GROUP_REGISTRY:
group_name = task_element
for task_name in GROUP_REGISTRY[task_element]:
if task_name not in task_name_from_registry_dict:
task_obj = get_task_dict(task_name)
if task_name in task_obj.keys():
task_dict = {
task_name: (group_name, task_obj[task_name]),
}
else:
task_dict = {
task_name: (group_name, None),
**task_obj,
}
string_task_name_list = [task for task in task_name_list if isinstance(task, str)]
others_task_name_list = [task for task in task_name_list if ~isinstance(task, str)]
if len(string_task_name_list) > 0:
if task_manager is None:
task_manager = TaskManager()
task_name_from_registry_dict = {
**task_name_from_registry_dict,
**task_dict,
}
else:
task_name = task_element
if task_name not in task_name_from_registry_dict:
task_name_from_registry_dict = {
**task_name_from_registry_dict,
task_name: get_task(task_name=task_element, config=config),
}
task_name_from_string_dict = task_manager.load_task_or_group(string_task_name_list)
elif isinstance(task_element, dict):
task_element.update(config)
for task_element in others_task_name_list:
if isinstance(task_element, dict):
task_name_from_config_dict = {
**task_name_from_config_dict,
get_task_name_from_config(task_element): ConfigurableTask(
config=task_element
),
**task_manager.load_config(config=task_element),
}
elif isinstance(task_element, Task):
......@@ -290,11 +424,11 @@ def get_task_dict(task_name_list: List[Union[str, Dict, Task]], **kwargs):
get_task_name_from_object(task_element): task_element,
}
assert set(task_name_from_registry_dict.keys()).isdisjoint(
assert set(task_name_from_string_dict.keys()).isdisjoint(
set(task_name_from_object_dict.keys())
)
return {
**task_name_from_registry_dict,
**task_name_from_string_dict,
**task_name_from_config_dict,
**task_name_from_object_dict,
}
group:
- ai2_arc
task: arc_easy
dataset_path: ai2_arc
dataset_path: allenai/ai2_arc
dataset_name: ARC-Easy
output_type: multiple_choice
training_split: train
......
......@@ -28,7 +28,7 @@ if __name__ == "__main__":
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
base_yaml_name = os.path.split(args.base_yaml_path)[-1]
with open(args.base_yaml_path) as f:
with open(args.base_yaml_path, encoding="utf-8") as f:
base_yaml = yaml.full_load(f)
base_doc_to_text = "Q: {{input}}\nA:"
......@@ -70,7 +70,7 @@ if __name__ == "__main__":
file_save_path = args.save_prefix_path + f"/{task}.yaml"
utils.eval_logger.info(f"Saving yaml for subset {task} to {file_save_path}")
with open(file_save_path, "w") as yaml_file:
with open(file_save_path, "w", encoding="utf-8") as yaml_file:
yaml.dump(
yaml_dict,
yaml_file,
......
......@@ -28,3 +28,4 @@ filter_list:
num_fewshot: 0
metadata:
version: 2.0
num_fewshot: 3 # controls what is printed in n-shot
......@@ -19,3 +19,4 @@ generation_kwargs:
num_fewshot: 0
metadata:
version: 1.0
num_fewshot: 3 # will be printed in results table
......@@ -27,13 +27,13 @@ if __name__ == "__main__":
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
base_yaml_name = os.path.split(args.base_yaml_path)[-1]
with open(args.base_yaml_path) as f:
with open(args.base_yaml_path, encoding="utf-8") as f:
base_yaml = yaml.full_load(f)
if args.cot_prompt_path is not None:
import json
with open(args.cot_prompt_path) as f:
with open(args.cot_prompt_path, encoding="utf-8") as f:
cot_file = json.load(f)
def query():
......@@ -42,7 +42,7 @@ if __name__ == "__main__":
print(query())
languages = [split["split"] for split in query()]
for lang in tqdm(languages):
for lang in tqdm([lang for lang in languages if "default" not in lang]):
yaml_dict = {
"include": base_yaml_name,
"task": f"belebele_{args.task_prefix}_{lang}"
......@@ -54,7 +54,7 @@ if __name__ == "__main__":
file_save_path = args.save_prefix_path + f"_{lang}.yaml"
logging.info(f"Saving yaml for subset {lang} to {file_save_path}")
with open(file_save_path, "w") as yaml_file:
with open(file_save_path, "w", encoding="utf-8") as yaml_file:
yaml.dump(
yaml_dict,
yaml_file,
......
"fewshot_split": "default"
"include": "_default_template_yaml"
"task": "belebele_default"
"test_split": "default"
output_type: generate_until
validation_split: validation
test_split: null
doc_to_choice: null
metric_list:
- metric: exact_match
aggregation: mean
......
group: flan_anli
task:
- include: yaml_templates/held_in_template_yaml
task: anli_r1
dataset_path: anli
use_prompt: prompt_templates/anli.yaml:*
validation_split: dev_r1
- include: yaml_templates/held_in_template_yaml
task: anli_r2
dataset_path: anli
use_prompt: prompt_templates/anli.yaml:*
validation_split: dev_r2
- include: yaml_templates/held_in_template_yaml
task: anli_r3
dataset_path: anli
use_prompt: prompt_templates/anli.yaml:*
validation_split: dev_r3
group: flan_arc
task:
- include: yaml_templates/held_in_template_yaml
task: arc_easy
dataset_path: ai2_arc
dataset_name: ARC-Easy
use_prompt: prompt_templates/arc.yaml:*
validation_split: validation
- include: yaml_templates/held_in_template_yaml
task: arc_challenge
dataset_path: ai2_arc
dataset_name: ARC-Challenge
use_prompt: prompt_templates/arc.yaml:*
validation_split: validation
group: flan_boolq
task:
- include: yaml_templates/held_in_template_yaml
dataset_path: super_glue
dataset_name: boolq
use_prompt: prompt_templates/boolq.yaml:*
validation_split: validation
group: flan_cot
task:
- include: yaml_templates/cot_template_yaml
dataset_path: gsmk
dataset_name: boolq
use_prompt: promptsource:*
validation_split: validation
- include: yaml_templates/cot_template_yaml
dataset_path: EleutherAI/asdiv
use_prompt: promptsource:*
validation_split: validation
group: flan_held_in
group_alias: Flan (Held-In)
task:
- flan_boolq
- flan_rte
- flan_anli
- flan_arc
# ANLI R1
- group: anli_r1_flan
group_alias: ANLI R1
task:
- task: anli_r1
task_alias: prompt-0
include: _held_in_template_yaml
doc_to_text: "{{premise}}\n\nChoose your answer: based on the paragraph above can we conclude that \"{{hypothesis}}\"?\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nI think the answer is"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r1
task_alias: prompt-1
include: _held_in_template_yaml
doc_to_text: "{{premise}}\n\nBased on that paragraph can we conclude that this sentence is true?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r1
task_alias: prompt-2
include: _held_in_template_yaml
doc_to_text: "{{premise}}\n\nCan we draw the following conclusion?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r1
task_alias: prompt-3
include: _held_in_template_yaml
doc_to_text: "{{premise}}\nDoes this next sentence follow, given the preceding text?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r1
task_alias: prompt-4
include: _held_in_template_yaml
doc_to_text: "{{premise}}\nCan we infer the following?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nThe answer is:"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r1
task_alias: prompt-5
include: _held_in_template_yaml
doc_to_text: "Read the following paragraph and determine if the hypothesis is true:\n\n{{premise}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nHypothesis: {{hypothesis}}\n\n\n"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r1
task_alias: prompt-6
include: _held_in_template_yaml
doc_to_text: "Read the text and determine if the sentence is true (see options at the end):\n\n{{premise}}\n\nSentence: {{hypothesis}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r1
task_alias: prompt-7
include: _held_in_template_yaml
doc_to_text: "Can we draw the following hypothesis from the context (see options)? \n\nContext:\n\n{{premise}}\n\nHypothesis: {{hypothesis}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r1
task_alias: prompt-8
include: _held_in_template_yaml
doc_to_text: "Choose from options: Determine if the sentence is true based on the text below:\n{{hypothesis}}\n\n{{premise}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
# ANLI R2
- group: anli_r2_flan
group_alias: ANLI R2
task:
- task: anli_r2
task_alias: prompt-0
include: _held_in_template_yaml
doc_to_text: "{{premise}}\n\nChoose your answer: based on the paragraph above can we conclude that \"{{hypothesis}}\"?\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nI think the answer is"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r2
task_alias: prompt-1
include: _held_in_template_yaml
doc_to_text: "{{premise}}\n\nBased on that paragraph can we conclude that this sentence is true?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r2
task_alias: prompt-2
include: _held_in_template_yaml
doc_to_text: "{{premise}}\n\nCan we draw the following conclusion?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r2
task_alias: prompt-3
include: _held_in_template_yaml
doc_to_text: "{{premise}}\nDoes this next sentence follow, given the preceding text?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r2
task_alias: prompt-4
include: _held_in_template_yaml
doc_to_text: "{{premise}}\nCan we infer the following?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nThe answer is:"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r2
task_alias: prompt-5
include: _held_in_template_yaml
doc_to_text: "Read the following paragraph and determine if the hypothesis is true:\n\n{{premise}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nHypothesis: {{hypothesis}}\n\n\n"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r2
task_alias: prompt-6
include: _held_in_template_yaml
doc_to_text: "Read the text and determine if the sentence is true (see options at the end):\n\n{{premise}}\n\nSentence: {{hypothesis}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r2
task_alias: prompt-7
include: _held_in_template_yaml
doc_to_text: "Can we draw the following hypothesis from the context (see options)? \n\nContext:\n\n{{premise}}\n\nHypothesis: {{hypothesis}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r2
task_alias: prompt-8
include: _held_in_template_yaml
doc_to_text: "Choose from options: Determine if the sentence is true based on the text below:\n{{hypothesis}}\n\n{{premise}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
# ANLI R3
- group: anli_r3_flan
group_alias: ANLI R3
task:
- task: anli_r3
task_alias: prompt-0
include: _held_in_template_yaml
doc_to_text: "{{premise}}\n\nChoose your answer: based on the paragraph above can we conclude that \"{{hypothesis}}\"?\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nI think the answer is"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r3
task_alias: prompt-1
include: _held_in_template_yaml
doc_to_text: "{{premise}}\n\nBased on that paragraph can we conclude that this sentence is true?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r3
task_alias: prompt-2
include: _held_in_template_yaml
doc_to_text: "{{premise}}\n\nCan we draw the following conclusion?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r3
task_alias: prompt-3
include: _held_in_template_yaml
doc_to_text: "{{premise}}\nDoes this next sentence follow, given the preceding text?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r3
task_alias: prompt-4
include: _held_in_template_yaml
doc_to_text: "{{premise}}\nCan we infer the following?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nThe answer is:"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r3
task_alias: prompt-5
include: _held_in_template_yaml
doc_to_text: "Read the following paragraph and determine if the hypothesis is true:\n\n{{premise}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nHypothesis: {{hypothesis}}\n\n\n"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r3
task_alias: prompt-6
include: _held_in_template_yaml
doc_to_text: "Read the text and determine if the sentence is true (see options at the end):\n\n{{premise}}\n\nSentence: {{hypothesis}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r3
task_alias: prompt-7
include: _held_in_template_yaml
doc_to_text: "Can we draw the following hypothesis from the context (see options)? \n\nContext:\n\n{{premise}}\n\nHypothesis: {{hypothesis}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r3
task_alias: prompt-8
include: _held_in_template_yaml
doc_to_text: "Choose from options: Determine if the sentence is true based on the text below:\n{{hypothesis}}\n\n{{premise}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
# Arc Easy
- group: arc_easy_flan
group_alias: Arc Easy
task:
- task: arc_easy
task_alias: prompt-0
include: _held_in_template_yaml
doc_to_text: "{{question}}\n\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_easy
task_alias: prompt-1
include: _held_in_template_yaml
doc_to_text: "Question: {{question}}\nOPTIONS:\n- {{choices.text|join('\n- ')}}\nAnswer:"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_easy
task_alias: prompt-2
include: _held_in_template_yaml
doc_to_text: "Question: {{question}}\n\nWhat is the correct answer to the question from the following choices?\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_easy
task_alias: prompt-3
include: _held_in_template_yaml
doc_to_text: "Q: {{question}}\nWhat is the correct answer to this question?\nOPTIONS:\n- {{choices.text|join('\n- ')}}...A:"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_easy
task_alias: prompt-4
include: _held_in_template_yaml
doc_to_text: "Choose your answer?\n\n{{question}}\n\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_easy
task_alias: prompt-5
include: _held_in_template_yaml
doc_to_text: "Answer the question\n\n{{question}}\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_easy
task_alias: prompt-6
include: _held_in_template_yaml
doc_to_text: "{{question}}\n\nPick the answer from these options\n\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
# Arc Challenge
- group: arc_challenge_flan
group_alias: Arc Challenge
task:
- task: arc_challenge
task_alias: prompt-0
include: _held_in_template_yaml
doc_to_text: "{{question}}\n\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_challenge
task_alias: prompt-1
include: _held_in_template_yaml
doc_to_text: "Question: {{question}}\nOPTIONS:\n- {{choices.text|join('\n- ')}}\nAnswer:"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_challenge
task_alias: prompt-2
include: _held_in_template_yaml
doc_to_text: "Question: {{question}}\n\nWhat is the correct answer to the question from the following choices?\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_challenge
task_alias: prompt-3
include: _held_in_template_yaml
doc_to_text: "Q: {{question}}\nWhat is the correct answer to this question?\nOPTIONS:\n- {{choices.text|join('\n- ')}}...A:"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_challenge
task_alias: prompt-4
include: _held_in_template_yaml
doc_to_text: "Choose your answer?\n\n{{question}}\n\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_challenge
task_alias: prompt-5
include: _held_in_template_yaml
doc_to_text: "Answer the question\n\n{{question}}\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_challenge
task_alias: prompt-6
include: _held_in_template_yaml
doc_to_text: "{{question}}\n\nPick the answer from these options\n\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
# BoolQ
- group: boolq_flan
group_alias: BoolQ
task:
- task: boolq
task_alias: prompt-0
include: _held_in_template_yaml
doc_to_text: "{{passage}}\n\nCan we conclude that {{question}}?\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
- task: boolq
task_alias: prompt-1
include: _held_in_template_yaml
doc_to_text: "{{passage}}\n\nIs it true that {{question}}?\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
- task: boolq
task_alias: prompt-2
include: _held_in_template_yaml
doc_to_text: "{{passage}}\n\n{{question}}?\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
- task: boolq
task_alias: prompt-3
include: _held_in_template_yaml
doc_to_text: "Text: {{passage}}\n\nQuestion: {{question}}?\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
- task: boolq
task_alias: prompt-4
include: _held_in_template_yaml
doc_to_text: "{{passage}}\n\nWhat's the best answer to this question: {{question}}?\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
- task: boolq
task_alias: prompt-5
include: _held_in_template_yaml
doc_to_text: "{{passage}}\nBased on the above text what's the best answer to this question: {{question}}?\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
- task: boolq
task_alias: prompt-6
include: _held_in_template_yaml
doc_to_text: "{{passage}}\nAnswer this question making sure that the answer is supposed by the text: {{question}}?\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
- task: boolq
task_alias: prompt-7
include: _held_in_template_yaml
doc_to_text: "{{passage}}\n\nIs the following statement correct based on the text\n\n{{question}}\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
- task: boolq
task_alias: prompt-8
include: _held_in_template_yaml
doc_to_text: "{{passage}}\n\nIs this statement correct \"{{question}}\"?\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
- task: boolq
task_alias: prompt-9
include: _held_in_template_yaml
doc_to_text: "Is it true that {{question}} based on the following text?\n\n{{passage}}\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
# RTE
- group: rte_flan
group_alias: RTE
task:
- task: rte
task_alias: prompt-0
include: _held_in_template_yaml
doc_to_text: "{{sentence1}}\n\nQuestion with options: Based on the paragraph above can we conclude that \"{{sentence2}}\"?\n\nOPTIONS:\n- yes\n- no"
doc_to_target: "{{['yes', 'no'][label]}}"
- task: rte
task_alias: prompt-1
include: _held_in_template_yaml
doc_to_text: "{{sentence1}}\n\nBased on that paragraph can we conclude that the sentence below is true?\n{{sentence2}}\n\nOPTIONS:\n- yes\n- no"
doc_to_target: "{{['yes', 'no'][label]}}"
- task: rte
task_alias: prompt-2
include: _held_in_template_yaml
doc_to_text: "{{sentence1}}\n\nQ with options: Can we draw the following conclusion?\n{{sentence2}}\n\nOPTIONS:\n- yes\n- no"
doc_to_target: "{{['yes', 'no'][label]}}"
- task: rte
task_alias: prompt-3
include: _held_in_template_yaml
doc_to_text: "{{sentence1}}\nDoes this next sentence follow, given the preceding text?\n{{sentence2}}\n\nOPTIONS:\n- yes\n- no"
doc_to_target: "{{['yes', 'no'][label]}}"
- task: rte
task_alias: prompt-4
include: _held_in_template_yaml
doc_to_text: "{{sentence1}}\nOPTIONS:\n- yes\n- no\nQuestion: Can we infer the following?\n{{sentence2}}"
doc_to_target: "{{['yes', 'no'][label]}}"
- task: rte
task_alias: prompt-5
include: _held_in_template_yaml
doc_to_text: "Read the following paragraph and determine if the hypothesis is true. Select from options at the end:\n\n{{sentence1}}\n\nHypothesis: {{sentence2}}\nOPTIONS:\n- yes\n- no\nThe answer is"
doc_to_target: "{{['yes', 'no'][label]}}"
- task: rte
task_alias: prompt-6
include: _held_in_template_yaml
doc_to_text: "Read the text and determine if the sentence is true:\n\n{{sentence1}}\n\nSentence: {{sentence2}}\nOPTIONS:\n- yes\n- no\nA:"
doc_to_target: "{{['yes', 'no'][label]}}"
- task: rte
task_alias: prompt-7
include: _held_in_template_yaml
doc_to_text: "Question with options: can we draw the following hypothesis from the context? \n\nContext:\n\n{{sentence1}}\n\nHypothesis: {{sentence2}}\nOPTIONS:\n- yes\n- no\nA:"
doc_to_target: "{{['yes', 'no'][label]}}"
- task: rte
task_alias: prompt-8
include: _held_in_template_yaml
doc_to_text: "Determine if the sentence is true based on the text below. Choose from options.\n{{sentence2}}\n\n{{sentence1}}\nOPTIONS:\n- yes\n- no"
doc_to_target: "{{['yes', 'no'][label]}}"
group: flan_held_in
task:
- include: flan/yaml_templates/held_in_template_yaml
dataset_path: super_glue
dataset_name: boolq
use_prompt: flan/prompt_templates/boolq.yaml:*
validation_split: validation
- include: flan/yaml_templates/held_in_template_yaml
dataset_path: super_glue
dataset_name: rte
use_prompt: flan/prompt_templates/rte.yaml:*
validation_split: validation
- include: flan/yaml_templates/held_in_template_yaml
task: anli_r1
dataset_path: anli
use_prompt: flan/prompt_templates/anli.yaml:*
validation_split: dev_r1
- include: flan/yaml_templates/held_in_template_yaml
task: anli_r2
dataset_path: anli
use_prompt: flan/prompt_templates/anli.yaml:*
validation_split: dev_r2
- include: flan/yaml_templates/held_in_template_yaml
task: anli_r3
dataset_path: anli
use_prompt: flan/prompt_templates/anli.yaml:*
validation_split: dev_r3
- include: flan/yaml_templates/held_in_template_yaml
task: arc_easy
dataset_path: ai2_arc
dataset_name: ARC-Easy
use_prompt: flan/prompt_templates/arc.yaml:*
validation_split: validation
- include: flan/yaml_templates/held_in_template_yaml
task: arc_challenge
dataset_path: ai2_arc
dataset_name: ARC-Challenge
use_prompt: flan/prompt_templates/arc.yaml:*
validation_split: validation
group: flan_held_out
task:
# BBH
- bbh_flan_zeroshot
- bbh_flan_fewshot
- bbh_flan_cot_fewshot
- bbh_flan_cot_zeroshot
- bbh_zeroshot
- bbh_fewshot
- bbh_cot_fewshot
- bbh_cot_zeroshot
# MMLU
- mmlu
- mmlu_flan_n_shot_generative
......
group: flan_rte
task:
- include: yaml_templates/held_in_template_yaml
dataset_path: super_glue
dataset_name: rte
use_prompt: prompt_templates/rte.yaml:*
validation_split: validation
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment