"vscode:/vscode.git/clone" did not exist on "475481b406e164ee821b4e761b63b9d5c7c061a9"
Commit e5306ea6 authored by lintangsutawika's avatar lintangsutawika
Browse files

added script to build benchmarks from promptsouce

parent 3713ec52
import os
import argparse
from lm_eval import utils
from promptsource.templates import DatasetTemplates
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--benchmark", required=True)
parser.add_argument("--model_args", default="")
def main():
args = parse_args()
path = args.benchmark
yaml_path = ""
with open(path) as file:
TASK_LIST = file.readlines()
for dataset_name, subset_name in TASK_LIST:
if subset_name is None:
prompts = DatasetTemplates(dataset_name=dataset_name)
else:
prompts = DatasetTemplates(
dataset_name=dataset_name, subset_name=subset_name
)
with open(os.path.join(yaml_path, "promptsource_template.yaml")) as file:
yaml_dict = file.readline()
for prompt_name in prompts.all_template_names:
config_dict = {
"include": "promptsource_template.yaml",
"use_prompts": prompts[prompt_name],
**yaml_dict,
}
return config_dict
- dataset_path: "super_glue" # Coreference Resolution
dataset_name: "wsc.fixed"
- dataset_path: "winogrande" # Coreference Resolution
dataset_name: "winogrande_xl"
- dataset_path: "super_glue" # Natural Language Inference
dataset_name: "cb"
- dataset_path: "super_glue" # Natural Language Inference
dataset_name: "rte"
- dataset_path: "anli" # Natural Language Inference
dataset_name: null
- dataset_path: "super_glue" # Sentence Completion
dataset_name: "copa"
- dataset_path: "hellaswag" # Natural Language Inference
dataset_name: null
- dataset_path: "super_glue" # Word Sense Disambiguation
dataset_name: "wic"
import os
import yaml
import argparse
from tqdm import tqdm
from promptsource.templates import DatasetTemplates
from lm_eval import utils
# from lm_eval.api.registry import ALL_TASKS
from lm_eval.logger import eval_logger
# from lm_eval.tasks import include_task_folder
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--benchmark_name", required=True)
parser.add_argument("--benchmark_path", required=True)
parser.add_argument("--task_save_path", default="lm_eval/tasks/")
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
with open(args.benchmark_path) as file:
TASK_LIST = yaml.full_load(file)
for task in tqdm(TASK_LIST):
eval_logger.info(f"Processing {task}")
dataset_name = task["dataset_path"]
if "dataset_name" in task:
subset_name = task["dataset_name"]
else:
subset_name = None
if subset_name is None:
file_name = f"promptsource_{dataset_name}"
file_path = os.path.join(args.task_save_path, f"{dataset_name}")
else:
file_name = f"promptsource_{dataset_name}_{subset_name}"
file_path = os.path.join(
args.task_save_path, f"{dataset_name}/{subset_name}"
)
os.makedirs(file_path, exist_ok=True)
if subset_name is None:
prompts = DatasetTemplates(dataset_name=dataset_name)
else:
prompts = DatasetTemplates(
dataset_name=dataset_name, subset_name=subset_name
)
for idx, prompt_name in enumerate(prompts.all_template_names):
full_file_name = (
file_name + f"_{idx}.yml"
) # .format(prompt_name.replace(" ", "_").lower())
config_dict = {
"group": args.benchmark_name,
"include": "promptsource_template.yaml",
"use_prompts": f"promptsource:{prompt_name}",
}
file_save_path = os.path.join(file_path, full_file_name)
eval_logger.info(f"Save to {file_save_path}")
with open(file_save_path, "w") as yaml_file:
yaml.dump(config_dict, yaml_file)
# return config_dict
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment