added script to build benchmarks from promptsouce

e5306ea6 · lintangsutawika · 3713ec52 · 3713ec52 · e5306ea6 · e5306ea6
Commit e5306ea6 authored Jul 03, 2023 by lintangsutawika
3 changed files
--- a/lm_eval/tasks/benchmarks/build_promptsource_tasks.py
+++ b/lm_eval/tasks/benchmarks/build_promptsource_tasks.py
-import os
-import argparse
-
-from lm_eval import utils
-from promptsource.templates import DatasetTemplates
-
-
-def parse_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--benchmark", required=True)
-    parser.add_argument("--model_args", default="")
-
-
-def main():
-    args = parse_args()
-
-    path = args.benchmark
-    yaml_path = ""
-    with open(path) as file:
-        TASK_LIST = file.readlines()
-        for dataset_name, subset_name in TASK_LIST:
-
-            if subset_name is None:
-                prompts = DatasetTemplates(dataset_name=dataset_name)
-            else:
-                prompts = DatasetTemplates(
-                    dataset_name=dataset_name, subset_name=subset_name
-                )
-
-            with open(os.path.join(yaml_path, "promptsource_template.yaml")) as file:
-                yaml_dict = file.readline()
-
-            for prompt_name in prompts.all_template_names:
-                config_dict = {
-                    "include": "promptsource_template.yaml",
-                    "use_prompts": prompts[prompt_name],
-                    **yaml_dict,
-                }
-
-                return config_dict
--- a/lm_eval/tasks/benchmarks/t0_eval.yml
+++ b/lm_eval/tasks/benchmarks/t0_eval.yml
+- dataset_path: "super_glue" # Coreference Resolution
+  dataset_name: "wsc.fixed"
+- dataset_path: "winogrande" # Coreference Resolution
+  dataset_name: "winogrande_xl"
+- dataset_path: "super_glue" # Natural Language Inference
+  dataset_name: "cb"
+- dataset_path: "super_glue" # Natural Language Inference
+  dataset_name: "rte"
+- dataset_path: "anli" # Natural Language Inference
+  dataset_name: null
+- dataset_path: "super_glue" # Sentence Completion
+  dataset_name: "copa"
+- dataset_path: "hellaswag" # Natural Language Inference
+  dataset_name: null
+- dataset_path: "super_glue"  # Word Sense Disambiguation
+  dataset_name: "wic"
--- a/scripts/build_benchmark.py
+++ b/scripts/build_benchmark.py
+import os
+import yaml
+import argparse
+
+from tqdm import tqdm
+from promptsource.templates import DatasetTemplates
+
+from lm_eval import utils
+
+# from lm_eval.api.registry import ALL_TASKS
+from lm_eval.logger import eval_logger
+
+# from lm_eval.tasks import include_task_folder
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--benchmark_name", required=True)
+    parser.add_argument("--benchmark_path", required=True)
+    parser.add_argument("--task_save_path", default="lm_eval/tasks/")
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+
+    args = parse_args()
+
+    with open(args.benchmark_path) as file:
+        TASK_LIST = yaml.full_load(file)
+        for task in tqdm(TASK_LIST):
+            eval_logger.info(f"Processing {task}")
+
+            dataset_name = task["dataset_path"]
+            if "dataset_name" in task:
+                subset_name = task["dataset_name"]
+            else:
+                subset_name = None
+
+            if subset_name is None:
+                file_name = f"promptsource_{dataset_name}"
+                file_path = os.path.join(args.task_save_path, f"{dataset_name}")
+            else:
+                file_name = f"promptsource_{dataset_name}_{subset_name}"
+                file_path = os.path.join(
+                    args.task_save_path, f"{dataset_name}/{subset_name}"
+                )
+
+            os.makedirs(file_path, exist_ok=True)
+
+            if subset_name is None:
+                prompts = DatasetTemplates(dataset_name=dataset_name)
+            else:
+                prompts = DatasetTemplates(
+                    dataset_name=dataset_name, subset_name=subset_name
+                )
+
+            for idx, prompt_name in enumerate(prompts.all_template_names):
+                full_file_name = (
+                    file_name + f"_{idx}.yml"
+                )  # .format(prompt_name.replace(" ", "_").lower())
+                config_dict = {
+                    "group": args.benchmark_name,
+                    "include": "promptsource_template.yaml",
+                    "use_prompts": f"promptsource:{prompt_name}",
+                }
+
+                file_save_path = os.path.join(file_path, full_file_name)
+                eval_logger.info(f"Save to {file_save_path}")
+                with open(file_save_path, "w") as yaml_file:
+                    yaml.dump(config_dict, yaml_file)
+
+                # return config_dict