updates

e795efcf · lintangsutawika · c8b76a3d · e795efcf · e795efcf · c8b76a3d
Commit e795efcf authored Sep 04, 2023 by lintangsutawika
5 changed files
--- a/lm_eval/tasks/bbh/README.md
+++ b/lm_eval/tasks/bbh/README.md
@@ -25,7 +25,7 @@ Homepage: https://github.com/suzgunmirac/BIG-Bench-Hard

 #### Groups

- `bbh`
+- `bbh_flan_zeroshot`

 #### Tasks


--- a/lm_eval/tasks/bbh/_generate_configs.py
+++ b/lm_eval/tasks/bbh/_generate_configs.py
+"""
+Take in a YAML, and output all other splits with this YAML
+"""
+import os
+import re
 import yaml
-import inspect
-import datasets
+import requests
+import argparse

+import datasets
 from tqdm import tqdm

+from lm_eval import utils
+from lm_eval.logger import eval_logger
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--base_yaml_path", required=True)
+    parser.add_argument(
+        "--save_prefix_path", default="flan_zeroshot"
+    )
+    parser.add_argument(
+        "--cot", default=False
+    )
+    parser.add_argument(
+        "--fewshot", default=False
+    )
+    parser.add_argument("--task_prefix", default="")
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+
+    args = parse_args()

-def main() -> None:
+    # get filename of base_yaml so we can `"include": ` it in our other YAMLs.
+    base_yaml_name = os.path.split(args.base_yaml_path)[-1]
+    with open(args.base_yaml_path) as f:
+        base_yaml = yaml.full_load(f)
+
+    base_doc_to_text = "Q: {{input}}\nA:"
+    answer_regex = re.compile("(?<=answer is )(.*)(?=.)")

    dataset_path = "lukaemon/bbh"
    for task in tqdm(datasets.get_dataset_infos(dataset_path).keys()):
-        file_name = f"{task}.yaml"
-        try:
-            with open(f"{file_name}", "w") as f:
-                f.write("# Generated by _generate_configs.py\n")
-                yaml.dump(
-                    {
-                        "include": "_template_yaml",
-                        "task": f"{dataset_path.split('/')[-1]}_{task}",
-                        "dataset_name": task,
-                    },
-                    f,
-                )
-        except FileExistsError:
-            pass

+        resp = requests.get(f"https://raw.githubusercontent.com/suzgunmirac/BIG-Bench-Hard/main/cot-prompts/{task}.txt").content.decode('utf-8')
+        prompt = resp.split("\n-----\n")[-1]
+        description, *few_shot = prompt.split("\n\nQ:")
+
+        prefix_doc_to_text = ""
+        if args.fewshot:
+            if args.cot:
+                prefix_doc_to_text = " ".join(few_shot)
+            else:
+                for shot in few_shot:
+                    shot = "Q:"+shot
+                    try:
+                        answer = answer_regex.search(shot)[0]
+                    except:
+                        print("task", task)
+                        print(shot)
+                    example = shot.split("Let\'s think step by step.")[0]
+                    prefix_doc_to_text += f"{example}{answer}\n\n"
+
+        doc_to_text = prefix_doc_to_text + base_doc_to_text
+        if args.cot:
+            doc_to_text = doc_to_text + " Let's think step by step.\n"
+
+        yaml_dict = {
+                "include": "_template_yaml",
+                "task": f"bbh_{args.task_prefix}_{task}",
+                "dataset_name": task,
+                "description": description+"\n\n",
+                "doc_to_text": doc_to_text,
+            }
+
+        file_save_path = args.save_prefix_path + f"/{task}.yaml"
+        eval_logger.info(f"Saving yaml for subset {task} to {file_save_path}")
+        with open(file_save_path, "w") as yaml_file:
+            yaml.dump(yaml_dict, yaml_file, width=float("inf"), allow_unicode=True, default_style='"')

-if __name__ == "__main__":
-    main()


-# https://raw.githubusercontent.com/suzgunmirac/BIG-Bench-Hard/main/cot-prompts/boolean_expressions.txt
--- a/lm_eval/tasks/bbh/_template_yaml
+++ b/lm_eval/tasks/bbh/_template_yaml
-group: bbh
-dataset_path: lukaemon/bbh
-output_type: greedy_until
-test_split: test
-doc_to_text: "Q: {{input}}\nA:"
-doc_to_target: "{{target}}"
-metric_list:
-  - metric: exact_match
-    aggregation: mean
-    higher_is_better: true
-generation_kwargs:
-  until:
-    - "</s>"
-  do_sample: false
-  temperature: 0.0
--- a/lm_eval/tasks/bbh/flan_cot_fewshot/_flan_cot_fewshot_template_yaml
+++ b/lm_eval/tasks/bbh/flan_cot_fewshot/_flan_cot_fewshot_template_yaml
-group: bbh_flan_fewshot
+group: bbh_flan_cot_fewshot
 dataset_path: lukaemon/bbh
 output_type: greedy_until
 test_split: test

--- a/lm_eval/tasks/bbh/flan_cot_zeroshot/_flan_cot_zeroshot_template_yaml
+++ b/lm_eval/tasks/bbh/flan_cot_zeroshot/_flan_cot_zeroshot_template_yaml
-group: bbh_flan_zeroshot
+group: bbh_flan_cot_zeroshot
 dataset_path: lukaemon/bbh
 output_type: greedy_until
 test_split: test