update generate_tasks.py and template yamls

1a77b4d5 · haileyschoelkopf · 605787a9 · 1a77b4d5 · 1a77b4d5 · 1a77b4d5
Commit 1a77b4d5 authored Oct 03, 2023 by haileyschoelkopf
3 changed files
--- a/lm_eval/tasks/bigbench/generate_tasks.py
+++ b/lm_eval/tasks/bigbench/generate_tasks.py
@@ -185,7 +185,7 @@ def main() -> None:
                        {
                            "include": f"../{task_type}",
                            "task": "bigbench_" + task + "_{}".format(task_type.split("_template_yaml")[0]),
-                            "dataset_name": task,
+                            "dataset_name": task + "_zero_shot", # zero-shot version of the dataset
                        },
                        f,
                        width=float("inf"), allow_unicode=True

--- a/lm_eval/tasks/bigbench/greedy_until_template_yaml
+++ b/lm_eval/tasks/bigbench/greedy_until_template_yaml
 group: bigbench
-dataset_path: bigbench
+dataset_path: hails/bigbench
 output_type: greedy_until
-training_split: train
+test_split: default
-validation_split: validation
 doc_to_text: inputs
 doc_to_target: "{{targets[0]}}"
 generation_kwargs:

--- a/lm_eval/tasks/bigbench/multiple_choice_template_yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice_template_yaml
 group: bigbench
-dataset_path: bigbench
+dataset_path: hails/bigbench
+dataset_kwargs:
+  num_shots: 0 # TODO: num of shots should be controlled through this, not through the typical methods
+  # subtask_name: null
 output_type: multiple_choice
-training_split: train
+test_split: default
-validation_split: validation
 doc_to_text: inputs
 doc_to_target: "{{multiple_choice_targets.index(targets[0])}}"
 doc_to_choice: "{{multiple_choice_targets}}"
 metric_list:
  - metric: acc
+  # TODO: brier score and other metrics