Merge branch 'main' into inverse-scaling-tasks

60c9c170 · haileyschoelkopf · 4b2d565b · b4cd85d4 · 60c9c170 · 4b2d565b
Commit 60c9c170 authored May 29, 2024 by haileyschoelkopf
20 changed files
--- a/lm_eval/tasks/bigbench/multiple_choice/code_line_description.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/code_line_description.yaml
 # Generated by utils.py
 dataset_name: code_line_description_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_code_line_description_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/codenames.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/codenames.yaml
-# Generated by utils.py
-dataset_name: codenames_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_codenames_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/color.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/color.yaml
 # Generated by utils.py
 dataset_name: color_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_color_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/common_morpheme.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/common_morpheme.yaml
 # Generated by utils.py
 dataset_name: common_morpheme_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_common_morpheme_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/conceptual_combinations.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/conceptual_combinations.yaml
 # Generated by utils.py
 dataset_name: conceptual_combinations_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_conceptual_combinations_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/conlang_translation.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/conlang_translation.yaml
-# Generated by utils.py
-dataset_name: conlang_translation_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_conlang_translation_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/contextual_parametric_knowledge_conflicts.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/contextual_parametric_knowledge_conflicts.yaml
 # Generated by utils.py
 dataset_name: contextual_parametric_knowledge_conflicts_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_contextual_parametric_knowledge_conflicts_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/crash_blossom.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/crash_blossom.yaml
 # Generated by utils.py
 dataset_name: crash_blossom_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_crash_blossom_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/crass_ai.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/crass_ai.yaml
 # Generated by utils.py
 dataset_name: crass_ai_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_crass_ai_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/cryobiology_spanish.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/cryobiology_spanish.yaml
 # Generated by utils.py
 dataset_name: cryobiology_spanish_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_cryobiology_spanish_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/cryptonite.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/cryptonite.yaml
-# Generated by utils.py
-dataset_name: cryptonite_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_cryptonite_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/cs_algorithms.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/cs_algorithms.yaml
 # Generated by utils.py
 dataset_name: cs_algorithms_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_cs_algorithms_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/dark_humor_detection.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/dark_humor_detection.yaml
 # Generated by utils.py
 dataset_name: dark_humor_detection_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_dark_humor_detection_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/date_understanding.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/date_understanding.yaml
 # Generated by utils.py
 dataset_name: date_understanding_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_date_understanding_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/disambiguation_qa.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/disambiguation_qa.yaml
 # Generated by utils.py
 dataset_name: disambiguation_qa_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_disambiguation_qa_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/discourse_marker_prediction.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/discourse_marker_prediction.yaml
 # Generated by utils.py
 dataset_name: discourse_marker_prediction_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_discourse_marker_prediction_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/disfl_qa.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/disfl_qa.yaml
-# Generated by utils.py
-dataset_name: disfl_qa_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_disfl_qa_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/dyck_languages.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/dyck_languages.yaml
 # Generated by utils.py
 dataset_name: dyck_languages_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_dyck_languages_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/elementary_math_qa.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/elementary_math_qa.yaml
 # Generated by utils.py
 dataset_name: elementary_math_qa_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_elementary_math_qa_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/emoji_movie.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/emoji_movie.yaml
 # Generated by utils.py
 dataset_name: emoji_movie_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_emoji_movie_multiple_choice