Merge remote-tracking branch 'origin/big-refactor' into calibration

e1ae8a2f · Herbie Bradley · 50e99bd7 · 30936bc7 · e1ae8a2f · e1ae8a2f
Commit e1ae8a2f authored Nov 26, 2023 by Herbie Bradley
20 changed files
--- a/lm_eval/tasks/bigbench/multiple_choice/codenames.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/codenames.yaml
+# Generated by utils.py
+dataset_name: codenames_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_codenames_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/color.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/color.yaml
+# Generated by utils.py
+dataset_name: color_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_color_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/common_morpheme.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/common_morpheme.yaml
+# Generated by utils.py
+dataset_name: common_morpheme_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_common_morpheme_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/conceptual_combinations.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/conceptual_combinations.yaml
+# Generated by utils.py
+dataset_name: conceptual_combinations_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_conceptual_combinations_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/conlang_translation.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/conlang_translation.yaml
+# Generated by utils.py
+dataset_name: conlang_translation_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_conlang_translation_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/contextual_parametric_knowledge_conflicts.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/contextual_parametric_knowledge_conflicts.yaml
+# Generated by utils.py
+dataset_name: contextual_parametric_knowledge_conflicts_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_contextual_parametric_knowledge_conflicts_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/crash_blossom.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/crash_blossom.yaml
+# Generated by utils.py
+dataset_name: crash_blossom_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_crash_blossom_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/crass_ai.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/crass_ai.yaml
+# Generated by utils.py
+dataset_name: crass_ai_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_crass_ai_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/cryobiology_spanish.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/cryobiology_spanish.yaml
+# Generated by utils.py
+dataset_name: cryobiology_spanish_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_cryobiology_spanish_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/cryptonite.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/cryptonite.yaml
+# Generated by utils.py
+dataset_name: cryptonite_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_cryptonite_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/cs_algorithms.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/cs_algorithms.yaml
+# Generated by utils.py
+dataset_name: cs_algorithms_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_cs_algorithms_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/dark_humor_detection.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/dark_humor_detection.yaml
+# Generated by utils.py
+dataset_name: dark_humor_detection_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_dark_humor_detection_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/date_understanding.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/date_understanding.yaml
+# Generated by utils.py
+dataset_name: date_understanding_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_date_understanding_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/disambiguation_qa.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/disambiguation_qa.yaml
+# Generated by utils.py
+dataset_name: disambiguation_qa_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_disambiguation_qa_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/discourse_marker_prediction.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/discourse_marker_prediction.yaml
+# Generated by utils.py
+dataset_name: discourse_marker_prediction_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_discourse_marker_prediction_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/disfl_qa.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/disfl_qa.yaml
+# Generated by utils.py
+dataset_name: disfl_qa_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_disfl_qa_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/dyck_languages.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/dyck_languages.yaml
+# Generated by utils.py
+dataset_name: dyck_languages_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_dyck_languages_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/elementary_math_qa.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/elementary_math_qa.yaml
+# Generated by utils.py
+dataset_name: elementary_math_qa_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_elementary_math_qa_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/emoji_movie.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/emoji_movie.yaml
+# Generated by utils.py
+dataset_name: emoji_movie_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_emoji_movie_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/emojis_emotion_prediction.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/emojis_emotion_prediction.yaml
+# Generated by utils.py
+dataset_name: emojis_emotion_prediction_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_emojis_emotion_prediction_multiple_choice