Merge pull request #852 from EleutherAI/bigbench

[Refactor] bigbench

Merge pull request #852 from EleutherAI/bigbench
[Refactor] bigbench
7f2a9cbd · Hailey Schoelkopf · GitHub · c4fa6fb5 · 51a43a76 · 7f2a9cbd
Unverified Commit 7f2a9cbd authored Oct 04, 2023 by Hailey Schoelkopf Committed by GitHub Oct 04, 2023
20 changed files
--- a/lm_eval/tasks/bigbench/multiple_choice/bridging_anaphora_resolution_barqa.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/bridging_anaphora_resolution_barqa.yaml
+# Generated by utils.py
+dataset_name: bridging_anaphora_resolution_barqa_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_bridging_anaphora_resolution_barqa_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/causal_judgment.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/causal_judgment.yaml
+# Generated by utils.py
+dataset_name: causal_judgment_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_causal_judgment_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/cause_and_effect.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/cause_and_effect.yaml
+# Generated by utils.py
+dataset_name: cause_and_effect_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_cause_and_effect_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/checkmate_in_one.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/checkmate_in_one.yaml
+# Generated by utils.py
+dataset_name: checkmate_in_one_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_checkmate_in_one_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/chess_state_tracking.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/chess_state_tracking.yaml
+# Generated by utils.py
+dataset_name: chess_state_tracking_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_chess_state_tracking_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/chinese_remainder_theorem.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/chinese_remainder_theorem.yaml
+# Generated by utils.py
+dataset_name: chinese_remainder_theorem_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_chinese_remainder_theorem_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/cifar10_classification.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/cifar10_classification.yaml
+# Generated by utils.py
+dataset_name: cifar10_classification_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_cifar10_classification_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/code_line_description.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/code_line_description.yaml
+# Generated by utils.py
+dataset_name: code_line_description_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_code_line_description_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/codenames.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/codenames.yaml
+# Generated by utils.py
+dataset_name: codenames_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_codenames_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/color.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/color.yaml
+# Generated by utils.py
+dataset_name: color_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_color_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/common_morpheme.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/common_morpheme.yaml
+# Generated by utils.py
+dataset_name: common_morpheme_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_common_morpheme_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/conceptual_combinations.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/conceptual_combinations.yaml
+# Generated by utils.py
+dataset_name: conceptual_combinations_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_conceptual_combinations_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/conlang_translation.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/conlang_translation.yaml
+# Generated by utils.py
+dataset_name: conlang_translation_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_conlang_translation_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/contextual_parametric_knowledge_conflicts.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/contextual_parametric_knowledge_conflicts.yaml
+# Generated by utils.py
+dataset_name: contextual_parametric_knowledge_conflicts_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_contextual_parametric_knowledge_conflicts_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/crash_blossom.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/crash_blossom.yaml
+# Generated by utils.py
+dataset_name: crash_blossom_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_crash_blossom_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/crass_ai.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/crass_ai.yaml
+# Generated by utils.py
+dataset_name: crass_ai_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_crass_ai_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/cryobiology_spanish.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/cryobiology_spanish.yaml
+# Generated by utils.py
+dataset_name: cryobiology_spanish_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_cryobiology_spanish_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/cryptonite.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/cryptonite.yaml
+# Generated by utils.py
+dataset_name: cryptonite_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_cryptonite_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/cs_algorithms.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/cs_algorithms.yaml
+# Generated by utils.py
+dataset_name: cs_algorithms_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_cs_algorithms_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/dark_humor_detection.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/dark_humor_detection.yaml
+# Generated by utils.py
+dataset_name: dark_humor_detection_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_dark_humor_detection_multiple_choice