Merge remote-tracking branch 'origin/big-refactor' into calibration

e1ae8a2f · Herbie Bradley · 50e99bd7 · 30936bc7 · e1ae8a2f · e1ae8a2f
Commit e1ae8a2f authored Nov 26, 2023 by Herbie Bradley
20 changed files
--- a/lm_eval/tasks/bigbench/multiple_choice/presuppositions_as_nli.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/presuppositions_as_nli.yaml
+# Generated by utils.py
+dataset_name: presuppositions_as_nli_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_presuppositions_as_nli_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/qa_wikidata.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/qa_wikidata.yaml
+# Generated by utils.py
+dataset_name: qa_wikidata_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_qa_wikidata_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/question_selection.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/question_selection.yaml
+# Generated by utils.py
+dataset_name: question_selection_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_question_selection_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/real_or_fake_text.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/real_or_fake_text.yaml
+# Generated by utils.py
+dataset_name: real_or_fake_text_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_real_or_fake_text_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/reasoning_about_colored_objects.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/reasoning_about_colored_objects.yaml
+# Generated by utils.py
+dataset_name: reasoning_about_colored_objects_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_reasoning_about_colored_objects_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/repeat_copy_logic.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/repeat_copy_logic.yaml
+# Generated by utils.py
+dataset_name: repeat_copy_logic_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_repeat_copy_logic_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/rephrase.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/rephrase.yaml
+# Generated by utils.py
+dataset_name: rephrase_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_rephrase_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/riddle_sense.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/riddle_sense.yaml
+# Generated by utils.py
+dataset_name: riddle_sense_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_riddle_sense_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/ruin_names.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/ruin_names.yaml
+# Generated by utils.py
+dataset_name: ruin_names_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_ruin_names_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/salient_translation_error_detection.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/salient_translation_error_detection.yaml
+# Generated by utils.py
+dataset_name: salient_translation_error_detection_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_salient_translation_error_detection_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/scientific_press_release.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/scientific_press_release.yaml
+# Generated by utils.py
+dataset_name: scientific_press_release_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_scientific_press_release_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/semantic_parsing_in_context_sparc.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/semantic_parsing_in_context_sparc.yaml
+# Generated by utils.py
+dataset_name: semantic_parsing_in_context_sparc_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_semantic_parsing_in_context_sparc_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/semantic_parsing_spider.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/semantic_parsing_spider.yaml
+# Generated by utils.py
+dataset_name: semantic_parsing_spider_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_semantic_parsing_spider_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/sentence_ambiguity.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/sentence_ambiguity.yaml
+# Generated by utils.py
+dataset_name: sentence_ambiguity_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_sentence_ambiguity_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/similarities_abstraction.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/similarities_abstraction.yaml
+# Generated by utils.py
+dataset_name: similarities_abstraction_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_similarities_abstraction_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/simp_turing_concept.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/simp_turing_concept.yaml
+# Generated by utils.py
+dataset_name: simp_turing_concept_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_simp_turing_concept_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json.yaml
+# Generated by utils.py
+dataset_name: simple_arithmetic_json_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_simple_arithmetic_json_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json_multiple_choice.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json_multiple_choice.yaml
+# Generated by utils.py
+dataset_name: simple_arithmetic_json_multiple_choice_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_simple_arithmetic_json_multiple_choice_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json_subtasks.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json_subtasks.yaml
+# Generated by utils.py
+dataset_name: simple_arithmetic_json_subtasks_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_simple_arithmetic_json_subtasks_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_multiple_targets_json.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_multiple_targets_json.yaml
+# Generated by utils.py
+dataset_name: simple_arithmetic_multiple_targets_json_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_simple_arithmetic_multiple_targets_json_multiple_choice