Merge branch 'big-refactor' of...

Merge branch 'big-refactor' of https://github.com/EleutherAI/lm-evaluation-harness into flan-benchmark

Merge branch 'big-refactor' of...
Merge branch 'big-refactor' of https://github.com/EleutherAI/lm-evaluation-harness into flan-benchmark
337492ad · lintangsutawika · 3d2ee4d4 · 4824a832 · 337492ad · 337492ad
Commit 337492ad authored Oct 05, 2023 by lintangsutawika
20 changed files
--- a/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_multiple_targets_json.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_multiple_targets_json.yaml
+# Generated by utils.py
+dataset_name: simple_arithmetic_multiple_targets_json_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_simple_arithmetic_multiple_targets_json_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/simple_ethical_questions.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/simple_ethical_questions.yaml
+# Generated by utils.py
+dataset_name: simple_ethical_questions_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_simple_ethical_questions_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/simple_text_editing.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/simple_text_editing.yaml
+# Generated by utils.py
+dataset_name: simple_text_editing_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_simple_text_editing_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/snarks.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/snarks.yaml
+# Generated by utils.py
+dataset_name: snarks_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_snarks_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/social_iqa.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/social_iqa.yaml
+# Generated by utils.py
+dataset_name: social_iqa_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_social_iqa_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/social_support.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/social_support.yaml
+# Generated by utils.py
+dataset_name: social_support_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_social_support_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/sports_understanding.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/sports_understanding.yaml
+# Generated by utils.py
+dataset_name: sports_understanding_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_sports_understanding_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/strange_stories.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/strange_stories.yaml
+# Generated by utils.py
+dataset_name: strange_stories_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_strange_stories_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/strategyqa.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/strategyqa.yaml
+# Generated by utils.py
+dataset_name: strategyqa_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_strategyqa_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/sufficient_information.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/sufficient_information.yaml
+# Generated by utils.py
+dataset_name: sufficient_information_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_sufficient_information_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/suicide_risk.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/suicide_risk.yaml
+# Generated by utils.py
+dataset_name: suicide_risk_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_suicide_risk_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/swahili_english_proverbs.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/swahili_english_proverbs.yaml
+# Generated by utils.py
+dataset_name: swahili_english_proverbs_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_swahili_english_proverbs_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/swedish_to_german_proverbs.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/swedish_to_german_proverbs.yaml
+# Generated by utils.py
+dataset_name: swedish_to_german_proverbs_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_swedish_to_german_proverbs_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/symbol_interpretation.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/symbol_interpretation.yaml
+# Generated by utils.py
+dataset_name: symbol_interpretation_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_symbol_interpretation_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/temporal_sequences.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/temporal_sequences.yaml
+# Generated by utils.py
+dataset_name: temporal_sequences_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_temporal_sequences_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/tense.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/tense.yaml
+# Generated by utils.py
+dataset_name: tense_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_tense_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/timedial.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/timedial.yaml
+# Generated by utils.py
+dataset_name: timedial_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_timedial_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/topical_chat.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/topical_chat.yaml
+# Generated by utils.py
+dataset_name: topical_chat_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_topical_chat_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/tracking_shuffled_objects.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/tracking_shuffled_objects.yaml
+# Generated by utils.py
+dataset_name: tracking_shuffled_objects_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_tracking_shuffled_objects_multiple_choice
--- a/lm_eval/tasks/bigbench/multiple_choice/understanding_fables.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/understanding_fables.yaml
+# Generated by utils.py
+dataset_name: understanding_fables_zero_shot
+include: ../multiple_choice_template_yaml
+task: bigbench_understanding_fables_multiple_choice