Merge branch 'group-agg-rework' of...

Merge branch 'group-agg-rework' of https://github.com/EleutherAI/lm-evaluation-harness into multiprompt

Merge branch 'group-agg-rework' of...
Merge branch 'group-agg-rework' of https://github.com/EleutherAI/lm-evaluation-harness into multiprompt
88486e57 · lintangsutawika · 5971f2ca · ba73d131 · 88486e57 · 88486e57
Commit 88486e57 authored Jul 05, 2024 by lintangsutawika
20 changed files
--- a/lm_eval/tasks/leaderboard/mmlu_pro/mmlu_pro.yaml
+++ b/lm_eval/tasks/leaderboard/mmlu_pro/mmlu_pro.yaml
+dataset_path: TIGER-Lab/MMLU-Pro # a copy of `cais/leaderboard_mmlu` with no auxiliary_train split
+task: leaderboard_mmlu_pro
+test_split: test
+fewshot_split: validation
+fewshot_config:
+  sampler: first_n
+output_type: multiple_choice
+doc_to_text: !function utils.doc_to_text
+doc_to_choice: !function utils.doc_to_choice
+doc_to_target: answer
+metric_list:
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+num_fewshot: 5
+metadata:
+  version: 0.1
--- a/lm_eval/tasks/leaderboard/mmlu_pro/utils.py
+++ b/lm_eval/tasks/leaderboard/mmlu_pro/utils.py
+import string
+
+
+def doc_to_text(doc):
+    doc_to_text = f"{doc['question']}\n"
+
+    for i in range(len(doc["options"])):
+        doc_to_text += f"{string.ascii_uppercase[i]}. {doc['options'][i]}\n"
+
+    doc_to_text += "Answer:"
+    return doc_to_text
+
+
+def doc_to_choice(doc):
+    return [string.ascii_uppercase[i] for i in range(len(doc["options"]))]
--- a/lm_eval/tasks/leaderboard/musr/_musr.yaml
+++ b/lm_eval/tasks/leaderboard/musr/_musr.yaml
+group: leaderboard_musr
+task:
+  - leaderboard_musr_murder_mysteries
+  - leaderboard_musr_object_placements
+  - leaderboard_musr_team_allocation
--- a/lm_eval/tasks/leaderboard/musr/_template_yaml
+++ b/lm_eval/tasks/leaderboard/musr/_template_yaml
+dataset_path: TAUR-Lab/MuSR
+output_type: multiple_choice
+doc_to_text: !function utils.doc_to_text
+doc_to_target: "{{answer_choice}}"
+doc_to_choice: "{{choices}}"
+metric_list:
+  - metric: acc_norm
+    aggregation: mean
+    higher_is_better: true
+metadata:
+  version: 1.0
--- a/lm_eval/tasks/leaderboard/musr/musr_murder_mysteries.yaml
+++ b/lm_eval/tasks/leaderboard/musr/musr_murder_mysteries.yaml
+include: "_template_yaml"
+task: leaderboard_musr_murder_mysteries
+test_split: murder_mysteries
--- a/lm_eval/tasks/leaderboard/musr/musr_object_placements.yaml
+++ b/lm_eval/tasks/leaderboard/musr/musr_object_placements.yaml
+include: "_template_yaml"
+task: leaderboard_musr_object_placements
+test_split: object_placements
--- a/lm_eval/tasks/leaderboard/musr/musr_team_allocation.yaml
+++ b/lm_eval/tasks/leaderboard/musr/musr_team_allocation.yaml
+include: "_template_yaml"
+task: leaderboard_musr_team_allocation
+test_split: team_allocation
--- a/lm_eval/tasks/leaderboard/musr/utils.py
+++ b/lm_eval/tasks/leaderboard/musr/utils.py
+import ast
+
+
+def doc_to_choice(doc):
+    """
+    Convert a doc to a choice.
+    """
+    return ast.literal_eval(doc["choices"])
+
+
+DOC_TO_TEXT = "{narrative}\n\n" "{question}\n\n" "{choices}\n" "Answer:"
+
+
+def doc_to_text(doc):
+    """
+    Convert a doc to text.
+    """
+    choices = ""
+    for i, choice in enumerate(ast.literal_eval(doc["choices"])):
+        choices += f"{i+1} - {choice}\n"
+
+    text = DOC_TO_TEXT.format(
+        narrative=doc["narrative"], question=doc["question"], choices=choices
+    )
+
+    return text
--- a/lm_eval/tasks/mathqa/mathqa.yaml
+++ b/lm_eval/tasks/mathqa/mathqa.yaml
-group:
+tag:
  - math_word_problems
 task: mathqa
 dataset_path: math_qa

--- a/lm_eval/tasks/mmlu/_generate_configs.py
+++ b/lm_eval/tasks/mmlu/_generate_configs.py
 """
 Take in a YAML, and output all "other" splits with this YAML
 """
+
 import argparse
 import logging
 import os
@@ -109,10 +110,9 @@ if __name__ == "__main__":

        yaml_dict = {
            "include": base_yaml_name,
-            "group": f"mmlu_{args.task_prefix}_{category}"
+            "tag": f"mmlu_{args.task_prefix}_{category}"
            if args.task_prefix != ""
            else f"mmlu_{category}",
-            "group_alias": category.replace("_", " "),
            "task": f"mmlu_{args.task_prefix}_{subject}"
            if args.task_prefix != ""
            else f"mmlu_{subject}",

--- a/lm_eval/tasks/mmlu/continuation/_continuation_template_yaml
+++ b/lm_eval/tasks/mmlu/continuation/_continuation_template_yaml
@@ -9,3 +9,5 @@ doc_to_choice: "{{choices}}"
 doc_to_target: "{{answer}}"
 metadata:
  version: 0.0
+dataset_kwargs:
+  trust_remote_code: true
--- a/lm_eval/tasks/mmlu/continuation/_mmlu.yaml
+++ b/lm_eval/tasks/mmlu/continuation/_mmlu.yaml
 group: mmlu_continuation
+group_alias: mmlu (continuation)
 task:
-  - mmlu_continuation_stem
-  - mmlu_continuation_other
-  - mmlu_continuation_social_sciences
-  - mmlu_continuation_humanities
+  - group: stem
+    task:
+      - mmlu_continuation_stem
+    aggregate_metric_list:
+      - metric: acc
+        weight_by_size: True
+  - group: other
+    task:
+      - mmlu_continuation_other
+    aggregate_metric_list:
+      - metric: acc
+        weight_by_size: True
+  - group: social sciences
+    task:
+      - mmlu_continuation_social_sciences
+    aggregate_metric_list:
+      - metric: acc
+        weight_by_size: True
+  - group: humanities
+    task:
+      - mmlu_continuation_humanities
+    aggregate_metric_list:
+      - metric: acc
+        weight_by_size: True
+aggregate_metric_list:
+  - metric: acc
+    weight_by_size: True
+metadata:
+  version: 1
--- a/lm_eval/tasks/mmlu/continuation/mmlu_abstract_algebra.yaml
+++ b/lm_eval/tasks/mmlu/continuation/mmlu_abstract_algebra.yaml
 "dataset_name": "abstract_algebra"
 "description": "The following are questions (with answers) about abstract\
  \ algebra.\n\n"
-"group": "mmlu_continuation_stem"
+"tag": "mmlu_continuation_stem"
 "include": "_continuation_template_yaml"
 "task": "mmlu_continuation_abstract_algebra"
--- a/lm_eval/tasks/mmlu/continuation/mmlu_anatomy.yaml
+++ b/lm_eval/tasks/mmlu/continuation/mmlu_anatomy.yaml
 "dataset_name": "anatomy"
 "description": "The following are questions (with answers) about anatomy.\n\
  \n"
-"group": "mmlu_continuation_stem"
+"tag": "mmlu_continuation_stem"
 "include": "_continuation_template_yaml"
 "task": "mmlu_continuation_anatomy"
--- a/lm_eval/tasks/mmlu/continuation/mmlu_astronomy.yaml
+++ b/lm_eval/tasks/mmlu/continuation/mmlu_astronomy.yaml
 "dataset_name": "astronomy"
 "description": "The following are questions (with answers) about astronomy.\n\
  \n"
-"group": "mmlu_continuation_stem"
+"tag": "mmlu_continuation_stem"
 "include": "_continuation_template_yaml"
 "task": "mmlu_continuation_astronomy"
--- a/lm_eval/tasks/mmlu/continuation/mmlu_business_ethics.yaml
+++ b/lm_eval/tasks/mmlu/continuation/mmlu_business_ethics.yaml
 "dataset_name": "business_ethics"
 "description": "The following are questions (with answers) about business\
  \ ethics.\n\n"
-"group": "mmlu_continuation_other"
+"tag": "mmlu_continuation_other"
 "include": "_continuation_template_yaml"
 "task": "mmlu_continuation_business_ethics"
--- a/lm_eval/tasks/mmlu/continuation/mmlu_clinical_knowledge.yaml
+++ b/lm_eval/tasks/mmlu/continuation/mmlu_clinical_knowledge.yaml
 "dataset_name": "clinical_knowledge"
 "description": "The following are questions (with answers) about clinical\
  \ knowledge.\n\n"
-"group": "mmlu_continuation_other"
+"tag": "mmlu_continuation_other"
 "include": "_continuation_template_yaml"
 "task": "mmlu_continuation_clinical_knowledge"
--- a/lm_eval/tasks/mmlu/continuation/mmlu_college_biology.yaml
+++ b/lm_eval/tasks/mmlu/continuation/mmlu_college_biology.yaml
 "dataset_name": "college_biology"
 "description": "The following are questions (with answers) about college\
  \ biology.\n\n"
-"group": "mmlu_continuation_stem"
+"tag": "mmlu_continuation_stem"
 "include": "_continuation_template_yaml"
 "task": "mmlu_continuation_college_biology"
--- a/lm_eval/tasks/mmlu/continuation/mmlu_college_chemistry.yaml
+++ b/lm_eval/tasks/mmlu/continuation/mmlu_college_chemistry.yaml
 "dataset_name": "college_chemistry"
 "description": "The following are questions (with answers) about college\
  \ chemistry.\n\n"
-"group": "mmlu_continuation_stem"
+"tag": "mmlu_continuation_stem"
 "include": "_continuation_template_yaml"
 "task": "mmlu_continuation_college_chemistry"
--- a/lm_eval/tasks/mmlu/continuation/mmlu_college_computer_science.yaml
+++ b/lm_eval/tasks/mmlu/continuation/mmlu_college_computer_science.yaml
 "dataset_name": "college_computer_science"
 "description": "The following are questions (with answers) about college\
  \ computer science.\n\n"
-"group": "mmlu_continuation_stem"
+"tag": "mmlu_continuation_stem"
 "include": "_continuation_template_yaml"
 "task": "mmlu_continuation_college_computer_science"