add many explicit group configs

51519e40 · haileyschoelkopf · 44a602ab · 51519e40 · 51519e40 · 51519e40
Commit 51519e40 authored Jun 25, 2024 by haileyschoelkopf
13 changed files
--- a/lm_eval/tasks/agieval/lsat-rc.yaml
+++ b/lm_eval/tasks/agieval/lsat-rc.yaml
 include: aqua-rat.yaml
-group:
-  - agieval
-  - agieval_nous
-  - agieval_en
 task: agieval_lsat_rc
 dataset_path: hails/agieval-lsat-rc
--- a/lm_eval/tasks/agieval/math.yaml
+++ b/lm_eval/tasks/agieval/math.yaml
-group:
-  - agieval
-  - agieval_en
 task: agieval_math
 dataset_path: hails/agieval-math
 dataset_name: null

--- a/lm_eval/tasks/agieval/sat-en-without-passage.yaml
+++ b/lm_eval/tasks/agieval/sat-en-without-passage.yaml
 include: aqua-rat.yaml
-group:
-  - agieval
-  - agieval_nous
-  - agieval_en
 task: agieval_sat_en_without_passage
 dataset_path: hails/agieval-sat-en-without-passage
--- a/lm_eval/tasks/agieval/sat-en.yaml
+++ b/lm_eval/tasks/agieval/sat-en.yaml
 include: aqua-rat.yaml
-group:
-  - agieval
-  - agieval_nous
-  - agieval_en
 task: agieval_sat_en
 dataset_path: hails/agieval-sat-en
--- a/lm_eval/tasks/agieval/sat-math.yaml
+++ b/lm_eval/tasks/agieval/sat-math.yaml
 include: aqua-rat.yaml
-group:
-  - agieval
-  - agieval_nous
-  - agieval_en
 task: agieval_sat_math
 dataset_path: hails/agieval-sat-math
--- a/lm_eval/tasks/bbh/cot_fewshot/_bbh.yaml
+++ b/lm_eval/tasks/bbh/cot_fewshot/_bbh.yaml
@@ -27,7 +27,7 @@ task:
  - bbh_cot_fewshot_tracking_shuffled_objects_three_objects
  - bbh_cot_fewshot_web_of_lies
  - bbh_cot_fewshot_word_sorting
-aggregate_metric:
+aggregate_metric_list:
  - metric: exact_match
    aggregation: mean
    weight_by_size: true

--- a/lm_eval/tasks/bbh/cot_fewshot/_bbh_cot_fewshot.yaml
+++ b/lm_eval/tasks/bbh/cot_fewshot/_bbh_cot_fewshot.yaml
@@ -27,7 +27,7 @@ task:
  - bbh_cot_fewshot_tracking_shuffled_objects_three_objects
  - bbh_cot_fewshot_web_of_lies
  - bbh_cot_fewshot_word_sorting
-aggregate_metric:
+aggregate_metric_list:
  - metric: exact_match
    aggregation: mean
    weight_by_size: true

--- a/lm_eval/tasks/bbh/cot_zeroshot/_bbh_cot_zeroshot.yaml
+++ b/lm_eval/tasks/bbh/cot_zeroshot/_bbh_cot_zeroshot.yaml
@@ -27,7 +27,7 @@ task:
  - bbh_cot_zeroshot_tracking_shuffled_objects_three_objects
  - bbh_cot_zeroshot_web_of_lies
  - bbh_cot_zeroshot_word_sorting
-aggregate_metric:
+aggregate_metric_list:
  - metric: exact_match
    aggregation: mean
    weight_by_size: true

--- a/lm_eval/tasks/bbh/fewshot/_bbh_fewshot.yaml
+++ b/lm_eval/tasks/bbh/fewshot/_bbh_fewshot.yaml
@@ -27,7 +27,7 @@ task:
  - bbh_fewshot_tracking_shuffled_objects_three_objects
  - bbh_fewshot_web_of_lies
  - bbh_fewshot_word_sorting
-aggregate_metric:
+aggregate_metric_list:
  - metric: exact_match
    aggregation: mean
    weight_by_size: true

--- a/lm_eval/tasks/bbh/zeroshot/_bbh_zeroshot.yaml
+++ b/lm_eval/tasks/bbh/zeroshot/_bbh_zeroshot.yaml
@@ -27,7 +27,7 @@ task:
  - bbh_zeroshot_tracking_shuffled_objects_three_objects
  - bbh_zeroshot_web_of_lies
  - bbh_zeroshot_word_sorting
-aggregate_metric:
+aggregate_metric_list:
  - metric: exact_match
    aggregation: mean
    weight_by_size: true

--- a/lm_eval/tasks/storycloze/storycloze_2016.yaml
+++ b/lm_eval/tasks/storycloze/storycloze_2016.yaml
-group: storycloze
+tag: storycloze
 task: storycloze_2016
 dataset_path: story_cloze
 dataset_name: 2016

--- a/lm_eval/tasks/storycloze/storycloze_2018.yaml
+++ b/lm_eval/tasks/storycloze/storycloze_2018.yaml
-group: storycloze
+tag: storycloze
 task: storycloze_2018
 dataset_path: story_cloze
 dataset_name: 2018

--- a/lm_eval/tasks/super_glue/README.md
+++ b/lm_eval/tasks/super_glue/README.md
@@ -26,10 +26,14 @@ Homepage: https://super.gluebenchmark.com/
 }
 ```
-### Groups and Tasks
+### Groups, Tags, and Tasks
 #### Groups
+None.
+#### Tags
 * `super-glue-lm-eval-v1`: SuperGLUE eval adapted from LM Eval V1
 * `super-glue-t5-prompt`: SuperGLUE prompt and evaluation that matches the T5 paper (if using accelerate, will error if record is included.)