Commit 51519e40 authored by haileyschoelkopf's avatar haileyschoelkopf
Browse files

add many explicit group configs

parent 44a602ab
include: aqua-rat.yaml include: aqua-rat.yaml
group:
- agieval
- agieval_nous
- agieval_en
task: agieval_lsat_rc task: agieval_lsat_rc
dataset_path: hails/agieval-lsat-rc dataset_path: hails/agieval-lsat-rc
group:
- agieval
- agieval_en
task: agieval_math task: agieval_math
dataset_path: hails/agieval-math dataset_path: hails/agieval-math
dataset_name: null dataset_name: null
......
include: aqua-rat.yaml include: aqua-rat.yaml
group:
- agieval
- agieval_nous
- agieval_en
task: agieval_sat_en_without_passage task: agieval_sat_en_without_passage
dataset_path: hails/agieval-sat-en-without-passage dataset_path: hails/agieval-sat-en-without-passage
include: aqua-rat.yaml include: aqua-rat.yaml
group:
- agieval
- agieval_nous
- agieval_en
task: agieval_sat_en task: agieval_sat_en
dataset_path: hails/agieval-sat-en dataset_path: hails/agieval-sat-en
include: aqua-rat.yaml include: aqua-rat.yaml
group:
- agieval
- agieval_nous
- agieval_en
task: agieval_sat_math task: agieval_sat_math
dataset_path: hails/agieval-sat-math dataset_path: hails/agieval-sat-math
...@@ -27,7 +27,7 @@ task: ...@@ -27,7 +27,7 @@ task:
- bbh_cot_fewshot_tracking_shuffled_objects_three_objects - bbh_cot_fewshot_tracking_shuffled_objects_three_objects
- bbh_cot_fewshot_web_of_lies - bbh_cot_fewshot_web_of_lies
- bbh_cot_fewshot_word_sorting - bbh_cot_fewshot_word_sorting
aggregate_metric: aggregate_metric_list:
- metric: exact_match - metric: exact_match
aggregation: mean aggregation: mean
weight_by_size: true weight_by_size: true
......
...@@ -27,7 +27,7 @@ task: ...@@ -27,7 +27,7 @@ task:
- bbh_cot_fewshot_tracking_shuffled_objects_three_objects - bbh_cot_fewshot_tracking_shuffled_objects_three_objects
- bbh_cot_fewshot_web_of_lies - bbh_cot_fewshot_web_of_lies
- bbh_cot_fewshot_word_sorting - bbh_cot_fewshot_word_sorting
aggregate_metric: aggregate_metric_list:
- metric: exact_match - metric: exact_match
aggregation: mean aggregation: mean
weight_by_size: true weight_by_size: true
......
...@@ -27,7 +27,7 @@ task: ...@@ -27,7 +27,7 @@ task:
- bbh_cot_zeroshot_tracking_shuffled_objects_three_objects - bbh_cot_zeroshot_tracking_shuffled_objects_three_objects
- bbh_cot_zeroshot_web_of_lies - bbh_cot_zeroshot_web_of_lies
- bbh_cot_zeroshot_word_sorting - bbh_cot_zeroshot_word_sorting
aggregate_metric: aggregate_metric_list:
- metric: exact_match - metric: exact_match
aggregation: mean aggregation: mean
weight_by_size: true weight_by_size: true
......
...@@ -27,7 +27,7 @@ task: ...@@ -27,7 +27,7 @@ task:
- bbh_fewshot_tracking_shuffled_objects_three_objects - bbh_fewshot_tracking_shuffled_objects_three_objects
- bbh_fewshot_web_of_lies - bbh_fewshot_web_of_lies
- bbh_fewshot_word_sorting - bbh_fewshot_word_sorting
aggregate_metric: aggregate_metric_list:
- metric: exact_match - metric: exact_match
aggregation: mean aggregation: mean
weight_by_size: true weight_by_size: true
......
...@@ -27,7 +27,7 @@ task: ...@@ -27,7 +27,7 @@ task:
- bbh_zeroshot_tracking_shuffled_objects_three_objects - bbh_zeroshot_tracking_shuffled_objects_three_objects
- bbh_zeroshot_web_of_lies - bbh_zeroshot_web_of_lies
- bbh_zeroshot_word_sorting - bbh_zeroshot_word_sorting
aggregate_metric: aggregate_metric_list:
- metric: exact_match - metric: exact_match
aggregation: mean aggregation: mean
weight_by_size: true weight_by_size: true
......
group: storycloze tag: storycloze
task: storycloze_2016 task: storycloze_2016
dataset_path: story_cloze dataset_path: story_cloze
dataset_name: 2016 dataset_name: 2016
......
group: storycloze tag: storycloze
task: storycloze_2018 task: storycloze_2018
dataset_path: story_cloze dataset_path: story_cloze
dataset_name: 2018 dataset_name: 2018
......
...@@ -26,10 +26,14 @@ Homepage: https://super.gluebenchmark.com/ ...@@ -26,10 +26,14 @@ Homepage: https://super.gluebenchmark.com/
} }
``` ```
### Groups and Tasks ### Groups, Tags, and Tasks
#### Groups #### Groups
None.
#### Tags
* `super-glue-lm-eval-v1`: SuperGLUE eval adapted from LM Eval V1 * `super-glue-lm-eval-v1`: SuperGLUE eval adapted from LM Eval V1
* `super-glue-t5-prompt`: SuperGLUE prompt and evaluation that matches the T5 paper (if using accelerate, will error if record is included.) * `super-glue-t5-prompt`: SuperGLUE prompt and evaluation that matches the T5 paper (if using accelerate, will error if record is included.)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment