Commit 9a30374c authored by lintangsutawika's avatar lintangsutawika
Browse files

update all mmlu variants

parent e6b1581f
group: mmlu_continuation
group_alias: mmlu (continuation)
task:
- mmlu_continuation_stem
- mmlu_continuation_other
- mmlu_continuation_social_sciences
- mmlu_continuation_humanities
- group: stem
task:
- mmlu_continuation_stem
aggregate_metric: True
weight_by_size: True
- group: other
task:
- mmlu_continuation_other
aggregate_metric: True
weight_by_size: True
- group: social sciences
task:
- mmlu_continuation_social_sciences
aggregate_metric: True
weight_by_size: True
- group: humanities
task:
- mmlu_continuation_humanities
aggregate_metric: True
weight_by_size: True
aggregate_metric: True
weight_by_size: True
group: mmlu_flan_cot_fewshot
group_alias: mmlu (flan style, fewshot cot)
task:
- mmlu_flan_cot_fewshot_stem
- mmlu_flan_cot_fewshot_other
- mmlu_flan_cot_fewshot_social_sciences
- mmlu_flan_cot_fewshot_humanities
group_config:
aggregate_metric: True
weight_by_size: True
- group: stem
task:
- mmlu_flan_cot_fewshot_stem
aggregate_metric: True
weight_by_size: True
- group: other
task:
- mmlu_flan_cot_fewshot_other
aggregate_metric: True
weight_by_size: True
- group: social sciences
task:
- mmlu_flan_cot_fewshot_social_sciences
aggregate_metric: True
weight_by_size: True
- group: humanities
task:
- mmlu_flan_cot_fewshot_humanities
aggregate_metric: True
weight_by_size: True
aggregate_metric: True
weight_by_size: True
group: mmlu_flan_cot_zeroshot
group_alias: mmlu (flan style, zeroshot cot)
task:
- mmlu_flan_cot_zeroshot_stem
- mmlu_flan_cot_zeroshot_other
- mmlu_flan_cot_zeroshot_social_sciences
- mmlu_flan_cot_zeroshot_humanities
group_config:
aggregate_metric: True
weight_by_size: True
- group: stem
task:
- mmlu_flan_cot_zeroshot_stem
aggregate_metric: True
weight_by_size: True
- group: other
task:
- mmlu_flan_cot_zeroshot_other
aggregate_metric: True
weight_by_size: True
- group: social sciences
task:
- mmlu_flan_cot_zeroshot_social_sciences
aggregate_metric: True
weight_by_size: True
- group: humanities
task:
- mmlu_flan_cot_zeroshot_humanities
aggregate_metric: True
weight_by_size: True
aggregate_metric: True
weight_by_size: True
group: mmlu_flan_n_shot_generative
group_alias: mmlu (flan style, generative)
task:
- mmlu_flan_n_shot_generative_stem
- mmlu_flan_n_shot_generative_other
- mmlu_flan_n_shot_generative_social_sciences
- mmlu_flan_n_shot_generative_humanities
group_config:
aggregate_metric: True
weight_by_size: True
- group: stem
task:
- mmlu_flan_n_shot_generative_stem
aggregate_metric: True
weight_by_size: True
- group: other
task:
- mmlu_flan_n_shot_generative_other
aggregate_metric: True
weight_by_size: True
- group: social sciences
task:
- mmlu_flan_n_shot_generative_social_sciences
aggregate_metric: True
weight_by_size: True
- group: humanities
task:
- mmlu_flan_n_shot_generative_humanities
aggregate_metric: True
weight_by_size: True
aggregate_metric: True
weight_by_size: True
group: mmlu_flan_n_shot_loglikelihood
group_alias: mmlu (flan style)
group_alias: mmlu (flan style, loglikelihood)
task:
- group: stem
task:
......@@ -16,7 +16,7 @@ task:
- mmlu_flan_n_shot_loglikelihood_social_sciences
aggregate_metric: True
weight_by_size: True
- group: social sciences
- group: humanities
task:
- mmlu_flan_n_shot_loglikelihood_humanities
aggregate_metric: True
......
group: mmlu_generative
group_alias: mmlu (generative)
task:
- mmlu_stem_generative
- mmlu_other_generative
- mmlu_social_sciences_generative
- mmlu_humanities_generative
- group: stem
task:
- mmlu_stem_generative
aggregate_metric: True
weight_by_size: True
- group: other
task:
- mmlu_other_generative
aggregate_metric: True
weight_by_size: True
- group: social sciences
task:
- mmlu_social_sciences_generative
aggregate_metric: True
weight_by_size: True
- group: humanities
task:
- mmlu_humanities_generative
aggregate_metric: True
weight_by_size: True
aggregate_metric: True
weight_by_size: True
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment