Commit 9a30374c authored by lintangsutawika's avatar lintangsutawika
Browse files

update all mmlu variants

parent e6b1581f
group: mmlu_continuation group: mmlu_continuation
group_alias: mmlu (continuation)
task: task:
- mmlu_continuation_stem - group: stem
- mmlu_continuation_other task:
- mmlu_continuation_social_sciences - mmlu_continuation_stem
- mmlu_continuation_humanities aggregate_metric: True
weight_by_size: True
- group: other
task:
- mmlu_continuation_other
aggregate_metric: True
weight_by_size: True
- group: social sciences
task:
- mmlu_continuation_social_sciences
aggregate_metric: True
weight_by_size: True
- group: humanities
task:
- mmlu_continuation_humanities
aggregate_metric: True
weight_by_size: True
aggregate_metric: True
weight_by_size: True
group: mmlu_flan_cot_fewshot group: mmlu_flan_cot_fewshot
group_alias: mmlu (flan style, fewshot cot)
task: task:
- mmlu_flan_cot_fewshot_stem - group: stem
- mmlu_flan_cot_fewshot_other task:
- mmlu_flan_cot_fewshot_social_sciences - mmlu_flan_cot_fewshot_stem
- mmlu_flan_cot_fewshot_humanities aggregate_metric: True
group_config: weight_by_size: True
aggregate_metric: True - group: other
weight_by_size: True task:
- mmlu_flan_cot_fewshot_other
aggregate_metric: True
weight_by_size: True
- group: social sciences
task:
- mmlu_flan_cot_fewshot_social_sciences
aggregate_metric: True
weight_by_size: True
- group: humanities
task:
- mmlu_flan_cot_fewshot_humanities
aggregate_metric: True
weight_by_size: True
aggregate_metric: True
weight_by_size: True
group: mmlu_flan_cot_zeroshot group: mmlu_flan_cot_zeroshot
group_alias: mmlu (flan style, zeroshot cot)
task: task:
- mmlu_flan_cot_zeroshot_stem - group: stem
- mmlu_flan_cot_zeroshot_other task:
- mmlu_flan_cot_zeroshot_social_sciences - mmlu_flan_cot_zeroshot_stem
- mmlu_flan_cot_zeroshot_humanities aggregate_metric: True
group_config: weight_by_size: True
aggregate_metric: True - group: other
weight_by_size: True task:
- mmlu_flan_cot_zeroshot_other
aggregate_metric: True
weight_by_size: True
- group: social sciences
task:
- mmlu_flan_cot_zeroshot_social_sciences
aggregate_metric: True
weight_by_size: True
- group: humanities
task:
- mmlu_flan_cot_zeroshot_humanities
aggregate_metric: True
weight_by_size: True
aggregate_metric: True
weight_by_size: True
group: mmlu_flan_n_shot_generative group: mmlu_flan_n_shot_generative
group_alias: mmlu (flan style, generative)
task: task:
- mmlu_flan_n_shot_generative_stem - group: stem
- mmlu_flan_n_shot_generative_other task:
- mmlu_flan_n_shot_generative_social_sciences - mmlu_flan_n_shot_generative_stem
- mmlu_flan_n_shot_generative_humanities aggregate_metric: True
group_config: weight_by_size: True
aggregate_metric: True - group: other
weight_by_size: True task:
- mmlu_flan_n_shot_generative_other
aggregate_metric: True
weight_by_size: True
- group: social sciences
task:
- mmlu_flan_n_shot_generative_social_sciences
aggregate_metric: True
weight_by_size: True
- group: humanities
task:
- mmlu_flan_n_shot_generative_humanities
aggregate_metric: True
weight_by_size: True
aggregate_metric: True
weight_by_size: True
group: mmlu_flan_n_shot_loglikelihood group: mmlu_flan_n_shot_loglikelihood
group_alias: mmlu (flan style) group_alias: mmlu (flan style, loglikelihood)
task: task:
- group: stem - group: stem
task: task:
...@@ -16,7 +16,7 @@ task: ...@@ -16,7 +16,7 @@ task:
- mmlu_flan_n_shot_loglikelihood_social_sciences - mmlu_flan_n_shot_loglikelihood_social_sciences
aggregate_metric: True aggregate_metric: True
weight_by_size: True weight_by_size: True
- group: social sciences - group: humanities
task: task:
- mmlu_flan_n_shot_loglikelihood_humanities - mmlu_flan_n_shot_loglikelihood_humanities
aggregate_metric: True aggregate_metric: True
......
group: mmlu_generative group: mmlu_generative
group_alias: mmlu (generative)
task: task:
- mmlu_stem_generative - group: stem
- mmlu_other_generative task:
- mmlu_social_sciences_generative - mmlu_stem_generative
- mmlu_humanities_generative aggregate_metric: True
weight_by_size: True
- group: other
task:
- mmlu_other_generative
aggregate_metric: True
weight_by_size: True
- group: social sciences
task:
- mmlu_social_sciences_generative
aggregate_metric: True
weight_by_size: True
- group: humanities
task:
- mmlu_humanities_generative
aggregate_metric: True
weight_by_size: True
aggregate_metric: True
weight_by_size: True
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment