Commit 5b64fb58 authored by lintangsutawika's avatar lintangsutawika
Browse files

update aggregate_metric arg

parent 9fa3b3f4
...@@ -4,8 +4,9 @@ task: ...@@ -4,8 +4,9 @@ task:
# ANLI R1 # ANLI R1
- group: anli_r1_flan - group: anli_r1_flan
group_alias: ANLI R1 group_alias: ANLI R1
group_config: aggregate_metric:
aggregate_metric: True - metric: acc
weight_by_size: True
task: task:
- task: anli_r1 - task: anli_r1
task_alias: prompt-0 task_alias: prompt-0
...@@ -55,8 +56,9 @@ task: ...@@ -55,8 +56,9 @@ task:
# ANLI R2 # ANLI R2
- group: anli_r2_flan - group: anli_r2_flan
group_alias: ANLI R2 group_alias: ANLI R2
group_config: aggregate_metric:
aggregate_metric: True - metric: acc
weight_by_size: True
task: task:
- task: anli_r2 - task: anli_r2
task_alias: prompt-0 task_alias: prompt-0
...@@ -106,8 +108,9 @@ task: ...@@ -106,8 +108,9 @@ task:
# ANLI R3 # ANLI R3
- group: anli_r3_flan - group: anli_r3_flan
group_alias: ANLI R3 group_alias: ANLI R3
group_config: aggregate_metric:
aggregate_metric: True - metric: acc
weight_by_size: True
task: task:
- task: anli_r3 - task: anli_r3
task_alias: prompt-0 task_alias: prompt-0
...@@ -157,8 +160,9 @@ task: ...@@ -157,8 +160,9 @@ task:
# Arc Easy # Arc Easy
- group: arc_easy_flan - group: arc_easy_flan
group_alias: Arc Easy group_alias: Arc Easy
group_config: aggregate_metric:
aggregate_metric: True - metric: acc
weight_by_size: True
task: task:
- task: arc_easy - task: arc_easy
task_alias: prompt-0 task_alias: prompt-0
...@@ -198,8 +202,9 @@ task: ...@@ -198,8 +202,9 @@ task:
# Arc Challenge # Arc Challenge
- group: arc_challenge_flan - group: arc_challenge_flan
group_alias: Arc Challenge group_alias: Arc Challenge
group_config: aggregate_metric:
aggregate_metric: True - metric: acc
weight_by_size: True
task: task:
- task: arc_challenge - task: arc_challenge
task_alias: prompt-0 task_alias: prompt-0
...@@ -239,8 +244,9 @@ task: ...@@ -239,8 +244,9 @@ task:
# BoolQ # BoolQ
- group: boolq_flan - group: boolq_flan
group_alias: BoolQ group_alias: BoolQ
group_config: aggregate_metric:
aggregate_metric: True - metric: acc
weight_by_size: True
task: task:
- task: boolq - task: boolq
task_alias: prompt-0 task_alias: prompt-0
...@@ -295,8 +301,9 @@ task: ...@@ -295,8 +301,9 @@ task:
# RTE # RTE
- group: rte_flan - group: rte_flan
group_alias: RTE group_alias: RTE
group_config: aggregate_metric:
aggregate_metric: True - metric: acc
weight_by_size: True
task: task:
- task: rte - task: rte
task_alias: prompt-0 task_alias: prompt-0
......
...@@ -4,24 +4,29 @@ task: ...@@ -4,24 +4,29 @@ task:
- group: stem - group: stem
task: task:
- mmlu_continuation_stem - mmlu_continuation_stem
aggregate_metric: True aggregate_metric:
- metric: acc
weight_by_size: True weight_by_size: True
- group: other - group: other
task: task:
- mmlu_continuation_other - mmlu_continuation_other
aggregate_metric: True aggregate_metric:
- metric: acc
weight_by_size: True weight_by_size: True
- group: social sciences - group: social sciences
task: task:
- mmlu_continuation_social_sciences - mmlu_continuation_social_sciences
aggregate_metric: True aggregate_metric:
- metric: acc
weight_by_size: True weight_by_size: True
- group: humanities - group: humanities
task: task:
- mmlu_continuation_humanities - mmlu_continuation_humanities
aggregate_metric: True aggregate_metric:
- metric: acc
weight_by_size: True
aggregate_metric:
- metric: acc
weight_by_size: True weight_by_size: True
aggregate_metric: True
weight_by_size: True
metadata: metadata:
version: 1 version: 1
...@@ -4,7 +4,8 @@ task: ...@@ -4,7 +4,8 @@ task:
- mmlu_other - mmlu_other
- mmlu_social_sciences - mmlu_social_sciences
- mmlu_humanities - mmlu_humanities
aggregate_metric: True aggregate_metric:
weight_by_size: True - metric: acc
weight_by_size: True
metadata: metadata:
version: 1 version: 1
...@@ -2,6 +2,8 @@ group: mmlu_humanities ...@@ -2,6 +2,8 @@ group: mmlu_humanities
group_alias: humanities group_alias: humanities
task: task:
- mmlu_humanities_tasks - mmlu_humanities_tasks
aggregate_metric: True aggregate_metric:
weight_by_size: True - metric: acc
version: 1 weight_by_size: True
metadata:
version: 1
...@@ -2,6 +2,8 @@ group: mmlu_other ...@@ -2,6 +2,8 @@ group: mmlu_other
group_alias: other group_alias: other
task: task:
- mmlu_other_tasks - mmlu_other_tasks
aggregate_metric: True aggregate_metric:
weight_by_size: True - metric: acc
version: 1 weight_by_size: True
metadata:
version: 1
\ No newline at end of file
...@@ -2,6 +2,8 @@ group: mmlu_social_sciences ...@@ -2,6 +2,8 @@ group: mmlu_social_sciences
group_alias: social sciences group_alias: social sciences
task: task:
- mmlu_social_sciences_tasks - mmlu_social_sciences_tasks
aggregate_metric: True aggregate_metric:
weight_by_size: True - metric: acc
version: 1 weight_by_size: True
metadata:
version: 1
\ No newline at end of file
...@@ -2,6 +2,8 @@ group: mmlu_stem ...@@ -2,6 +2,8 @@ group: mmlu_stem
group_alias: stem group_alias: stem
task: task:
- mmlu_stem_tasks - mmlu_stem_tasks
aggregate_metric: True aggregate_metric:
weight_by_size: True - metric: acc
version: 1 weight_by_size: True
metadata:
version: 1
\ No newline at end of file
...@@ -4,24 +4,29 @@ task: ...@@ -4,24 +4,29 @@ task:
- group: stem - group: stem
task: task:
- mmlu_flan_cot_fewshot_stem - mmlu_flan_cot_fewshot_stem
aggregate_metric: True aggregate_metric:
- metric: acc
weight_by_size: True weight_by_size: True
- group: other - group: other
task: task:
- mmlu_flan_cot_fewshot_other - mmlu_flan_cot_fewshot_other
aggregate_metric: True aggregate_metric:
- metric: acc
weight_by_size: True weight_by_size: True
- group: social sciences - group: social sciences
task: task:
- mmlu_flan_cot_fewshot_social_sciences - mmlu_flan_cot_fewshot_social_sciences
aggregate_metric: True aggregate_metric:
- metric: acc
weight_by_size: True weight_by_size: True
- group: humanities - group: humanities
task: task:
- mmlu_flan_cot_fewshot_humanities - mmlu_flan_cot_fewshot_humanities
aggregate_metric: True aggregate_metric:
- metric: acc
weight_by_size: True
aggregate_metric:
- metric: acc
weight_by_size: True weight_by_size: True
aggregate_metric: True
weight_by_size: True
metadata: metadata:
version: 1 version: 1
...@@ -4,24 +4,29 @@ task: ...@@ -4,24 +4,29 @@ task:
- group: stem - group: stem
task: task:
- mmlu_flan_cot_zeroshot_stem - mmlu_flan_cot_zeroshot_stem
aggregate_metric: True aggregate_metric:
- metric: acc
weight_by_size: True weight_by_size: True
- group: other - group: other
task: task:
- mmlu_flan_cot_zeroshot_other - mmlu_flan_cot_zeroshot_other
aggregate_metric: True aggregate_metric:
- metric: acc
weight_by_size: True weight_by_size: True
- group: social sciences - group: social sciences
task: task:
- mmlu_flan_cot_zeroshot_social_sciences - mmlu_flan_cot_zeroshot_social_sciences
aggregate_metric: True aggregate_metric:
- metric: acc
weight_by_size: True weight_by_size: True
- group: humanities - group: humanities
task: task:
- mmlu_flan_cot_zeroshot_humanities - mmlu_flan_cot_zeroshot_humanities
aggregate_metric: True aggregate_metric:
- metric: acc
weight_by_size: True
aggregate_metric:
- metric: acc
weight_by_size: True weight_by_size: True
aggregate_metric: True
weight_by_size: True
metadata: metadata:
version: 1 version: 1
...@@ -4,24 +4,29 @@ task: ...@@ -4,24 +4,29 @@ task:
- group: stem - group: stem
task: task:
- mmlu_flan_n_shot_generative_stem - mmlu_flan_n_shot_generative_stem
aggregate_metric: True aggregate_metric:
- metric: acc
weight_by_size: True weight_by_size: True
- group: other - group: other
task: task:
- mmlu_flan_n_shot_generative_other - mmlu_flan_n_shot_generative_other
aggregate_metric: True aggregate_metric:
- metric: acc
weight_by_size: True weight_by_size: True
- group: social sciences - group: social sciences
task: task:
- mmlu_flan_n_shot_generative_social_sciences - mmlu_flan_n_shot_generative_social_sciences
aggregate_metric: True aggregate_metric:
- metric: acc
weight_by_size: True weight_by_size: True
- group: humanities - group: humanities
task: task:
- mmlu_flan_n_shot_generative_humanities - mmlu_flan_n_shot_generative_humanities
aggregate_metric: True aggregate_metric:
- metric: acc
weight_by_size: True
aggregate_metric:
- metric: acc
weight_by_size: True weight_by_size: True
aggregate_metric: True
weight_by_size: True
metadata: metadata:
version: 1 version: 1
...@@ -4,24 +4,29 @@ task: ...@@ -4,24 +4,29 @@ task:
- group: stem - group: stem
task: task:
- mmlu_flan_n_shot_loglikelihood_stem - mmlu_flan_n_shot_loglikelihood_stem
aggregate_metric: True aggregate_metric:
- metric: acc
weight_by_size: True weight_by_size: True
- group: other - group: other
task: task:
- mmlu_flan_n_shot_loglikelihood_other - mmlu_flan_n_shot_loglikelihood_other
aggregate_metric: True aggregate_metric:
- metric: acc
weight_by_size: True weight_by_size: True
- group: social sciences - group: social sciences
task: task:
- mmlu_flan_n_shot_loglikelihood_social_sciences - mmlu_flan_n_shot_loglikelihood_social_sciences
aggregate_metric: True aggregate_metric:
- metric: acc
weight_by_size: True weight_by_size: True
- group: humanities - group: humanities
task: task:
- mmlu_flan_n_shot_loglikelihood_humanities - mmlu_flan_n_shot_loglikelihood_humanities
aggregate_metric: True aggregate_metric:
- metric: acc
weight_by_size: True
aggregate_metric:
- metric: acc
weight_by_size: True weight_by_size: True
aggregate_metric: True
weight_by_size: True
metadata: metadata:
version: 1 version: 1
...@@ -4,24 +4,29 @@ task: ...@@ -4,24 +4,29 @@ task:
- group: stem - group: stem
task: task:
- mmlu_stem_generative - mmlu_stem_generative
aggregate_metric: True aggregate_metric:
- metric: acc
weight_by_size: True weight_by_size: True
- group: other - group: other
task: task:
- mmlu_other_generative - mmlu_other_generative
aggregate_metric: True aggregate_metric:
- metric: acc
weight_by_size: True weight_by_size: True
- group: social sciences - group: social sciences
task: task:
- mmlu_social_sciences_generative - mmlu_social_sciences_generative
aggregate_metric: True aggregate_metric:
- metric: acc
weight_by_size: True weight_by_size: True
- group: humanities - group: humanities
task: task:
- mmlu_humanities_generative - mmlu_humanities_generative
aggregate_metric: True aggregate_metric:
- metric: acc
weight_by_size: True
aggregate_metric:
- metric: acc
weight_by_size: True weight_by_size: True
aggregate_metric: True
weight_by_size: True
metadata: metadata:
version: 1 version: 1
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment