Commit 5b64fb58 authored by lintangsutawika's avatar lintangsutawika
Browse files

update aggregate_metric arg

parent 9fa3b3f4
......@@ -4,8 +4,9 @@ task:
# ANLI R1
- group: anli_r1_flan
group_alias: ANLI R1
group_config:
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
task:
- task: anli_r1
task_alias: prompt-0
......@@ -55,8 +56,9 @@ task:
# ANLI R2
- group: anli_r2_flan
group_alias: ANLI R2
group_config:
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
task:
- task: anli_r2
task_alias: prompt-0
......@@ -106,8 +108,9 @@ task:
# ANLI R3
- group: anli_r3_flan
group_alias: ANLI R3
group_config:
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
task:
- task: anli_r3
task_alias: prompt-0
......@@ -157,8 +160,9 @@ task:
# Arc Easy
- group: arc_easy_flan
group_alias: Arc Easy
group_config:
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
task:
- task: arc_easy
task_alias: prompt-0
......@@ -198,8 +202,9 @@ task:
# Arc Challenge
- group: arc_challenge_flan
group_alias: Arc Challenge
group_config:
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
task:
- task: arc_challenge
task_alias: prompt-0
......@@ -239,8 +244,9 @@ task:
# BoolQ
- group: boolq_flan
group_alias: BoolQ
group_config:
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
task:
- task: boolq
task_alias: prompt-0
......@@ -295,8 +301,9 @@ task:
# RTE
- group: rte_flan
group_alias: RTE
group_config:
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
task:
- task: rte
task_alias: prompt-0
......
......@@ -4,24 +4,29 @@ task:
- group: stem
task:
- mmlu_continuation_stem
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
- group: other
task:
- mmlu_continuation_other
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
- group: social sciences
task:
- mmlu_continuation_social_sciences
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
- group: humanities
task:
- mmlu_continuation_humanities
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
aggregate_metric:
- metric: acc
weight_by_size: True
aggregate_metric: True
weight_by_size: True
metadata:
version: 1
......@@ -4,7 +4,8 @@ task:
- mmlu_other
- mmlu_social_sciences
- mmlu_humanities
aggregate_metric: True
weight_by_size: True
aggregate_metric:
- metric: acc
weight_by_size: True
metadata:
version: 1
......@@ -2,6 +2,8 @@ group: mmlu_humanities
group_alias: humanities
task:
- mmlu_humanities_tasks
aggregate_metric: True
weight_by_size: True
version: 1
aggregate_metric:
- metric: acc
weight_by_size: True
metadata:
version: 1
......@@ -2,6 +2,8 @@ group: mmlu_other
group_alias: other
task:
- mmlu_other_tasks
aggregate_metric: True
weight_by_size: True
version: 1
aggregate_metric:
- metric: acc
weight_by_size: True
metadata:
version: 1
\ No newline at end of file
......@@ -2,6 +2,8 @@ group: mmlu_social_sciences
group_alias: social sciences
task:
- mmlu_social_sciences_tasks
aggregate_metric: True
weight_by_size: True
version: 1
aggregate_metric:
- metric: acc
weight_by_size: True
metadata:
version: 1
\ No newline at end of file
......@@ -2,6 +2,8 @@ group: mmlu_stem
group_alias: stem
task:
- mmlu_stem_tasks
aggregate_metric: True
weight_by_size: True
version: 1
aggregate_metric:
- metric: acc
weight_by_size: True
metadata:
version: 1
\ No newline at end of file
......@@ -4,24 +4,29 @@ task:
- group: stem
task:
- mmlu_flan_cot_fewshot_stem
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
- group: other
task:
- mmlu_flan_cot_fewshot_other
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
- group: social sciences
task:
- mmlu_flan_cot_fewshot_social_sciences
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
- group: humanities
task:
- mmlu_flan_cot_fewshot_humanities
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
aggregate_metric:
- metric: acc
weight_by_size: True
aggregate_metric: True
weight_by_size: True
metadata:
version: 1
......@@ -4,24 +4,29 @@ task:
- group: stem
task:
- mmlu_flan_cot_zeroshot_stem
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
- group: other
task:
- mmlu_flan_cot_zeroshot_other
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
- group: social sciences
task:
- mmlu_flan_cot_zeroshot_social_sciences
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
- group: humanities
task:
- mmlu_flan_cot_zeroshot_humanities
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
aggregate_metric:
- metric: acc
weight_by_size: True
aggregate_metric: True
weight_by_size: True
metadata:
version: 1
......@@ -4,24 +4,29 @@ task:
- group: stem
task:
- mmlu_flan_n_shot_generative_stem
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
- group: other
task:
- mmlu_flan_n_shot_generative_other
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
- group: social sciences
task:
- mmlu_flan_n_shot_generative_social_sciences
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
- group: humanities
task:
- mmlu_flan_n_shot_generative_humanities
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
aggregate_metric:
- metric: acc
weight_by_size: True
aggregate_metric: True
weight_by_size: True
metadata:
version: 1
......@@ -4,24 +4,29 @@ task:
- group: stem
task:
- mmlu_flan_n_shot_loglikelihood_stem
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
- group: other
task:
- mmlu_flan_n_shot_loglikelihood_other
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
- group: social sciences
task:
- mmlu_flan_n_shot_loglikelihood_social_sciences
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
- group: humanities
task:
- mmlu_flan_n_shot_loglikelihood_humanities
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
aggregate_metric:
- metric: acc
weight_by_size: True
aggregate_metric: True
weight_by_size: True
metadata:
version: 1
......@@ -4,24 +4,29 @@ task:
- group: stem
task:
- mmlu_stem_generative
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
- group: other
task:
- mmlu_other_generative
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
- group: social sciences
task:
- mmlu_social_sciences_generative
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
- group: humanities
task:
- mmlu_humanities_generative
aggregate_metric: True
aggregate_metric:
- metric: acc
weight_by_size: True
aggregate_metric:
- metric: acc
weight_by_size: True
aggregate_metric: True
weight_by_size: True
metadata:
version: 1
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment