group: mmlu task: - mmlu_stem - mmlu_other - mmlu_social_sciences - mmlu_humanities aggregate_metric: - metric: acc weight_by_size: True metadata: version: 1