group: mmlu_llama task: - mmlu_llama_stem - mmlu_llama_other - mmlu_llama_social_sciences - mmlu_llama_humanities aggregate_metric_list: - metric: exact_match aggregation: mean weight_by_size: True filter_list: [strict_match] metadata: version: 1