group: hrm8k_en task: - hrm8k_gsm8k_en - hrm8k_ksm_en - hrm8k_math_en - hrm8k_mmmlu_en - hrm8k_omni_math_en metric_list: - metric: exact_match aggregation: mean higher_is_better: true metadata: version: 1.0