group: hrm8k task: - hrm8k_gsm8k - hrm8k_ksm - hrm8k_math - hrm8k_mmmlu - hrm8k_omni_math metric_list: - metric: exact_match aggregation: mean higher_is_better: true metadata: version: 1.0