tag: - math_word_problems task: aime25 dataset_path: math-ai/aime25 # dataset_name: null output_type: generate_until training_split: test fewshot_split: test test_split: test doc_to_text: "Question: {{problem}}\nAnswer:" doc_to_target: "{{answer}}" process_results: !function utils.process_results metric_list: - metric: exact_match aggregation: mean higher_is_better: true generation_kwargs: until: - "Question:" - "" - "<|im_end|>" - "<|eot_id|>" do_sample: false temperature: 0.0 max_gen_toks: 32768 repeats: 1 num_fewshot: 0 metadata: version: 0.0