tag: - math_word_problems task: aime dataset_path: gneubig/aime-1983-2024 # dataset_name: null output_type: generate_until training_split: train fewshot_split: train test_split: train doc_to_text: "Question: {{Question}}\nAnswer:" doc_to_target: "{{Answer}}" metric_list: - metric: exact_match aggregation: mean higher_is_better: true generation_kwargs: until: - "Question:" - "" - "<|im_end|>" - "<|eot_id|>" do_sample: false temperature: 0.0 max_gen_toks: 32768 repeats: 1 num_fewshot: 0 metadata: version: 0.0