tag: llama3 task: mgsm_chat dataset_path: meta-llama/Llama-3.2-3B-Instruct-evals dataset_name: Llama-3.2-3B-Instruct-evals__mgsm__details output_type: generate_until test_split: latest doc_to_text: "{{ input_final_prompts |first |replace('<|eot_id|><|start_header_id|>assistant<|end_header_id|>', '') |replace('<|start_header_id|>', '') |replace('<|end_header_id|>', '') |replace('<|eot_id|>', '') |replace('^user', '') |trim }}" doc_to_target: "input_correct_responses" process_results: !function utils.process_results_mgsm generation_kwargs: until: [] do_sample: false temperature: 0.0 max_gen_toks: 2048 metric_list: - metric: exact_match aggregation: mean higher_is_better: true ignore_case: true ignore_punctuation: true filter_list: - name: "strict-match" filter: - function: "regex" regex_pattern: "(?:Answer|Réponse|Antwort|Ответ|Respuesta|答え|Jibu|答案|คำตอบ|సమాధానం|উত্তর): (\\-?[0-9\\.\\,]+)" - function: "take_first" - name: "flexible-extract" filter: - function: regex group_select: -1 regex_pattern: "(?:Answer|Réponse|Antwort|Ответ|Respuesta|答え|Jibu|答案|คำตอบ|సమాధానం|উত্তর): (-?[$0-9.,]{2,})|(-?[0-9]+)" - function: take_first - function: remove_whitespace - function: take_first metadata: version: 0.0