task: llama_gpqa dataset_path: Idavidrein/gpqa dataset_name: gpqa_main output_type: generate_until test_split: train doc_to_text: "Given the following question and four candidate answers (A, B, C and D), choose the best answer.\nQuestion: {{Question}}\nA. {{choice1}}\nB. {{choice2}}\nC. {{choice3}}\nD. {{choice4}}\nYour response should end with \"The best answer is [the_answer_letter]\" where the [the_answer_letter] is one of A, B, C or D." process_docs: !function utils.process_docs doc_to_target: answer gen_prefix: "The best answer is" generation_kwargs: until: - "\n" max_gen_toks: 96 do_sample: false temperature: 0 filter_list: - name: exact_match filter: - function: multi_choice_regex group_select: 0 ignore_case: true ignore_punctuation: true regex_pattern: ([A-Z]) - function: remove_whitespace - function: take_first metric_list: - metric: exact_match ignore_punctuation: true aggregation: mean higher_is_better: true num_fewshot: 0 metadata: version: 1.0 dataset_kwargs: trust_remote_code: true