dataset_path: Idavidrein/gpqa output_type: multiple_choice process_docs: !function utils.process_docs training_split: train # Because huggingface dataset only has train split validation_split: train test_split: null doc_to_text: "What is the correct answer to this question:{{Question}}\nChoices:\n(A) {{choice1}}\n(B) {{choice2}}\n(C) {{choice3}}\n(D) {{choice4}}\nAnswer: " doc_to_target: answer doc_to_choice: ["(A)", "(B)", "(C)", "(D)"] num_fewshot: 0 metric_list: - metric: acc_norm aggregation: mean higher_is_better: true metadata: version: 1.0 fewshot_config: sampler: first_n