gpqa.yaml

task: llama_gpqa
dataset_path: Idavidrein/gpqa
dataset_name: gpqa_main
output_type: generate_until
test_split: train
doc_to_text: "Given the following question and four candidate answers (A, B, C and D), choose the best answer.\nQuestion: {{Question}}\nA. {{choice1}}\nB. {{choice2}}\nC. {{choice3}}\nD. {{choice4}}\nYour response should end with \"The best answer is [the_answer_letter]\" where the [the_answer_letter] is one of A, B, C or D."
process_docs: !function utils.process_docs
doc_to_target: answer
gen_prefix: "The best answer is"
generation_kwargs:
  until:
    - "\n"
  max_gen_toks: 96
  do_sample: false
  temperature: 0
filter_list:
  - name: exact_match
    filter:
      - function: multi_choice_regex
        group_select: 0
        ignore_case: true
        ignore_punctuation: true
        regex_pattern: ([A-Z])
      - function: remove_whitespace
      - function: take_first
metric_list:
  - metric: exact_match
    ignore_punctuation: true
    aggregation: mean
    higher_is_better: true
num_fewshot: 0
metadata:
  version: 1.0
dataset_kwargs:
  trust_remote_code: true