dataset_path: Idavidrein/gpqa
tag: gpqa
output_type: generate_until
process_docs: !function utils.process_docs
training_split: train
# Because huggingface dataset only has train split
validation_split: train
test_split: null
process_docs: !function utils.process_docs
doc_to_text: "Given the following question and four candidate answers (A, B, C and D), choose the best answer.\n\nQuestion: {{Question}}\nChoices:\nA. {{choice1}}\nB. {{choice2}}\nC. {{choice3}}\nD. {{choice4}}\nPlease reason step by step and conclude with:\nThe answer is [the_answer_letter].\nwhere the [the_answer_letter] is one of A, B, C or D"
gen_prefix: "<think>\n"
doc_to_target: answer
filter_list:
  - name: "strict-match"
    filter:
      - function: "regex"
        regex_pattern: "([A-D])"
        group_select: -1
      - function: "take_first"
generation_kwargs:
  until: []
  do_sample: true
  temperature: 0.6
  top_p: 0.95
  max_gen_toks: 32768
num_fewshot: 0
metric_list:
  - metric: exact_match
    aggregation: mean
    higher_is_better: true
    ignore_case: true
    ignore_punctuation: true
metadata:
  version: 1.0
