group: mmlu_flan
dataset_path: cais/mmlu
# validation_split: validation
test_split: test
fewshot_split: dev
# doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA: "
doc_to_text: "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:"
output_type: greedy_until
# doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer]}}"
doc_to_target: "{{['A', 'B', 'C', 'D'][answer]}}"
metric_list:
  - metric: exact_match
    aggregation: mean
    higher_is_better: true
    # ignore_case: true
    # ignore_punctuation: true
generation_kwargs:
  until:
    - "</s>"
#   do_sample: false
#   temperature: 0.0