dataset_path: sjyuxyz/MMLU-Pro-with-subset
validation_split: validation
fewshot_split: dev
output_type: generate_until
doc_to_text: "{% set alphabet = 'ABCDEFGHIJ' %}Q: {{ question.strip() }}\n{% for index in range(options|length) %}({{ alphabet[index] }}) {{ options[index] }} {% endfor %}\nA: Let's think step by step."
doc_to_target: "{{['(A)', '(B)', '(C)', '(D)', '(E)', '(F)', '(G)', '(H)', '(I)', '(J)'][answer_index]}}"
filter_list:
  - name: "strict-match"
    filter:
      - function: "regex"
        regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
      - function: "take_first"
  - name: "flexible-extract"
    filter:
      - function: !function utils.MultiChoiceRegexFilter
        group_select: -1
        ignore_case: true
        ignore_punctuation: true
        regex_pattern: "(\\([A-Z]\\))"
      - function: "take_first"
generation_kwargs:
  until:
    - "</s>"
    - "Q:"
    - "<|im_end|>"
  do_sample: false
  temperature: 0.0
num_fewshot: 0
metric_list:
  - metric: exact_match
    aggregation: mean
    higher_is_better: true
    ignore_case: true
    ignore_punctuation: true
metadata:
  version: 1.0
dataset_kwargs:
  trust_remote_code: true