Commit d9d45c9e authored by Baber's avatar Baber
Browse files

add llama 3.1 mmlu

parent 9b03a3c5
# dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
# test_split: test
# fewshot_split: dev
# fewshot_config:
# sampler: first_n
# output_type: generate_until
# doc_to_text: "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:"
# doc_to_target: "{{['A', 'B', 'C', 'D'][answer]}}"
# generation_kwargs:
# until:
# - "</s>"
# - "\n"
# metric_list:
# - metric: exact_match
# aggregation: mean
# higher_is_better: true
# metadata:
# version: 2.0
# dataset_kwargs:
# trust_remote_code: true
dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
test_split: test test_split: test
fewshot_split: dev fewshot_split: dev
fewshot_config: fewshot_config:
sampler: first_n sampler: first_n
output_type: generate_until output_type: generate_until
doc_to_text: 'Given the following question and four candidate answers (A, B, C and D), choose the best answer.\nQuestion: {{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{{{choices[3]}}}}\nYour response should end with \"The best answer is [the_answer_letter]\" where the [the_answer_letter] is one of A, B, C or D.' doc_to_text: "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:"
doc_to_target: "{{['A', 'B', 'C', 'D'][answer]}}" doc_to_target: "{{['A', 'B', 'C', 'D'][answer]}}"
gen_prefix: "The best answer is"
generation_kwargs: generation_kwargs:
until: until:
- "</s>" - "</s>"
- "\n" - "\n"
filter:
- function: "regex"
group_select: -1
regex_pattern: 'best answer is ([A-Z])'
- function: "take_first"
metric_list: metric_list:
- metric: exact_match - metric: exact_match
aggregation: mean aggregation: mean
......
dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
test_split: test
fewshot_split: dev
fewshot_config:
sampler: first_n
output_type: generate_until
doc_to_text: 'Given the following question and four candidate answers (A, B, C and D), choose the best answer.\nQuestion: {{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nYour response should end with \"The best answer is [the_answer_letter]\" where the [the_answer_letter] is one of A, B, C or D.'
doc_to_target: "{{['A', 'B', 'C', 'D'][answer]}}"
gen_prefix: "The best answer is"
generation_kwargs:
max_gen_toks: 10
filter_list:
- filter:
- function: regex
group_select: -1
regex_pattern: (?:best answer is )?([A-Z])
- function: take_first
name: strict-match
metric_list:
- metric: exact_match
higher_is_better: true
ignore_case: true
ignore_punctuation: true
aggregation: mean
higher_is_better: true
metadata:
version: 2.0
dataset_kwargs:
trust_remote_code: true
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment