Unverified Commit 5a48ca27 authored by eyuansu62's avatar eyuansu62 Committed by GitHub
Browse files

fix some bugs of mmlu (#2299)



* fix some bugs of mmlu

* Fix end of file newline issue

---------
Co-authored-by: default avatareyuansu62 <772468951@qq.com>
parent 1bc6c933
......@@ -26,7 +26,9 @@ task:
- metric: acc
weight_by_size: True
aggregate_metric_list:
- metric: acc
- aggregation: mean
metric: exact_match
weight_by_size: True
filter_list: get-answer
metadata:
version: 2
dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
validation_split: validation
test_split: test
fewshot_split: dev
fewshot_config:
sampler: first_n
output_type: generate_until
doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA: Let's think step by step."
doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer]}}"
doc_to_text: "{% if choices is defined%}Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA: Let's think step by step.{% else %}Q: {{ question.strip() }}\nA:{% endif %}"
doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer] if answer is defined else target}}"
filter_list:
- name: "get-answer"
filter:
......@@ -18,7 +17,7 @@ generation_kwargs:
- "</s>"
do_sample: false
temperature: 0.0
num_fewshot: 0
num_fewshot: 4
metric_list:
- metric: exact_match
aggregation: mean
......
......@@ -12,7 +12,7 @@ filter_list:
- function: "take_first"
- name: "flexible-extract"
filter:
- function: !function utils.MultiChoiceRegexFilter
- function: "multi_choice_regex"
group_select: 0
regex_pattern: "(\\([A-Z]\\))"
ignore_case: true
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment