Commit 6ac0fa62 authored by Baber's avatar Baber
Browse files

changed mmlu to cloze and added arc_challenge_mmlu

parent bb098f13
tag:
- llama
task: arc_challenge_mmlu
dataset_path: allenai/ai2_arc
dataset_name: ARC-Challenge
output_type: multiple_choice
training_split: train
validation_split: validation
test_split: test
fewshot_split: train
doc_to_text: "Question: {{question.strip()}}\nA. {{choices.text[0]}}\nB. {{choices.text[1]}}\nC. {{choices.text[2]}}{% if choices.text|length > 3 %}\nD. {{choices.text[3]}}{% endif %}\nAnswer:"
doc_to_target: "{{ 'ABCD'[answerKey|int - 1] if answerKey|string in '1234' else answerKey }}"
doc_to_choice: "{{ choices.label|map('replace', '1', 'A')|map('replace', '2', 'B')|map('replace', '3', 'C')|map('replace', '4', 'D')|list if choices.label[0] in '1234' else choices.label }}"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
......@@ -4,13 +4,16 @@ fewshot_split: dev
fewshot_config:
sampler: first_n
output_type: multiple_choice
doc_to_text: "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:"
doc_to_choice: ["A", "B", "C", "D"]
doc_to_text: "Question: {{question.strip()}}\nAnswer:"
doc_to_choice: {{choices}}
doc_to_target: answer
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
dataset_kwargs:
......
......@@ -7,5 +7,7 @@ task:
aggregate_metric_list:
- metric: acc
weight_by_size: True
- metric: acc_norm
weight_by_size: True
metadata:
version: 2
......@@ -5,5 +5,7 @@ task:
aggregate_metric_list:
- metric: acc
weight_by_size: True
- metric: acc_norm
weight_by_size: True
metadata:
version: 2
......@@ -5,5 +5,7 @@ task:
aggregate_metric_list:
- metric: acc
weight_by_size: True
- metric: acc_norm
weight_by_size: True
metadata:
version: 2
......@@ -5,5 +5,7 @@ task:
aggregate_metric_list:
- metric: acc
weight_by_size: True
- metric: acc_norm
weight_by_size: True
metadata:
version: 2
......@@ -5,5 +5,7 @@ task:
aggregate_metric_list:
- metric: acc
weight_by_size: True
- metric: acc_norm
weight_by_size: True
metadata:
version: 2
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment