Commit 6a72f627 authored by Baber's avatar Baber
Browse files

use `allenai/ai2_arc`

parent 6d0c60d7
tag: tag:
- llama3 - llama
task: llama_arc_challenge task: llama_arc_challenge
dataset_path: meta-llama/Llama-3.1-8B-evals dataset_path: allenai/ai2_arc
dataset_name: Llama-3.1-8B-evals__arc_challenge__details dataset_name: ARC-Challenge
output_type: multiple_choice output_type: multiple_choice
test_split: latest training_split: train
process_docs: !function utils.process_arc_c_docs validation_split: validation
doc_to_text: "{{doc_to_text}}" test_split: test
doc_to_target: "{{doc_to_target}}" fewshot_split: train
doc_to_choice: "{{doc_to_choice}}" doc_to_text: "Question: {{question.strip()}}\nA. {{choices.text[0]}}\nB. {{choices.text[1]}}\nC. {{choices.text[2]}}{% if choices.text|length > 3 %}\nD. {{choices.text[3]}}{% endif %}\nAnswer:"
fewshot_delimiter: "\n\n"
doc_to_target: "{{ 'ABCD'[answerKey|int - 1] if answerKey|string in '1234' else answerKey }}"
doc_to_choice: "{{ choices.label|map('replace', '1', 'A')|map('replace', '2', 'B')|map('replace', '3', 'C')|map('replace', '4', 'D')|list if choices.label[0] in '1234' else choices.label }}"
num_fewshot: 25
metric_list: metric_list:
- metric: acc - metric: acc
aggregation: mean aggregation: mean
......
tag:
- llama
task: arc_challenge_chat
dataset_path: allenai/ai2_arc
dataset_name: ARC-Challenge
output_type: multiple_choice
training_split: train
validation_split: validation
test_split: test
#doc_to_text: "Question: {{question}}\nAnswer:"
doc_to_text: "Question: {{question.strip()}}\nA. {{choices.text[0]}}\nB. {{choices.text[1]}}\nC. {{choices.text[2]}}{% if choices.text|length > 3 %}\nD. {{choices.text[3]}}{% endif %}\nAnswer:"
fewshot_delimiter: "\n\n"
doc_to_target: "{{answerKey}}"
doc_to_choice: "{{choices.label}}"
num_fewshot: 25
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment