Commit 31c52f55 authored by lintangsutawika's avatar lintangsutawika
Browse files

fixed arc

parent 155160a1
include: arc_easy.yaml
group: group:
- ai2_arc - ai2_arc
- multiple_choice - multiple_choice
task: arc_challenge task: arc_challenge
dataset_path: ai2_arc dataset_path: ai2_arc
dataset_name: ARC-Challenge dataset_name: ARC-Challenge
output_type: multiple_choice
training_split: train
validation_split: validation
test_split: test
template_aliases: "{% set answer_choices = choices['text'] %}{% set gold = choices.label.index(answerKey) %}" # set the list of possible answer choices, and set what this doc's gold answer is (set what ds column used, and what)
doc_to_text: "Question: {{question}}\nAnswer:"
doc_to_target: "{{answer_choices[gold]}}"
gold_alias: "{{gold}}" # this will be cast to an int.
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
# - metric: acc_mutual_info
# aggregation: mean
# higher_is_better: true
...@@ -8,10 +8,11 @@ output_type: multiple_choice ...@@ -8,10 +8,11 @@ output_type: multiple_choice
training_split: train training_split: train
validation_split: validation validation_split: validation
test_split: test test_split: test
template_aliases: "{% set answer_choices = choices['text'] %}{% set gold = choices.label.index(answerKey) %}" # set the list of possible answer choices, and set what this doc's gold answer is (set what ds column used, and what)
doc_to_text: "Question: {{question}}\nAnswer:" doc_to_text: "Question: {{question}}\nAnswer:"
doc_to_target: "{{answer_choices[gold]}}" doc_to_target: "{{choices['text'][choices.label.index(answerKey)]}}"
gold_alias: "{{gold}}" # this will be cast to an int. doc_to_choice: "{{choices.text}}"
should_decontaminate: true
doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
metric_list: metric_list:
- metric: acc - metric: acc
aggregation: mean aggregation: mean
...@@ -19,6 +20,3 @@ metric_list: ...@@ -19,6 +20,3 @@ metric_list:
- metric: acc_norm - metric: acc_norm
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
- metric: acc_mutual_info
aggregation: mean
higher_is_better: true
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment