Commit 2507c434 authored by Baber's avatar Baber
Browse files

add comma bench

parent 4f8195f1
group: comma
task:
- task: arc_challenge
metric_list:
- metric: acc_mutual_info
aggregation: mean
higher_is_better: true
- task: arc_easy
metric_list:
- metric: acc_mutual_info
aggregation: mean
higher_is_better: true
- boolq
- task: hellaswag
metric_list:
- metric: acc_norm
aggregation: mean
higher_is_better: true
- task: openbookqa
metric_list:
- metric: acc_mutual_info
aggregation: mean
higher_is_better: true
- task: commonsense_qa
doc_to_text: "Question: {{ question.strip() }}\nAnswer:"
doc_to_target: '{{["A", "B", "C", "D", "E"].index(answerKey)}}'
doc_to_choice: "{{ choices['text'] }}"
metric_list:
- metric: acc_mutual_info
aggregation: mean
higher_is_better: true
- task: piqa
doc_to_text: "Goal: {{goal}}\nAnswer:"
metric_list:
- metric: acc_norm
aggregation: mean
higher_is_better: true
- task: social_iqa
doc_to_text: "Question: {{context}} {{question}}\nAnswer:"
metric_list:
- metric: acc_norm
aggregation: mean
higher_is_better: true
aggregate_metric_list:
- metric: acc
aggregation: mean
weight_by_size: false
- metric: acc_norm
aggregation: mean
weight_by_size: false
- metric: acc_mutual_info
aggregation: mean
weight_by_size: false
......@@ -13,5 +13,7 @@ should_decontaminate: true
doc_to_decontamination_query: passage
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
version: 2.0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment