group: comma task: - task: arc_challenge metric_list: - metric: acc_mutual_info aggregation: mean higher_is_better: true - task: arc_easy metric_list: - metric: acc_mutual_info aggregation: mean higher_is_better: true - boolq - task: hellaswag metric_list: - metric: acc_norm aggregation: mean higher_is_better: true - task: openbookqa metric_list: - metric: acc_mutual_info aggregation: mean higher_is_better: true - task: commonsense_qa doc_to_text: "Question: {{ question.strip() }}\nAnswer:" doc_to_target: '{{["A", "B", "C", "D", "E"].index(answerKey)}}' doc_to_choice: "{{ choices['text'] }}" metric_list: - metric: acc_mutual_info aggregation: mean higher_is_better: true - task: piqa doc_to_text: "Goal: {{goal}}\nAnswer:" metric_list: - metric: acc_norm aggregation: mean higher_is_better: true - task: social_iqa doc_to_text: "Question: {{context}} {{question}}\nAnswer:" metric_list: - metric: acc_norm aggregation: mean higher_is_better: true aggregate_metric_list: - metric: acc aggregation: mean weight_by_size: false - metric: acc_norm aggregation: mean weight_by_size: false - metric: acc_mutual_info aggregation: mean weight_by_size: false