group: test-1 group_alias: test 1 task: - piqa # string task - ai2_arc # string tag - task: super-glue-lm-eval-v1 # Should this be spread out? num_fewshot: 3 - task: swag # dict registered task num_fewshot: 2 - task: mmlu num_fewshot: 5 - group: nli-tasks # dict group task: - anli - boolq - sglue_rte num_fewshot: 4 metric_list: - metric: brier_score - task: sciq # dict registered task duplicate task_alias: sciq 2-shot num_fewshot: 2 - task: sciq # dict registered task duplicate task_alias: sciq 4-shot num_fewshot: 4 - task: sciq # dict registered task duplicate task_alias: sciq 6-shot num_fewshot: 6 - task: siqa_custom # dict task dataset_path: social_i_qa dataset_name: null output_type: multiple_choice training_split: train validation_split: validation doc_to_text: "Question: {{context}} {{question}}\nAnswer:" target_delimiter: " " doc_to_choice: - "{{answerA}}" - "{{answerB}}" - "{{answerC}}" doc_to_target: "{{ (label|int) - 1 }}" metric_list: - metric: acc aggregation: mean higher_is_better: true