group: test-1 group_alias: test 1 task: - piqa # string task - ai2_arc # string tag # - task: super-glue-lm-eval-v1 # Should this be spread out? # num_fewshot: 3 - task: swag # dict registered task num_fewshot: 2 # - task: mmlu # num_fewshot: 5 - group: nli-tasks # dict group task: - anli - boolq - sglue_rte num_fewshot: 4 metric_list: - metric: brier_score aggregate_metric: true # - task: sciq # dict registered task duplicate # task_alias: sciq 2-shot # num_fewshot: 2 # - task: sciq # dict registered task duplicate # task_alias: sciq 4-shot # num_fewshot: 4 # - task: sciq # dict registered task duplicate # task_alias: sciq 6-shot # num_fewshot: 6 # - task: siqa_custom # dict task # dataset_path: social_i_qa # dataset_name: null # output_type: multiple_choice # training_split: train # validation_split: validation # doc_to_text: "Question: {{context}} {{question}}\nAnswer:" # target_delimiter: " " # doc_to_choice: # - "{{answerA}}" # - "{{answerB}}" # - "{{answerC}}" # doc_to_target: "{{ (label|int) - 1 }}" # metric_list: # - metric: acc # aggregation: mean # higher_is_better: true