test-01.yaml

group: test-1
group_alias: test 1
task:
  - piqa # string task
  - ai2_arc # string tag
  - task: super-glue-lm-eval-v1 # Should this be spread out?
    num_fewshot: 3
  - task: swag # dict registered task
    num_fewshot: 2
  - task: mmlu
    num_fewshot: 5
  - group: nli-tasks # dict group
    task:
      - anli
      - boolq
      - sglue_rte
    num_fewshot: 4
    metric_list:
      - metric: brier_score
  - task: sciq # dict registered task duplicate
    task_alias: sciq 2-shot
    num_fewshot: 2
  - task: sciq # dict registered task duplicate
    task_alias: sciq 4-shot
    num_fewshot: 4
  - task: sciq # dict registered task duplicate
    task_alias: sciq 6-shot
    num_fewshot: 6
  - task: siqa_custom # dict task
    dataset_path: social_i_qa
    dataset_name: null
    output_type: multiple_choice
    training_split: train
    validation_split: validation
    doc_to_text: "Question: {{context}} {{question}}\nAnswer:"
    target_delimiter: " "
    doc_to_choice:
      - "{{answerA}}"
      - "{{answerB}}"
      - "{{answerC}}"
    doc_to_target: "{{ (label|int) - 1 }}"
    metric_list:
      - metric: acc
        aggregation: mean
        higher_is_better: true