group: test-1
group_alias: test 1
task:
  - piqa # string task
  - ai2_arc # string tag
  # - task: super-glue-lm-eval-v1 # Should this be spread out?
  #   num_fewshot: 3
  - task: swag # dict registered task
    num_fewshot: 2
  # - task: mmlu
  #   num_fewshot: 5
  - group: nli-tasks # dict group
    task:
      - anli
      - boolq
      - sglue_rte
    num_fewshot: 4
    metric_list:
      - metric: brier_score
    aggregate_metric: true
    
  # - task: sciq # dict registered task duplicate
  #   task_alias: sciq 2-shot
  #   num_fewshot: 2
  # - task: sciq # dict registered task duplicate
  #   task_alias: sciq 4-shot
  #   num_fewshot: 4
  # - task: sciq # dict registered task duplicate
  #   task_alias: sciq 6-shot
  #   num_fewshot: 6
  # - task: siqa_custom # dict task
  #   dataset_path: social_i_qa
  #   dataset_name: null
  #   output_type: multiple_choice
  #   training_split: train
  #   validation_split: validation
  #   doc_to_text: "Question: {{context}} {{question}}\nAnswer:"
  #   target_delimiter: " "
  #   doc_to_choice:
  #     - "{{answerA}}"
  #     - "{{answerB}}"
  #     - "{{answerC}}"
  #   doc_to_target: "{{ (label|int) - 1 }}"
  #   metric_list:
  #     - metric: acc
  #       aggregation: mean
  #       higher_is_better: true