task: social_iqa
dataset_path: social_i_qa
dataset_name: null
output_type: multiple_choice
training_split: train
validation_split: validation
doc_to_choice: ["{{answerA}}", "{{answerB}}", "{{answerC}}"]
doc_to_target: "{{label}}"
should_decontaminate: true
metric_list:
  - metric: acc
    aggregation: mean
    higher_is_better: true
  - metric: acc_norm
    aggregation: mean
    higher_is_better: true
  - metric: brier_score
    aggregation: brier_score
    higher_is_better: false
