dataset_path: math_qa
output_type: multiple_choice
training_split: train
validation_split: validation
test_split: test
doc_to_target: "{{['a', 'b', 'c', 'd', 'e'].index(correct)}}"
metric_list:
  - metric: acc
    aggregation: mean
    higher_is_better: true
  - metric: acc_norm
    aggregation: mean
    higher_is_better: true
  - metric: brier_score
    higher_is_better: false
