group: bigbench
dataset_path: bigbench # will switch to `hails/bigbench` when all tasks are pushed
output_type: generate_until
dataset_kwargs:
  # num_shots: 0 # TODO: num of shots for `bigbench` HF dataset should be controlled through this, not through the typical methods
  # subtask_name: null
test_split: default
doc_to_text: inputs
doc_to_target: "{{targets[0]}}"
generation_kwargs:
  max_length: 128
metric_list:
  - metric: exact_match
    aggregation: mean
    higher_is_better: true
    ignore_punctuation: true
  - metric: !function aux_metric.token_edit_distance # pip install textdistance
    aggregation: mean
    higher_is_better: false