longbench.yaml

task: longbench
dataset_path: THUDM/LongBench
dataset_name: hotpotqa_e
output_type: generate_until
test_split: test
doc_to_text: "Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{{context}}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {{input}}\nAnswer:"
doc_to_target: "{{answers}}"
generation_kwargs:
  max_gen_toks: 32
  temperature: 1
  do_sample: false
metric_list:
  - metric: !function metrics.qa_f1_score
    aggregation: mean
    higher_is_better: true
  - metric: acc_norm
    aggregation: mean
    higher_is_better: true
metadata:
  version: 1.0