tag: - longbench task: longbench_trec dataset_path: THUDM/LongBench test_split: test dataset_name: trec doc_to_text: 'Please determine the type of the question below. Here are some examples of questions.\n\n{{context}}\n{{input}}' doc_to_target: '{{answers[0]}}' process_results: !function metrics.classification_score generation_kwargs: max_gen_toks: 64 temperature: 1 do_sample: True until: ['\n'] metric_list: - metric: "classification_score" aggregation: mean higher_is_better: True metadata: version: 2.0