dataset_path: HAERAE-HUB/HAE_RAE_BENCH test_split: test fewshot_split: test output_type: multiple_choice doc_to_text: "{{query}}" doc_to_choice: ["(A)", "(B)", "(C)", "(D)", "(E)"] doc_to_target: "{{answer}}" metric_list: - metric: acc aggregation: mean higher_is_better: true - metric: acc_norm aggregation: mean higher_is_better: true metadata: version: 1.0