group: longbench2_structured group_alias: "Long Structured Data Understanding" task: - longbench2_graph - longbench2_table aggregate_metric_list: - metric: acc weight_by_size: True metadata: version: 0.0