Unverified Commit ad506a13 authored by Baber Abbasi's avatar Baber Abbasi Committed by GitHub
Browse files

remove duplicate tags/groups (#3343)

parent d5ddccd9
group: longbench2
task:
- longbench2_history
- longbench2_incontext
- longbench2_multi
- longbench2_single
- longbench2_structured
- longbench2_history_tasks
- longbench2_incontext_tasks
- longbench2_multi_tasks
- longbench2_single_tasks
- longbench2_structured_tasks
- longbench2_code
aggregate_metric_list:
- metric: acc
......
include: _longbench_common_yaml
tag:
- longbench2
- longbench2_multi
- longbench2_tasks
- longbench2_multi_tasks
task: longbench2_academic_multi
dataset_name: academic_multi
include: _longbench_common_yaml
tag:
- longbench2
- longbench2_single
- longbench2_tasks
- longbench2_single_tasks
task: longbench2_academic_single
dataset_name: academic_single
include: _longbench_common_yaml
tag:
- longbench2
- longbench2_history
- longbench2_tasks
- longbench2_history_tasks
task: longbench2_agent_history
dataset_name: agent_history_qa
include: _longbench_common_yaml
tag:
- longbench2
- longbench2_single
- longbench2_tasks
- longbench2_single_tasks
task: longbench2_detective
dataset_name: detective
include: _longbench_common_yaml
tag:
- longbench2
- longbench2_history
- longbench2_tasks
- longbench2_history_tasks
task: longbench2_dialogue_history
dataset_name: dialogue_history_qa
include: _longbench_common_yaml
tag:
- longbench2
- longbench2_single
- longbench2_tasks
- longbench2_single_tasks
task: longbench2_event_order
dataset_name: event_ordering
include: _longbench_common_yaml
tag:
- longbench2
- longbench2_multi
- longbench2_tasks
- longbench2_multi_tasks
task: longbench2_fin_multi
dataset_name: financial_multi
include: _longbench_common_yaml
tag:
- longbench2
- longbench2_single
- longbench2_tasks
- longbench2_single_tasks
task: longbench2_fin_single
dataset_name: financial_single
include: _longbench_common_yaml
tag:
- longbench2
- longbench2_multi
- longbench2_tasks
- longbench2_multi_tasks
task: longbench2_govt_multi
dataset_name: government_multi
include: _longbench_common_yaml
tag:
- longbench2
- longbench2_single
- longbench2_tasks
- longbench2_single_tasks
task: longbench2_govt_single
dataset_name: government_single
include: _longbench_common_yaml
tag:
- longbench2
- longbench2_structured
- longbench2_tasks
- longbench2_structured_tasks
task: longbench2_graph
dataset_name: graph_reasoning
include: _longbench_common_yaml
tag:
- longbench2
- longbench2_multi
- longbench2_tasks
- longbench2_multi_tasks
task: longbench2_legal_multi
dataset_name: legal_multi
include: _longbench_common_yaml
tag:
- longbench2
- longbench2_single
- longbench2_tasks
- longbench2_single_tasks
task: longbench2_legal_single
dataset_name: legal_single
include: _longbench_common_yaml
tag:
- longbench2
- longbench2_single
- longbench2_tasks
- longbench2_single_tasks
task: longbench2_lit_single
dataset_name: literary
include: _longbench_common_yaml
tag:
- longbench2
- longbench2_tasks
task: longbench2_code
dataset_name: code_repo_qa
include: _longbench_common_yaml
tag:
- longbench2
- longbench2_incontext
- longbench2_tasks
- longbench2_incontext_tasks
task: longbench2_many_shot
dataset_name: manyshot_learning
include: _longbench_common_yaml
tag:
- longbench2
- longbench2_multi
- longbench2_tasks
- longbench2_multi_tasks
task: longbench2_news_multi
dataset_name: multinews
include: _longbench_common_yaml
tag:
- longbench2
- longbench2_structured
- longbench2_tasks
- longbench2_structured_tasks
task: longbench2_table
dataset_name: table_qa
include: _longbench_common_yaml
tag:
- longbench2
- longbench2_incontext
- longbench2_tasks
- longbench2_incontext_tasks
task: longbench2_translate
dataset_name: new_language_translation
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment