Unverified Commit b2c090cc authored by Minho Ryu's avatar Minho Ryu Committed by GitHub
Browse files

aggregate by group (total and categories) (#2643)

parent ed9c6fc8
dataset_name: Taxation dataset_name: Taxation
include: _direct_kmmlu_yaml include: _direct_kmmlu_yaml
task: kmmlu_direct_taxation task: kmmlu_direct_taxation
tag: kmmlu_direct_humss_tasks
dataset_name: Telecommunications-and-Wireless-Technology dataset_name: Telecommunications-and-Wireless-Technology
include: _direct_kmmlu_yaml include: _direct_kmmlu_yaml
task: kmmlu_direct_telecommunications_and_wireless_technology task: kmmlu_direct_telecommunications_and_wireless_technology
tag: kmmlu_direct_applied_science_tasks
tag:
- kmmlu
- kmmlu_hard_direct
dataset_path: HAERAE-HUB/KMMLU-HARD dataset_path: HAERAE-HUB/KMMLU-HARD
output_type: generate_until output_type: generate_until
test_split: test test_split: test
......
group: kmmlu_direct_hard
task:
- kmmlu_direct_hard_stem
- kmmlu_direct_hard_other
- kmmlu_direct_hard_applied_science
- kmmlu_direct_hard_humss
aggregate_metric_list:
- metric: exact_match
weight_by_size: True
metadata:
version: 2.0
group: kmmlu_direct_hard_applied_science
task:
- kmmlu_direct_hard_applied_science_tasks
aggregate_metric_list:
- metric: exact_match
weight_by_size: True
metadata:
version: 2.0
group: kmmlu_direct_hard_humss
task:
- kmmlu_direct_hard_humss_tasks
aggregate_metric_list:
- metric: exact_match
weight_by_size: True
metadata:
version: 2.0
group: kmmlu_direct_hard_other
task:
- kmmlu_direct_hard_other_tasks
aggregate_metric_list:
- metric: exact_match
weight_by_size: True
metadata:
version: 2.0
group: kmmlu_direct_hard_stem
task:
- kmmlu_direct_hard_stem_tasks
aggregate_metric_list:
- metric: exact_match
weight_by_size: True
metadata:
version: 2.0
dataset_name: accounting dataset_name: accounting
include: _direct_hard_kmmlu_yaml include: _direct_hard_kmmlu_yaml
task: kmmlu_hard_direct_accounting task: kmmlu_direct_hard_accounting
tag: kmmlu_direct_hard_humss_tasks
dataset_name: agricultural_sciences dataset_name: agricultural_sciences
include: _direct_hard_kmmlu_yaml include: _direct_hard_kmmlu_yaml
task: kmmlu_hard_direct_agricultural_sciences task: kmmlu_direct_hard_agricultural_sciences
tag: kmmlu_direct_hard_other_tasks
dataset_name: aviation_engineering_and_maintenance dataset_name: aviation_engineering_and_maintenance
include: _direct_hard_kmmlu_yaml include: _direct_hard_kmmlu_yaml
task: kmmlu_hard_direct_aviation_engineering_and_maintenance task: kmmlu_direct_hard_aviation_engineering_and_maintenance
tag: kmmlu_direct_hard_applied_science_tasks
dataset_name: biology dataset_name: biology
include: _direct_hard_kmmlu_yaml include: _direct_hard_kmmlu_yaml
task: kmmlu_hard_direct_biology task: kmmlu_direct_hard_biology
tag: kmmlu_direct_hard_stem_tasks
dataset_name: chemical_engineering dataset_name: chemical_engineering
include: _direct_hard_kmmlu_yaml include: _direct_hard_kmmlu_yaml
task: kmmlu_hard_direct_chemical_engineering task: kmmlu_direct_hard_chemical_engineering
tag: kmmlu_direct_hard_stem_tasks
dataset_name: chemistry dataset_name: chemistry
include: _direct_hard_kmmlu_yaml include: _direct_hard_kmmlu_yaml
task: kmmlu_hard_direct_chemistry task: kmmlu_direct_hard_chemistry
tag: kmmlu_direct_hard_stem_tasks
dataset_name: civil_engineering dataset_name: civil_engineering
include: _direct_hard_kmmlu_yaml include: _direct_hard_kmmlu_yaml
task: kmmlu_hard_direct_civil_engineering task: kmmlu_direct_hard_civil_engineering
tag: kmmlu_direct_hard_stem_tasks
dataset_name: computer_science dataset_name: computer_science
include: _direct_hard_kmmlu_yaml include: _direct_hard_kmmlu_yaml
task: kmmlu_hard_direct_computer_science task: kmmlu_direct_hard_computer_science
tag: kmmlu_direct_hard_stem_tasks
dataset_name: construction dataset_name: construction
include: _direct_hard_kmmlu_yaml include: _direct_hard_kmmlu_yaml
task: kmmlu_hard_direct_construction task: kmmlu_direct_hard_construction
tag: kmmlu_direct_hard_other_tasks
dataset_name: criminal_law dataset_name: criminal_law
include: _direct_hard_kmmlu_yaml include: _direct_hard_kmmlu_yaml
task: kmmlu_hard_direct_criminal_law task: kmmlu_direct_hard_criminal_law
tag: kmmlu_direct_hard_humss_tasks
dataset_name: ecology dataset_name: ecology
include: _direct_hard_kmmlu_yaml include: _direct_hard_kmmlu_yaml
task: kmmlu_hard_direct_ecology task: kmmlu_direct_hard_ecology
tag: kmmlu_direct_hard_stem_tasks
dataset_name: economics dataset_name: economics
include: _direct_hard_kmmlu_yaml include: _direct_hard_kmmlu_yaml
task: kmmlu_hard_direct_economics task: kmmlu_direct_hard_economics
tag: kmmlu_direct_hard_humss_tasks
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment