Unverified Commit b2c090cc authored by Minho Ryu's avatar Minho Ryu Committed by GitHub
Browse files

aggregate by group (total and categories) (#2643)

parent ed9c6fc8
dataset_name: health
include: _hard_kmmlu_yaml
task: kmmlu_hard_health
tag: kmmlu_hard_other_tasks
dataset_name: industrial_engineer
include: _hard_kmmlu_yaml
task: kmmlu_hard_industrial_engineer
tag: kmmlu_hard_applied_science_tasks
dataset_name: information_technology
include: _hard_kmmlu_yaml
task: kmmlu_hard_information_technology
tag: kmmlu_hard_stem_tasks
dataset_name: interior_architecture_and_design
include: _hard_kmmlu_yaml
task: kmmlu_hard_interior_architecture_and_design
tag: kmmlu_hard_other_tasks
dataset_name: korean_history
include: _hard_kmmlu_yaml
task: kmmlu_hard_korean_history
tag: kmmlu_hard_humss_tasks
dataset_name: law
include: _hard_kmmlu_yaml
task: kmmlu_hard_law
tag: kmmlu_hard_humss_tasks
dataset_name: machine_design_and_manufacturing
include: _hard_kmmlu_yaml
task: kmmlu_hard_machine_design_and_manufacturing
tag: kmmlu_hard_applied_science_tasks
dataset_name: management
include: _hard_kmmlu_yaml
task: kmmlu_hard_management
tag: kmmlu_hard_humss_tasks
dataset_name: maritime_engineering
include: _hard_kmmlu_yaml
task: kmmlu_hard_maritime_engineering
tag: kmmlu_hard_applied_science_tasks
dataset_name: marketing
include: _hard_kmmlu_yaml
task: kmmlu_hard_marketing
tag: kmmlu_hard_other_tasks
dataset_name: materials_engineering
include: _hard_kmmlu_yaml
task: kmmlu_hard_materials_engineering
tag: kmmlu_hard_stem_tasks
dataset_name: math
include: _hard_kmmlu_yaml
task: kmmlu_hard_math
tag: kmmlu_hard_stem_tasks
dataset_name: mechanical_engineering
include: _hard_kmmlu_yaml
task: kmmlu_hard_mechanical_engineering
tag: kmmlu_hard_stem_tasks
dataset_name: nondestructive_testing
include: _hard_kmmlu_yaml
task: kmmlu_hard_nondestructive_testing
tag: kmmlu_hard_applied_science_tasks
dataset_name: patent
include: _hard_kmmlu_yaml
task: kmmlu_hard_patent
tag: kmmlu_hard_other_tasks
dataset_name: political_science_and_sociology
include: _hard_kmmlu_yaml
task: kmmlu_hard_political_science_and_sociology
tag: kmmlu_hard_humss_tasks
dataset_name: psychology
include: _hard_kmmlu_yaml
task: kmmlu_hard_psychology
tag: kmmlu_hard_humss_tasks
dataset_name: public_safety
include: _hard_kmmlu_yaml
task: kmmlu_hard_public_safety
tag: kmmlu_hard_other_tasks
dataset_name: railway_and_automotive_engineering
include: _hard_kmmlu_yaml
task: kmmlu_hard_railway_and_automotive_engineering
tag: kmmlu_hard_applied_science_tasks
dataset_name: real_estate
include: _hard_kmmlu_yaml
task: kmmlu_hard_real_estate
tag: kmmlu_hard_other_tasks
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment