Unverified Commit b2c090cc authored by Minho Ryu's avatar Minho Ryu Committed by GitHub
Browse files

aggregate by group (total and categories) (#2643)

parent ed9c6fc8
dataset_name: health dataset_name: health
include: _hard_kmmlu_yaml include: _hard_kmmlu_yaml
task: kmmlu_hard_health task: kmmlu_hard_health
tag: kmmlu_hard_other_tasks
dataset_name: industrial_engineer dataset_name: industrial_engineer
include: _hard_kmmlu_yaml include: _hard_kmmlu_yaml
task: kmmlu_hard_industrial_engineer task: kmmlu_hard_industrial_engineer
tag: kmmlu_hard_applied_science_tasks
dataset_name: information_technology dataset_name: information_technology
include: _hard_kmmlu_yaml include: _hard_kmmlu_yaml
task: kmmlu_hard_information_technology task: kmmlu_hard_information_technology
tag: kmmlu_hard_stem_tasks
dataset_name: interior_architecture_and_design dataset_name: interior_architecture_and_design
include: _hard_kmmlu_yaml include: _hard_kmmlu_yaml
task: kmmlu_hard_interior_architecture_and_design task: kmmlu_hard_interior_architecture_and_design
tag: kmmlu_hard_other_tasks
dataset_name: korean_history dataset_name: korean_history
include: _hard_kmmlu_yaml include: _hard_kmmlu_yaml
task: kmmlu_hard_korean_history task: kmmlu_hard_korean_history
tag: kmmlu_hard_humss_tasks
dataset_name: law dataset_name: law
include: _hard_kmmlu_yaml include: _hard_kmmlu_yaml
task: kmmlu_hard_law task: kmmlu_hard_law
tag: kmmlu_hard_humss_tasks
dataset_name: machine_design_and_manufacturing dataset_name: machine_design_and_manufacturing
include: _hard_kmmlu_yaml include: _hard_kmmlu_yaml
task: kmmlu_hard_machine_design_and_manufacturing task: kmmlu_hard_machine_design_and_manufacturing
tag: kmmlu_hard_applied_science_tasks
dataset_name: management dataset_name: management
include: _hard_kmmlu_yaml include: _hard_kmmlu_yaml
task: kmmlu_hard_management task: kmmlu_hard_management
tag: kmmlu_hard_humss_tasks
dataset_name: maritime_engineering dataset_name: maritime_engineering
include: _hard_kmmlu_yaml include: _hard_kmmlu_yaml
task: kmmlu_hard_maritime_engineering task: kmmlu_hard_maritime_engineering
tag: kmmlu_hard_applied_science_tasks
dataset_name: marketing dataset_name: marketing
include: _hard_kmmlu_yaml include: _hard_kmmlu_yaml
task: kmmlu_hard_marketing task: kmmlu_hard_marketing
tag: kmmlu_hard_other_tasks
dataset_name: materials_engineering dataset_name: materials_engineering
include: _hard_kmmlu_yaml include: _hard_kmmlu_yaml
task: kmmlu_hard_materials_engineering task: kmmlu_hard_materials_engineering
tag: kmmlu_hard_stem_tasks
dataset_name: math dataset_name: math
include: _hard_kmmlu_yaml include: _hard_kmmlu_yaml
task: kmmlu_hard_math task: kmmlu_hard_math
tag: kmmlu_hard_stem_tasks
dataset_name: mechanical_engineering dataset_name: mechanical_engineering
include: _hard_kmmlu_yaml include: _hard_kmmlu_yaml
task: kmmlu_hard_mechanical_engineering task: kmmlu_hard_mechanical_engineering
tag: kmmlu_hard_stem_tasks
dataset_name: nondestructive_testing dataset_name: nondestructive_testing
include: _hard_kmmlu_yaml include: _hard_kmmlu_yaml
task: kmmlu_hard_nondestructive_testing task: kmmlu_hard_nondestructive_testing
tag: kmmlu_hard_applied_science_tasks
dataset_name: patent dataset_name: patent
include: _hard_kmmlu_yaml include: _hard_kmmlu_yaml
task: kmmlu_hard_patent task: kmmlu_hard_patent
tag: kmmlu_hard_other_tasks
dataset_name: political_science_and_sociology dataset_name: political_science_and_sociology
include: _hard_kmmlu_yaml include: _hard_kmmlu_yaml
task: kmmlu_hard_political_science_and_sociology task: kmmlu_hard_political_science_and_sociology
tag: kmmlu_hard_humss_tasks
dataset_name: psychology dataset_name: psychology
include: _hard_kmmlu_yaml include: _hard_kmmlu_yaml
task: kmmlu_hard_psychology task: kmmlu_hard_psychology
tag: kmmlu_hard_humss_tasks
dataset_name: public_safety dataset_name: public_safety
include: _hard_kmmlu_yaml include: _hard_kmmlu_yaml
task: kmmlu_hard_public_safety task: kmmlu_hard_public_safety
tag: kmmlu_hard_other_tasks
dataset_name: railway_and_automotive_engineering dataset_name: railway_and_automotive_engineering
include: _hard_kmmlu_yaml include: _hard_kmmlu_yaml
task: kmmlu_hard_railway_and_automotive_engineering task: kmmlu_hard_railway_and_automotive_engineering
tag: kmmlu_hard_applied_science_tasks
dataset_name: real_estate dataset_name: real_estate
include: _hard_kmmlu_yaml include: _hard_kmmlu_yaml
task: kmmlu_hard_real_estate task: kmmlu_hard_real_estate
tag: kmmlu_hard_other_tasks
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment