Commit c23c9305 authored by lintangsutawika's avatar lintangsutawika
Browse files

update mmlu

parent cb085b02
......@@ -578,6 +578,8 @@ def evaluate(
results, task_hierarchy, show_group_table = process_group(results, task_dict)
print(task_hierarchy)
import sys; sys.exit()
results_agg = defaultdict(dict)
groups_agg = defaultdict(dict)
all_tasks_list = list(task_hierarchy.keys())
......
group: mmlu_humanities
group_alias: humanities
task:
- formal_logic
- high_school_european_history
- high_school_us_history
- high_school_world_history
- international_law
- jurisprudence
- logical_fallacies
- moral_disputes
- moral_scenarios
- philosophy
- prehistory
- professional_law
- world_religions
- mmlu_formal_logic
- mmlu_high_school_european_history
# - mmlu_high_school_us_history
# - mmlu_high_school_world_history
# - mmlu_international_law
# - mmlu_jurisprudence
# - mmlu_logical_fallacies
# - mmlu_moral_disputes
# - mmlu_moral_scenarios
# - mmlu_philosophy
# - mmlu_prehistory
# - mmlu_professional_law
# - mmlu_world_religions
aggregate_metric: True
weight_by_size: True
......@@ -3,16 +3,16 @@ group_alias: other
task:
- mmlu_business_ethics
- mmlu_clinical_knowledge
- mmlu_college_medicine
- mmlu_global_facts
- mmlu_human_aging
- mmlu_management
- mmlu_marketing
- mmlu_medical_genetics
- mmlu_miscellaneous
- mmlu_nutrition
- mmlu_professional_accounting
- mmlu_professional_medicine
- mmlu_virology
# - mmlu_college_medicine
# - mmlu_global_facts
# - mmlu_human_aging
# - mmlu_management
# - mmlu_marketing
# - mmlu_medical_genetics
# - mmlu_miscellaneous
# - mmlu_nutrition
# - mmlu_professional_accounting
# - mmlu_professional_medicine
# - mmlu_virology
aggregate_metric: True
weight_by_size: True
group: mmlu_social_sciences
group_alias: social_sciences
task:
- econometrics
- high_school_geography
- high_school_government_and_politics
- high_school_macroeconomics
- high_school_microeconomics
- high_school_psychology
- human_sexuality
- professional_psychology
- public_relations
- security_studies
- sociology
- us_foreign_policy
- mmlu_econometrics
- mmlu_high_school_geography
# - mmlu_high_school_government_and_politics
# - mmlu_high_school_macroeconomics
# - mmlu_high_school_microeconomics
# - mmlu_high_school_psychology
# - mmlu_human_sexuality
# - mmlu_professional_psychology
# - mmlu_public_relations
# - mmlu_security_studies
# - mmlu_sociology
# - mmlu_us_foreign_policy
aggregate_metric: True
weight_by_size: True
......@@ -3,22 +3,22 @@ group_alias: stem
task:
- mmlu_abstract_algebra
- mmlu_anatomy
- mmlu_astronomy
- mmlu_college_biology
- mmlu_college_chemistry
- mmlu_college_computer_science
- mmlu_college_mathematics
- mmlu_college_physics
- mmlu_computer_security
- mmlu_conceptual_physics
- mmlu_electrical_engineering
- mmlu_elementary_mathematics
- mmlu_high_school_biology
- mmlu_high_school_chemistry
- mmlu_high_school_computer_science
- mmlu_high_school_mathematics
- mmlu_high_school_physics
- mmlu_high_school_statistics
- mmlu_machine_learning
# - mmlu_astronomy
# - mmlu_college_biology
# - mmlu_college_chemistry
# - mmlu_college_computer_science
# - mmlu_college_mathematics
# - mmlu_college_physics
# - mmlu_computer_security
# - mmlu_conceptual_physics
# - mmlu_electrical_engineering
# - mmlu_elementary_mathematics
# - mmlu_high_school_biology
# - mmlu_high_school_chemistry
# - mmlu_high_school_computer_science
# - mmlu_high_school_mathematics
# - mmlu_high_school_physics
# - mmlu_high_school_statistics
# - mmlu_machine_learning
aggregate_metric: True
weight_by_size: True
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment