"examples/vscode:/vscode.git/clone" did not exist on "fad19fabd036879fc3e6a78d544b94c4440c1666"
Commit c23c9305 authored by lintangsutawika's avatar lintangsutawika
Browse files

update mmlu

parent cb085b02
...@@ -578,6 +578,8 @@ def evaluate( ...@@ -578,6 +578,8 @@ def evaluate(
results, task_hierarchy, show_group_table = process_group(results, task_dict) results, task_hierarchy, show_group_table = process_group(results, task_dict)
print(task_hierarchy)
import sys; sys.exit()
results_agg = defaultdict(dict) results_agg = defaultdict(dict)
groups_agg = defaultdict(dict) groups_agg = defaultdict(dict)
all_tasks_list = list(task_hierarchy.keys()) all_tasks_list = list(task_hierarchy.keys())
......
group: mmlu_humanities group: mmlu_humanities
group_alias: humanities group_alias: humanities
task: task:
- formal_logic - mmlu_formal_logic
- high_school_european_history - mmlu_high_school_european_history
- high_school_us_history # - mmlu_high_school_us_history
- high_school_world_history # - mmlu_high_school_world_history
- international_law # - mmlu_international_law
- jurisprudence # - mmlu_jurisprudence
- logical_fallacies # - mmlu_logical_fallacies
- moral_disputes # - mmlu_moral_disputes
- moral_scenarios # - mmlu_moral_scenarios
- philosophy # - mmlu_philosophy
- prehistory # - mmlu_prehistory
- professional_law # - mmlu_professional_law
- world_religions # - mmlu_world_religions
aggregate_metric: True aggregate_metric: True
weight_by_size: True weight_by_size: True
...@@ -3,16 +3,16 @@ group_alias: other ...@@ -3,16 +3,16 @@ group_alias: other
task: task:
- mmlu_business_ethics - mmlu_business_ethics
- mmlu_clinical_knowledge - mmlu_clinical_knowledge
- mmlu_college_medicine # - mmlu_college_medicine
- mmlu_global_facts # - mmlu_global_facts
- mmlu_human_aging # - mmlu_human_aging
- mmlu_management # - mmlu_management
- mmlu_marketing # - mmlu_marketing
- mmlu_medical_genetics # - mmlu_medical_genetics
- mmlu_miscellaneous # - mmlu_miscellaneous
- mmlu_nutrition # - mmlu_nutrition
- mmlu_professional_accounting # - mmlu_professional_accounting
- mmlu_professional_medicine # - mmlu_professional_medicine
- mmlu_virology # - mmlu_virology
aggregate_metric: True aggregate_metric: True
weight_by_size: True weight_by_size: True
group: mmlu_social_sciences group: mmlu_social_sciences
group_alias: social_sciences group_alias: social_sciences
task: task:
- econometrics - mmlu_econometrics
- high_school_geography - mmlu_high_school_geography
- high_school_government_and_politics # - mmlu_high_school_government_and_politics
- high_school_macroeconomics # - mmlu_high_school_macroeconomics
- high_school_microeconomics # - mmlu_high_school_microeconomics
- high_school_psychology # - mmlu_high_school_psychology
- human_sexuality # - mmlu_human_sexuality
- professional_psychology # - mmlu_professional_psychology
- public_relations # - mmlu_public_relations
- security_studies # - mmlu_security_studies
- sociology # - mmlu_sociology
- us_foreign_policy # - mmlu_us_foreign_policy
aggregate_metric: True aggregate_metric: True
weight_by_size: True weight_by_size: True
...@@ -3,22 +3,22 @@ group_alias: stem ...@@ -3,22 +3,22 @@ group_alias: stem
task: task:
- mmlu_abstract_algebra - mmlu_abstract_algebra
- mmlu_anatomy - mmlu_anatomy
- mmlu_astronomy # - mmlu_astronomy
- mmlu_college_biology # - mmlu_college_biology
- mmlu_college_chemistry # - mmlu_college_chemistry
- mmlu_college_computer_science # - mmlu_college_computer_science
- mmlu_college_mathematics # - mmlu_college_mathematics
- mmlu_college_physics # - mmlu_college_physics
- mmlu_computer_security # - mmlu_computer_security
- mmlu_conceptual_physics # - mmlu_conceptual_physics
- mmlu_electrical_engineering # - mmlu_electrical_engineering
- mmlu_elementary_mathematics # - mmlu_elementary_mathematics
- mmlu_high_school_biology # - mmlu_high_school_biology
- mmlu_high_school_chemistry # - mmlu_high_school_chemistry
- mmlu_high_school_computer_science # - mmlu_high_school_computer_science
- mmlu_high_school_mathematics # - mmlu_high_school_mathematics
- mmlu_high_school_physics # - mmlu_high_school_physics
- mmlu_high_school_statistics # - mmlu_high_school_statistics
- mmlu_machine_learning # - mmlu_machine_learning
aggregate_metric: True aggregate_metric: True
weight_by_size: True weight_by_size: True
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment