Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
c23c9305
Commit
c23c9305
authored
May 07, 2024
by
lintangsutawika
Browse files
update mmlu
parent
cb085b02
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
55 additions
and
53 deletions
+55
-53
lm_eval/evaluator.py
lm_eval/evaluator.py
+2
-0
lm_eval/tasks/mmlu/default/_mmlu_humanities.yaml
lm_eval/tasks/mmlu/default/_mmlu_humanities.yaml
+13
-13
lm_eval/tasks/mmlu/default/_mmlu_other.yaml
lm_eval/tasks/mmlu/default/_mmlu_other.yaml
+11
-11
lm_eval/tasks/mmlu/default/_mmlu_social_sciences.yaml
lm_eval/tasks/mmlu/default/_mmlu_social_sciences.yaml
+12
-12
lm_eval/tasks/mmlu/default/_mmlu_stem.yaml
lm_eval/tasks/mmlu/default/_mmlu_stem.yaml
+17
-17
No files found.
lm_eval/evaluator.py
View file @
c23c9305
...
...
@@ -578,6 +578,8 @@ def evaluate(
results
,
task_hierarchy
,
show_group_table
=
process_group
(
results
,
task_dict
)
print
(
task_hierarchy
)
import
sys
;
sys
.
exit
()
results_agg
=
defaultdict
(
dict
)
groups_agg
=
defaultdict
(
dict
)
all_tasks_list
=
list
(
task_hierarchy
.
keys
())
...
...
lm_eval/tasks/mmlu/default/_mmlu_humanities.yaml
View file @
c23c9305
group
:
mmlu_humanities
group_alias
:
humanities
task
:
-
formal_logic
-
high_school_european_history
-
high_school_us_history
-
high_school_world_history
-
international_law
-
jurisprudence
-
logical_fallacies
-
moral_disputes
-
moral_scenarios
-
philosophy
-
prehistory
-
professional_law
-
world_religions
-
mmlu_
formal_logic
-
mmlu_
high_school_european_history
# - mmlu_
high_school_us_history
# - mmlu_
high_school_world_history
# - mmlu_
international_law
# - mmlu_
jurisprudence
# - mmlu_
logical_fallacies
# - mmlu_
moral_disputes
# - mmlu_
moral_scenarios
# - mmlu_
philosophy
# - mmlu_
prehistory
# - mmlu_
professional_law
# - mmlu_
world_religions
aggregate_metric
:
True
weight_by_size
:
True
lm_eval/tasks/mmlu/default/_mmlu_other.yaml
View file @
c23c9305
...
...
@@ -3,16 +3,16 @@ group_alias: other
task
:
-
mmlu_business_ethics
-
mmlu_clinical_knowledge
-
mmlu_college_medicine
-
mmlu_global_facts
-
mmlu_human_aging
-
mmlu_management
-
mmlu_marketing
-
mmlu_medical_genetics
-
mmlu_miscellaneous
-
mmlu_nutrition
-
mmlu_professional_accounting
-
mmlu_professional_medicine
-
mmlu_virology
#
- mmlu_college_medicine
#
- mmlu_global_facts
#
- mmlu_human_aging
#
- mmlu_management
#
- mmlu_marketing
#
- mmlu_medical_genetics
#
- mmlu_miscellaneous
#
- mmlu_nutrition
#
- mmlu_professional_accounting
#
- mmlu_professional_medicine
#
- mmlu_virology
aggregate_metric
:
True
weight_by_size
:
True
lm_eval/tasks/mmlu/default/_mmlu_social_sciences.yaml
View file @
c23c9305
group
:
mmlu_social_sciences
group_alias
:
social_sciences
task
:
-
econometrics
-
high_school_geography
-
high_school_government_and_politics
-
high_school_macroeconomics
-
high_school_microeconomics
-
high_school_psychology
-
human_sexuality
-
professional_psychology
-
public_relations
-
security_studies
-
sociology
-
us_foreign_policy
-
mmlu_
econometrics
-
mmlu_
high_school_geography
# - mmlu_
high_school_government_and_politics
# - mmlu_
high_school_macroeconomics
# - mmlu_
high_school_microeconomics
# - mmlu_
high_school_psychology
# - mmlu_
human_sexuality
# - mmlu_
professional_psychology
# - mmlu_
public_relations
# - mmlu_
security_studies
# - mmlu_
sociology
# - mmlu_
us_foreign_policy
aggregate_metric
:
True
weight_by_size
:
True
lm_eval/tasks/mmlu/default/_mmlu_stem.yaml
View file @
c23c9305
...
...
@@ -3,22 +3,22 @@ group_alias: stem
task
:
-
mmlu_abstract_algebra
-
mmlu_anatomy
-
mmlu_astronomy
-
mmlu_college_biology
-
mmlu_college_chemistry
-
mmlu_college_computer_science
-
mmlu_college_mathematics
-
mmlu_college_physics
-
mmlu_computer_security
-
mmlu_conceptual_physics
-
mmlu_electrical_engineering
-
mmlu_elementary_mathematics
-
mmlu_high_school_biology
-
mmlu_high_school_chemistry
-
mmlu_high_school_computer_science
-
mmlu_high_school_mathematics
-
mmlu_high_school_physics
-
mmlu_high_school_statistics
-
mmlu_machine_learning
#
- mmlu_astronomy
#
- mmlu_college_biology
#
- mmlu_college_chemistry
#
- mmlu_college_computer_science
#
- mmlu_college_mathematics
#
- mmlu_college_physics
#
- mmlu_computer_security
#
- mmlu_conceptual_physics
#
- mmlu_electrical_engineering
#
- mmlu_elementary_mathematics
#
- mmlu_high_school_biology
#
- mmlu_high_school_chemistry
#
- mmlu_high_school_computer_science
#
- mmlu_high_school_mathematics
#
- mmlu_high_school_physics
#
- mmlu_high_school_statistics
#
- mmlu_machine_learning
aggregate_metric
:
True
weight_by_size
:
True
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment