Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
bd028848
Commit
bd028848
authored
Jul 18, 2025
by
Baber
Browse files
Merge branch 'main' into metrics
# Conflicts: # tests/test_tasks.py
parents
6e48110e
56def33d
Changes
108
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
140 additions
and
0 deletions
+140
-0
lm_eval/tasks/egymmlu/egymmlu_geography.yaml
lm_eval/tasks/egymmlu/egymmlu_geography.yaml
+7
-0
lm_eval/tasks/egymmlu/egymmlu_global_facts.yaml
lm_eval/tasks/egymmlu/egymmlu_global_facts.yaml
+7
-0
lm_eval/tasks/egymmlu/egymmlu_high_school_european_history.yaml
...l/tasks/egymmlu/egymmlu_high_school_european_history.yaml
+7
-0
lm_eval/tasks/egymmlu/egymmlu_high_school_geography.yaml
lm_eval/tasks/egymmlu/egymmlu_high_school_geography.yaml
+7
-0
lm_eval/tasks/egymmlu/egymmlu_high_school_government_and_politics.yaml
.../egymmlu/egymmlu_high_school_government_and_politics.yaml
+7
-0
lm_eval/tasks/egymmlu/egymmlu_high_school_psychology.yaml
lm_eval/tasks/egymmlu/egymmlu_high_school_psychology.yaml
+7
-0
lm_eval/tasks/egymmlu/egymmlu_high_school_statistics.yaml
lm_eval/tasks/egymmlu/egymmlu_high_school_statistics.yaml
+7
-0
lm_eval/tasks/egymmlu/egymmlu_high_school_world_history.yaml
lm_eval/tasks/egymmlu/egymmlu_high_school_world_history.yaml
+7
-0
lm_eval/tasks/egymmlu/egymmlu_history.yaml
lm_eval/tasks/egymmlu/egymmlu_history.yaml
+7
-0
lm_eval/tasks/egymmlu/egymmlu_human_aging.yaml
lm_eval/tasks/egymmlu/egymmlu_human_aging.yaml
+7
-0
lm_eval/tasks/egymmlu/egymmlu_international_law.yaml
lm_eval/tasks/egymmlu/egymmlu_international_law.yaml
+7
-0
lm_eval/tasks/egymmlu/egymmlu_islamic_studies.yaml
lm_eval/tasks/egymmlu/egymmlu_islamic_studies.yaml
+7
-0
lm_eval/tasks/egymmlu/egymmlu_jurisprudence.yaml
lm_eval/tasks/egymmlu/egymmlu_jurisprudence.yaml
+7
-0
lm_eval/tasks/egymmlu/egymmlu_law.yaml
lm_eval/tasks/egymmlu/egymmlu_law.yaml
+7
-0
lm_eval/tasks/egymmlu/egymmlu_logical_fallacies.yaml
lm_eval/tasks/egymmlu/egymmlu_logical_fallacies.yaml
+7
-0
lm_eval/tasks/egymmlu/egymmlu_management.yaml
lm_eval/tasks/egymmlu/egymmlu_management.yaml
+7
-0
lm_eval/tasks/egymmlu/egymmlu_management_ar.yaml
lm_eval/tasks/egymmlu/egymmlu_management_ar.yaml
+7
-0
lm_eval/tasks/egymmlu/egymmlu_marketing.yaml
lm_eval/tasks/egymmlu/egymmlu_marketing.yaml
+7
-0
lm_eval/tasks/egymmlu/egymmlu_math.yaml
lm_eval/tasks/egymmlu/egymmlu_math.yaml
+7
-0
lm_eval/tasks/egymmlu/egymmlu_moral_disputes.yaml
lm_eval/tasks/egymmlu/egymmlu_moral_disputes.yaml
+7
-0
No files found.
lm_eval/tasks/egymmlu/egymmlu_geography.yaml
0 → 100644
View file @
bd028848
"
dataset_name"
:
"
geography"
"
include"
:
"
_default_egymmlu_template_yaml"
"
tag"
:
-
"
egymmlu_social_sciences_tasks"
-
"
egymmlu_ar_mmlu_tasks"
"
task"
:
"
egymmlu_geography"
"
task_alias"
:
"
geography"
lm_eval/tasks/egymmlu/egymmlu_global_facts.yaml
0 → 100644
View file @
bd028848
"
dataset_name"
:
"
global_facts"
"
include"
:
"
_default_egymmlu_template_yaml"
"
tag"
:
-
"
egymmlu_other_tasks"
-
"
egymmlu_mmlu_tasks"
"
task"
:
"
egymmlu_global_facts"
"
task_alias"
:
"
global
facts"
lm_eval/tasks/egymmlu/egymmlu_high_school_european_history.yaml
0 → 100644
View file @
bd028848
"
dataset_name"
:
"
high_school_european_history"
"
include"
:
"
_default_egymmlu_template_yaml"
"
tag"
:
-
"
egymmlu_humanities_tasks"
-
"
egymmlu_mmlu_tasks"
"
task"
:
"
egymmlu_high_school_european_history"
"
task_alias"
:
"
high
school
european
history"
lm_eval/tasks/egymmlu/egymmlu_high_school_geography.yaml
0 → 100644
View file @
bd028848
"
dataset_name"
:
"
high_school_geography"
"
include"
:
"
_default_egymmlu_template_yaml"
"
tag"
:
-
"
egymmlu_social_sciences_tasks"
-
"
egymmlu_mmlu_tasks"
"
task"
:
"
egymmlu_high_school_geography"
"
task_alias"
:
"
high
school
geography"
lm_eval/tasks/egymmlu/egymmlu_high_school_government_and_politics.yaml
0 → 100644
View file @
bd028848
"
dataset_name"
:
"
high_school_government_and_politics"
"
include"
:
"
_default_egymmlu_template_yaml"
"
tag"
:
-
"
egymmlu_social_sciences_tasks"
-
"
egymmlu_mmlu_tasks"
"
task"
:
"
egymmlu_high_school_government_and_politics"
"
task_alias"
:
"
high
school
government
and
politics"
lm_eval/tasks/egymmlu/egymmlu_high_school_psychology.yaml
0 → 100644
View file @
bd028848
"
dataset_name"
:
"
high_school_psychology"
"
include"
:
"
_default_egymmlu_template_yaml"
"
tag"
:
-
"
egymmlu_social_sciences_tasks"
-
"
egymmlu_mmlu_tasks"
"
task"
:
"
egymmlu_high_school_psychology"
"
task_alias"
:
"
high
school
psychology"
lm_eval/tasks/egymmlu/egymmlu_high_school_statistics.yaml
0 → 100644
View file @
bd028848
"
dataset_name"
:
"
high_school_statistics"
"
include"
:
"
_default_egymmlu_template_yaml"
"
tag"
:
-
"
egymmlu_stem_tasks"
-
"
egymmlu_mmlu_tasks"
"
task"
:
"
egymmlu_high_school_statistics"
"
task_alias"
:
"
high
school
statistics"
lm_eval/tasks/egymmlu/egymmlu_high_school_world_history.yaml
0 → 100644
View file @
bd028848
"
dataset_name"
:
"
high_school_world_history"
"
include"
:
"
_default_egymmlu_template_yaml"
"
tag"
:
-
"
egymmlu_humanities_tasks"
-
"
egymmlu_mmlu_tasks"
"
task"
:
"
egymmlu_high_school_world_history"
"
task_alias"
:
"
high
school
world
history"
lm_eval/tasks/egymmlu/egymmlu_history.yaml
0 → 100644
View file @
bd028848
"
dataset_name"
:
"
history"
"
include"
:
"
_default_egymmlu_template_yaml"
"
tag"
:
-
"
egymmlu_humanities_tasks"
-
"
egymmlu_ar_mmlu_tasks"
"
task"
:
"
egymmlu_history"
"
task_alias"
:
"
history"
lm_eval/tasks/egymmlu/egymmlu_human_aging.yaml
0 → 100644
View file @
bd028848
"
dataset_name"
:
"
human_aging"
"
include"
:
"
_default_egymmlu_template_yaml"
"
tag"
:
-
"
egymmlu_other_tasks"
-
"
egymmlu_mmlu_tasks"
"
task"
:
"
egymmlu_human_aging"
"
task_alias"
:
"
human
aging"
lm_eval/tasks/egymmlu/egymmlu_international_law.yaml
0 → 100644
View file @
bd028848
"
dataset_name"
:
"
international_law"
"
include"
:
"
_default_egymmlu_template_yaml"
"
tag"
:
-
"
egymmlu_humanities_tasks"
-
"
egymmlu_mmlu_tasks"
"
task"
:
"
egymmlu_international_law"
"
task_alias"
:
"
international
law"
lm_eval/tasks/egymmlu/egymmlu_islamic_studies.yaml
0 → 100644
View file @
bd028848
"
dataset_name"
:
"
islamic_studies"
"
include"
:
"
_default_egymmlu_template_yaml"
"
tag"
:
-
"
egymmlu_humanities_tasks"
-
"
egymmlu_ar_mmlu_tasks"
"
task"
:
"
egymmlu_islamic_studies"
"
task_alias"
:
"
islamic
studies"
lm_eval/tasks/egymmlu/egymmlu_jurisprudence.yaml
0 → 100644
View file @
bd028848
"
dataset_name"
:
"
jurisprudence"
"
include"
:
"
_default_egymmlu_template_yaml"
"
tag"
:
-
"
egymmlu_humanities_tasks"
-
"
egymmlu_mmlu_tasks"
"
task"
:
"
egymmlu_jurisprudence"
"
task_alias"
:
"
jurisprudence"
lm_eval/tasks/egymmlu/egymmlu_law.yaml
0 → 100644
View file @
bd028848
"
dataset_name"
:
"
law"
"
include"
:
"
_default_egymmlu_template_yaml"
"
tag"
:
-
"
egymmlu_humanities_tasks"
-
"
egymmlu_ar_mmlu_tasks"
"
task"
:
"
egymmlu_law"
"
task_alias"
:
"
law"
lm_eval/tasks/egymmlu/egymmlu_logical_fallacies.yaml
0 → 100644
View file @
bd028848
"
dataset_name"
:
"
logical_fallacies"
"
include"
:
"
_default_egymmlu_template_yaml"
"
tag"
:
-
"
egymmlu_humanities_tasks"
-
"
egymmlu_mmlu_tasks"
"
task"
:
"
egymmlu_logical_fallacies"
"
task_alias"
:
"
logical
fallacies"
lm_eval/tasks/egymmlu/egymmlu_management.yaml
0 → 100644
View file @
bd028848
"
dataset_name"
:
"
management"
"
include"
:
"
_default_egymmlu_template_yaml"
"
tag"
:
-
"
egymmlu_other_tasks"
-
"
egymmlu_mmlu_tasks"
"
task"
:
"
egymmlu_management"
"
task_alias"
:
"
management"
lm_eval/tasks/egymmlu/egymmlu_management_ar.yaml
0 → 100644
View file @
bd028848
"
dataset_name"
:
"
management_ar"
"
include"
:
"
_default_egymmlu_template_yaml"
"
tag"
:
-
"
egymmlu_other_tasks"
-
"
egymmlu_ar_mmlu_tasks"
"
task"
:
"
egymmlu_management_ar"
"
task_alias"
:
"
management
ar"
lm_eval/tasks/egymmlu/egymmlu_marketing.yaml
0 → 100644
View file @
bd028848
"
dataset_name"
:
"
marketing"
"
include"
:
"
_default_egymmlu_template_yaml"
"
tag"
:
-
"
egymmlu_other_tasks"
-
"
egymmlu_mmlu_tasks"
"
task"
:
"
egymmlu_marketing"
"
task_alias"
:
"
marketing"
lm_eval/tasks/egymmlu/egymmlu_math.yaml
0 → 100644
View file @
bd028848
"
dataset_name"
:
"
math"
"
include"
:
"
_default_egymmlu_template_yaml"
"
tag"
:
-
"
egymmlu_stem_tasks"
-
"
egymmlu_ar_mmlu_tasks"
"
task"
:
"
egymmlu_math"
"
task_alias"
:
"
math"
lm_eval/tasks/egymmlu/egymmlu_moral_disputes.yaml
0 → 100644
View file @
bd028848
"
dataset_name"
:
"
moral_disputes"
"
include"
:
"
_default_egymmlu_template_yaml"
"
tag"
:
-
"
egymmlu_humanities_tasks"
-
"
egymmlu_mmlu_tasks"
"
task"
:
"
egymmlu_moral_disputes"
"
task_alias"
:
"
moral
disputes"
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment