Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
741a6a69
Commit
741a6a69
authored
Aug 20, 2024
by
lintangsutawika
Browse files
Merge branch 'main' of
https://github.com/EleutherAI/lm-evaluation-harness
into mela
parents
494a4515
b536f067
Changes
1000
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
223 additions
and
0 deletions
+223
-0
lm_eval/tasks/arabicmmlu/_default_arabicmmlu_template_yaml
lm_eval/tasks/arabicmmlu/_default_arabicmmlu_template_yaml
+15
-0
lm_eval/tasks/arabicmmlu/_generate_configs.py
lm_eval/tasks/arabicmmlu/_generate_configs.py
+118
-0
lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_general.yaml
.../tasks/arabicmmlu/arabicmmlu_arabic_language_general.yaml
+5
-0
lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_grammar.yaml
.../tasks/arabicmmlu/arabicmmlu_arabic_language_grammar.yaml
+5
-0
lm_eval/tasks/arabicmmlu/arabicmmlu_driving_test.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_driving_test.yaml
+5
-0
lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge.yaml
+5
-0
lm_eval/tasks/arabicmmlu/arabicmmlu_high_arabic_language.yaml
...val/tasks/arabicmmlu/arabicmmlu_high_arabic_language.yaml
+5
-0
lm_eval/tasks/arabicmmlu/arabicmmlu_high_biology.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_high_biology.yaml
+5
-0
lm_eval/tasks/arabicmmlu/arabicmmlu_high_civics.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_high_civics.yaml
+5
-0
lm_eval/tasks/arabicmmlu/arabicmmlu_high_computer_science.yaml
...al/tasks/arabicmmlu/arabicmmlu_high_computer_science.yaml
+5
-0
lm_eval/tasks/arabicmmlu/arabicmmlu_high_economics.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_high_economics.yaml
+5
-0
lm_eval/tasks/arabicmmlu/arabicmmlu_high_geography.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_high_geography.yaml
+5
-0
lm_eval/tasks/arabicmmlu/arabicmmlu_high_history.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_high_history.yaml
+5
-0
lm_eval/tasks/arabicmmlu/arabicmmlu_high_islamic_studies.yaml
...val/tasks/arabicmmlu/arabicmmlu_high_islamic_studies.yaml
+5
-0
lm_eval/tasks/arabicmmlu/arabicmmlu_high_philosophy.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_high_philosophy.yaml
+5
-0
lm_eval/tasks/arabicmmlu/arabicmmlu_high_physics.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_high_physics.yaml
+5
-0
lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies.yaml
+5
-0
lm_eval/tasks/arabicmmlu/arabicmmlu_middle_arabic_language.yaml
...l/tasks/arabicmmlu/arabicmmlu_middle_arabic_language.yaml
+5
-0
lm_eval/tasks/arabicmmlu/arabicmmlu_middle_civics.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_middle_civics.yaml
+5
-0
lm_eval/tasks/arabicmmlu/arabicmmlu_middle_computer_science.yaml
.../tasks/arabicmmlu/arabicmmlu_middle_computer_science.yaml
+5
-0
No files found.
Too many changes to show.
To preserve performance only
1000 of 1000+
files are displayed.
Plain diff
Email patch
lm_eval/tasks/arabicmmlu/_default_arabicmmlu_template_yaml
0 → 100644
View file @
741a6a69
dataset_path: yazeed7/ArabicMMLU
test_split: test
fewshot_split: dev
fewshot_config:
sampler: first_n
output_type: multiple_choice
doc_to_text: !function utils.doc_to_text
doc_to_choice: !function utils.doc_to_choice
doc_to_target: "Answer Key"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
version: 0.0
lm_eval/tasks/arabicmmlu/_generate_configs.py
0 → 100644
View file @
741a6a69
"""
Take in a YAML, and output all "other" splits with this YAML
"""
import
argparse
import
logging
import
os
import
yaml
from
tqdm
import
tqdm
eval_logger
=
logging
.
getLogger
(
"lm-eval"
)
SUBJECTS
=
{
"Driving Test"
:
"other"
,
"High Geography"
:
"social_science"
,
"High History"
:
"humanities"
,
"Islamic Studies"
:
"humanities"
,
"Univ Accounting"
:
"social_science"
,
"Primary General Knowledge"
:
"other"
,
"Univ Political Science"
:
"social_science"
,
"Primary Math"
:
"stem"
,
"Middle General Knowledge"
:
"other"
,
"High Biology"
:
"stem"
,
"Primary Natural Science"
:
"stem"
,
"High Economics"
:
"social_science"
,
"Middle Natural Science"
:
"stem"
,
"Middle Geography"
:
"social_science"
,
"Primary Social Science"
:
"social_science"
,
"Middle Computer Science"
:
"stem"
,
"Middle Islamic Studies"
:
"humanities"
,
"Primary Computer Science"
:
"stem"
,
"High Physics"
:
"stem"
,
"Middle Social Science"
:
"social_science"
,
"Middle Civics"
:
"social_science"
,
"High Computer Science"
:
"stem"
,
"General Knowledge"
:
"other"
,
"High Civics"
:
"social_science"
,
"Prof Law"
:
"humanities"
,
"High Islamic Studies"
:
"humanities"
,
"Primary Arabic Language"
:
"language"
,
"High Arabic Language"
:
"language"
,
"Arabic Language (Grammar)"
:
"language"
,
"Primary History"
:
"humanities"
,
"Middle History"
:
"humanities"
,
"Univ Economics"
:
"social_science"
,
"Arabic Language (General)"
:
"language"
,
"Univ Computer Science"
:
"stem"
,
"Primary Islamic Studies"
:
"humanities"
,
"Primary Geography"
:
"social_science"
,
"High Philosophy"
:
"humanities"
,
"Middle Arabic Language"
:
"language"
,
"Middle Economics"
:
"social_science"
,
"Univ Management"
:
"other"
,
}
def
parse_args
():
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--base_yaml_path"
,
default
=
"_default_arabicmmlu_template_yaml"
)
parser
.
add_argument
(
"--save_prefix_path"
,
default
=
"arabicmmlu"
)
return
parser
.
parse_args
()
if
__name__
==
"__main__"
:
args
=
parse_args
()
# get filename of base_yaml so we can `"include": ` it in our "other" YAMLs.
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
with
open
(
args
.
base_yaml_path
,
encoding
=
"utf-8"
)
as
f
:
base_yaml
=
yaml
.
full_load
(
f
)
ALL_CATEGORIES
=
[]
for
subject
,
category
in
tqdm
(
SUBJECTS
.
items
()):
if
category
not
in
ALL_CATEGORIES
:
ALL_CATEGORIES
.
append
(
category
)
# description = f"The following are multiple choice questions (with answers) about {' '.join(subject.split('_'))}.\n\n"
yaml_dict
=
{
"include"
:
base_yaml_name
,
"tag"
:
f
"arabicmmlu_
{
category
}
"
,
"task"
:
f
"arabicmmlu_
{
subject
.
lower
().
replace
(
' '
,
'_'
)
}
"
,
"task_alias"
:
subject
,
"dataset_name"
:
subject
,
# "description": description,
}
file_save_path
=
(
args
.
save_prefix_path
+
f
"_
{
subject
.
lower
().
replace
(
' '
,
'_'
).
replace
(
'('
,
''
).
replace
(
')'
,
''
)
}
.yaml"
)
eval_logger
.
info
(
f
"Saving yaml for subset
{
subject
}
to
{
file_save_path
}
"
)
with
open
(
file_save_path
,
"w"
,
encoding
=
"utf-8"
)
as
yaml_file
:
yaml
.
dump
(
yaml_dict
,
yaml_file
,
allow_unicode
=
True
,
default_style
=
'"'
,
)
arabicmmlu_subcategories
=
[
f
"arabicmmlu_
{
category
}
"
for
category
in
ALL_CATEGORIES
]
file_save_path
=
args
.
save_prefix_path
+
".yaml"
eval_logger
.
info
(
f
"Saving benchmark config to
{
file_save_path
}
"
)
with
open
(
file_save_path
,
"w"
,
encoding
=
"utf-8"
)
as
yaml_file
:
yaml
.
dump
(
{
"group"
:
"arabicmmlu"
,
"task"
:
arabicmmlu_subcategories
,
},
yaml_file
,
indent
=
4
,
default_flow_style
=
False
,
)
lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_general.yaml
0 → 100644
View file @
741a6a69
"
dataset_name"
:
"
Arabic
Language
(General)"
"
tag"
:
"
arabicmmlu_language_tasks"
"
include"
:
"
_default_arabicmmlu_template_yaml"
"
task"
:
"
arabicmmlu_arabic_language_(general)"
"
task_alias"
:
"
Arabic
Language
(General)"
lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_grammar.yaml
0 → 100644
View file @
741a6a69
"
dataset_name"
:
"
Arabic
Language
(Grammar)"
"
tag"
:
"
arabicmmlu_language_tasks"
"
include"
:
"
_default_arabicmmlu_template_yaml"
"
task"
:
"
arabicmmlu_arabic_language_(grammar)"
"
task_alias"
:
"
Arabic
Language
(Grammar)"
lm_eval/tasks/arabicmmlu/arabicmmlu_driving_test.yaml
0 → 100644
View file @
741a6a69
"
dataset_name"
:
"
Driving
Test"
"
tag"
:
"
arabicmmlu_other_tasks"
"
include"
:
"
_default_arabicmmlu_template_yaml"
"
task"
:
"
arabicmmlu_driving_test"
"
task_alias"
:
"
Driving
Test"
lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge.yaml
0 → 100644
View file @
741a6a69
"
dataset_name"
:
"
General
Knowledge"
"
tag"
:
"
arabicmmlu_other_tasks"
"
include"
:
"
_default_arabicmmlu_template_yaml"
"
task"
:
"
arabicmmlu_general_knowledge"
"
task_alias"
:
"
General
Knowledge"
lm_eval/tasks/arabicmmlu/arabicmmlu_high_arabic_language.yaml
0 → 100644
View file @
741a6a69
"
dataset_name"
:
"
High
Arabic
Language"
"
tag"
:
"
arabicmmlu_language_tasks"
"
include"
:
"
_default_arabicmmlu_template_yaml"
"
task"
:
"
arabicmmlu_high_arabic_language"
"
task_alias"
:
"
High
Arabic
Language"
lm_eval/tasks/arabicmmlu/arabicmmlu_high_biology.yaml
0 → 100644
View file @
741a6a69
"
dataset_name"
:
"
High
Biology"
"
tag"
:
"
arabicmmlu_stem_tasks"
"
include"
:
"
_default_arabicmmlu_template_yaml"
"
task"
:
"
arabicmmlu_high_biology"
"
task_alias"
:
"
High
Biology"
lm_eval/tasks/arabicmmlu/arabicmmlu_high_civics.yaml
0 → 100644
View file @
741a6a69
"
dataset_name"
:
"
High
Civics"
"
tag"
:
"
arabicmmlu_social_science_tasks"
"
include"
:
"
_default_arabicmmlu_template_yaml"
"
task"
:
"
arabicmmlu_high_civics"
"
task_alias"
:
"
High
Civics"
lm_eval/tasks/arabicmmlu/arabicmmlu_high_computer_science.yaml
0 → 100644
View file @
741a6a69
"
dataset_name"
:
"
High
Computer
Science"
"
tag"
:
"
arabicmmlu_stem_tasks"
"
include"
:
"
_default_arabicmmlu_template_yaml"
"
task"
:
"
arabicmmlu_high_computer_science"
"
task_alias"
:
"
High
Computer
Science"
lm_eval/tasks/arabicmmlu/arabicmmlu_high_economics.yaml
0 → 100644
View file @
741a6a69
"
dataset_name"
:
"
High
Economics"
"
tag"
:
"
arabicmmlu_social_science_tasks"
"
include"
:
"
_default_arabicmmlu_template_yaml"
"
task"
:
"
arabicmmlu_high_economics"
"
task_alias"
:
"
High
Economics"
lm_eval/tasks/arabicmmlu/arabicmmlu_high_geography.yaml
0 → 100644
View file @
741a6a69
"
dataset_name"
:
"
High
Geography"
"
tag"
:
"
arabicmmlu_social_science_tasks"
"
include"
:
"
_default_arabicmmlu_template_yaml"
"
task"
:
"
arabicmmlu_high_geography"
"
task_alias"
:
"
High
Geography"
lm_eval/tasks/arabicmmlu/arabicmmlu_high_history.yaml
0 → 100644
View file @
741a6a69
"
dataset_name"
:
"
High
History"
"
tag"
:
"
arabicmmlu_humanities_tasks"
"
include"
:
"
_default_arabicmmlu_template_yaml"
"
task"
:
"
arabicmmlu_high_history"
"
task_alias"
:
"
High
History"
lm_eval/tasks/arabicmmlu/arabicmmlu_high_islamic_studies.yaml
0 → 100644
View file @
741a6a69
"
dataset_name"
:
"
High
Islamic
Studies"
"
tag"
:
"
arabicmmlu_humanities_tasks"
"
include"
:
"
_default_arabicmmlu_template_yaml"
"
task"
:
"
arabicmmlu_high_islamic_studies"
"
task_alias"
:
"
High
Islamic
Studies"
lm_eval/tasks/arabicmmlu/arabicmmlu_high_philosophy.yaml
0 → 100644
View file @
741a6a69
"
dataset_name"
:
"
High
Philosophy"
"
tag"
:
"
arabicmmlu_humanities_tasks"
"
include"
:
"
_default_arabicmmlu_template_yaml"
"
task"
:
"
arabicmmlu_high_philosophy"
"
task_alias"
:
"
High
Philosophy"
lm_eval/tasks/arabicmmlu/arabicmmlu_high_physics.yaml
0 → 100644
View file @
741a6a69
"
dataset_name"
:
"
High
Physics"
"
tag"
:
"
arabicmmlu_stem_tasks"
"
include"
:
"
_default_arabicmmlu_template_yaml"
"
task"
:
"
arabicmmlu_high_physics"
"
task_alias"
:
"
High
Physics"
lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies.yaml
0 → 100644
View file @
741a6a69
"
dataset_name"
:
"
Islamic
Studies"
"
tag"
:
"
arabicmmlu_humanities_tasks"
"
include"
:
"
_default_arabicmmlu_template_yaml"
"
task"
:
"
arabicmmlu_islamic_studies"
"
task_alias"
:
"
Islamic
Studies"
lm_eval/tasks/arabicmmlu/arabicmmlu_middle_arabic_language.yaml
0 → 100644
View file @
741a6a69
"
dataset_name"
:
"
Middle
Arabic
Language"
"
tag"
:
"
arabicmmlu_language_tasks"
"
include"
:
"
_default_arabicmmlu_template_yaml"
"
task"
:
"
arabicmmlu_middle_arabic_language"
"
task_alias"
:
"
Middle
Arabic
Language"
lm_eval/tasks/arabicmmlu/arabicmmlu_middle_civics.yaml
0 → 100644
View file @
741a6a69
"
dataset_name"
:
"
Middle
Civics"
"
tag"
:
"
arabicmmlu_social_science_tasks"
"
include"
:
"
_default_arabicmmlu_template_yaml"
"
task"
:
"
arabicmmlu_middle_civics"
"
task_alias"
:
"
Middle
Civics"
lm_eval/tasks/arabicmmlu/arabicmmlu_middle_computer_science.yaml
0 → 100644
View file @
741a6a69
"
dataset_name"
:
"
Middle
Computer
Science"
"
tag"
:
"
arabicmmlu_stem_tasks"
"
include"
:
"
_default_arabicmmlu_template_yaml"
"
task"
:
"
arabicmmlu_middle_computer_science"
"
task_alias"
:
"
Middle
Computer
Science"
Prev
1
…
11
12
13
14
15
16
17
18
19
…
50
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment