Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
5be2bb10
Commit
5be2bb10
authored
Jun 27, 2024
by
lintangsutawika
Browse files
update files
parent
bfbda3b3
Changes
17
Hide whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
0 additions
and
247 deletions
+0
-247
lm_eval/tasks/mmlu_pro/_generate_configs.py
lm_eval/tasks/mmlu_pro/_generate_configs.py
+0
-114
lm_eval/tasks/mmlu_pro/default/_default_template_yaml
lm_eval/tasks/mmlu_pro/default/_default_template_yaml
+0
-15
lm_eval/tasks/mmlu_pro/default/_mmlu_pro.yaml
lm_eval/tasks/mmlu_pro/default/_mmlu_pro.yaml
+0
-6
lm_eval/tasks/mmlu_pro/default/mmlu_pro_biology.yaml
lm_eval/tasks/mmlu_pro/default/mmlu_pro_biology.yaml
+0
-8
lm_eval/tasks/mmlu_pro/default/mmlu_pro_business.yaml
lm_eval/tasks/mmlu_pro/default/mmlu_pro_business.yaml
+0
-8
lm_eval/tasks/mmlu_pro/default/mmlu_pro_chemistry.yaml
lm_eval/tasks/mmlu_pro/default/mmlu_pro_chemistry.yaml
+0
-8
lm_eval/tasks/mmlu_pro/default/mmlu_pro_computer_science.yaml
...val/tasks/mmlu_pro/default/mmlu_pro_computer_science.yaml
+0
-8
lm_eval/tasks/mmlu_pro/default/mmlu_pro_economics.yaml
lm_eval/tasks/mmlu_pro/default/mmlu_pro_economics.yaml
+0
-8
lm_eval/tasks/mmlu_pro/default/mmlu_pro_engineering.yaml
lm_eval/tasks/mmlu_pro/default/mmlu_pro_engineering.yaml
+0
-8
lm_eval/tasks/mmlu_pro/default/mmlu_pro_health.yaml
lm_eval/tasks/mmlu_pro/default/mmlu_pro_health.yaml
+0
-8
lm_eval/tasks/mmlu_pro/default/mmlu_pro_history.yaml
lm_eval/tasks/mmlu_pro/default/mmlu_pro_history.yaml
+0
-8
lm_eval/tasks/mmlu_pro/default/mmlu_pro_law.yaml
lm_eval/tasks/mmlu_pro/default/mmlu_pro_law.yaml
+0
-8
lm_eval/tasks/mmlu_pro/default/mmlu_pro_math.yaml
lm_eval/tasks/mmlu_pro/default/mmlu_pro_math.yaml
+0
-8
lm_eval/tasks/mmlu_pro/default/mmlu_pro_other.yaml
lm_eval/tasks/mmlu_pro/default/mmlu_pro_other.yaml
+0
-8
lm_eval/tasks/mmlu_pro/default/mmlu_pro_philosophy.yaml
lm_eval/tasks/mmlu_pro/default/mmlu_pro_philosophy.yaml
+0
-8
lm_eval/tasks/mmlu_pro/default/mmlu_pro_physics.yaml
lm_eval/tasks/mmlu_pro/default/mmlu_pro_physics.yaml
+0
-8
lm_eval/tasks/mmlu_pro/default/mmlu_pro_psychology.yaml
lm_eval/tasks/mmlu_pro/default/mmlu_pro_psychology.yaml
+0
-8
No files found.
lm_eval/tasks/mmlu_pro/_generate_configs.py
deleted
100644 → 0
View file @
bfbda3b3
"""
Take in a YAML, and output all "other" splits with this YAML
"""
import
argparse
import
logging
import
os
import
yaml
from
tqdm
import
tqdm
eval_logger
=
logging
.
getLogger
(
"lm-eval"
)
SUBJECTS
=
{
"business"
:
"other"
,
"law"
:
"humanities"
,
"psychology"
:
"social_sciences"
,
"biology"
:
"stem"
,
"chemistry"
:
"stem"
,
"history"
:
"humanities"
,
"other"
:
"other"
,
"health"
:
"other"
,
"economics"
:
"social_sciences"
,
"math"
:
"stem"
,
"physics"
:
"stem"
,
"computer_science"
:
"stem"
,
"philosophy"
:
"humanities"
,
"engineering"
:
"stem"
}
def
parse_args
():
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--base_yaml_path"
,
required
=
True
)
parser
.
add_argument
(
"--save_prefix_path"
,
default
=
"mmlu_pro"
)
parser
.
add_argument
(
"--cot_prompt_path"
,
default
=
None
)
parser
.
add_argument
(
"--task_prefix"
,
default
=
""
)
parser
.
add_argument
(
"--group_prefix"
,
default
=
""
)
return
parser
.
parse_args
()
if
__name__
==
"__main__"
:
args
=
parse_args
()
# get filename of base_yaml so we can `"include": ` it in our "other" YAMLs.
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
with
open
(
args
.
base_yaml_path
,
encoding
=
"utf-8"
)
as
f
:
base_yaml
=
yaml
.
full_load
(
f
)
if
args
.
cot_prompt_path
is
not
None
:
import
json
with
open
(
args
.
cot_prompt_path
,
encoding
=
"utf-8"
)
as
f
:
cot_file
=
json
.
load
(
f
)
ALL_CATEGORIES
=
[]
for
subject
,
category
in
tqdm
(
SUBJECTS
.
items
()):
if
category
not
in
ALL_CATEGORIES
:
ALL_CATEGORIES
.
append
(
category
)
if
args
.
cot_prompt_path
is
not
None
:
description
=
cot_file
[
subject
]
else
:
description
=
f
"The following are multiple choice questions (with answers) about
{
' '
.
join
(
subject
.
split
(
'_'
))
}
.
\n\n
"
yaml_dict
=
{
"include"
:
base_yaml_name
,
"group"
:
f
"mmlu_pro_
{
args
.
task_prefix
}
_
{
category
}
"
if
args
.
task_prefix
!=
""
else
f
"mmlu_pro_
{
category
}
"
,
"group_alias"
:
category
.
replace
(
"_"
,
" "
),
"task"
:
f
"mmlu_pro_
{
args
.
task_prefix
}
_
{
subject
}
"
if
args
.
task_prefix
!=
""
else
f
"mmlu_pro_
{
subject
}
"
,
"task_alias"
:
subject
.
replace
(
"_"
,
" "
),
"dataset_name"
:
subject
,
"description"
:
description
,
}
file_save_path
=
args
.
save_prefix_path
+
f
"_
{
subject
}
.yaml"
eval_logger
.
info
(
f
"Saving yaml for subset
{
subject
}
to
{
file_save_path
}
"
)
with
open
(
file_save_path
,
"w"
,
encoding
=
"utf-8"
)
as
yaml_file
:
yaml
.
dump
(
yaml_dict
,
yaml_file
,
allow_unicode
=
True
,
default_style
=
'"'
,
)
if
args
.
task_prefix
!=
""
:
mmlu_pro_subcategories
=
[
f
"mmlu_pro_
{
args
.
task_prefix
}
_
{
category
}
"
for
category
in
ALL_CATEGORIES
]
else
:
mmlu_pro_subcategories
=
[
f
"mmlu_pro_
{
category
}
"
for
category
in
ALL_CATEGORIES
]
if
args
.
group_prefix
!=
""
:
file_save_path
=
args
.
group_prefix
+
".yaml"
else
:
file_save_path
=
args
.
save_prefix_path
+
".yaml"
eval_logger
.
info
(
f
"Saving benchmark config to
{
file_save_path
}
"
)
with
open
(
file_save_path
,
"w"
,
encoding
=
"utf-8"
)
as
yaml_file
:
yaml
.
dump
(
{
"group"
:
f
"mmlu_pro_
{
args
.
task_prefix
}
"
if
args
.
task_prefix
!=
""
else
"mmlu_pro"
,
"task"
:
mmlu_pro_subcategories
,
},
yaml_file
,
indent
=
4
,
default_flow_style
=
False
,
)
lm_eval/tasks/mmlu_pro/default/_default_template_yaml
deleted
100644 → 0
View file @
bfbda3b3
dataset_path: sjyuxyz/MMLU-Pro-with-subset
test_split: test
fewshot_split: dev
fewshot_config:
sampler: first_n
output_type: multiple_choice
doc_to_text: "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nE. {{choices[4]}}\nF. {{choices[5]}}\nG. {{choices[6]}}\nH. {{choices[7]}}\nI. {{choices[8]}}\nJ. {{choices[9]}}\nAnswer:"
doc_to_choice: ["A", "B", "C", "D", "E","F","G","H","I","J"]
doc_to_target: answer
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
version: 0.0
lm_eval/tasks/mmlu_pro/default/_mmlu_pro.yaml
deleted
100644 → 0
View file @
bfbda3b3
group
:
mmlu_pro
task
:
-
mmlu_pro_stem
-
mmlu_pro_other
-
mmlu_pro_social_sciences
-
mmlu_pro_humanities
lm_eval/tasks/mmlu_pro/default/mmlu_pro_biology.yaml
deleted
100644 → 0
View file @
bfbda3b3
"
dataset_name"
:
"
biology"
"
description"
:
"
The
following
are
multiple
choice
questions
(with
answers)
about
biology.
\n\
\n
"
"
group"
:
"
mmlu_pro_stem"
"
group_alias"
:
"
stem"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
mmlu_pro_biology"
"
task_alias"
:
"
biology"
lm_eval/tasks/mmlu_pro/default/mmlu_pro_business.yaml
deleted
100644 → 0
View file @
bfbda3b3
"
dataset_name"
:
"
business"
"
description"
:
"
The
following
are
multiple
choice
questions
(with
answers)
about
business.
\n\
\n
"
"
group"
:
"
mmlu_pro_other"
"
group_alias"
:
"
other"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
mmlu_pro_business"
"
task_alias"
:
"
business"
lm_eval/tasks/mmlu_pro/default/mmlu_pro_chemistry.yaml
deleted
100644 → 0
View file @
bfbda3b3
"
dataset_name"
:
"
math"
"
description"
:
"
The
following
are
multiple
choice
questions
(with
answers)
about
math.
\n\
\n
"
"
group"
:
"
mmlu_pro_stem"
"
group_alias"
:
"
stem"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
mmlu_pro_math"
"
task_alias"
:
"
math"
lm_eval/tasks/mmlu_pro/default/mmlu_pro_computer_science.yaml
deleted
100644 → 0
View file @
bfbda3b3
"
dataset_name"
:
"
computer_science"
"
description"
:
"
The
following
are
multiple
choice
questions
(with
answers)
about
computer_science.
\n\
\n
"
"
group"
:
"
mmlu_pro_stem"
"
group_alias"
:
"
stem"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
mmlu_pro_computer_science"
"
task_alias"
:
"
computer_science"
lm_eval/tasks/mmlu_pro/default/mmlu_pro_economics.yaml
deleted
100644 → 0
View file @
bfbda3b3
"
dataset_name"
:
"
economics"
"
description"
:
"
The
following
are
multiple
choice
questions
(with
answers)
about
economics.
\n\
\n
"
"
group"
:
"
mmlu_pro_social_sciences"
"
group_alias"
:
"
social_sciences"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
mmlu_pro_economics"
"
task_alias"
:
"
economics"
lm_eval/tasks/mmlu_pro/default/mmlu_pro_engineering.yaml
deleted
100644 → 0
View file @
bfbda3b3
"
dataset_name"
:
"
engineering"
"
description"
:
"
The
following
are
multiple
choice
questions
(with
answers)
about
engineering.
\n\
\n
"
"
group"
:
"
mmlu_pro_stem"
"
group_alias"
:
"
stem"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
mmlu_pro_engineering"
"
task_alias"
:
"
engineering"
lm_eval/tasks/mmlu_pro/default/mmlu_pro_health.yaml
deleted
100644 → 0
View file @
bfbda3b3
"
dataset_name"
:
"
health"
"
description"
:
"
The
following
are
multiple
choice
questions
(with
answers)
about
health.
\n\
\n
"
"
group"
:
"
mmlu_pro_other"
"
group_alias"
:
"
other"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
mmlu_pro_health"
"
task_alias"
:
"
health"
lm_eval/tasks/mmlu_pro/default/mmlu_pro_history.yaml
deleted
100644 → 0
View file @
bfbda3b3
"
dataset_name"
:
"
history"
"
description"
:
"
The
following
are
multiple
choice
questions
(with
answers)
about
history.
\n\
\n
"
"
group"
:
"
mmlu_pro_humanities"
"
group_alias"
:
"
humanities"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
mmlu_pro_history"
"
task_alias"
:
"
history"
lm_eval/tasks/mmlu_pro/default/mmlu_pro_law.yaml
deleted
100644 → 0
View file @
bfbda3b3
"
dataset_name"
:
"
law"
"
description"
:
"
The
following
are
multiple
choice
questions
(with
answers)
about
law.
\n\
\n
"
"
group"
:
"
mmlu_pro_humanities"
"
group_alias"
:
"
humanities"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
mmlu_pro_law"
"
task_alias"
:
"
law"
lm_eval/tasks/mmlu_pro/default/mmlu_pro_math.yaml
deleted
100644 → 0
View file @
bfbda3b3
"
dataset_name"
:
"
math"
"
description"
:
"
The
following
are
multiple
choice
questions
(with
answers)
about
math.
\n\
\n
"
"
group"
:
"
mmlu_pro_stem"
"
group_alias"
:
"
stem"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
mmlu_pro_math"
"
task_alias"
:
"
math"
lm_eval/tasks/mmlu_pro/default/mmlu_pro_other.yaml
deleted
100644 → 0
View file @
bfbda3b3
"
dataset_name"
:
"
other"
"
description"
:
"
The
following
are
multiple
choice
questions
(with
answers)
about
other.
\n\
\n
"
"
group"
:
"
mmlu_pro_other"
"
group_alias"
:
"
other"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
mmlu_pro_other"
"
task_alias"
:
"
other"
lm_eval/tasks/mmlu_pro/default/mmlu_pro_philosophy.yaml
deleted
100644 → 0
View file @
bfbda3b3
"
dataset_name"
:
"
philosophy"
"
description"
:
"
The
following
are
multiple
choice
questions
(with
answers)
about
philosophy.
\n\
\n
"
"
group"
:
"
mmlu_pro_humanities"
"
group_alias"
:
"
humanities"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
mmlu_pro_philosophy"
"
task_alias"
:
"
philosophy"
lm_eval/tasks/mmlu_pro/default/mmlu_pro_physics.yaml
deleted
100644 → 0
View file @
bfbda3b3
"
dataset_name"
:
"
physics"
"
description"
:
"
The
following
are
multiple
choice
questions
(with
answers)
about
physics.
\n\
\n
"
"
group"
:
"
mmlu_pro_stem"
"
group_alias"
:
"
stem"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
mmlu_pro_physics"
"
task_alias"
:
"
physics"
lm_eval/tasks/mmlu_pro/default/mmlu_pro_psychology.yaml
deleted
100644 → 0
View file @
bfbda3b3
"
dataset_name"
:
"
psychology"
"
description"
:
"
The
following
are
multiple
choice
questions
(with
answers)
about
psychology.
\n\
\n
"
"
group"
:
"
mmlu_pro_social_sciences"
"
group_alias"
:
"
social_sciences"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
mmlu_pro_psychology"
"
task_alias"
:
"
psychology"
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment