Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
f66fc06f
Commit
f66fc06f
authored
Feb 01, 2024
by
haileyschoelkopf
Browse files
fix merge conflicts
parents
b13753cd
d714fc95
Changes
84
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
22 additions
and
160 deletions
+22
-160
lm_eval/tasks/benchmarks/flan/prompt_templates/anli.yaml
lm_eval/tasks/benchmarks/flan/prompt_templates/anli.yaml
+0
-29
lm_eval/tasks/benchmarks/flan/prompt_templates/arc.yaml
lm_eval/tasks/benchmarks/flan/prompt_templates/arc.yaml
+0
-23
lm_eval/tasks/benchmarks/flan/prompt_templates/boolq.yaml
lm_eval/tasks/benchmarks/flan/prompt_templates/boolq.yaml
+0
-33
lm_eval/tasks/benchmarks/flan/prompt_templates/rte.yaml
lm_eval/tasks/benchmarks/flan/prompt_templates/rte.yaml
+0
-29
lm_eval/tasks/benchmarks/flan/yaml_templates/cot_template_yaml
...al/tasks/benchmarks/flan/yaml_templates/cot_template_yaml
+0
-21
lm_eval/tasks/benchmarks/multimedqa/multimedqa.yaml
lm_eval/tasks/benchmarks/multimedqa/multimedqa.yaml
+0
-6
lm_eval/tasks/bigbench/generate_tasks.py
lm_eval/tasks/bigbench/generate_tasks.py
+1
-1
lm_eval/tasks/blimp/generate_configs.py
lm_eval/tasks/blimp/generate_configs.py
+1
-1
lm_eval/tasks/ceval/_generate_configs.py
lm_eval/tasks/ceval/_generate_configs.py
+3
-3
lm_eval/tasks/cmmlu/_generate_configs.py
lm_eval/tasks/cmmlu/_generate_configs.py
+3
-3
lm_eval/tasks/code_x_glue/code-text/bleu.py
lm_eval/tasks/code_x_glue/code-text/bleu.py
+1
-1
lm_eval/tasks/csatqa/_generate_configs.py
lm_eval/tasks/csatqa/_generate_configs.py
+2
-2
lm_eval/tasks/gsm8k/gsm8k-cot.yaml
lm_eval/tasks/gsm8k/gsm8k-cot.yaml
+1
-0
lm_eval/tasks/gsm8k/gsm8k.yaml
lm_eval/tasks/gsm8k/gsm8k.yaml
+1
-0
lm_eval/tasks/minerva_math/minerva_math_algebra.yaml
lm_eval/tasks/minerva_math/minerva_math_algebra.yaml
+1
-0
lm_eval/tasks/mmlu/_generate_configs.py
lm_eval/tasks/mmlu/_generate_configs.py
+4
-4
lm_eval/tasks/model_written_evals/advanced_ai_risk/_generate_configs.py
...model_written_evals/advanced_ai_risk/_generate_configs.py
+1
-1
lm_eval/tasks/model_written_evals/persona/_generate_configs.py
...al/tasks/model_written_evals/persona/_generate_configs.py
+1
-1
lm_eval/tasks/qasper/bool.yaml
lm_eval/tasks/qasper/bool.yaml
+1
-1
lm_eval/tasks/qasper/freeform.yaml
lm_eval/tasks/qasper/freeform.yaml
+1
-1
No files found.
lm_eval/tasks/benchmarks/flan/prompt_templates/anli.yaml
deleted
100644 → 0
View file @
b13753cd
# Flan Prompt Templates
prompts
:
"
template-0"
:
doc_to_text
:
"
{{premise}}
\n\n
Choose
your
answer:
based
on
the
paragraph
above
can
we
conclude
that
\"
{{hypothesis}}
\"
?
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No
\n
I
think
the
answer
is"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-1"
:
doc_to_text
:
"
{{premise}}
\n\n
Based
on
that
paragraph
can
we
conclude
that
this
sentence
is
true?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-2"
:
doc_to_text
:
"
{{premise}}
\n\n
Can
we
draw
the
following
conclusion?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-3"
:
doc_to_text
:
"
{{premise}}
\n
Does
this
next
sentence
follow,
given
the
preceding
text?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-4"
:
doc_to_text
:
"
{{premise}}
\n
Can
we
infer
the
following?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No
\n
The
answer
is:"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-5"
:
doc_to_text
:
"
Read
the
following
paragraph
and
determine
if
the
hypothesis
is
true:
\n\n
{{premise}}
\n\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No
\n
Hypothesis:
{{hypothesis}}
\n\n\n
"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-6"
:
doc_to_text
:
"
Read
the
text
and
determine
if
the
sentence
is
true
(see
options
at
the
end):
\n\n
{{premise}}
\n\n
Sentence:
{{hypothesis}}
\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-7"
:
doc_to_text
:
"
Can
we
draw
the
following
hypothesis
from
the
context
(see
options)?
\n\n
Context:
\n\n
{{premise}}
\n\n
Hypothesis:
{{hypothesis}}
\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
"
template-8"
:
doc_to_text
:
"
Choose
from
options:
Determine
if
the
sentence
is
true
based
on
the
text
below:
\n
{{hypothesis}}
\n\n
{{premise}}
\n
OPTIONS:
\n
-
Yes
\n
-
It's
impossible
to
say
\n
-
No"
doc_to_target
:
"
{{[
\"
Yes
\"
,
\"
It's
impossible
to
say
\"
,
\"
No
\"
][label]}}"
lm_eval/tasks/benchmarks/flan/prompt_templates/arc.yaml
deleted
100644 → 0
View file @
b13753cd
# Flan Prompt Templates
prompts
:
"
template-0"
:
doc_to_text
:
"
{{question}}
\n\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
"
template-1"
:
doc_to_text
:
"
Question:
{{question}}
\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}
\n
Answer:"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
"
template-2"
:
doc_to_text
:
"
Question:
{{question}}
\n\n
What
is
the
correct
answer
to
the
question
from
the
following
choices?
\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
"
template-3"
:
doc_to_text
:
"
Q:
{{question}}
\n
What
is
the
correct
answer
to
this
question?
\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}...A:"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
"
template-4"
:
doc_to_text
:
"
Choose
your
answer?
\n\n
{{question}}
\n\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
"
template-5"
:
doc_to_text
:
"
Answer
the
question
\n\n
{{question}}
\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
"
template-6"
:
doc_to_text
:
"
{{question}}
\n\n
Pick
the
answer
from
these
options
\n\n
OPTIONS:
\n
-
{{choices.text|join('
\n
-
')}}"
doc_to_target
:
"
{{choices.text[choices.label.index(answerKey)]}}"
lm_eval/tasks/benchmarks/flan/prompt_templates/boolq.yaml
deleted
100644 → 0
View file @
b13753cd
# Flan Prompt Templates
prompts
:
"
template-0"
:
doc_to_text
:
"
{{passage}}
\n\n
Can
we
conclude
that
{{question}}?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
"
template-1"
:
doc_to_text
:
"
{{passage}}
\n\n
Is
it
true
that
{{question}}?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
"
template-2"
:
doc_to_text
:
"
{{passage}}
\n\n
{{question}}?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
"
template-3"
:
doc_to_text
:
"
Text:
{{passage}}
\n\n
Question:
{{question}}?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
"
template-4"
:
doc_to_text
:
"
{{passage}}
\n\n
What's
the
best
answer
to
this
question:
{{question}}?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
"
template-5"
:
doc_to_text
:
"
{{passage}}
\n
Based
on
the
above
text
what's
the
best
answer
to
this
question:
{{question}}?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
"
template-6"
:
doc_to_text
:
"
{{passage}}
\n
Answer
this
question
making
sure
that
the
answer
is
supposed
by
the
text:
{{question}}?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
"
template-7"
:
doc_to_text
:
"
{{passage}}
\n\n
Is
the
following
statement
correct
based
on
the
text
\n\n
{{question}}
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
"
template-8"
:
# doc_to_text: "{{title}}\n\n{{passage}}\n\nIs this statement correct \"{{question}}\"?\n\nOPTIONS:\n- no\n- yes"
doc_to_text
:
"
{{passage}}
\n\n
Is
this
statement
correct
\"
{{question}}
\"
?
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
"
template-9"
:
doc_to_text
:
"
Is
it
true
that
{{question}}
based
on
the
following
text?
\n\n
{{passage}}
\n\n
OPTIONS:
\n
-
no
\n
-
yes"
doc_to_target
:
"
{{['no',
'yes'][label]}}"
lm_eval/tasks/benchmarks/flan/prompt_templates/rte.yaml
deleted
100644 → 0
View file @
b13753cd
# Flan Prompt Templates
prompts
:
"
template-0"
:
doc_to_text
:
"
{{premise}}
\n\n
Question
with
options:
Based
on
the
paragraph
above
can
we
conclude
that
\"
{{hypothesis}}
\"
?
\n\n
OPTIONS:
\n
-
yes
\n
-
no"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
"
template-1"
:
doc_to_text
:
"
{{premise}}
\n\n
Based
on
that
paragraph
can
we
conclude
that
the
sentence
below
is
true?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
yes
\n
-
no"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
"
template-2"
:
doc_to_text
:
"
{{premise}}
\n\n
Q
with
options:
Can
we
draw
the
following
conclusion?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
yes
\n
-
no"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
"
template-3"
:
doc_to_text
:
"
{{premise}}
\n
Does
this
next
sentence
follow,
given
the
preceding
text?
\n
{{hypothesis}}
\n\n
OPTIONS:
\n
-
yes
\n
-
no"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
"
template-4"
:
doc_to_text
:
"
{{premise}}
\n
OPTIONS:
\n
-
yes
\n
-
no
\n
Question:
Can
we
infer
the
following?
\n
{{hypothesis}}"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
"
template-5"
:
doc_to_text
:
"
Read
the
following
paragraph
and
determine
if
the
hypothesis
is
true.
Select
from
options
at
the
end:
\n\n
{{premise}}
\n\n
Hypothesis:
{{hypothesis}}
\n
OPTIONS:
\n
-
yes
\n
-
no
\n
The
answer
is"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
"
template-6"
:
doc_to_text
:
"
Read
the
text
and
determine
if
the
sentence
is
true:
\n\n
{{premise}}
\n\n
Sentence:
{{hypothesis}}
\n
OPTIONS:
\n
-
yes
\n
-
no
\n
A:"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
"
template-7"
:
doc_to_text
:
"
Question
with
options:
can
we
draw
the
following
hypothesis
from
the
context?
\n\n
Context:
\n\n
{{premise}}
\n\n
Hypothesis:
{{hypothesis}}
\n
OPTIONS:
\n
-
yes
\n
-
no
\n
A:"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
"
template-8"
:
doc_to_text
:
"
Determine
if
the
sentence
is
true
based
on
the
text
below.
Choose
from
options.
\n
{{hypothesis}}
\n\n
{{premise}}
\n
OPTIONS:
\n
-
yes
\n
-
no"
doc_to_target
:
"
{{['yes',
'no'][label]}}"
lm_eval/tasks/benchmarks/flan/yaml_templates/cot_template_yaml
deleted
100644 → 0
View file @
b13753cd
group: flan-cot
output_type: generate_until
validation_split: validation
doc_to_target: "{{answer}}"
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
generation_kwargs:
until:
- "\n\n"
do_sample: false
temperature: 0.0
filter_list:
- name: "get-answer"
filter:
- function: "regex"
regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
- function: "take_first"
metadata:
version: 1.0
lm_eval/tasks/benchmarks/multimedqa/multimedqa.yaml
View file @
f66fc06f
...
...
@@ -5,19 +5,13 @@ task:
-
medqa_4options
-
task
:
mmlu_anatomy
task_alias
:
"
anatomy
(mmlu)"
group_alias
:
null
-
task
:
mmlu_clinical_knowledge
task_alias
:
"
clinical_knowledge
(mmlu)"
group_alias
:
null
-
task
:
mmlu_college_medicine
task_alias
:
"
college_medicine
(mmlu)"
group_alias
:
null
-
task
:
mmlu_medical_genetics
task_alias
:
"
medical_genetics
(mmlu)"
group_alias
:
null
-
task
:
mmlu_professional_medicine
task_alias
:
"
professional_medicine
(mmlu)"
group_alias
:
null
-
task
:
mmlu_college_biology
task_alias
:
"
college_biology
(mmlu)"
group_alias
:
null
lm_eval/tasks/bigbench/generate_tasks.py
View file @
f66fc06f
...
...
@@ -181,7 +181,7 @@ def main() -> None:
for
task
in
all_subtasks
:
file_name
=
f
"
{
task
}
.yaml"
try
:
with
open
(
f
"
{
path
}
/
{
file_name
}
"
,
"w"
)
as
f
:
with
open
(
f
"
{
path
}
/
{
file_name
}
"
,
"w"
,
encoding
=
"utf-8"
)
as
f
:
f
.
write
(
"# Generated by utils.py
\n
"
)
yaml
.
dump
(
{
...
...
lm_eval/tasks/blimp/generate_configs.py
View file @
f66fc06f
...
...
@@ -75,7 +75,7 @@ def main() -> None:
for
task
in
all_subtasks
:
file_name
=
f
"
{
task
}
.yaml"
try
:
with
open
(
f
"
{
file_name
}
"
,
"w"
)
as
f
:
with
open
(
f
"
{
file_name
}
"
,
"w"
,
encoding
=
"utf-8"
)
as
f
:
f
.
write
(
"# Generated by utils.py
\n
"
)
yaml
.
dump
(
{
...
...
lm_eval/tasks/ceval/_generate_configs.py
View file @
f66fc06f
...
...
@@ -79,13 +79,13 @@ if __name__ == "__main__":
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
with
open
(
args
.
base_yaml_path
)
as
f
:
with
open
(
args
.
base_yaml_path
,
encoding
=
"utf-8"
)
as
f
:
base_yaml
=
yaml
.
full_load
(
f
)
if
args
.
cot_prompt_path
is
not
None
:
import
json
with
open
(
args
.
cot_prompt_path
)
as
f
:
with
open
(
args
.
cot_prompt_path
,
encoding
=
"utf-8"
)
as
f
:
cot_file
=
json
.
load
(
f
)
for
subject_eng
,
subject_zh
in
tqdm
(
SUBJECTS
.
items
()):
...
...
@@ -107,7 +107,7 @@ if __name__ == "__main__":
file_save_path
=
args
.
save_prefix_path
+
f
"_
{
subject_eng
}
.yaml"
eval_logger
.
info
(
f
"Saving yaml for subset
{
subject_eng
}
to
{
file_save_path
}
"
)
with
open
(
file_save_path
,
"w"
)
as
yaml_file
:
with
open
(
file_save_path
,
"w"
,
encoding
=
"utf-8"
)
as
yaml_file
:
yaml
.
dump
(
yaml_dict
,
yaml_file
,
...
...
lm_eval/tasks/cmmlu/_generate_configs.py
View file @
f66fc06f
...
...
@@ -94,13 +94,13 @@ if __name__ == "__main__":
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
with
open
(
args
.
base_yaml_path
)
as
f
:
with
open
(
args
.
base_yaml_path
,
encoding
=
"utf-8"
)
as
f
:
base_yaml
=
yaml
.
full_load
(
f
)
if
args
.
cot_prompt_path
is
not
None
:
import
json
with
open
(
args
.
cot_prompt_path
)
as
f
:
with
open
(
args
.
cot_prompt_path
,
encoding
=
"utf-8"
)
as
f
:
cot_file
=
json
.
load
(
f
)
for
subject_eng
,
subject_zh
in
tqdm
(
SUBJECTS
.
items
()):
...
...
@@ -122,7 +122,7 @@ if __name__ == "__main__":
file_save_path
=
args
.
save_prefix_path
+
f
"_
{
subject_eng
}
.yaml"
eval_logger
.
info
(
f
"Saving yaml for subset
{
subject_eng
}
to
{
file_save_path
}
"
)
with
open
(
file_save_path
,
"w"
)
as
yaml_file
:
with
open
(
file_save_path
,
"w"
,
encoding
=
"utf-8"
)
as
yaml_file
:
yaml
.
dump
(
yaml_dict
,
yaml_file
,
...
...
lm_eval/tasks/code_x_glue/code-text/bleu.py
View file @
f66fc06f
...
...
@@ -184,7 +184,7 @@ def splitPuncts(line):
def
computeMaps
(
predictions
,
goldfile
):
predictionMap
:
Dict
[
str
,
list
]
=
{}
goldMap
:
Dict
[
str
,
list
]
=
{}
gf
=
open
(
goldfile
,
"r"
)
gf
=
open
(
goldfile
,
"r"
,
encoding
=
"utf-8"
)
for
row
in
predictions
:
cols
=
row
.
strip
().
split
(
"
\t
"
)
...
...
lm_eval/tasks/csatqa/_generate_configs.py
View file @
f66fc06f
...
...
@@ -25,7 +25,7 @@ if __name__ == "__main__":
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
with
open
(
args
.
base_yaml_path
)
as
f
:
with
open
(
args
.
base_yaml_path
,
encoding
=
"utf-8"
)
as
f
:
base_yaml
=
yaml
.
full_load
(
f
)
for
name
in
tqdm
(
SUBSETS
):
...
...
@@ -39,7 +39,7 @@ if __name__ == "__main__":
file_save_path
=
args
.
save_prefix_path
+
f
"_
{
name
.
lower
()
}
.yaml"
eval_logger
.
info
(
f
"Saving yaml for subset
{
name
}
to
{
file_save_path
}
"
)
with
open
(
file_save_path
,
"w"
)
as
yaml_file
:
with
open
(
file_save_path
,
"w"
,
encoding
=
"utf-8"
)
as
yaml_file
:
yaml
.
dump
(
yaml_dict
,
yaml_file
,
...
...
lm_eval/tasks/gsm8k/gsm8k-cot.yaml
View file @
f66fc06f
...
...
@@ -41,3 +41,4 @@ filter_list:
-
function
:
"
take_first"
metadata
:
version
:
2.0
num_fewshot
:
8
lm_eval/tasks/gsm8k/gsm8k.yaml
View file @
f66fc06f
...
...
@@ -24,6 +24,7 @@ generation_kwargs:
-
"
\n\n
"
-
"
Question:"
do_sample
:
false
temperature
:
0.0
repeats
:
1
num_fewshot
:
5
filter_list
:
...
...
lm_eval/tasks/minerva_math/minerva_math_algebra.yaml
View file @
f66fc06f
...
...
@@ -22,3 +22,4 @@ metric_list:
num_fewshot
:
0
metadata
:
version
:
1.0
num_fewshot
:
4
lm_eval/tasks/mmlu/_generate_configs.py
View file @
f66fc06f
...
...
@@ -85,13 +85,13 @@ if __name__ == "__main__":
# get filename of base_yaml so we can `"include": ` it in our "other" YAMLs.
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
with
open
(
args
.
base_yaml_path
)
as
f
:
with
open
(
args
.
base_yaml_path
,
encoding
=
"utf-8"
)
as
f
:
base_yaml
=
yaml
.
full_load
(
f
)
if
args
.
cot_prompt_path
is
not
None
:
import
json
with
open
(
args
.
cot_prompt_path
)
as
f
:
with
open
(
args
.
cot_prompt_path
,
encoding
=
"utf-8"
)
as
f
:
cot_file
=
json
.
load
(
f
)
ALL_CATEGORIES
=
[]
...
...
@@ -120,7 +120,7 @@ if __name__ == "__main__":
file_save_path
=
args
.
save_prefix_path
+
f
"_
{
subject
}
.yaml"
eval_logger
.
info
(
f
"Saving yaml for subset
{
subject
}
to
{
file_save_path
}
"
)
with
open
(
file_save_path
,
"w"
)
as
yaml_file
:
with
open
(
file_save_path
,
"w"
,
encoding
=
"utf-8"
)
as
yaml_file
:
yaml
.
dump
(
yaml_dict
,
yaml_file
,
...
...
@@ -142,7 +142,7 @@ if __name__ == "__main__":
file_save_path
=
args
.
save_prefix_path
+
".yaml"
eval_logger
.
info
(
f
"Saving benchmark config to
{
file_save_path
}
"
)
with
open
(
file_save_path
,
"w"
)
as
yaml_file
:
with
open
(
file_save_path
,
"w"
,
encoding
=
"utf-8"
)
as
yaml_file
:
yaml
.
dump
(
{
"group"
:
f
"mmlu_
{
args
.
task_prefix
}
"
...
...
lm_eval/tasks/model_written_evals/advanced_ai_risk/_generate_configs.py
View file @
f66fc06f
...
...
@@ -9,7 +9,7 @@ def main() -> None:
for
task
in
tqdm
(
datasets
.
get_dataset_infos
(
dataset_path
).
keys
()):
file_name
=
f
"
{
task
}
.yaml"
try
:
with
open
(
f
"
{
file_name
}
"
,
"w"
)
as
f
:
with
open
(
f
"
{
file_name
}
"
,
"w"
,
encoding
=
"utf-8"
)
as
f
:
f
.
write
(
"# Generated by _generate_configs.py
\n
"
)
yaml
.
dump
(
{
...
...
lm_eval/tasks/model_written_evals/persona/_generate_configs.py
View file @
f66fc06f
...
...
@@ -9,7 +9,7 @@ def main() -> None:
for
task
in
tqdm
(
datasets
.
get_dataset_infos
(
dataset_path
).
keys
()):
file_name
=
f
"
{
task
}
.yaml"
try
:
with
open
(
f
"
{
file_name
}
"
,
"w"
)
as
f
:
with
open
(
f
"
{
file_name
}
"
,
"w"
,
encoding
=
"utf-8"
)
as
f
:
f
.
write
(
"# Generated by _generate_configs.py
\n
"
)
yaml
.
dump
(
{
...
...
lm_eval/tasks/qasper/bool.yaml
View file @
f66fc06f
group
:
qasper
task
:
qasper_bool
dataset_path
:
qasper
dataset_path
:
allenai/
qasper
output_type
:
multiple_choice
training_split
:
train
validation_split
:
validation
...
...
lm_eval/tasks/qasper/freeform.yaml
View file @
f66fc06f
group
:
qasper
task
:
qasper_freeform
dataset_path
:
qasper
dataset_path
:
allenai/
qasper
output_type
:
generate_until
training_split
:
train
validation_split
:
validation
...
...
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment