Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
c64bf9a9
Commit
c64bf9a9
authored
Oct 17, 2023
by
lintangsutawika
Browse files
change all mentions of `greedy_until` to `generate_until`
parent
04ca5671
Changes
236
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
21 additions
and
33 deletions
+21
-33
lm_eval/tasks/minerva_math/README.md
lm_eval/tasks/minerva_math/README.md
+1
-1
lm_eval/tasks/minerva_math/minerva_math_algebra.yaml
lm_eval/tasks/minerva_math/minerva_math_algebra.yaml
+1
-1
lm_eval/tasks/mmlu/flan_cot_fewshot/_mmlu_flan_cot_fewshot_template_yaml
...mlu/flan_cot_fewshot/_mmlu_flan_cot_fewshot_template_yaml
+1
-1
lm_eval/tasks/mmlu/flan_cot_zeroshot/_mmlu_flan_generative_template_yaml
...mlu/flan_cot_zeroshot/_mmlu_flan_generative_template_yaml
+1
-1
lm_eval/tasks/mmlu/flan_n_shot/_mmlu_flan_generative_template_yaml
...asks/mmlu/flan_n_shot/_mmlu_flan_generative_template_yaml
+1
-1
lm_eval/tasks/nq_open/nq_open.yaml
lm_eval/tasks/nq_open/nq_open.yaml
+1
-1
lm_eval/tasks/polemo2/polemo2_in.yaml
lm_eval/tasks/polemo2/polemo2_in.yaml
+1
-1
lm_eval/tasks/qasper/freeform.yaml
lm_eval/tasks/qasper/freeform.yaml
+1
-1
lm_eval/tasks/squadv2/default.yaml
lm_eval/tasks/squadv2/default.yaml
+2
-14
lm_eval/tasks/super_glue/boolq/seq2seq.yaml
lm_eval/tasks/super_glue/boolq/seq2seq.yaml
+1
-1
lm_eval/tasks/super_glue/boolq/t5-prompt.yaml
lm_eval/tasks/super_glue/boolq/t5-prompt.yaml
+1
-1
lm_eval/tasks/super_glue/cb/t5-prompt.yaml
lm_eval/tasks/super_glue/cb/t5-prompt.yaml
+1
-1
lm_eval/tasks/super_glue/copa/t5-prompt.yaml
lm_eval/tasks/super_glue/copa/t5-prompt.yaml
+1
-1
lm_eval/tasks/super_glue/multirc/t5-prompt.yaml
lm_eval/tasks/super_glue/multirc/t5-prompt.yaml
+1
-1
lm_eval/tasks/super_glue/record/t5-prompt.yaml
lm_eval/tasks/super_glue/record/t5-prompt.yaml
+1
-1
lm_eval/tasks/super_glue/rte/t5-prompt.yaml
lm_eval/tasks/super_glue/rte/t5-prompt.yaml
+1
-1
lm_eval/tasks/super_glue/wic/t5-prompt.yaml
lm_eval/tasks/super_glue/wic/t5-prompt.yaml
+1
-1
lm_eval/tasks/super_glue/wsc/t5-prompt.yaml
lm_eval/tasks/super_glue/wsc/t5-prompt.yaml
+1
-1
lm_eval/tasks/translation/iwslt2017_ar-en.yaml
lm_eval/tasks/translation/iwslt2017_ar-en.yaml
+1
-1
lm_eval/tasks/translation/iwslt2017_en-ar.yaml
lm_eval/tasks/translation/iwslt2017_en-ar.yaml
+1
-1
No files found.
lm_eval/tasks/minerva_math/README.md
View file @
c64bf9a9
...
@@ -37,7 +37,7 @@ Eprint = {arXiv:2206.14858},
...
@@ -37,7 +37,7 @@ Eprint = {arXiv:2206.14858},
#### Groups
#### Groups
-
`math_word_problems`
-
`math_word_problems`
-
`g
reedy
_until`
-
`g
enerate
_until`
#### Tasks
#### Tasks
...
...
lm_eval/tasks/minerva_math/minerva_math_algebra.yaml
View file @
c64bf9a9
...
@@ -4,7 +4,7 @@ task: minerva_math_algebra
...
@@ -4,7 +4,7 @@ task: minerva_math_algebra
dataset_path
:
EleutherAI/hendrycks_math
dataset_path
:
EleutherAI/hendrycks_math
process_docs
:
!function
utils.process_docs
process_docs
:
!function
utils.process_docs
dataset_name
:
algebra
dataset_name
:
algebra
output_type
:
g
reedy
_until
output_type
:
g
enerate
_until
training_split
:
train
training_split
:
train
test_split
:
test
test_split
:
test
doc_to_text
:
!function
utils.doc_to_text
doc_to_text
:
!function
utils.doc_to_text
...
...
lm_eval/tasks/mmlu/flan_cot_fewshot/_mmlu_flan_cot_fewshot_template_yaml
View file @
c64bf9a9
...
@@ -2,7 +2,7 @@ group: mmlu_flan_cot_fewshot
...
@@ -2,7 +2,7 @@ group: mmlu_flan_cot_fewshot
dataset_path: cais/mmlu
dataset_path: cais/mmlu
validation_split: validation
validation_split: validation
fewshot_split: dev
fewshot_split: dev
output_type: g
reedy
_until
output_type: g
enerate
_until
doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA: Let's think step by step."
doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA: Let's think step by step."
doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer]}}"
doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer]}}"
filter_list:
filter_list:
...
...
lm_eval/tasks/mmlu/flan_cot_zeroshot/_mmlu_flan_generative_template_yaml
View file @
c64bf9a9
...
@@ -2,7 +2,7 @@ group: mmlu_flan_cot_zeroshot
...
@@ -2,7 +2,7 @@ group: mmlu_flan_cot_zeroshot
dataset_path: cais/mmlu
dataset_path: cais/mmlu
validation_split: validation
validation_split: validation
fewshot_split: dev
fewshot_split: dev
output_type: g
reedy
_until
output_type: g
enerate
_until
doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA: Let's think step by step."
doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA: Let's think step by step."
doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer]}}"
doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer]}}"
filter_list:
filter_list:
...
...
lm_eval/tasks/mmlu/flan_n_shot/_mmlu_flan_generative_template_yaml
View file @
c64bf9a9
...
@@ -2,7 +2,7 @@ group: mmlu_flan_n_shot_generative
...
@@ -2,7 +2,7 @@ group: mmlu_flan_n_shot_generative
dataset_path: cais/mmlu
dataset_path: cais/mmlu
test_split: test
test_split: test
fewshot_split: dev
fewshot_split: dev
output_type: g
reedy
_until
output_type: g
enerate
_until
doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA: "
doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA: "
doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer]}}"
doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer]}}"
generation_kwargs:
generation_kwargs:
...
...
lm_eval/tasks/nq_open/nq_open.yaml
View file @
c64bf9a9
task
:
nq_open
task
:
nq_open
dataset_path
:
nq_open
dataset_path
:
nq_open
output_type
:
g
reedy
_until
output_type
:
g
enerate
_until
training_split
:
train
training_split
:
train
validation_split
:
validation
validation_split
:
validation
description
:
"
Answer
these
questions:
\n
"
description
:
"
Answer
these
questions:
\n
"
...
...
lm_eval/tasks/polemo2/polemo2_in.yaml
View file @
c64bf9a9
...
@@ -3,7 +3,7 @@ group:
...
@@ -3,7 +3,7 @@ group:
task
:
polemo2_in
task
:
polemo2_in
dataset_path
:
allegro/klej-polemo2-in
dataset_path
:
allegro/klej-polemo2-in
dataset_name
:
klej-polemo2-in
dataset_name
:
klej-polemo2-in
output_type
:
g
reedy
_until
output_type
:
g
enerate
_until
training_split
:
train
training_split
:
train
validation_split
:
validation
validation_split
:
validation
test_split
:
test
test_split
:
test
...
...
lm_eval/tasks/qasper/freeform.yaml
View file @
c64bf9a9
group
:
qasper
group
:
qasper
task
:
qasper_freeform
task
:
qasper_freeform
dataset_path
:
qasper
dataset_path
:
qasper
output_type
:
g
reedy
_until
output_type
:
g
enerate
_until
training_split
:
train
training_split
:
train
validation_split
:
validation
validation_split
:
validation
process_docs
:
!function
utils.process_docs_freeform
process_docs
:
!function
utils.process_docs_freeform
...
...
lm_eval/tasks/squadv2/default.yaml
View file @
c64bf9a9
include
:
_template_yaml
task
:
squadv2
task
:
squadv2
dataset_path
:
squad_v2
output_type
:
generate_until
output_type
:
greedy_until
training_split
:
train
validation_split
:
validation
doc_to_text
:
"
Title:
{{title}}
\n\n
Background:
{{context}}
\n\n
Question:
{{question}}
\n\n
Answer:"
doc_to_target
:
"
{%
if
answers.text|
length
>
0
%}{{answers.text}}{%
else
%}{{['']}}{%
endif
%}"
target_delimiter
:
"
"
should_decontaminate
:
true
doc_to_decontamination_query
:
context
generation_kwargs
:
generation_kwargs
:
until
:
until
:
-
"
\n
"
-
"
\n
"
# filter_list:
# - name: remove_whitespace
# filter:
# - function: remove_whitespace
# - function: take_first
metric_list
:
metric_list
:
-
metric
:
!function
utils.exact
-
metric
:
!function
utils.exact
aggregation
:
mean
aggregation
:
mean
...
...
lm_eval/tasks/super_glue/boolq/seq2seq.yaml
View file @
c64bf9a9
...
@@ -3,7 +3,7 @@ group:
...
@@ -3,7 +3,7 @@ group:
task
:
"
boolq-seq2seq"
task
:
"
boolq-seq2seq"
dataset_path
:
super_glue
dataset_path
:
super_glue
dataset_name
:
boolq
dataset_name
:
boolq
output_type
:
g
reedy
_until
output_type
:
g
enerate
_until
training_split
:
train
training_split
:
train
validation_split
:
validation
validation_split
:
validation
doc_to_text
:
"
{{passage}}
\n
Question:
{{question}}?
\n
Answer:"
doc_to_text
:
"
{{passage}}
\n
Question:
{{question}}?
\n
Answer:"
...
...
lm_eval/tasks/super_glue/boolq/t5-prompt.yaml
View file @
c64bf9a9
...
@@ -5,7 +5,7 @@ dataset_path: super_glue
...
@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name
:
boolq
dataset_name
:
boolq
training_split
:
train
training_split
:
train
validation_split
:
validation
validation_split
:
validation
output_type
:
g
reedy
_until
output_type
:
g
enerate
_until
doc_to_text
:
"
boolq
passage:
{{passage}}
question:
{{question}}"
doc_to_text
:
"
boolq
passage:
{{passage}}
question:
{{question}}"
doc_to_target
:
label
doc_to_target
:
label
doc_to_choice
:
[
'
False'
,
'
True'
]
doc_to_choice
:
[
'
False'
,
'
True'
]
...
...
lm_eval/tasks/super_glue/cb/t5-prompt.yaml
View file @
c64bf9a9
...
@@ -5,7 +5,7 @@ dataset_path: super_glue
...
@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name
:
cb
dataset_name
:
cb
training_split
:
train
training_split
:
train
validation_split
:
validation
validation_split
:
validation
output_type
:
g
reedy
_until
output_type
:
g
enerate
_until
doc_to_text
:
"
cb
hypothesis:
{{hypothesis}}
premise:
{{premise}}"
doc_to_text
:
"
cb
hypothesis:
{{hypothesis}}
premise:
{{premise}}"
doc_to_target
:
label
doc_to_target
:
label
doc_to_choice
:
[
'
entailment'
,
'
contradiction'
,
'
neutral'
]
doc_to_choice
:
[
'
entailment'
,
'
contradiction'
,
'
neutral'
]
...
...
lm_eval/tasks/super_glue/copa/t5-prompt.yaml
View file @
c64bf9a9
...
@@ -5,7 +5,7 @@ dataset_path: super_glue
...
@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name
:
copa
dataset_name
:
copa
training_split
:
train
training_split
:
train
validation_split
:
validation
validation_split
:
validation
output_type
:
g
reedy
_until
output_type
:
g
enerate
_until
doc_to_text
:
"
copa
choice1:
{{choice1}}
choice2:
{{choice2}}
premise:
{{premise}}
question:
{{question}}"
doc_to_text
:
"
copa
choice1:
{{choice1}}
choice2:
{{choice2}}
premise:
{{premise}}
question:
{{question}}"
doc_to_target
:
label
doc_to_target
:
label
doc_to_choice
:
[
'
choice1'
,
'
choice2'
]
doc_to_choice
:
[
'
choice1'
,
'
choice2'
]
...
...
lm_eval/tasks/super_glue/multirc/t5-prompt.yaml
View file @
c64bf9a9
...
@@ -5,7 +5,7 @@ dataset_path: super_glue
...
@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name
:
multirc
dataset_name
:
multirc
training_split
:
train
training_split
:
train
validation_split
:
validation
validation_split
:
validation
output_type
:
g
reedy
_until
output_type
:
g
enerate
_until
doc_to_text
:
"
multirc
question:
{{question}}
answer:
{{answer}}
paragraph:
{{paragraph}}"
doc_to_text
:
"
multirc
question:
{{question}}
answer:
{{answer}}
paragraph:
{{paragraph}}"
doc_to_target
:
label
doc_to_target
:
label
doc_to_choice
:
"
{%
set
group_id
=
idx.question|string
%}{{[group_id+'_False',
group_id+'_True']}}"
doc_to_choice
:
"
{%
set
group_id
=
idx.question|string
%}{{[group_id+'_False',
group_id+'_True']}}"
...
...
lm_eval/tasks/super_glue/record/t5-prompt.yaml
View file @
c64bf9a9
...
@@ -4,7 +4,7 @@ task: super_glue-record-t5-prompt
...
@@ -4,7 +4,7 @@ task: super_glue-record-t5-prompt
dataset_path
:
super_glue
dataset_path
:
super_glue
dataset_name
:
record
dataset_name
:
record
validation_split
:
validation
validation_split
:
validation
output_type
:
g
reedy
_until
output_type
:
g
enerate
_until
process_docs
:
!function
t5_utils.process_docs
process_docs
:
!function
t5_utils.process_docs
doc_to_text
:
!function
t5_utils.doc_to_text
doc_to_text
:
!function
t5_utils.doc_to_text
doc_to_target
:
"
{{idx.passage|string}}+{{idx.query}}_{{answers}}"
doc_to_target
:
"
{{idx.passage|string}}+{{idx.query}}_{{answers}}"
...
...
lm_eval/tasks/super_glue/rte/t5-prompt.yaml
View file @
c64bf9a9
...
@@ -5,7 +5,7 @@ dataset_path: super_glue
...
@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name
:
rte
dataset_name
:
rte
training_split
:
train
training_split
:
train
validation_split
:
validation
validation_split
:
validation
output_type
:
g
reedy
_until
output_type
:
g
enerate
_until
doc_to_text
:
"
rte
hypothesis:
{{hypothesis}}
premise:
{{premise}}"
doc_to_text
:
"
rte
hypothesis:
{{hypothesis}}
premise:
{{premise}}"
doc_to_target
:
label
doc_to_target
:
label
doc_to_choice
:
[
'
entailment'
,
'
not_entailment'
]
doc_to_choice
:
[
'
entailment'
,
'
not_entailment'
]
...
...
lm_eval/tasks/super_glue/wic/t5-prompt.yaml
View file @
c64bf9a9
...
@@ -5,7 +5,7 @@ dataset_path: super_glue
...
@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name
:
wic
dataset_name
:
wic
training_split
:
train
training_split
:
train
validation_split
:
validation
validation_split
:
validation
output_type
:
g
reedy
_until
output_type
:
g
enerate
_until
doc_to_text
:
"
wic
sentence1:
{{sentence1}}
sentence2:
{{sentence2}}
word:
{{word}}"
doc_to_text
:
"
wic
sentence1:
{{sentence1}}
sentence2:
{{sentence2}}
word:
{{word}}"
doc_to_target
:
label
doc_to_target
:
label
doc_to_choice
:
[
'
False'
,
'
True'
]
doc_to_choice
:
[
'
False'
,
'
True'
]
...
...
lm_eval/tasks/super_glue/wsc/t5-prompt.yaml
View file @
c64bf9a9
...
@@ -5,7 +5,7 @@ dataset_path: super_glue
...
@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name
:
wsc.fixed
dataset_name
:
wsc.fixed
training_split
:
train
training_split
:
train
validation_split
:
validation
validation_split
:
validation
output_type
:
g
reedy
_until
output_type
:
g
enerate
_until
doc_to_text
:
!function
"
t5_utils.doc_to_text"
doc_to_text
:
!function
"
t5_utils.doc_to_text"
doc_to_target
:
label
doc_to_target
:
label
generation_kwargs
:
generation_kwargs
:
...
...
lm_eval/tasks/translation/iwslt2017_ar-en.yaml
View file @
c64bf9a9
...
@@ -6,7 +6,7 @@ doc_to_text: 'Arabic phrase: {{translation["ar"]}}
...
@@ -6,7 +6,7 @@ doc_to_text: 'Arabic phrase: {{translation["ar"]}}
English
phrase:'
English
phrase:'
group
:
group
:
-
g
reedy
_until
-
g
enerate
_until
-
translation
-
translation
-
iwslt2017
-
iwslt2017
include
:
wmt_common_yaml
include
:
wmt_common_yaml
...
...
lm_eval/tasks/translation/iwslt2017_en-ar.yaml
View file @
c64bf9a9
...
@@ -6,7 +6,7 @@ doc_to_text: 'English phrase: {{translation["en"]}}
...
@@ -6,7 +6,7 @@ doc_to_text: 'English phrase: {{translation["en"]}}
Arabic
phrase:'
Arabic
phrase:'
group
:
group
:
-
g
reedy
_until
-
g
enerate
_until
-
translation
-
translation
-
iwslt2017
-
iwslt2017
include
:
wmt_common_yaml
include
:
wmt_common_yaml
...
...
Prev
1
…
7
8
9
10
11
12
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment