Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
ab96fc7e
Commit
ab96fc7e
authored
Feb 20, 2024
by
lintangsutawika
Browse files
merged with latest update
parents
bf2517cc
8680e938
Changes
128
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
421 additions
and
7 deletions
+421
-7
lm_eval/tasks/bbh/cot_zeroshot/tracking_shuffled_objects_seven_objects.yaml
...cot_zeroshot/tracking_shuffled_objects_seven_objects.yaml
+15
-1
lm_eval/tasks/bbh/cot_zeroshot/tracking_shuffled_objects_three_objects.yaml
...cot_zeroshot/tracking_shuffled_objects_three_objects.yaml
+15
-1
lm_eval/tasks/bbh/cot_zeroshot/utils.py
lm_eval/tasks/bbh/cot_zeroshot/utils.py
+191
-0
lm_eval/tasks/bbh/cot_zeroshot/web_of_lies.yaml
lm_eval/tasks/bbh/cot_zeroshot/web_of_lies.yaml
+16
-1
lm_eval/tasks/bbh/cot_zeroshot/word_sorting.yaml
lm_eval/tasks/bbh/cot_zeroshot/word_sorting.yaml
+11
-1
lm_eval/tasks/bbh/zeroshot/_zeroshot_template_yaml
lm_eval/tasks/bbh/zeroshot/_zeroshot_template_yaml
+9
-3
lm_eval/tasks/bbh/zeroshot/boolean_expressions.yaml
lm_eval/tasks/bbh/zeroshot/boolean_expressions.yaml
+11
-0
lm_eval/tasks/bbh/zeroshot/causal_judgement.yaml
lm_eval/tasks/bbh/zeroshot/causal_judgement.yaml
+11
-0
lm_eval/tasks/bbh/zeroshot/date_understanding.yaml
lm_eval/tasks/bbh/zeroshot/date_understanding.yaml
+13
-0
lm_eval/tasks/bbh/zeroshot/disambiguation_qa.yaml
lm_eval/tasks/bbh/zeroshot/disambiguation_qa.yaml
+13
-0
lm_eval/tasks/bbh/zeroshot/dyck_languages.yaml
lm_eval/tasks/bbh/zeroshot/dyck_languages.yaml
+10
-0
lm_eval/tasks/bbh/zeroshot/formal_fallacies.yaml
lm_eval/tasks/bbh/zeroshot/formal_fallacies.yaml
+11
-0
lm_eval/tasks/bbh/zeroshot/geometric_shapes.yaml
lm_eval/tasks/bbh/zeroshot/geometric_shapes.yaml
+13
-0
lm_eval/tasks/bbh/zeroshot/hyperbaton.yaml
lm_eval/tasks/bbh/zeroshot/hyperbaton.yaml
+13
-0
lm_eval/tasks/bbh/zeroshot/logical_deduction_five_objects.yaml
...al/tasks/bbh/zeroshot/logical_deduction_five_objects.yaml
+12
-0
lm_eval/tasks/bbh/zeroshot/logical_deduction_seven_objects.yaml
...l/tasks/bbh/zeroshot/logical_deduction_seven_objects.yaml
+12
-0
lm_eval/tasks/bbh/zeroshot/logical_deduction_three_objects.yaml
...l/tasks/bbh/zeroshot/logical_deduction_three_objects.yaml
+12
-0
lm_eval/tasks/bbh/zeroshot/movie_recommendation.yaml
lm_eval/tasks/bbh/zeroshot/movie_recommendation.yaml
+12
-0
lm_eval/tasks/bbh/zeroshot/multistep_arithmetic_two.yaml
lm_eval/tasks/bbh/zeroshot/multistep_arithmetic_two.yaml
+11
-0
lm_eval/tasks/bbh/zeroshot/navigate.yaml
lm_eval/tasks/bbh/zeroshot/navigate.yaml
+10
-0
No files found.
lm_eval/tasks/bbh/cot_zeroshot/tracking_shuffled_objects_seven_objects.yaml
View file @
ab96fc7e
"
dataset_name"
:
"
tracking_shuffled_objects_seven_objects"
"
description"
:
"
A
task
requiring
determining
the
final
positions
of
a
set
of
objects
given
their
initial
positions
and
a
description
of
a
sequence
of
swaps.
\n\n
"
"
doc_to_text"
:
"
Q:
{{input}}
\n
A:
Let's
think
step
by
step.
\n
"
"
doc_to_text"
:
"
Q:
{{input}}
\n
A:
Let's
think
step
by
step."
"
include"
:
"
_cot_zeroshot_template_yaml"
"
task"
:
"
bbh_cot_zeroshot_tracking_shuffled_objects_seven_objects"
filter_list
:
-
name
:
"
flexible-extract"
filter
:
-
function
:
!function
utils.MultiChoiceRegexFilter
group_select
:
-1
ignore_case
:
true
ignore_punctuation
:
true
regex_pattern
:
"
(
\\
([A-Z]
\\
))"
-
function
:
"
take_first"
-
name
:
"
strict-match"
filter
:
-
function
:
"
regex"
regex_pattern
:
"
((?<=The
answer
is
)(.*)(?=.)|(?<=the
answer
is
)(.*)(?=.)|(?<=The
answer:
)(.*)(?=.)|(?<=The
final
answer:
)(.*)(?=.))"
-
function
:
"
take_first"
lm_eval/tasks/bbh/cot_zeroshot/tracking_shuffled_objects_three_objects.yaml
View file @
ab96fc7e
"
dataset_name"
:
"
tracking_shuffled_objects_three_objects"
"
description"
:
"
A
task
requiring
determining
the
final
positions
of
a
set
of
objects
given
their
initial
positions
and
a
description
of
a
sequence
of
swaps.
\n\n
"
"
doc_to_text"
:
"
Q:
{{input}}
\n
A:
Let's
think
step
by
step.
\n
"
"
doc_to_text"
:
"
Q:
{{input}}
\n
A:
Let's
think
step
by
step."
"
include"
:
"
_cot_zeroshot_template_yaml"
"
task"
:
"
bbh_cot_zeroshot_tracking_shuffled_objects_three_objects"
filter_list
:
-
name
:
"
flexible-extract"
filter
:
-
function
:
!function
utils.MultiChoiceRegexFilter
group_select
:
-1
ignore_case
:
true
ignore_punctuation
:
true
regex_pattern
:
"
(
\\
([A-Z]
\\
))"
-
function
:
"
take_first"
-
name
:
"
strict-match"
filter
:
-
function
:
"
regex"
regex_pattern
:
"
((?<=The
answer
is
)(.*)(?=.)|(?<=the
answer
is
)(.*)(?=.)|(?<=The
answer:
)(.*)(?=.)|(?<=The
final
answer:
)(.*)(?=.))"
-
function
:
"
take_first"
lm_eval/tasks/bbh/cot_zeroshot/utils.py
0 → 100644
View file @
ab96fc7e
import
collections
import
re
import
sys
import
unicodedata
from
lm_eval.filters.extraction
import
RegexFilter
,
Filter
class
ExtendedRegexFilter
(
RegexFilter
):
punct_tbl
=
dict
.
fromkeys
(
i
for
i
in
range
(
sys
.
maxunicode
)
if
unicodedata
.
category
(
chr
(
i
)).
startswith
(
'P'
))
def
__init__
(
self
,
regex_pattern
:
str
=
r
"#### (\-?[0-9\.\,]+)"
,
group_select
=
0
,
fallback
:
str
=
"[invalid]"
,
ignore_case
=
False
,
ignore_punctuation
=
False
,
regexes_to_ignore
=
None
,
)
->
None
:
super
().
__init__
(
regex_pattern
,
group_select
,
fallback
)
self
.
ignore_case
=
ignore_case
self
.
ignore_punctuation
=
ignore_punctuation
self
.
regexes_to_ignore
=
regexes_to_ignore
def
filter_ignores
(
self
,
st
):
if
self
.
regexes_to_ignore
is
not
None
:
for
s
in
self
.
regexes_to_ignore
:
st
=
re
.
sub
(
s
,
""
,
st
)
if
self
.
ignore_case
:
st
=
st
.
lower
()
if
self
.
ignore_punctuation
:
# https://stackoverflow.com/a/266162
st
=
st
.
translate
(
self
.
punct_tbl
)
return
st
def
find_match
(
self
,
regex
,
resp
,
convert_dict
=
{}):
match
=
regex
.
findall
(
resp
)
if
match
:
match
=
match
[
self
.
group_select
]
if
isinstance
(
match
,
tuple
):
match
=
[
m
for
m
in
match
if
m
][
0
]
match
=
match
.
strip
()
if
match
and
match
in
convert_dict
:
match
=
convert_dict
[
match
]
return
match
class
MapRegexFilter
(
ExtendedRegexFilter
):
def
__init__
(
self
,
regex_pattern_to_value
:
dict
=
{},
group_select
=
0
,
fallback
:
str
=
"[invalid]"
,
ignore_case
=
False
,
ignore_punctuation
=
False
,
regexes_to_ignore
=
None
,
)
->
None
:
"""
regex_pattern_to_value: Match the regex pattern and change the result into the value
group_select: Selects the (group_select)th match from the findall result. We use the whole regex_patterns, concatenated by |
ignore_case: Lowers the case of response before matching with the given regex
ignore_punctuation: Remove the punctuation before matching with the given regex
regexes_to_ignore: Remove these regexes before matching with the given regex
"""
super
().
__init__
(
'|'
.
join
(
list
(
regex_pattern_to_value
.
keys
())),
group_select
,
fallback
,
ignore_case
,
ignore_punctuation
,
regexes_to_ignore
)
self
.
regex_to_value
=
{
re
.
compile
(
r
):
v
for
r
,
v
in
regex_pattern_to_value
.
items
()}
def
apply
(
self
,
resps
,
docs
):
filtered_resps
=
[]
for
r
in
resps
:
filtered
=
[]
for
resp
in
r
:
whole_match_considering_group_select
=
self
.
find_match
(
self
.
regex
,
self
.
filter_ignores
(
resp
))
if
whole_match_considering_group_select
:
for
regex
,
mapped_value
in
self
.
regex_to_value
.
items
():
match
=
self
.
find_match
(
regex
,
self
.
filter_ignores
(
whole_match_considering_group_select
))
if
match
:
match
=
mapped_value
break
if
not
whole_match_considering_group_select
or
not
match
:
match
=
self
.
fallback
filtered
.
append
(
match
)
filtered_resps
.
append
(
filtered
)
return
filtered_resps
class
NumberParseRegexFilter
(
ExtendedRegexFilter
):
def
apply
(
self
,
resps
,
docs
):
# here, we assume we have a list, in which each element is
# a list of model responses for some particular input/target pair.
# so we process each of these (same input/target response sets)
# independently (and keep them a list.)
filtered_resps
=
[]
import
regex
from
word2number
import
w2n
# https://www.reddit.com/r/regex/comments/11a38uk/parsing_numbers_written_out_as_english_words
english_number_regex
=
regex
.
compile
(
"((?:(?:zero|one|two|three|four|five|(?:twen|thir|for|fif|six|seven|nine)(?|teen|ty)|eight(?:|een|y)|ten|eleven|twelve|fourteen|hundred|thousand|(?:m|b|tr)illion)(?:zero|one|two|three|four|five|(?:twen|thir|for|fif|six|seven|nine)(?:|teen|ty)|eight(?|een|y)|ten|eleven|twelve|fourteen|hundred|thousand|(?:m|b|tr)illion|[^\S
\r\n
]|,|and|&)+)?(?:zero|one|two|three|four|five|(?:twen|thir|for|fif|six|seven|nine)(?|teen|ty)|eight(?|een|y)|ten|eleven|twelve|fourteen|hundred|thousand|(?:m|b|tr)illion))"
)
for
r
in
resps
:
filtered
=
[]
for
resp
in
r
:
match
=
self
.
find_match
(
self
.
regex
,
resp
)
if
not
match
:
match
=
self
.
find_match
(
english_number_regex
,
resp
.
lower
())
if
match
:
match
=
str
(
w2n
.
word_to_num
(
match
))
if
not
match
:
match
=
self
.
fallback
filtered
.
append
(
match
)
filtered_resps
.
append
(
filtered
)
return
filtered_resps
class
WordSortFilter
(
Filter
):
""" """
def
apply
(
self
,
resps
,
docs
):
filtered_resps
=
[]
for
r
,
doc
in
zip
(
resps
,
docs
):
words
=
doc
[
'input'
].
split
(
"List:"
)[
1
].
strip
().
split
()
regex
=
re
.
compile
(
'|'
.
join
([
f
"
\\
b
{
w
}
\\
b"
for
w
in
words
]))
filtered
=
[]
for
resp
in
r
:
match
=
regex
.
findall
(
resp
)
match
.
reverse
()
ordered_words
=
reversed
(
collections
.
OrderedDict
(
zip
(
match
,
[
None
]
*
len
(
match
))))
filtered
.
append
(
' '
.
join
(
ordered_words
))
filtered_resps
.
append
(
filtered
)
return
filtered_resps
class
MultiChoiceRegexFilter
(
ExtendedRegexFilter
):
def
__init__
(
self
,
*
args
,
**
kwargs
):
"""
regex_pattern: The basic regex pattern to use. If fails to match, we will use the customized match procedure
- step 1 : We parse the choices between ([A-Z])s then try to find these choices in the response.
- step 2 : We parse the choice with regex :[\s]*([A-?]), where ? varies by number of choices.
group_select: Selects the (group_select)th match from the findall result.
ignore_case: Ignores the case during step 1 matching
ignore_punctuation: Remove the punctuation during step 1 matching
regexes_to_ignore: Remove these regexes during step 1 matching
"""
super
().
__init__
(
*
args
,
**
kwargs
)
def
apply
(
self
,
resps
,
docs
):
# here, we assume we have a list, in which each element is
# a list of model responses for some particular input/target pair.
# so we process each of these (same input/target response sets)
# independently (and keep them a list.)
filtered_resps
=
[]
for
r
,
doc
in
zip
(
resps
,
docs
):
fallback_regexes
=
[]
choice_to_alpha
=
{}
next_alpha
=
'A'
without_paren_fallback_regexes
=
[]
without_paren_to_target
=
{}
multiple_choices_regex
=
re
.
compile
(
r
"\([A-Z]\)([^\n^(]*)"
)
match
=
multiple_choices_regex
.
findall
(
doc
[
'input'
])
for
m
in
match
:
m
=
self
.
filter_ignores
(
m
.
strip
())
fallback_regexes
.
append
(
f
"
{
re
.
escape
(
m
)
}
"
)
choice_to_alpha
[
m
]
=
f
"(
{
next_alpha
}
)"
without_paren_fallback_regexes
.
append
(
next_alpha
)
without_paren_to_target
[
next_alpha
]
=
f
"(
{
next_alpha
}
)"
next_alpha
=
chr
(
ord
(
next_alpha
)
+
1
)
fallback_regex
=
re
.
compile
(
'|'
.
join
(
fallback_regexes
))
without_paren_fallback_regex
=
'|'
.
join
(
without_paren_fallback_regexes
)
without_paren_fallback_regex
=
re
.
compile
(
f
":[\s]*(
{
without_paren_fallback_regex
}
)"
)
filtered
=
[]
for
resp
in
r
:
match
=
self
.
find_match
(
self
.
regex
,
resp
)
if
not
match
:
match
=
self
.
find_match
(
fallback_regex
,
self
.
filter_ignores
(
resp
),
choice_to_alpha
)
if
not
match
:
match
=
self
.
find_match
(
without_paren_fallback_regex
,
resp
,
without_paren_to_target
)
if
not
match
:
match
=
self
.
fallback
filtered
.
append
(
match
)
filtered_resps
.
append
(
filtered
)
return
filtered_resps
lm_eval/tasks/bbh/cot_zeroshot/web_of_lies.yaml
View file @
ab96fc7e
"
dataset_name"
:
"
web_of_lies"
"
description"
:
"
Evaluate
a
random
boolean
function
expressed
as
a
word
problem.
\n\n
"
"
doc_to_text"
:
"
Q:
{{input}}
\n
A:
Let's
think
step
by
step.
\n
"
"
doc_to_text"
:
"
Q:
{{input}}
\n
A:
Let's
think
step
by
step."
"
include"
:
"
_cot_zeroshot_template_yaml"
"
task"
:
"
bbh_cot_zeroshot_web_of_lies"
filter_list
:
-
name
:
"
flexible-extract"
filter
:
-
function
:
!function
utils.MapRegexFilter
group_select
:
-1
ignore_case
:
true
regex_pattern_to_value
:
\b(no|does not tell the truth|is not telling the truth)\b
:
"
no"
\b(yes|tells the truth|is telling the truth)\b
:
"
yes"
-
function
:
"
take_first"
-
name
:
"
strict-match"
filter
:
-
function
:
"
regex"
regex_pattern
:
"
((?<=The
answer
is
)(.*)(?=.)|(?<=the
answer
is
)(.*)(?=.)|(?<=The
answer:
)(.*)(?=.)|(?<=The
final
answer:
)(.*)(?=.))"
-
function
:
"
take_first"
lm_eval/tasks/bbh/cot_zeroshot/word_sorting.yaml
View file @
ab96fc7e
"
dataset_name"
:
"
word_sorting"
"
description"
:
"
Sort
a
list
of
words.
\n\n
"
"
doc_to_text"
:
"
Q:
{{input}}
\n
A:
Let's
think
step
by
step.
\n
"
"
doc_to_text"
:
"
Q:
{{input}}
\n
A:
Let's
think
step
by
step."
"
include"
:
"
_cot_zeroshot_template_yaml"
"
task"
:
"
bbh_cot_zeroshot_word_sorting"
filter_list
:
-
name
:
"
flexible-extract"
filter
:
-
function
:
!function
utils.WordSortFilter
-
function
:
"
take_first"
-
name
:
"
strict-match"
filter
:
-
function
:
"
regex"
regex_pattern
:
"
((?<=The
answer
is
)(.*)(?=.)|(?<=the
answer
is
)(.*)(?=.)|(?<=The
answer:
)(.*)(?=.)|(?<=The
final
answer:
)(.*)(?=.))"
-
function
:
"
take_first"
lm_eval/tasks/bbh/zeroshot/_zeroshot_template_yaml
View file @
ab96fc7e
...
...
@@ -7,16 +7,22 @@ metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
#
ignore_case: true
ignore_case: true
# ignore_punctuation: true
regexes_to_ignore:
- "\\.$"
- ","
- "\n"
- "\\\\"
- '"'
generation_kwargs:
until:
- "</s>"
- "Q:"
- "
\n\n
"
- "
<|im_end|>
"
- "<0x0A>"
do_sample: false
temperature: 0.0
num_fewshot: 0
metadata:
version:
1
.0
version:
2
.0
lm_eval/tasks/bbh/zeroshot/boolean_expressions.yaml
View file @
ab96fc7e
...
...
@@ -3,3 +3,14 @@
"
doc_to_text"
:
"
Q:
{{input}}
\n
A:"
"
include"
:
"
_zeroshot_template_yaml"
"
task"
:
"
bbh_zeroshot_boolean_expressions"
filter_list
:
-
name
:
"
strict-match"
filter
:
-
function
:
"
take_first"
-
name
:
"
flexible-extract"
filter
:
-
function
:
"
regex"
group_select
:
0
regex_pattern
:
"
\\
b(True|False)
\\
b"
-
function
:
"
take_first"
lm_eval/tasks/bbh/zeroshot/causal_judgement.yaml
View file @
ab96fc7e
...
...
@@ -3,3 +3,14 @@
"
doc_to_text"
:
"
Q:
{{input}}
\n
A:"
"
include"
:
"
_zeroshot_template_yaml"
"
task"
:
"
bbh_zeroshot_causal_judgement"
filter_list
:
-
name
:
"
strict-match"
filter
:
-
function
:
"
take_first"
-
name
:
"
flexible-extract"
filter
:
-
function
:
"
regex"
group_select
:
0
regex_pattern
:
"
\\
b(Yes|No|yes|no)
\\
b"
-
function
:
"
take_first"
lm_eval/tasks/bbh/zeroshot/date_understanding.yaml
View file @
ab96fc7e
...
...
@@ -3,3 +3,16 @@
"
doc_to_text"
:
"
Q:
{{input}}
\n
A:"
"
include"
:
"
_zeroshot_template_yaml"
"
task"
:
"
bbh_zeroshot_date_understanding"
filter_list
:
-
name
:
"
strict-match"
filter
:
-
function
:
"
take_first"
-
name
:
"
flexible-extract"
filter
:
-
function
:
!function
utils.MultiChoiceRegexFilter
group_select
:
0
ignore_case
:
true
ignore_punctuation
:
true
regex_pattern
:
"
(
\\
([A-Z]
\\
))"
-
function
:
"
take_first"
lm_eval/tasks/bbh/zeroshot/disambiguation_qa.yaml
View file @
ab96fc7e
...
...
@@ -3,3 +3,16 @@
"
doc_to_text"
:
"
Q:
{{input}}
\n
A:"
"
include"
:
"
_zeroshot_template_yaml"
"
task"
:
"
bbh_zeroshot_disambiguation_qa"
filter_list
:
-
name
:
"
strict-match"
filter
:
-
function
:
"
take_first"
-
name
:
"
flexible-extract"
filter
:
-
function
:
!function
utils.MultiChoiceRegexFilter
group_select
:
0
ignore_case
:
true
ignore_punctuation
:
true
regex_pattern
:
"
(
\\
([A-Z]
\\
))"
-
function
:
"
take_first"
lm_eval/tasks/bbh/zeroshot/dyck_languages.yaml
View file @
ab96fc7e
...
...
@@ -3,3 +3,13 @@
"
doc_to_text"
:
"
Q:
{{input}}
\n
A:"
"
include"
:
"
_zeroshot_template_yaml"
"
task"
:
"
bbh_zeroshot_dyck_languages"
filter_list
:
-
name
:
"
strict-match"
filter
:
-
function
:
"
take_first"
-
name
:
"
flexible-extract"
filter
:
-
function
:
"
regex"
group_select
:
0
regex_pattern
:
"
(?<=
)([
\"
\\
[
\\
(<{}>
\\
)
\\
]]+)|([
\"
\\
[
\\
(<{}>
\\
)
\\
]]+)"
-
function
:
"
take_first"
lm_eval/tasks/bbh/zeroshot/formal_fallacies.yaml
View file @
ab96fc7e
...
...
@@ -3,3 +3,14 @@
"
doc_to_text"
:
"
Q:
{{input}}
\n
A:"
"
include"
:
"
_zeroshot_template_yaml"
"
task"
:
"
bbh_zeroshot_formal_fallacies"
filter_list
:
-
name
:
"
strict-match"
filter
:
-
function
:
"
take_first"
-
name
:
"
flexible-extract"
filter
:
-
function
:
"
regex"
group_select
:
0
regex_pattern
:
"
\\
b(valid|invalid)
\\
b"
-
function
:
"
take_first"
lm_eval/tasks/bbh/zeroshot/geometric_shapes.yaml
View file @
ab96fc7e
...
...
@@ -3,3 +3,16 @@
"
doc_to_text"
:
"
Q:
{{input}}
\n
A:"
"
include"
:
"
_zeroshot_template_yaml"
"
task"
:
"
bbh_zeroshot_geometric_shapes"
filter_list
:
-
name
:
"
strict-match"
filter
:
-
function
:
"
take_first"
-
name
:
"
flexible-extract"
filter
:
-
function
:
!function
utils.MultiChoiceRegexFilter
group_select
:
0
ignore_case
:
true
ignore_punctuation
:
true
regex_pattern
:
"
(
\\
([A-Z]
\\
))"
-
function
:
"
take_first"
lm_eval/tasks/bbh/zeroshot/hyperbaton.yaml
View file @
ab96fc7e
...
...
@@ -3,3 +3,16 @@
"
doc_to_text"
:
"
Q:
{{input}}
\n
A:"
"
include"
:
"
_zeroshot_template_yaml"
"
task"
:
"
bbh_zeroshot_hyperbaton"
filter_list
:
-
name
:
"
strict-match"
filter
:
-
function
:
"
take_first"
-
name
:
"
flexible-extract"
filter
:
-
function
:
!function
utils.MultiChoiceRegexFilter
group_select
:
0
ignore_case
:
true
ignore_punctuation
:
true
regex_pattern
:
"
(
\\
([A-Z]
\\
))"
-
function
:
"
take_first"
lm_eval/tasks/bbh/zeroshot/logical_deduction_five_objects.yaml
View file @
ab96fc7e
...
...
@@ -3,3 +3,15 @@
"
doc_to_text"
:
"
Q:
{{input}}
\n
A:"
"
include"
:
"
_zeroshot_template_yaml"
"
task"
:
"
bbh_zeroshot_logical_deduction_five_objects"
filter_list
:
-
name
:
"
strict-match"
filter
:
-
function
:
"
take_first"
-
name
:
"
flexible-extract"
filter
:
-
function
:
!function
utils.MultiChoiceRegexFilter
group_select
:
0
ignore_case
:
true
ignore_punctuation
:
true
regex_pattern
:
"
(
\\
([A-Z]
\\
))"
-
function
:
"
take_first"
lm_eval/tasks/bbh/zeroshot/logical_deduction_seven_objects.yaml
View file @
ab96fc7e
...
...
@@ -3,3 +3,15 @@
"
doc_to_text"
:
"
Q:
{{input}}
\n
A:"
"
include"
:
"
_zeroshot_template_yaml"
"
task"
:
"
bbh_zeroshot_logical_deduction_seven_objects"
filter_list
:
-
name
:
"
strict-match"
filter
:
-
function
:
"
take_first"
-
name
:
"
flexible-extract"
filter
:
-
function
:
!function
utils.MultiChoiceRegexFilter
group_select
:
0
ignore_case
:
true
ignore_punctuation
:
true
regex_pattern
:
"
(
\\
([A-Z]
\\
))"
-
function
:
"
take_first"
lm_eval/tasks/bbh/zeroshot/logical_deduction_three_objects.yaml
View file @
ab96fc7e
...
...
@@ -3,3 +3,15 @@
"
doc_to_text"
:
"
Q:
{{input}}
\n
A:"
"
include"
:
"
_zeroshot_template_yaml"
"
task"
:
"
bbh_zeroshot_logical_deduction_three_objects"
filter_list
:
-
name
:
"
strict-match"
filter
:
-
function
:
"
take_first"
-
name
:
"
flexible-extract"
filter
:
-
function
:
!function
utils.MultiChoiceRegexFilter
group_select
:
0
ignore_case
:
true
ignore_punctuation
:
true
regex_pattern
:
"
(
\\
([A-Z]
\\
))"
-
function
:
"
take_first"
lm_eval/tasks/bbh/zeroshot/movie_recommendation.yaml
View file @
ab96fc7e
...
...
@@ -3,3 +3,15 @@
"
doc_to_text"
:
"
Q:
{{input}}
\n
A:"
"
include"
:
"
_zeroshot_template_yaml"
"
task"
:
"
bbh_zeroshot_movie_recommendation"
filter_list
:
-
name
:
"
strict-match"
filter
:
-
function
:
"
take_first"
-
name
:
"
flexible-extract"
filter
:
-
function
:
!function
utils.MultiChoiceRegexFilter
group_select
:
0
ignore_case
:
true
ignore_punctuation
:
true
regex_pattern
:
"
(
\\
([A-Z]
\\
))"
-
function
:
"
take_first"
lm_eval/tasks/bbh/zeroshot/multistep_arithmetic_two.yaml
View file @
ab96fc7e
...
...
@@ -3,3 +3,14 @@
"
doc_to_text"
:
"
Q:
{{input}}
\n
A:"
"
include"
:
"
_zeroshot_template_yaml"
"
task"
:
"
bbh_zeroshot_multistep_arithmetic_two"
filter_list
:
-
name
:
"
strict-match"
filter
:
-
function
:
"
take_first"
-
name
:
"
flexible-extract"
filter
:
-
function
:
!function
utils.NumberParseRegexFilter
group_select
:
0
regex_pattern
:
"
([-0-9]+)"
-
function
:
"
take_first"
lm_eval/tasks/bbh/zeroshot/navigate.yaml
View file @
ab96fc7e
...
...
@@ -3,3 +3,13 @@
"
doc_to_text"
:
"
Q:
{{input}}
\n
A:"
"
include"
:
"
_zeroshot_template_yaml"
"
task"
:
"
bbh_zeroshot_navigate"
filter_list
:
-
name
:
"
strict-match"
filter
:
-
function
:
"
take_first"
-
name
:
"
flexible-extract"
filter
:
-
function
:
"
regex"
group_select
:
0
regex_pattern
:
"
\\
b(Yes|No|yes|no)
\\
b"
-
function
:
"
take_first"
Prev
1
2
3
4
5
6
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment