Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
fef54568
Commit
fef54568
authored
May 12, 2024
by
JessicaOjo
Browse files
filter and metric fix -mgsm
parent
b691c952
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
21 additions
and
61 deletions
+21
-61
lm_eval/filters/extraction.py
lm_eval/filters/extraction.py
+0
-42
lm_eval/tasks/afrimgsm/direct.sh
lm_eval/tasks/afrimgsm/direct.sh
+2
-2
lm_eval/tasks/afrimgsm/direct/afrimgsm_common_yaml
lm_eval/tasks/afrimgsm/direct/afrimgsm_common_yaml
+15
-11
lm_eval/tasks/afrimgsm/gpt_direct.sh
lm_eval/tasks/afrimgsm/gpt_direct.sh
+4
-6
No files found.
lm_eval/filters/extraction.py
View file @
fef54568
...
...
@@ -49,48 +49,6 @@ class RegexFilter(Filter):
return
filtered_resps
@
register_filter
(
"regex-numbers"
)
class
RegexFilter
(
Filter
):
""" """
def
__init__
(
self
,
regex_pattern
:
str
=
r
"#### (\-?[0-9\.\,]+)"
,
group_select
=
0
,
fallback
:
str
=
0
,
)
->
None
:
"""
pass a string `regex` to run `re.compile(r"regex")` on.
`fallback` defines the output returned if no matches for the regex are located.
"""
self
.
regex_pattern
=
regex_pattern
self
.
regex
=
re
.
compile
(
regex_pattern
)
self
.
group_select
=
group_select
self
.
fallback
=
fallback
def
apply
(
self
,
resps
,
docs
):
# here, we assume we have a list, in which each element is
# a list of model responses for some particular input/target pair.
# so we process each of these (same input/target response sets)
# independently (and keep them a list.)
def
filter_set
(
inst
):
filtered
=
[]
for
resp
in
inst
:
match
=
self
.
regex
.
findall
(
resp
)
if
match
:
match
=
match
[
self
.
group_select
]
if
isinstance
(
match
,
tuple
):
match
=
[
m
for
m
in
match
if
m
][
0
]
match
=
match
.
strip
().
replace
(
','
,
''
).
replace
(
'.'
,
''
)
else
:
match
=
self
.
fallback
filtered
.
append
(
match
)
return
filtered
filtered_resps
=
list
(
map
(
lambda
x
:
filter_set
(
x
),
resps
))
return
filtered_resps
@
register_filter
(
"remove_whitespace"
)
class
WhitespaceFilter
(
Filter
):
""" """
...
...
lm_eval/tasks/afrimgsm/direct.sh
100644 → 100755
View file @
fef54568
...
...
@@ -15,14 +15,14 @@ models=(
"RWKV/v5-EagleX-v2-7B-HF"
"RWKV/rwkv-6-world-7b"
)
task
=
afrimgsm_direct_amh,afrimgsm_direct_
ibo
,afrimgsm_direct_fra,afrimgsm_direct_
sna
,afrimgsm_direct_
lin
,afrimgsm_direct_
wol
,afrimgsm_direct_
ewe
,afrimgsm_direct_lug,afrimgsm_direct_
xho
,afrimgsm_direct_
kin
,afrimgsm_direct_
twi
,afrimgsm_direct_
zul
,afrimgsm_direct_
orm
,afrimgsm_direct_
yor
,afrimgsm_direct_
hau
,afrimgsm_direct_
sot
,afrimgsm_direct_
swa
task
=
afrimgsm_direct_amh,afrimgsm_direct_
eng,afrimgsm_direct_ewe
,afrimgsm_direct_fra,afrimgsm_direct_
hau
,afrimgsm_direct_
ibo
,afrimgsm_direct_
kin
,afrimgsm_direct_
lin
,afrimgsm_direct_lug,afrimgsm_direct_
orm
,afrimgsm_direct_
sna
,afrimgsm_direct_
sot
,afrimgsm_direct_
swa
,afrimgsm_direct_
twi
,afrimgsm_direct_
wol
,afrimgsm_direct_
xho
,afrimgsm_direct_
yor
,afrimgsm_direct_
zul
for
model
in
"
${
models
[@]
}
"
do
echo
"Evaluating model:
$model
"
for
fewshot
in
0 2 4 6 8
do
export
OUTPUT_DIR
=
results/
$
{
model
##*/
}
/
$
fewshot
export
OUTPUT_DIR
=
results/
$fewshot
mkdir
-p
"
$OUTPUT_DIR
"
...
...
lm_eval/tasks/afrimgsm/direct/afrimgsm_common_yaml
View file @
fef54568
...
...
@@ -9,23 +9,27 @@ target_delimiter: ""
doc_to_target: '{% if answer is not none %}{{answer}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Question:'
- </s>
- <|im_end|>
- "\n\n"
- "\n"
do_sample: false
temperature: 0.0
filter_list:
- name: remove_whitespace
filter:
- function: remove_whitespace
- function: take_first
- filter:
- function: regex
-numbers
- function: regex
group_select: -1
regex_pattern: (-?[0-9.,]{2,})|(-?[0-9]+)
regex_pattern: (-?[
$
0-9.,]{2,})|(-?[0-9]+)
- function: take_first
name: flexible-extract
metric_list:
- metric:
squad
aggregation:
squad_f1
average: weighted
hf_evaluate: Fals
e
h
ig
her_is_better
:
T
rue
- metric:
exact_match
aggregation:
mean
higher_is_better: true
ignore_case: tru
e
ig
nore_punctuation
:
t
rue
metadata:
version: 1.0
lm_eval/tasks/afrimgsm/gpt_direct.sh
View file @
fef54568
...
...
@@ -4,22 +4,20 @@ models=(
"gpt-3.5-turbo"
"gpt-4-0125-preview"
)
task
=
afrimgsm_direct_
amh,afrimgsm_direct_ibo
,afrimgsm_direct_fra,afrimgsm_direct_
sna
,afrimgsm_direct_
lin
,afrimgsm_direct_
wol
,afrimgsm_direct_
ewe
,afrimgsm_direct_lug,afrimgsm_direct_
xho
,afrimgsm_direct_
kin
,afrimgsm_direct_
twi
,afrimgsm_direct_
zul
,afrimgsm_direct_
orm
,afrimgsm_direct_
yor
,afrimgsm_direct_
hau
,afrimgsm_direct_
sot
,afrimgsm_direct_
swa
task
=
afrimgsm_direct_
eng,afrimgsm_direct_fra,afrimgsm_direct_swa
#afrimgsm_direct_ewe
,afrimgsm_direct_fra,afrimgsm_direct_
hau
,afrimgsm_direct_
ibo
,afrimgsm_direct_
kin
,afrimgsm_direct_
lin
,afrimgsm_direct_lug,afrimgsm_direct_
orm
,afrimgsm_direct_
sna
,afrimgsm_direct_
sot
,afrimgsm_direct_
swa
,afrimgsm_direct_
twi
,afrimgsm_direct_
wol
,afrimgsm_direct_
xho
,afrimgsm_direct_
yor
,afrimgsm_direct_
zul
for
model
in
"
${
models
[@]
}
"
do
echo
"Evaluating model:
$model
"
for
fewshot
in
0 2 4 6 8
do
export
OUTPUT_DIR
=
results/
$
{
model
##*/
}
/
$
fewshot
export
OUTPUT_DIR
=
results/
$fewshot
mkdir
-p
"
$OUTPUT_DIR
"
lm_eval
--model
openai-chat-completions
\
--model_args
model
=
"
${
model
}
"
\
--tasks
$task
\
--device
cuda:0
\
--batch_size
16
\
--model_args
model
=
"
${
model
}
"
\
--tasks
$task
\
--output_path
"
$OUTPUT_DIR
"
\
--num_fewshot
$fewshot
\
--verbosity
DEBUG
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment