Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
c4f634c6
Commit
c4f634c6
authored
May 10, 2024
by
JessicaOjo
Browse files
add few show. metric fixes
parent
ae6e5cbd
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
15 additions
and
17 deletions
+15
-17
lm_eval/api/metrics.py
lm_eval/api/metrics.py
+2
-3
lm_eval/api/task.py
lm_eval/api/task.py
+0
-4
lm_eval/filters/extraction.py
lm_eval/filters/extraction.py
+1
-1
lm_eval/tasks/afrimgsm/direct.sh
lm_eval/tasks/afrimgsm/direct.sh
+12
-9
No files found.
lm_eval/api/metrics.py
View file @
c4f634c6
...
@@ -60,11 +60,10 @@ def f1_score(items):
...
@@ -60,11 +60,10 @@ def f1_score(items):
@
register_aggregation
(
"squad_f1"
)
@
register_aggregation
(
"squad_f1"
)
def
squad_f1_score
(
items
):
def
squad_f1_score
(
items
):
gold_squad
,
pred_squad
=
[],
[]
gold_squad
,
pred_squad
=
[],
[]
for
index
,
(
ref
,
pred
)
in
enumerate
(
items
):
for
index
,
(
ref
,
pred
)
in
enumerate
(
items
):
pred_dict
=
{
'prediction_text'
:
pred
,
'id'
:
str
(
index
)}
pred_dict
=
{
'prediction_text'
:
str
(
pred
)
,
'id'
:
str
(
index
)}
ref_dict
=
{
'answers'
:
{
'answer_start'
:
[
0
],
'text'
:
[
ref
]
},
'id'
:
str
(
index
)}
ref_dict
=
{
'answers'
:
{
'answer_start'
:
[
0
],
'text'
:
str
(
ref
)
},
'id'
:
str
(
index
)}
gold_squad
.
append
(
ref_dict
)
gold_squad
.
append
(
ref_dict
)
pred_squad
.
append
(
pred_dict
)
pred_squad
.
append
(
pred_dict
)
...
...
lm_eval/api/task.py
View file @
c4f634c6
...
@@ -1366,9 +1366,6 @@ class ConfigurableTask(Task):
...
@@ -1366,9 +1366,6 @@ class ConfigurableTask(Task):
else
:
else
:
result_score
=
0.0
result_score
=
0.0
else
:
else
:
print
(
gold
)
print
(
result
)
print
(
metric
)
try
:
try
:
result_score
=
self
.
_metric_fn_list
[
metric
](
result_score
=
self
.
_metric_fn_list
[
metric
](
references
=
[
gold
],
references
=
[
gold
],
...
@@ -1376,7 +1373,6 @@ class ConfigurableTask(Task):
...
@@ -1376,7 +1373,6 @@ class ConfigurableTask(Task):
**
self
.
_metric_fn_kwargs
[
metric
],
**
self
.
_metric_fn_kwargs
[
metric
],
)
)
except
TypeError
as
error
:
# needed for now in order to use a different interface between our own metrics and HF Evaluate metrics
except
TypeError
as
error
:
# needed for now in order to use a different interface between our own metrics and HF Evaluate metrics
print
(
error
)
result_score
=
self
.
_metric_fn_list
[
metric
]([
gold
,
result
])
result_score
=
self
.
_metric_fn_list
[
metric
]([
gold
,
result
])
if
isinstance
(
result_score
,
dict
):
if
isinstance
(
result_score
,
dict
):
# TODO: this handles the case where HF evaluate returns a dict.
# TODO: this handles the case where HF evaluate returns a dict.
...
...
lm_eval/filters/extraction.py
View file @
c4f634c6
...
@@ -57,7 +57,7 @@ class RegexFilter(Filter):
...
@@ -57,7 +57,7 @@ class RegexFilter(Filter):
self
,
self
,
regex_pattern
:
str
=
r
"#### (\-?[0-9\.\,]+)"
,
regex_pattern
:
str
=
r
"#### (\-?[0-9\.\,]+)"
,
group_select
=
0
,
group_select
=
0
,
fallback
:
str
=
"[invalid]"
,
fallback
:
str
=
0
,
)
->
None
:
)
->
None
:
"""
"""
pass a string `regex` to run `re.compile(r"regex")` on.
pass a string `regex` to run `re.compile(r"regex")` on.
...
...
lm_eval/tasks/afrimgsm/direct.sh
View file @
c4f634c6
...
@@ -20,7 +20,9 @@ task=afrimgsm_direct_amh,afrimgsm_direct_ibo,afrimgsm_direct_fra,afrimgsm_direct
...
@@ -20,7 +20,9 @@ task=afrimgsm_direct_amh,afrimgsm_direct_ibo,afrimgsm_direct_fra,afrimgsm_direct
for
model
in
"
${
models
[@]
}
"
for
model
in
"
${
models
[@]
}
"
do
do
echo
"Evaluating model:
$model
"
echo
"Evaluating model:
$model
"
export
OUTPUT_DIR
=
results/
${
model
##*/
}
for
fewshot
in
0 2 4 6 8
do
export
OUTPUT_DIR
=
results/
$fewshot
/
${
model
##*/
}
mkdir
-p
"
$OUTPUT_DIR
"
mkdir
-p
"
$OUTPUT_DIR
"
...
@@ -29,6 +31,7 @@ do
...
@@ -29,6 +31,7 @@ do
--tasks
$task
\
--tasks
$task
\
--device
cuda:0
\
--device
cuda:0
\
--batch_size
16
\
--batch_size
16
\
--num_fewshot
0
\
--num_fewshot
$fewshot
\
--verbosity
DEBUG
--verbosity
DEBUG
done
done
done
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment