Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
4bb92ebc
Unverified
Commit
4bb92ebc
authored
May 14, 2024
by
Jess
Committed by
GitHub
May 14, 2024
Browse files
Merge pull request #18 from JessicaOjo/africamgsm
fix exact match bug and restructure mmlu folder
parents
348e304a
5ba791e2
Changes
44
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
30 additions
and
23 deletions
+30
-23
lm_eval/api/task.py
lm_eval/api/task.py
+12
-5
lm_eval/tasks/afrimgsm/gpt_direct.sh
lm_eval/tasks/afrimgsm/gpt_direct.sh
+1
-1
lm_eval/tasks/afrimmlu/direct/afrimmlu_common_yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_common_yaml
+0
-0
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_amh.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_amh.yaml
+1
-1
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_eng.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_eng.yaml
+1
-1
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ewe.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ewe.yaml
+1
-1
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_fra.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_fra.yaml
+1
-1
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_hau.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_hau.yaml
+1
-1
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ibo.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ibo.yaml
+1
-1
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_kin.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_kin.yaml
+1
-1
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lin.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lin.yaml
+1
-1
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lug.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lug.yaml
+1
-1
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_orm.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_orm.yaml
+1
-1
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sna.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sna.yaml
+1
-1
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sot.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sot.yaml
+1
-1
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_swa.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_swa.yaml
+1
-1
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_twi.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_twi.yaml
+1
-1
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_wol.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_wol.yaml
+1
-1
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_xho.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_xho.yaml
+1
-1
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_yor.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_yor.yaml
+1
-1
No files found.
lm_eval/api/task.py
View file @
4bb92ebc
...
...
@@ -1367,11 +1367,18 @@ class ConfigurableTask(Task):
result_score
=
0.0
else
:
try
:
result_score
=
self
.
_metric_fn_list
[
metric
](
references
=
[
gold
],
predictions
=
[
result
],
**
self
.
_metric_fn_kwargs
[
metric
],
)
if
metric
==
"exact_match"
:
result_score
=
self
.
_metric_fn_list
[
metric
](
references
=
[
str
(
gold
)],
predictions
=
[
str
(
result
)],
**
self
.
_metric_fn_kwargs
[
metric
],
)
else
:
result_score
=
self
.
_metric_fn_list
[
metric
](
references
=
[
gold
],
predictions
=
[
result
],
**
self
.
_metric_fn_kwargs
[
metric
],
)
except
TypeError
as
error
:
# needed for now in order to use a different interface between our own metrics and HF Evaluate metrics
result_score
=
self
.
_metric_fn_list
[
metric
]([
gold
,
result
])
if
isinstance
(
result_score
,
dict
):
...
...
lm_eval/tasks/afrimgsm/gpt_direct.sh
View file @
4bb92ebc
...
...
@@ -4,7 +4,7 @@ models=(
"gpt-3.5-turbo"
"gpt-4-0125-preview"
)
task
=
afrimgsm_direct_
eng,afrimgsm_direct_fra
,afrimgsm_direct_
swa
#
afrimgsm_direct_ewe,afrimgsm_direct_fra,afrimgsm_direct_hau,afrimgsm_direct_ibo,afrimgsm_direct_kin,afrimgsm_direct_lin,afrimgsm_direct_lug,afrimgsm_direct_orm,afrimgsm_direct_sna,afrimgsm_direct_sot,afrimgsm_direct_swa,afrimgsm_direct_twi,afrimgsm_direct_wol,afrimgsm_direct_xho,afrimgsm_direct_yor,afrimgsm_direct_zul
task
=
afrimgsm_direct_
amh
,afrimgsm_direct_
eng,
afrimgsm_direct_ewe,afrimgsm_direct_fra,afrimgsm_direct_hau,afrimgsm_direct_ibo,afrimgsm_direct_kin,afrimgsm_direct_lin,afrimgsm_direct_lug,afrimgsm_direct_orm,afrimgsm_direct_sna,afrimgsm_direct_sot,afrimgsm_direct_swa,afrimgsm_direct_twi,afrimgsm_direct_wol,afrimgsm_direct_xho,afrimgsm_direct_yor,afrimgsm_direct_zul
for
model
in
"
${
models
[@]
}
"
do
...
...
lm_eval/tasks/afrimmlu/afrimmlu_common_yaml
→
lm_eval/tasks/afrimmlu/
direct/
afrimmlu_common_yaml
View file @
4bb92ebc
File moved
lm_eval/tasks/afrimmlu/afrimmlu_amh.yaml
→
lm_eval/tasks/afrimmlu/
direct/
afrimmlu_
direct_
amh.yaml
View file @
4bb92ebc
dataset_name
:
amh
include
:
afrimmlu_common_yaml
task
:
afrimmlu_amh
task
:
afrimmlu_
direct_
amh
lm_eval/tasks/afrimmlu/afrimmlu_eng.yaml
→
lm_eval/tasks/afrimmlu/
direct/
afrimmlu_
direct_
eng.yaml
View file @
4bb92ebc
dataset_name
:
eng
include
:
afrimmlu_common_yaml
task
:
afrimmlu_eng
task
:
afrimmlu_
direct_
eng
lm_eval/tasks/afrimmlu/afrimmlu_ewe.yaml
→
lm_eval/tasks/afrimmlu/
direct/
afrimmlu_
direct_
ewe.yaml
View file @
4bb92ebc
dataset_name
:
ewe
include
:
afrimmlu_common_yaml
task
:
afrimmlu_ewe
task
:
afrimmlu_
direct_
ewe
lm_eval/tasks/afrimmlu/afrimmlu_fra.yaml
→
lm_eval/tasks/afrimmlu/
direct/
afrimmlu_
direct_
fra.yaml
View file @
4bb92ebc
dataset_name
:
fra
include
:
afrimmlu_common_yaml
task
:
afrimmlu_fra
\ No newline at end of file
task
:
afrimmlu_direct_fra
\ No newline at end of file
lm_eval/tasks/afrimmlu/afrimmlu_hau.yaml
→
lm_eval/tasks/afrimmlu/
direct/
afrimmlu_
direct_
hau.yaml
View file @
4bb92ebc
dataset_name
:
hau
include
:
afrimmlu_common_yaml
task
:
afrimmlu_hau
\ No newline at end of file
task
:
afrimmlu_direct_hau
\ No newline at end of file
lm_eval/tasks/afrimmlu/afrimmlu_ibo.yaml
→
lm_eval/tasks/afrimmlu/
direct/
afrimmlu_
direct_
ibo.yaml
View file @
4bb92ebc
dataset_name
:
ibo
include
:
afrimmlu_common_yaml
task
:
afrimmlu_ibo
\ No newline at end of file
task
:
afrimmlu_direct_ibo
\ No newline at end of file
lm_eval/tasks/afrimmlu/afrimmlu_kin.yaml
→
lm_eval/tasks/afrimmlu/
direct/
afrimmlu_
direct_
kin.yaml
View file @
4bb92ebc
dataset_name
:
kin
include
:
afrimmlu_common_yaml
task
:
afrimmlu_kin
\ No newline at end of file
task
:
afrimmlu_direct_kin
\ No newline at end of file
lm_eval/tasks/afrimmlu/afrimmlu_lin.yaml
→
lm_eval/tasks/afrimmlu/
direct/
afrimmlu_
direct_
lin.yaml
View file @
4bb92ebc
dataset_name
:
lin
include
:
afrimmlu_common_yaml
task
:
afrimmlu_lin
\ No newline at end of file
task
:
afrimmlu_direct_lin
\ No newline at end of file
lm_eval/tasks/afrimmlu/afrimmlu_lug.yaml
→
lm_eval/tasks/afrimmlu/
direct/
afrimmlu_
direct_
lug.yaml
View file @
4bb92ebc
dataset_name
:
lug
include
:
afrimmlu_common_yaml
task
:
afrimmlu_lug
\ No newline at end of file
task
:
afrimmlu_direct_lug
\ No newline at end of file
lm_eval/tasks/afrimmlu/afrimmlu_orm.yaml
→
lm_eval/tasks/afrimmlu/
direct/
afrimmlu_
direct_
orm.yaml
View file @
4bb92ebc
dataset_name
:
orm
include
:
afrimmlu_common_yaml
task
:
afrimmlu_orm
\ No newline at end of file
task
:
afrimmlu_direct_orm
\ No newline at end of file
lm_eval/tasks/afrimmlu/afrimmlu_sna.yaml
→
lm_eval/tasks/afrimmlu/
direct/
afrimmlu_
direct_
sna.yaml
View file @
4bb92ebc
dataset_name
:
sna
include
:
afrimmlu_common_yaml
task
:
afrimmlu_sna
\ No newline at end of file
task
:
afrimmlu_direct_sna
\ No newline at end of file
lm_eval/tasks/afrimmlu/afrimmlu_sot.yaml
→
lm_eval/tasks/afrimmlu/
direct/
afrimmlu_
direct_
sot.yaml
View file @
4bb92ebc
dataset_name
:
sot
include
:
afrimmlu_common_yaml
task
:
afrimmlu_sot
\ No newline at end of file
task
:
afrimmlu_direct_sot
\ No newline at end of file
lm_eval/tasks/afrimmlu/afrimmlu_swa.yaml
→
lm_eval/tasks/afrimmlu/
direct/
afrimmlu_
direct_
swa.yaml
View file @
4bb92ebc
dataset_name
:
swa
include
:
afrimmlu_common_yaml
task
:
afrimmlu_swa
\ No newline at end of file
task
:
afrimmlu_direct_swa
\ No newline at end of file
lm_eval/tasks/afrimmlu/afrimmlu_twi.yaml
→
lm_eval/tasks/afrimmlu/
direct/
afrimmlu_
direct_
twi.yaml
View file @
4bb92ebc
dataset_name
:
twi
include
:
afrimmlu_common_yaml
task
:
afrimmlu_twi
\ No newline at end of file
task
:
afrimmlu_direct_twi
\ No newline at end of file
lm_eval/tasks/afrimmlu/afrimmlu_wol.yaml
→
lm_eval/tasks/afrimmlu/
direct/
afrimmlu_
direct_
wol.yaml
View file @
4bb92ebc
dataset_name
:
wol
include
:
afrimmlu_common_yaml
task
:
afrimmlu_wol
\ No newline at end of file
task
:
afrimmlu_direct_wol
\ No newline at end of file
lm_eval/tasks/afrimmlu/afrimmlu_xho.yaml
→
lm_eval/tasks/afrimmlu/
direct/
afrimmlu_
direct_
xho.yaml
View file @
4bb92ebc
dataset_name
:
xho
include
:
afrimmlu_common_yaml
task
:
afrimmlu_xho
\ No newline at end of file
task
:
afrimmlu_direct_xho
\ No newline at end of file
lm_eval/tasks/afrimmlu/afrimmlu_yor.yaml
→
lm_eval/tasks/afrimmlu/
direct/
afrimmlu_
direct_
yor.yaml
View file @
4bb92ebc
dataset_name
:
yor
include
:
afrimmlu_common_yaml
task
:
afrimmlu_yor
\ No newline at end of file
task
:
afrimmlu_direct_yor
\ No newline at end of file
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment