Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
d7779945
Unverified
Commit
d7779945
authored
May 13, 2024
by
Jess
Committed by
GitHub
May 13, 2024
Browse files
Merge pull request #12 from JessicaOjo/africamgsm
update f1 function
parents
012c8cac
2a285714
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
18 additions
and
6 deletions
+18
-6
lm_eval/tasks/afrixnli/en-direct/afrixnli_en_direct_yaml
lm_eval/tasks/afrixnli/en-direct/afrixnli_en_direct_yaml
+1
-4
lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_yaml
.../tasks/afrixnli/native-direct/afrixnli_native_direct_yaml
+1
-1
lm_eval/tasks/afrixnli/native-direct/utils.py
lm_eval/tasks/afrixnli/native-direct/utils.py
+9
-0
lm_eval/tasks/afrixnli/utils.py
lm_eval/tasks/afrixnli/utils.py
+7
-1
No files found.
lm_eval/tasks/afrixnli/en-direct/afrixnli_en_direct_yaml
View file @
d7779945
...
@@ -20,7 +20,7 @@ should_decontaminate: true
...
@@ -20,7 +20,7 @@ should_decontaminate: true
doc_to_decontamination_query: premise
doc_to_decontamination_query: premise
metric_list:
metric_list:
- metric: f1
- metric: f1
aggregation:
f1
aggregation:
!function utils.weighted_f1_score
average: weighted
average: weighted
higher_is_better: True
higher_is_better: True
ignore_case: true
ignore_case: true
...
@@ -30,8 +30,5 @@ metric_list:
...
@@ -30,8 +30,5 @@ metric_list:
higher_is_better: true
higher_is_better: true
ignore_case: true
ignore_case: true
ignore_punctuation: true
ignore_punctuation: true
regexes_to_ignore:
- ","
- "\\$"
metadata:
metadata:
version: 1.0
version: 1.0
lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_yaml
View file @
d7779945
...
@@ -11,7 +11,7 @@ doc_to_target: label
...
@@ -11,7 +11,7 @@ doc_to_target: label
doc_to_text: ""
doc_to_text: ""
metric_list:
metric_list:
- metric: f1
- metric: f1
aggregation:
f1
aggregation:
!function utils.weighted_f1_score
average: weighted
average: weighted
higher_is_better: True
higher_is_better: True
ignore_case: true
ignore_case: true
...
...
lm_eval/tasks/afrixnli/native-direct/utils.py
0 → 100644
View file @
d7779945
from
sklearn.metrics
import
f1_score
def
weighted_f1_score
(
items
):
unzipped_list
=
list
(
zip
(
*
items
))
golds
=
unzipped_list
[
0
]
preds
=
unzipped_list
[
1
]
fscore
=
f1_score
(
golds
,
preds
,
average
=
"weighted"
)
return
fscore
lm_eval/tasks/afrixnli/utils.py
View file @
d7779945
...
@@ -50,6 +50,12 @@ LANGUAGES = {
...
@@ -50,6 +50,12 @@ LANGUAGES = {
"NEUTRAL_LABEL"
:
"Na none"
,
"NEUTRAL_LABEL"
:
"Na none"
,
"CONTRADICTION_LABEL"
:
"Oya"
"CONTRADICTION_LABEL"
:
"Oya"
},
},
"lin"
:
{
"QUESTION_WORD"
:
"Malamu"
,
"ENTAILMENT_LABEL"
:
"Iyo"
,
"NEUTRAL_LABEL"
:
"Lisusu"
,
"CONTRADICTION_LABEL"
:
"Te"
},
"lug"
:
{
"lug"
:
{
"QUESTION_WORD"
:
"Kituufu"
,
"QUESTION_WORD"
:
"Kituufu"
,
"ENTAILMENT_LABEL"
:
"Yee"
,
"ENTAILMENT_LABEL"
:
"Yee"
,
...
@@ -121,7 +127,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
...
@@ -121,7 +127,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
:param overwrite: Whether to overwrite files if they already exist.
:param overwrite: Whether to overwrite files if they already exist.
"""
"""
err
=
[]
err
=
[]
languages
=
[
'eng'
,
'amh'
,
'ibo'
,
'fra'
,
'sna'
,
'wol'
,
'ewe'
,
'lug'
,
'xho'
,
'kin'
,
'twi'
,
'zul'
,
'orm'
,
languages
=
[
'eng'
,
'amh'
,
'ibo'
,
'fra'
,
'sna'
,
'wol'
,
'ewe'
,
'lin'
,
'lug'
,
'xho'
,
'kin'
,
'twi'
,
'zul'
,
'orm'
,
'yor'
,
'hau'
,
'sot'
,
'swa'
]
'yor'
,
'hau'
,
'sot'
,
'swa'
]
for
lang
in
languages
:
for
lang
in
languages
:
try
:
try
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment