Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
a27ea4bd
Commit
a27ea4bd
authored
May 07, 2024
by
JessicaOjo
Browse files
add afrixnli to task
parent
e6394715
Changes
22
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
106 additions
and
0 deletions
+106
-0
lm_eval/tasks/afrixnli/preprocess_wikitext.py
lm_eval/tasks/afrixnli/preprocess_wikitext.py
+32
-0
lm_eval/tasks/afrixnli/utils.py
lm_eval/tasks/afrixnli/utils.py
+74
-0
No files found.
lm_eval/tasks/afrixnli/preprocess_wikitext.py
0 → 100644
View file @
a27ea4bd
import
re
def
wikitext_detokenizer
(
doc
):
string
=
doc
[
"label"
]
string
=
string
.
replace
(
'[.,]'
,
''
).
lower
()
string
=
string
.
split
(
"
\\
n
\\
n"
)
string
=
string
.
split
(
"<pad>"
)[
-
1
].
split
(
"</s>"
)[
0
].
strip
()
string
=
extract_answer
(
string
)
string
=
verbalizer
(
string
.
strip
())
return
string
def
extract_answer
(
string
):
pattern
=
r
'(\*\*answer:\*\*|\*answer is:\*|\*\*|\*\*|\*answer is exact\*|label:|the premise and hypothesis '
\
r
'are|the premise and the hypothesis is|the premise and the hypothesis is a|described as|therefore they '
\
r
'are|therefore|are considered|is an exact|it is|is a|is)\s*(neutral|entailment|contradiction)'
match
=
re
.
search
(
pattern
,
string
,
re
.
IGNORECASE
)
return
match
.
group
(
2
)
if
match
else
string
def
verbalizer
(
string
):
verbalizer_dict
=
{
"entailment"
:
[
'encouragement'
,
'entitlement'
,
'entails'
,
'entailed'
,
'entailment'
],
"contradiction"
:
[
'contradictory'
,
'contradicts'
,
'contradiction'
],
"neutral"
:
[
'neutral'
]}
for
key
,
values
in
verbalizer_dict
.
items
():
for
value
in
values
:
if
value
in
string
:
return
key
return
string
lm_eval/tasks/afrixnli/utils.py
0 → 100644
View file @
a27ea4bd
import
yaml
import
argparse
class
FunctionTag
:
def
__init__
(
self
,
value
):
self
.
value
=
value
def
function_representer
(
dumper
,
data
):
return
dumper
.
represent_scalar
(
'!function'
,
data
.
value
,
style
=
''
)
yaml
.
add_representer
(
FunctionTag
,
function_representer
)
def
gen_lang_yamls
(
output_dir
:
str
,
overwrite
:
bool
)
->
None
:
"""
Generate a yaml file for each language.
:param output_dir: The directory to output the files to.
:param overwrite: Whether to overwrite files if they already exist.
"""
err
=
[]
languages
=
[
'amh'
,
'ibo'
,
'fra'
,
'sna'
,
'lin'
,
'wol'
,
'ewe'
,
'lug'
,
'xho'
,
'kin'
,
'twi'
,
'zul'
,
'orm'
,
'yor'
,
'hau'
,
'sot'
,
'swa'
]
for
lang
in
languages
:
file_name
=
f
"afrixnli_
{
lang
}
.yaml"
try
:
with
open
(
f
"
{
output_dir
}
/
{
file_name
}
"
,
"w"
if
overwrite
else
"x"
,
encoding
=
"utf8"
)
as
f
:
f
.
write
(
"# Generated by utils.py
\n
"
)
yaml
.
dump
(
{
"include"
:
"afrixnli_common_yaml"
,
"task"
:
f
"afrixnli_
{
lang
}
"
,
"dataset_name"
:
lang
,
"doc_to_target"
:
"{{label}}"
,
#FunctionTag('preprocess_wikitext.wikitext_detokenizer'),
"doc_to_text"
:
"Premise: {{premise}}
\n
Hypothesis: {{hypothesis}}
\n
Is it entailment, "
"contradiction, or neutral?"
},
f
,
allow_unicode
=
True
,
)
except
FileExistsError
:
err
.
append
(
file_name
)
if
len
(
err
)
>
0
:
raise
FileExistsError
(
"Files were not created because they already exist (use --overwrite flag):"
f
"
{
', '
.
join
(
err
)
}
"
)
def
main
()
->
None
:
"""Parse CLI args and generate language-specific yaml files."""
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--overwrite"
,
default
=
True
,
action
=
"store_true"
,
help
=
"Overwrite files if they already exist"
,
)
parser
.
add_argument
(
"--output-dir"
,
default
=
"."
,
help
=
"Directory to write yaml files to"
)
args
=
parser
.
parse_args
()
gen_lang_yamls
(
output_dir
=
args
.
output_dir
,
overwrite
=
args
.
overwrite
)
if
__name__
==
"__main__"
:
main
()
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment