Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
2e2f28a5
Commit
2e2f28a5
authored
Aug 17, 2023
by
haileyschoelkopf
Browse files
add XX->en direction to translation tasks
parent
129762c2
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
86 additions
and
41 deletions
+86
-41
lm_eval/tasks/translation/iwslt2017_ar-en.yaml
lm_eval/tasks/translation/iwslt2017_ar-en.yaml
+1
-1
lm_eval/tasks/translation/utils.py
lm_eval/tasks/translation/utils.py
+43
-40
lm_eval/tasks/translation/wmt14_en-fr.yaml
lm_eval/tasks/translation/wmt14_en-fr.yaml
+14
-0
lm_eval/tasks/translation/wmt16_en-de.yaml
lm_eval/tasks/translation/wmt16_en-de.yaml
+14
-0
lm_eval/tasks/translation/wmt16_en-ro.yaml
lm_eval/tasks/translation/wmt16_en-ro.yaml
+14
-0
No files found.
lm_eval/tasks/translation/iwslt2017_ar-en.yaml
View file @
2e2f28a5
# Generated by utils.py
dataset_name
:
iwslt2017-
ar-en
dataset_name
:
iwslt2017-
en-ar
dataset_path
:
iwslt2017
doc_to_target
:
'
{{translation["en"]}}'
doc_to_text
:
'
Arabic
phrase:
{{translation["ar"]}}
...
...
lm_eval/tasks/translation/utils.py
View file @
2e2f28a5
...
...
@@ -30,7 +30,7 @@ gpt3_translation_benchmarks = {
LANGUAGES
=
{
**
gpt3_translation_benchmarks
,
# "wmt20": sacrebleu.get_langpairs_for_testset("wmt20"),
"iwslt2017"
:
[
"en-ar"
,
"ar-en"
],
# Arabic
"iwslt2017"
:
[
"en-ar"
],
# Arabic
}
...
...
@@ -49,45 +49,48 @@ def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
"""
err
=
[]
for
lang
in
LANGUAGES
.
keys
():
for
lang_pair
in
LANGUAGES
[
lang
]:
file_name
=
f
"
{
lang
}
_
{
lang_pair
}
.yaml"
try
:
src_lang
,
_
,
tgt_lang
=
lang_pair
.
partition
(
"-"
)
source
,
target
=
code_to_language
(
src_lang
),
code_to_language
(
tgt_lang
)
groups
=
[
"greedy_until"
,
"translation"
,
lang
]
if
lang
in
gpt3_translation_benchmarks
.
keys
():
groups
+=
[
"gpt3_translation_benchmarks"
]
with
open
(
f
"
{
output_dir
}
/
{
file_name
}
"
,
"w"
if
overwrite
else
"x"
,
encoding
=
"utf8"
,
)
as
f
:
f
.
write
(
"# Generated by utils.py
\n
"
)
yaml
.
dump
(
{
"include"
:
"wmt_common_yaml"
,
"group"
:
groups
,
"dataset_path"
:
lang
,
"dataset_name"
:
lang_pair
if
not
(
lang
==
"iwslt2017"
)
else
"iwslt2017-"
+
lang_pair
,
"task"
:
f
"
{
lang
}
-
{
lang_pair
}
"
,
"doc_to_text"
:
f
"
{
source
}
phrase: "
+
"{{translation["
+
f
'"
{
src_lang
}
"'
+
"]}}
\n
"
+
f
"
{
target
}
phrase:"
,
"doc_to_target"
:
" {{"
+
"translation["
+
f
'"
{
tgt_lang
}
"]'
+
"}}"
,
},
f
,
)
except
FileExistsError
:
err
.
append
(
file_name
)
for
dataset_name
in
LANGUAGES
[
lang
]:
src_lang
,
_
,
tgt_lang
=
dataset_name
.
partition
(
"-"
)
for
src
,
tgt
in
[[
src_lang
,
tgt_lang
],
[
tgt_lang
,
src_lang
]]:
# both translation directions for each lang pair
lang_pair
=
src
+
"-"
+
tgt
file_name
=
f
"
{
lang
}
_
{
lang_pair
}
.yaml"
try
:
source
,
target
=
code_to_language
(
src
),
code_to_language
(
tgt
)
groups
=
[
"greedy_until"
,
"translation"
,
lang
]
if
lang
in
gpt3_translation_benchmarks
.
keys
():
groups
+=
[
"gpt3_translation_benchmarks"
]
with
open
(
f
"
{
output_dir
}
/
{
file_name
}
"
,
"w"
if
overwrite
else
"x"
,
encoding
=
"utf8"
,
)
as
f
:
f
.
write
(
"# Generated by utils.py
\n
"
)
yaml
.
dump
(
{
"include"
:
"wmt_common_yaml"
,
"group"
:
groups
,
"dataset_path"
:
lang
,
"dataset_name"
:
dataset_name
if
not
(
lang
==
"iwslt2017"
)
else
"iwslt2017-"
+
dataset_name
,
"task"
:
f
"
{
lang
}
-
{
lang_pair
}
"
,
"doc_to_text"
:
f
"
{
source
}
phrase: "
+
"{{translation["
+
f
'"
{
src
}
"'
+
"]}}
\n
"
+
f
"
{
target
}
phrase:"
,
"doc_to_target"
:
" {{"
+
"translation["
+
f
'"
{
tgt
}
"]'
+
"}}"
,
},
f
,
)
except
FileExistsError
:
err
.
append
(
file_name
)
if
len
(
err
)
>
0
:
raise
FileExistsError
(
...
...
lm_eval/tasks/translation/wmt14_en-fr.yaml
0 → 100644
View file @
2e2f28a5
# Generated by utils.py
dataset_name
:
fr-en
dataset_path
:
wmt14
doc_to_target
:
'
{{translation["fr"]}}'
doc_to_text
:
'
English
phrase:
{{translation["en"]}}
French
phrase:'
group
:
-
greedy_until
-
translation
-
wmt14
-
gpt3_translation_benchmarks
include
:
wmt_common_yaml
task
:
wmt14-en-fr
lm_eval/tasks/translation/wmt16_en-de.yaml
0 → 100644
View file @
2e2f28a5
# Generated by utils.py
dataset_name
:
de-en
dataset_path
:
wmt16
doc_to_target
:
'
{{translation["de"]}}'
doc_to_text
:
'
English
phrase:
{{translation["en"]}}
German
phrase:'
group
:
-
greedy_until
-
translation
-
wmt16
-
gpt3_translation_benchmarks
include
:
wmt_common_yaml
task
:
wmt16-en-de
lm_eval/tasks/translation/wmt16_en-ro.yaml
0 → 100644
View file @
2e2f28a5
# Generated by utils.py
dataset_name
:
ro-en
dataset_path
:
wmt16
doc_to_target
:
'
{{translation["ro"]}}'
doc_to_text
:
'
English
phrase:
{{translation["en"]}}
Romanian
phrase:'
group
:
-
greedy_until
-
translation
-
wmt16
-
gpt3_translation_benchmarks
include
:
wmt_common_yaml
task
:
wmt16-en-ro
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment