Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
2e2f28a5
Commit
2e2f28a5
authored
Aug 17, 2023
by
haileyschoelkopf
Browse files
add XX->en direction to translation tasks
parent
129762c2
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
86 additions
and
41 deletions
+86
-41
lm_eval/tasks/translation/iwslt2017_ar-en.yaml
lm_eval/tasks/translation/iwslt2017_ar-en.yaml
+1
-1
lm_eval/tasks/translation/utils.py
lm_eval/tasks/translation/utils.py
+43
-40
lm_eval/tasks/translation/wmt14_en-fr.yaml
lm_eval/tasks/translation/wmt14_en-fr.yaml
+14
-0
lm_eval/tasks/translation/wmt16_en-de.yaml
lm_eval/tasks/translation/wmt16_en-de.yaml
+14
-0
lm_eval/tasks/translation/wmt16_en-ro.yaml
lm_eval/tasks/translation/wmt16_en-ro.yaml
+14
-0
No files found.
lm_eval/tasks/translation/iwslt2017_ar-en.yaml
View file @
2e2f28a5
# Generated by utils.py
# Generated by utils.py
dataset_name
:
iwslt2017-
ar-en
dataset_name
:
iwslt2017-
en-ar
dataset_path
:
iwslt2017
dataset_path
:
iwslt2017
doc_to_target
:
'
{{translation["en"]}}'
doc_to_target
:
'
{{translation["en"]}}'
doc_to_text
:
'
Arabic
phrase:
{{translation["ar"]}}
doc_to_text
:
'
Arabic
phrase:
{{translation["ar"]}}
...
...
lm_eval/tasks/translation/utils.py
View file @
2e2f28a5
...
@@ -30,7 +30,7 @@ gpt3_translation_benchmarks = {
...
@@ -30,7 +30,7 @@ gpt3_translation_benchmarks = {
LANGUAGES
=
{
LANGUAGES
=
{
**
gpt3_translation_benchmarks
,
**
gpt3_translation_benchmarks
,
# "wmt20": sacrebleu.get_langpairs_for_testset("wmt20"),
# "wmt20": sacrebleu.get_langpairs_for_testset("wmt20"),
"iwslt2017"
:
[
"en-ar"
,
"ar-en"
],
# Arabic
"iwslt2017"
:
[
"en-ar"
],
# Arabic
}
}
...
@@ -49,45 +49,48 @@ def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
...
@@ -49,45 +49,48 @@ def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
"""
"""
err
=
[]
err
=
[]
for
lang
in
LANGUAGES
.
keys
():
for
lang
in
LANGUAGES
.
keys
():
for
lang_pair
in
LANGUAGES
[
lang
]:
for
dataset_name
in
LANGUAGES
[
lang
]:
file_name
=
f
"
{
lang
}
_
{
lang_pair
}
.yaml"
src_lang
,
_
,
tgt_lang
=
dataset_name
.
partition
(
"-"
)
try
:
for
src
,
tgt
in
[[
src_lang
,
tgt_lang
],
[
tgt_lang
,
src_lang
]]:
src_lang
,
_
,
tgt_lang
=
lang_pair
.
partition
(
"-"
)
# both translation directions for each lang pair
source
,
target
=
code_to_language
(
src_lang
),
code_to_language
(
tgt_lang
)
lang_pair
=
src
+
"-"
+
tgt
file_name
=
f
"
{
lang
}
_
{
lang_pair
}
.yaml"
groups
=
[
"greedy_until"
,
"translation"
,
lang
]
try
:
if
lang
in
gpt3_translation_benchmarks
.
keys
():
source
,
target
=
code_to_language
(
src
),
code_to_language
(
tgt
)
groups
+=
[
"gpt3_translation_benchmarks"
]
groups
=
[
"greedy_until"
,
"translation"
,
lang
]
with
open
(
if
lang
in
gpt3_translation_benchmarks
.
keys
():
f
"
{
output_dir
}
/
{
file_name
}
"
,
groups
+=
[
"gpt3_translation_benchmarks"
]
"w"
if
overwrite
else
"x"
,
encoding
=
"utf8"
,
with
open
(
)
as
f
:
f
"
{
output_dir
}
/
{
file_name
}
"
,
f
.
write
(
"# Generated by utils.py
\n
"
)
"w"
if
overwrite
else
"x"
,
yaml
.
dump
(
encoding
=
"utf8"
,
{
)
as
f
:
"include"
:
"wmt_common_yaml"
,
f
.
write
(
"# Generated by utils.py
\n
"
)
"group"
:
groups
,
yaml
.
dump
(
"dataset_path"
:
lang
,
{
"dataset_name"
:
lang_pair
"include"
:
"wmt_common_yaml"
,
if
not
(
lang
==
"iwslt2017"
)
"group"
:
groups
,
else
"iwslt2017-"
+
lang_pair
,
"dataset_path"
:
lang
,
"task"
:
f
"
{
lang
}
-
{
lang_pair
}
"
,
"dataset_name"
:
dataset_name
"doc_to_text"
:
f
"
{
source
}
phrase: "
if
not
(
lang
==
"iwslt2017"
)
+
"{{translation["
else
"iwslt2017-"
+
dataset_name
,
+
f
'"
{
src_lang
}
"'
"task"
:
f
"
{
lang
}
-
{
lang_pair
}
"
,
+
"]}}
\n
"
"doc_to_text"
:
f
"
{
source
}
phrase: "
+
f
"
{
target
}
phrase:"
,
+
"{{translation["
"doc_to_target"
:
" {{"
+
f
'"
{
src
}
"'
+
"translation["
+
"]}}
\n
"
+
f
'"
{
tgt_lang
}
"]'
+
f
"
{
target
}
phrase:"
,
+
"}}"
,
"doc_to_target"
:
" {{"
},
+
"translation["
f
,
+
f
'"
{
tgt
}
"]'
)
+
"}}"
,
except
FileExistsError
:
},
err
.
append
(
file_name
)
f
,
)
except
FileExistsError
:
err
.
append
(
file_name
)
if
len
(
err
)
>
0
:
if
len
(
err
)
>
0
:
raise
FileExistsError
(
raise
FileExistsError
(
...
...
lm_eval/tasks/translation/wmt14_en-fr.yaml
0 → 100644
View file @
2e2f28a5
# Generated by utils.py
dataset_name
:
fr-en
dataset_path
:
wmt14
doc_to_target
:
'
{{translation["fr"]}}'
doc_to_text
:
'
English
phrase:
{{translation["en"]}}
French
phrase:'
group
:
-
greedy_until
-
translation
-
wmt14
-
gpt3_translation_benchmarks
include
:
wmt_common_yaml
task
:
wmt14-en-fr
lm_eval/tasks/translation/wmt16_en-de.yaml
0 → 100644
View file @
2e2f28a5
# Generated by utils.py
dataset_name
:
de-en
dataset_path
:
wmt16
doc_to_target
:
'
{{translation["de"]}}'
doc_to_text
:
'
English
phrase:
{{translation["en"]}}
German
phrase:'
group
:
-
greedy_until
-
translation
-
wmt16
-
gpt3_translation_benchmarks
include
:
wmt_common_yaml
task
:
wmt16-en-de
lm_eval/tasks/translation/wmt16_en-ro.yaml
0 → 100644
View file @
2e2f28a5
# Generated by utils.py
dataset_name
:
ro-en
dataset_path
:
wmt16
doc_to_target
:
'
{{translation["ro"]}}'
doc_to_text
:
'
English
phrase:
{{translation["en"]}}
Romanian
phrase:'
group
:
-
greedy_until
-
translation
-
wmt16
-
gpt3_translation_benchmarks
include
:
wmt_common_yaml
task
:
wmt16-en-ro
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment