Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
754f1f24
Unverified
Commit
754f1f24
authored
Sep 01, 2021
by
sdtblck
Committed by
GitHub
Sep 01, 2021
Browse files
Fix lambada multilingual tasks
the old task constructor was making everything spanish
parent
9d46382b
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
40 additions
and
11 deletions
+40
-11
lm_eval/tasks/lambada_multilingual.py
lm_eval/tasks/lambada_multilingual.py
+40
-11
No files found.
lm_eval/tasks/lambada_multilingual.py
View file @
754f1f24
...
@@ -5,6 +5,7 @@ from lm_eval.utils import sh
...
@@ -5,6 +5,7 @@ from lm_eval.utils import sh
from
best_download
import
download_file
from
best_download
import
download_file
import
json
import
json
from
functools
import
partial
from
functools
import
partial
import
os
# This task is lambada but machine-translated to the other languages.
# This task is lambada but machine-translated to the other languages.
...
@@ -25,23 +26,51 @@ class MultilingualLAMBADA(lambada.LAMBADA):
...
@@ -25,23 +26,51 @@ class MultilingualLAMBADA(lambada.LAMBADA):
def
download
(
self
):
def
download
(
self
):
sh
(
"mkdir -p data/lambada"
)
sh
(
"mkdir -p data/lambada"
)
download_file
(
f
=
f
"data/lambada/lambada_test_
{
self
.
LANG
}
.jsonl"
f
"http://eaidata.bmk.sh/data/lambada_test_
{
self
.
LANG
}
.jsonl"
,
url
=
f
"http://eaidata.bmk.sh/data/lambada_test_
{
self
.
LANG
}
.jsonl"
f
"data/lambada/lambada_test_
{
self
.
LANG
}
.jsonl"
,
try
:
CHECKSUMS
[
self
.
LANG
]
if
not
os
.
path
.
exists
():
)
download_file
(
url
,
f
,
CHECKSUMS
[
self
.
LANG
]
)
except
:
# fallback - for some reason best_download doesnt work all the time here
sh
(
f
"wget
{
url
}
-O
{
f
}
"
)
sh
(
f
'echo "
{
CHECKSUMS
[
self
.
LANG
]
}
{
f
}
" | sha256sum --check'
)
def
validation_docs
(
self
):
def
validation_docs
(
self
):
with
open
(
f
"data/lambada/lambada_test_
{
self
.
LANG
}
.jsonl"
)
as
fh
:
with
open
(
f
"data/lambada/lambada_test_
{
self
.
LANG
}
.jsonl"
)
as
fh
:
for
line
in
fh
:
for
line
in
fh
:
yield
json
.
loads
(
line
)
yield
json
.
loads
(
line
)
class
MultilingualLAMBADAEN
(
MultilingualLAMBADA
):
def
__init__
(
self
):
super
().
__init__
(
'en'
)
class
MultilingualLAMBADAFR
(
MultilingualLAMBADA
):
def
__init__
(
self
):
super
().
__init__
(
'fr'
)
class
MultilingualLAMBADADE
(
MultilingualLAMBADA
):
def
__init__
(
self
):
super
().
__init__
(
'de'
)
class
MultilingualLAMBADAIT
(
MultilingualLAMBADA
):
def
__init__
(
self
):
super
().
__init__
(
'it'
)
class
MultilingualLAMBADAES
(
MultilingualLAMBADA
):
def
__init__
(
self
):
super
().
__init__
(
'es'
)
LANG_CLASSES
=
[
MultilingualLAMBADAEN
,
MultilingualLAMBADAFR
,
MultilingualLAMBADADE
,
MultilingualLAMBADAIT
,
MultilingualLAMBADAES
]
def
construct_tasks
():
def
construct_tasks
():
tasks
=
{}
tasks
=
{}
for
lang
in
LANGS
:
for
lang
,
lang_class
in
zip
(
LANGS
,
LANG_CLASSES
):
class
MultilingualLAMBADAInstance
(
MultilingualLAMBADA
):
tasks
[
f
"lambada_mt_
{
lang
}
"
]
=
lang_class
def
__init__
(
self
):
super
().
__init__
(
lang
)
tasks
[
f
"lambada_mt_
{
lang
}
"
]
=
MultilingualLAMBADAInstance
return
tasks
return
tasks
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment