Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
28fa2ddd
Unverified
Commit
28fa2ddd
authored
Sep 01, 2021
by
Leo Gao
Committed by
GitHub
Sep 01, 2021
Browse files
Merge pull request #215 from EleutherAI/sdtblck-patch-1
Fix lambada multilingual tasks
parents
9d46382b
754f1f24
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
40 additions
and
11 deletions
+40
-11
lm_eval/tasks/lambada_multilingual.py
lm_eval/tasks/lambada_multilingual.py
+40
-11
No files found.
lm_eval/tasks/lambada_multilingual.py
View file @
28fa2ddd
...
@@ -5,6 +5,7 @@ from lm_eval.utils import sh
...
@@ -5,6 +5,7 @@ from lm_eval.utils import sh
from
best_download
import
download_file
from
best_download
import
download_file
import
json
import
json
from
functools
import
partial
from
functools
import
partial
import
os
# This task is lambada but machine-translated to the other languages.
# This task is lambada but machine-translated to the other languages.
...
@@ -25,23 +26,51 @@ class MultilingualLAMBADA(lambada.LAMBADA):
...
@@ -25,23 +26,51 @@ class MultilingualLAMBADA(lambada.LAMBADA):
def
download
(
self
):
def
download
(
self
):
sh
(
"mkdir -p data/lambada"
)
sh
(
"mkdir -p data/lambada"
)
download_file
(
f
=
f
"data/lambada/lambada_test_
{
self
.
LANG
}
.jsonl"
f
"http://eaidata.bmk.sh/data/lambada_test_
{
self
.
LANG
}
.jsonl"
,
url
=
f
"http://eaidata.bmk.sh/data/lambada_test_
{
self
.
LANG
}
.jsonl"
f
"data/lambada/lambada_test_
{
self
.
LANG
}
.jsonl"
,
try
:
CHECKSUMS
[
self
.
LANG
]
if
not
os
.
path
.
exists
():
)
download_file
(
url
,
f
,
CHECKSUMS
[
self
.
LANG
]
)
except
:
# fallback - for some reason best_download doesnt work all the time here
sh
(
f
"wget
{
url
}
-O
{
f
}
"
)
sh
(
f
'echo "
{
CHECKSUMS
[
self
.
LANG
]
}
{
f
}
" | sha256sum --check'
)
def
validation_docs
(
self
):
def
validation_docs
(
self
):
with
open
(
f
"data/lambada/lambada_test_
{
self
.
LANG
}
.jsonl"
)
as
fh
:
with
open
(
f
"data/lambada/lambada_test_
{
self
.
LANG
}
.jsonl"
)
as
fh
:
for
line
in
fh
:
for
line
in
fh
:
yield
json
.
loads
(
line
)
yield
json
.
loads
(
line
)
class
MultilingualLAMBADAEN
(
MultilingualLAMBADA
):
def
__init__
(
self
):
super
().
__init__
(
'en'
)
class
MultilingualLAMBADAFR
(
MultilingualLAMBADA
):
def
__init__
(
self
):
super
().
__init__
(
'fr'
)
class
MultilingualLAMBADADE
(
MultilingualLAMBADA
):
def
__init__
(
self
):
super
().
__init__
(
'de'
)
class
MultilingualLAMBADAIT
(
MultilingualLAMBADA
):
def
__init__
(
self
):
super
().
__init__
(
'it'
)
class
MultilingualLAMBADAES
(
MultilingualLAMBADA
):
def
__init__
(
self
):
super
().
__init__
(
'es'
)
LANG_CLASSES
=
[
MultilingualLAMBADAEN
,
MultilingualLAMBADAFR
,
MultilingualLAMBADADE
,
MultilingualLAMBADAIT
,
MultilingualLAMBADAES
]
def
construct_tasks
():
def
construct_tasks
():
tasks
=
{}
tasks
=
{}
for
lang
in
LANGS
:
for
lang
,
lang_class
in
zip
(
LANGS
,
LANG_CLASSES
):
class
MultilingualLAMBADAInstance
(
MultilingualLAMBADA
):
tasks
[
f
"lambada_mt_
{
lang
}
"
]
=
lang_class
def
__init__
(
self
):
super
().
__init__
(
lang
)
tasks
[
f
"lambada_mt_
{
lang
}
"
]
=
MultilingualLAMBADAInstance
return
tasks
return
tasks
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment