Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
13710677
Unverified
Commit
13710677
authored
Aug 31, 2021
by
Leo Gao
Committed by
GitHub
Aug 31, 2021
Browse files
Merge pull request #204 from EleutherAI/lambada-multilingual
Add multlingual lambada
parents
52f270dc
e35386d9
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
53 additions
and
0 deletions
+53
-0
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+6
-0
lm_eval/tasks/lambada_multilingual.py
lm_eval/tasks/lambada_multilingual.py
+47
-0
No files found.
lm_eval/tasks/__init__.py
View file @
13710677
...
@@ -41,6 +41,7 @@ from . import cbt
...
@@ -41,6 +41,7 @@ from . import cbt
from
.
import
lambada_cloze
from
.
import
lambada_cloze
from
.
import
pile
from
.
import
pile
from
.
import
wikitext
from
.
import
wikitext
from
.
import
lambada_multilingual
from
.
import
mutual
from
.
import
mutual
########################################
########################################
...
@@ -99,6 +100,10 @@ TASK_REGISTRY = {
...
@@ -99,6 +100,10 @@ TASK_REGISTRY = {
"drop"
:
drop
.
DROP
,
"drop"
:
drop
.
DROP
,
"lambada"
:
lambada
.
LAMBADA
,
"lambada"
:
lambada
.
LAMBADA
,
"lambada_cloze"
:
lambada_cloze
.
LAMBADA_cloze
,
"lambada_cloze"
:
lambada_cloze
.
LAMBADA_cloze
,
# multilingual lambada
**
lambada_multilingual
.
construct_tasks
(),
"wikitext"
:
wikitext
.
WikiText
,
"wikitext"
:
wikitext
.
WikiText
,
# "cbt-cn": cbt.CBTCN, # disabled pending context length fix
# "cbt-cn": cbt.CBTCN, # disabled pending context length fix
# "cbt-ne": cbt.CBTNE, # disabled pending context length fix
# "cbt-ne": cbt.CBTNE, # disabled pending context length fix
...
@@ -207,6 +212,7 @@ TASK_REGISTRY = {
...
@@ -207,6 +212,7 @@ TASK_REGISTRY = {
"pile_ubuntu-irc"
:
pile
.
PileUbuntuIrc
,
"pile_ubuntu-irc"
:
pile
.
PileUbuntuIrc
,
"pile_wikipedia"
:
pile
.
PileWikipedia
,
"pile_wikipedia"
:
pile
.
PileWikipedia
,
"pile_youtubesubtitles"
:
pile
.
PileYoutubeSubtitles
,
"pile_youtubesubtitles"
:
pile
.
PileYoutubeSubtitles
,
}
}
...
...
lm_eval/tasks/lambada_multilingual.py
0 → 100644
View file @
13710677
from
.
import
lambada
from
lm_eval.base
import
Task
,
rf
from
lm_eval.metrics
import
mean
,
perplexity
from
lm_eval.utils
import
sh
from
best_download
import
download_file
import
json
from
functools
import
partial
# This task is lambada but machine-translated to the other languages.
LANGS
=
[
"en"
,
"fr"
,
"de"
,
"it"
,
"es"
]
CHECKSUMS
=
{
"en"
:
"4aa8d02cd17c719165fc8a7887fddd641f43fcafa4b1c806ca8abc31fabdb226"
,
"fr"
:
"941ec6a73dba7dc91c860bf493eb66a527cd430148827a4753a4535a046bf362"
,
"de"
:
"51c6c1795894c46e88e4c104b5667f488efe79081fb34d746b82b8caa663865e"
,
"it"
:
"86654237716702ab74f42855ae5a78455c1b0e50054a4593fb9c6fcf7fad0850"
,
"es"
:
"ffd760026c647fb43c67ce1bc56fd527937304b348712dce33190ea6caba6f9c"
}
class
MultilingualLAMBADA
(
lambada
.
LAMBADA
):
VERSION
=
0
def
__init__
(
self
,
lang
=
None
):
self
.
LANG
=
lang
super
().
__init__
()
def
download
(
self
):
sh
(
"mkdir -p data/lambada"
)
download_file
(
f
"http://eaidata.bmk.sh/data/lambada_test_
{
self
.
LANG
}
.jsonl"
,
f
"data/lambada/lambada_test_
{
self
.
LANG
}
.jsonl"
,
CHECKSUMS
[
self
.
LANG
]
)
def
validation_docs
(
self
):
with
open
(
f
"data/lambada/lambada_test_
{
self
.
LANG
}
.jsonl"
)
as
fh
:
for
line
in
fh
:
yield
json
.
loads
(
line
)
def
construct_tasks
():
tasks
=
{}
for
lang
in
LANGS
:
class
MultilingualLAMBADAInstance
(
MultilingualLAMBADA
):
def
__init__
(
self
):
super
().
__init__
(
lang
)
tasks
[
f
"lambada_mt_
{
lang
}
"
]
=
MultilingualLAMBADAInstance
return
tasks
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment