Commit 8cd91624 authored by jordiclive's avatar jordiclive
Browse files

GEM Simplification

parent 9cd70235
......@@ -56,6 +56,7 @@ from . import hans
from . import gem_webnlg
from . import gem_xsum
from . import gem_mlsum
from . import gem_asset_turk
# from . import e2e_nlg_cleaned
########################################
......@@ -115,6 +116,7 @@ TASK_REGISTRY = {
"lambada": lambada.LAMBADA,
"lambada_cloze": lambada_cloze.LAMBADA_cloze,
# multilingual lambada
**gem_asset_turk.construct_tasks(),
**lambada_multilingual.construct_tasks(),
"wikitext": wikitext.WikiText,
# "cbt-cn": cbt.CBTCN, # disabled pending context length fix
......
from lm_eval.base import PromptSourceTask
class AssetTurk(PromptSourceTask):
VERSION = 0
DATASET_PATH = "GEM/wiki_auto_asset_turk"
DATASET_NAME = None
SPLIT = None
def has_training_docs(self):
return False
def has_validation_docs(self):
return True
def has_test_docs(self):
return True
def training_docs(self):
if self.has_training_docs():
if self._training_docs is None:
self._training_docs = list(self.dataset["train"])
return self._training_docs
def validation_docs(self):
if self.has_validation_docs():
return self.dataset["validation"]
def test_docs(self):
return self.dataset[str(self.SPLIT)]
def stopping_criteria(self):
return None
def max_generation_length(self):
return 200
# def higher_is_better(self):
# return {"bleu": True, "rouge": True}
class AssetTest(AssetTurk):
SPLIT = "test_asset"
class TurkTest(AssetTurk):
SPLIT = "test_turk"
class AssetTest1(AssetTurk):
SPLIT = "challenge_test_asset_backtranslation"
class AssetTest2(AssetTurk):
SPLIT = "challenge_test_asset_bfp02"
class AssetTest3(AssetTurk):
SPLIT = "challenge_test_asset_bfp05"
class AssetTest4(AssetTurk):
SPLIT = "challenge_test_asset_nopunc"
class TurkTest1(AssetTurk):
SPLIT = "challenge_test_turk_backtranslation"
class TurkTest2(AssetTurk):
SPLIT = "challenge_test_turk_bfp02"
class TurkTest3(AssetTurk):
SPLIT = "challenge_test_turk_bfp05"
class TurkTest4(AssetTurk):
SPLIT = "challenge_test_turk_nopunc"
ASSET_TURK_CLASSES = [
AssetTest,
TurkTest,
TurkTest1,
TurkTest2,
TurkTest3,
TurkTest4,
AssetTest1,
AssetTest2,
AssetTest3,
AssetTest4,
]
def construct_tasks():
tasks = {}
for asset_turk_class in ASSET_TURK_CLASSES:
tasks[f"GEM/wiki_auto_asset_turk_{asset_turk_class.SPLIT}"] = asset_turk_class
return tasks
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment