Commit 41a94e13 authored by Jon Tow's avatar Jon Tow
Browse files

Implement all word scrambling and manipulation tasks

parent a5e135b6
......@@ -29,7 +29,7 @@ from . import qa4mre
from . import translation
from . import headqa
from . import mathqa
from . import anagrams
from . import unscramble
########################################
# Translation tasks
......@@ -133,8 +133,11 @@ TASK_REGISTRY = {
**translation.create_tasks_from_benchmarks(selected_translation_benchmarks),
# Word Scrambling and Manipulation Tasks
"anagrams1": anagrams.Anagrams1,
"anagrams2": anagrams.Anagrams2
"anagrams1": unscramble.Anagrams1,
"anagrams2": unscramble.Anagrams2,
"cycle_letters": unscramble.CycleLetters,
"random_insertion": unscramble.RandomInsertion,
"reversed_words": unscramble.ReversedWords,
}
......
......@@ -14,8 +14,8 @@ def extract_gzip(gz, to):
shutil.copyfileobj(fin, fout)
class AnagramsBase(Task):
BASE_PATH = Path("data/anagrams")
class WordUnscrambleTask(Task):
BASE_PATH = Path("data/unscramble")
FILENAME = None
CHECKSUM = None # SHA256 Checksum.
......@@ -50,7 +50,7 @@ class AnagramsBase(Task):
def fewshot_examples(self, k):
# Override to avoid error caused by missing `training_docs`.
return random.sample(self.validation_docs(), k)
return random.sample(list(self.validation_docs()), k)
def doc_to_text(self, doc):
return doc["context"]
......@@ -80,11 +80,26 @@ class AnagramsBase(Task):
}
class Anagrams1(AnagramsBase):
class Anagrams1(WordUnscrambleTask):
FILENAME = "mid_word_1_anagrams.jsonl"
CHECKSUM = "6768a86896083199de4815d4964cb2f6f1046476cfd80c2a562784f182905979"
class Anagrams2(AnagramsBase):
class Anagrams2(WordUnscrambleTask):
FILENAME = "mid_word_2_anagrams.jsonl"
CHECKSUM = "c3d839d09a7954b78a27cd2cd75d4ed0488656c56ef4dbd741a005343826cb01"
class CycleLetters(WordUnscrambleTask):
FILENAME = "cycle_letters_in_word.jsonl"
CHECKSUM = "1689c9002bb8c5988bf5f05e977c9db92f57932c1b5a38998c29ac0dd71e1d42"
class RandomInsertion(WordUnscrambleTask):
FILENAME = "random_insertion_in_word.jsonl"
CHECKSUM = "72e65d83da53d15752ee0c47379509de149ddbad32d61184e5991df29616b78a"
class ReversedWords(WordUnscrambleTask):
FILENAME = "reversed_words.jsonl"
CHECKSUM = "133a08f875cd6c1ef8608a3233571a773881cc27b1c707de738cc6543439332a"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment