Commit 41a94e13 authored by Jon Tow's avatar Jon Tow
Browse files

Implement all word scrambling and manipulation tasks

parent a5e135b6
...@@ -29,7 +29,7 @@ from . import qa4mre ...@@ -29,7 +29,7 @@ from . import qa4mre
from . import translation from . import translation
from . import headqa from . import headqa
from . import mathqa from . import mathqa
from . import anagrams from . import unscramble
######################################## ########################################
# Translation tasks # Translation tasks
...@@ -133,8 +133,11 @@ TASK_REGISTRY = { ...@@ -133,8 +133,11 @@ TASK_REGISTRY = {
**translation.create_tasks_from_benchmarks(selected_translation_benchmarks), **translation.create_tasks_from_benchmarks(selected_translation_benchmarks),
# Word Scrambling and Manipulation Tasks # Word Scrambling and Manipulation Tasks
"anagrams1": anagrams.Anagrams1, "anagrams1": unscramble.Anagrams1,
"anagrams2": anagrams.Anagrams2 "anagrams2": unscramble.Anagrams2,
"cycle_letters": unscramble.CycleLetters,
"random_insertion": unscramble.RandomInsertion,
"reversed_words": unscramble.ReversedWords,
} }
......
...@@ -14,8 +14,8 @@ def extract_gzip(gz, to): ...@@ -14,8 +14,8 @@ def extract_gzip(gz, to):
shutil.copyfileobj(fin, fout) shutil.copyfileobj(fin, fout)
class AnagramsBase(Task): class WordUnscrambleTask(Task):
BASE_PATH = Path("data/anagrams") BASE_PATH = Path("data/unscramble")
FILENAME = None FILENAME = None
CHECKSUM = None # SHA256 Checksum. CHECKSUM = None # SHA256 Checksum.
...@@ -50,7 +50,7 @@ class AnagramsBase(Task): ...@@ -50,7 +50,7 @@ class AnagramsBase(Task):
def fewshot_examples(self, k): def fewshot_examples(self, k):
# Override to avoid error caused by missing `training_docs`. # Override to avoid error caused by missing `training_docs`.
return random.sample(self.validation_docs(), k) return random.sample(list(self.validation_docs()), k)
def doc_to_text(self, doc): def doc_to_text(self, doc):
return doc["context"] return doc["context"]
...@@ -80,11 +80,26 @@ class AnagramsBase(Task): ...@@ -80,11 +80,26 @@ class AnagramsBase(Task):
} }
class Anagrams1(AnagramsBase): class Anagrams1(WordUnscrambleTask):
FILENAME = "mid_word_1_anagrams.jsonl" FILENAME = "mid_word_1_anagrams.jsonl"
CHECKSUM = "6768a86896083199de4815d4964cb2f6f1046476cfd80c2a562784f182905979" CHECKSUM = "6768a86896083199de4815d4964cb2f6f1046476cfd80c2a562784f182905979"
class Anagrams2(AnagramsBase): class Anagrams2(WordUnscrambleTask):
FILENAME = "mid_word_2_anagrams.jsonl" FILENAME = "mid_word_2_anagrams.jsonl"
CHECKSUM = "c3d839d09a7954b78a27cd2cd75d4ed0488656c56ef4dbd741a005343826cb01" CHECKSUM = "c3d839d09a7954b78a27cd2cd75d4ed0488656c56ef4dbd741a005343826cb01"
class CycleLetters(WordUnscrambleTask):
FILENAME = "cycle_letters_in_word.jsonl"
CHECKSUM = "1689c9002bb8c5988bf5f05e977c9db92f57932c1b5a38998c29ac0dd71e1d42"
class RandomInsertion(WordUnscrambleTask):
FILENAME = "random_insertion_in_word.jsonl"
CHECKSUM = "72e65d83da53d15752ee0c47379509de149ddbad32d61184e5991df29616b78a"
class ReversedWords(WordUnscrambleTask):
FILENAME = "reversed_words.jsonl"
CHECKSUM = "133a08f875cd6c1ef8608a3233571a773881cc27b1c707de738cc6543439332a"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment