Commit 6de520af authored by Leo Gao's avatar Leo Gao
Browse files

Make fewshot_examples fast

parent ff3adfe2
......@@ -57,6 +57,7 @@ class Dataset(abc.ABC):
@abc.abstractmethod
def __init__(self):
self.download()
self._traindocs = None
def download(self):
"""Downloads the task dataset if necessary"""
......@@ -95,9 +96,10 @@ class Dataset(abc.ABC):
pass
def fewshot_examples(self, k):
traindocs = list(self.training_docs())
random.shuffle(traindocs)
return traindocs[:k]
if self._traindocs is None:
self._traindocs = list(self.training_docs())
return random.sample(self._traindocs, k)
@abc.abstractmethod
def doc_to_text(self, doc, include_target=True):
......
......@@ -41,12 +41,6 @@ class HFTask(Dataset):
if self.has_test_docs():
return self.data["test"]
def fewshot_examples(self, k):
training_docs = self.training_docs()
n = len(training_docs)
indices = random.sample(range(n), k)
return [training_docs[i] for i in indices]
def simple_accuracy_metric(preds, golds):
acc = float((np.array(preds) == np.array(golds)).mean())
......
......@@ -22,6 +22,13 @@ class NaturalQs(HFTask):
# Data is too large to fit in memory.
return self.data["train"]
def fewshot_examples(self, k):
# Data is too large to fit in memory. We just sample from the first bit.
if self._traindocs is None:
self._traindocs = list(islice(self.training_docs(), 0, 100000))
return random.sample(self._traindocs, k)
def doc_to_text(self, doc, include_target=True):
question = doc['question']['text']
......
......@@ -37,12 +37,6 @@ class QuAC(Dataset):
def test_docs(self):
raise NotImplementedError("QuAC has no test docs.")
def fewshot_examples(self, k):
traindocs = list(self.training_docs())
random.shuffle(traindocs)
return traindocs[:k]
def fewshot_description(self):
desc = "TITLE: Title of the context passage - subtitle of the passage\nPARAGRAPH: Passage describing the relevant information for answering questions.\n\nQ: Text of a question.\n\nA: Answer to the question, based on the passage. If it cannot be answered based on the passage, write CANNOTANSWER"
return desc
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment