Commit 5cfb7308 authored by Jonathan Tow's avatar Jonathan Tow
Browse files

Fix naming convention to avoid `pytest` name mangling invocation

parent a60ef6fa
......@@ -61,7 +61,7 @@ class LM(abc.ABC):
class Task(abc.ABC):
def __init__(self):
self.download()
self.__training_docs = None
self._training_docs = None
def download(self):
"""Downloads the task dataset if necessary"""
......@@ -104,9 +104,9 @@ class Task(abc.ABC):
return []
def fewshot_examples(self, k):
if self.__training_docs is None:
self.__training_docs = list(self.training_docs())
return random.sample(self.__training_docs, k)
if self._training_docs is None:
self._training_docs = list(self.training_docs())
return random.sample(self._training_docs, k)
@abc.abstractmethod
def doc_to_text(self, doc):
......
......@@ -18,9 +18,9 @@ class ANLIBase(HFTask):
def training_docs(self):
if self.has_training_docs():
if self.__training_docs is None:
self.__training_docs = list(self.data["train_r" + str(self.SPLIT)])
return self.__training_docs
if self._training_docs is None:
self._training_docs = list(self.data["train_r" + str(self.SPLIT)])
return self._training_docs
def validation_docs(self):
if self.has_validation_docs():
......
......@@ -30,9 +30,9 @@ class HFTask(Task):
# Cache training for faster few-shot.
# If data is too large to fit in memory, override this method.
if self.has_training_docs():
if self.__training_docs is None:
self.__training_docs = list(self.data["train"])
return self.__training_docs
if self._training_docs is None:
self._training_docs = list(self.data["train"])
return self._training_docs
def validation_docs(self):
if self.has_validation_docs():
......
......@@ -30,10 +30,10 @@ class NaturalQs(HFTask):
def fewshot_examples(self, k):
# Data is too large to fit in memory. We just sample from the first bit.
if self.__training_docs is None:
self.__training_docs = list(islice(self.training_docs(), 0, 100000))
if self._training_docs is None:
self._training_docs = list(islice(self.training_docs(), 0, 100000))
return random.sample(self.__training_docs, k)
return random.sample(self._training_docs, k)
def doc_to_text(self, doc):
return 'Q: ' + doc['question']['text'] + '\n\n' + 'A: '
......
......@@ -19,9 +19,9 @@ class OpenBookQA(HFTask):
def training_docs(self):
if self.has_training_docs():
if self.__training_docs is None:
self.__training_docs = list(self.data["train"])
return self.__training_docs
if self._training_docs is None:
self._training_docs = list(self.data["train"])
return self._training_docs
def validation_docs(self):
if self.has_validation_docs():
......
......@@ -273,17 +273,17 @@ class ReCoRD(HFTask):
# Hence, we one "doc" for each (context + passage, answer) pair.
# Moreover, we only use the correct answers for context packing
# (This is not an issue for evaluation, where we can directly score multiple candidates at once).
if self.__training_docs is None:
self.__training_docs = []
if self._training_docs is None:
self._training_docs = []
for doc in self.data["train"]:
for entity in list(set(doc["entities"])):
self.__training_docs.append({
self._training_docs.append({
"passage": doc["passage"],
"query": doc["query"],
"entity": entity,
"label": entity in doc["answers"],
})
return self.__training_docs
return self._training_docs
def validation_docs(self):
for doc in self.data["validation"]:
......@@ -417,14 +417,14 @@ class SGWinogradSchemaChallenge(HFTask):
def training_docs(self):
if self.has_training_docs():
if self.__training_docs is None:
if self._training_docs is None:
# GPT-3 Paper's format only uses positive examples for fewshot "training"
self.__training_docs = [
self._training_docs = [
doc for doc in
self.data["train"]
if doc["label"]
]
return self.__training_docs
return self._training_docs
def fewshot_description(self):
return "Final Exam with Answer Key\n" \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment