from . common import HFTask from itertools import islice import random class NaturalQs(HFTask): # TODO: naturalqs has a *really* large train set that huggingface just # automatically downloads even if you dont use it. we should try and only # download the val set and not even bother with the train set. DATASET_PATH = "natural_questions" DATASET_NAME = None def has_training_docs(self): return True def has_validation_docs(self): return True def has_test_docs(self): return False def fewshot_description(self): # TODO: figure out description return "" def training_docs(self): # Cache training for faster few-shot. # Data is too large to fit in memory. return self.data["train"] def fewshot_examples(self, k): # Data is too large to fit in memory. We just sample from the first bit. if self._training_docs is None: self._training_docs = list(islice(self.training_docs(), 0, 100000)) return random.sample(self._training_docs, k) def doc_to_text(self, doc): return 'Q: ' + doc['question']['text'] + '\n\n' + 'A: ' def doc_to_target(self, doc): # There's a short answer and a long answer. Based on the paper, I'm using the long answer. short_answer = doc['annotations']['short_answers'][0]['text'] long_answer_start = doc['annotations']['long_answer'][0]['start_token'] long_answer_end = doc['annotations']['long_answer'][0]['end_token'] long_answer_span = doc['document']['tokens']['token'][long_answer_start:long_answer_end] long_answer_is_html = doc['document']['tokens']['is_html'][long_answer_start:long_answer_end] long_answer_chars = [tok for (tok, is_html) in zip(long_answer_span, long_answer_is_html) if not is_html] long_answer = " ".join(long_answer_chars) return long_answer # Replace with short_answer[0] for short answer def construct_requests(self, doc, ctx): """ Uses RequestFactory to construct Requests and returns an iterable of Requests which will be sent to the LM. :param doc: The document as returned from training_docs, validation_docs, or test_docs. :param ctx: str The context string, generated by fewshot_context. This includes the natural language description, as well as the few shot examples, and the question part of the document for `doc`. """ # TODO: implement evaluation. raise NotImplementedError('Evaluation not implemented') def process_results(self, doc, results): """Take a single document and the LM results and evaluates, returning a dict where keys are the names of submetrics and values are the values of the metric for that one document :param doc: The document as returned from training_docs, validation_docs, or test_docs. :param results: The results of the requests created in construct_requests. """ # TODO: implement evaluation. raise NotImplementedError('Evaluation not implemented') def aggregation(self): """ :returns: {str: [float] -> float} A dictionary where keys are the names of submetrics and values are functions that aggregate a list of metrics """ # TODO: implement evaluation. raise NotImplementedError('Evaluation not implemented') def higher_is_better(self): """ :returns: {str: bool} A dictionary where keys are the names of submetrics and values are whether a higher value of the submetric is better """ # TODO: implement evaluation. raise NotImplementedError('Evaluation not implemented')