raiseNotImplementedError("QuAC has no test docs.")
raiseNotImplementedError("QuAC has no test docs.")
deffewshot_description(self):
deffewshot_description(self):
# TODO: figure out fewshot description
desc="TITLE: Title of the context passage - subtitle of the passage\nPARAGRAPH: Passage describing the relevant information for answering questions.\n\nQ: Text of a question.\n\nA: Answer to the question, based on the passage. If it cannot be answered based on the passage, write CANNOTANSWER"
desc="TITLE: Title of the context passage - subtitle of the passage\nPARAGRAPH: Passage describing the relevant information for answering questions.\n\nQ: Text of a question.\n\nA: Answer to the question, based on the passage. If it cannot be answered based on the passage, write CANNOTANSWER"
returndesc
returndesc
...
@@ -61,8 +62,47 @@ class QuAC(Dataset):
...
@@ -61,8 +62,47 @@ class QuAC(Dataset):
defdoc_to_target(self,doc):
defdoc_to_target(self,doc):
returndoc['answer']
returndoc['answer']
# TODO: Implement evaluation code
defconstruct_requests(self,doc,ctx):
""" Uses RequestFactory to construct Requests and returns an iterable of
Requests which will be sent to the LM.
# ***IMPORTANT***: this evaluation function needs to be written for the new framework.
:param doc:
# For more info, check out the interface in base.py and the example BoolQ implementation in superglue.py.
The document as returned from training_docs, validation_docs, or test_docs.
# Remove this comment when the evaluation code is implemented.
:param ctx: str
\ No newline at end of file
The context string, generated by fewshot_context. This includes the natural
language description, as well as the few shot examples, and the question
part of the document for `doc`.
"""
# TODO: implement evaluation.
raiseNotImplementedError('Evaluation not implemented')
defprocess_results(self,doc,results):
"""Take a single document and the LM results and evaluates, returning a
dict where keys are the names of submetrics and values are the values of
the metric for that one document
:param doc:
The document as returned from training_docs, validation_docs, or test_docs.
:param results:
The results of the requests created in construct_requests.
"""
# TODO: implement evaluation.
raiseNotImplementedError('Evaluation not implemented')
defaggregation(self):
"""
:returns: {str: [float] -> float}
A dictionary where keys are the names of submetrics and values are
functions that aggregate a list of metrics
"""
# TODO: implement evaluation.
raiseNotImplementedError('Evaluation not implemented')
defhigher_is_better(self):
"""
:returns: {str: bool}
A dictionary where keys are the names of submetrics and values are
whether a higher value of the submetric is better
"""
# TODO: implement evaluation.
raiseNotImplementedError('Evaluation not implemented')