readded task descriptions

a3520619 · lintangsutawika · d3f429ac · a3520619
Commit a3520619 authored Nov 09, 2023 by lintangsutawika
Hide whitespace changes
Inline Side-by-side

Showing with 26 additions and 0 deletions

lm_eval/tasks/squad.py lm_eval/tasks/squad.py +26 -0

No files found.
--- a/lm_eval/tasks/squad.py
+++ b/lm_eval/tasks/squad.py
+"""
+Know What You Don’t Know: Unanswerable Questions for SQuAD
+https://arxiv.org/pdf/1806.03822.pdf
+Stanford Question Answering Dataset (SQuAD) is a reading comprehension dataset,
+consisting of questions posed by crowdworkers on a set of Wikipedia articles,
+where the answer to every question is a segment of text, or span, from the
+corresponding reading passage, or the question might be unanswerable.
+SQuAD2.0 combines the 100,000 questions in SQuAD1.1 with over 50,000 unanswerable
+questions written adversarially by crowdworkers to look similar to answerable ones.
+To do well on SQuAD2.0, systems must not only answer questions when possible, but
+also determine when no answer is supported by the paragraph and abstain from answering.
+Homepage: https://rajpurkar.github.io/SQuAD-explorer/
+"""
 import datasets
 from math import exp
@@ -8,6 +23,17 @@ from lm_eval.api.task import Task
 from lm_eval.api.instance import Instance
 from lm_eval.api.registry import register_task
+_CITATION = """
+@misc{rajpurkar2018know,
+    title={Know What You Don't Know: Unanswerable Questions for SQuAD},
+    author={Pranav Rajpurkar and Robin Jia and Percy Liang},
+    year={2018},
+    eprint={1806.03822},
+    archivePrefix={arXiv},
+    primaryClass={cs.CL}
+}
+"""
 def _squad_metric(predictions, references):
    squad_metric = datasets.load_metric("squad_v2")
    return squad_metric.compute(predictions=predictions, references=references)