"...git@developer.sourcefind.cn:modelzoo/qwen_lmdeploy.git" did not exist on "026841447cd2e143b6d5d2fa9621b3bd1d975e25"
Unverified Commit 80f5fc3b authored by Stella Biderman's avatar Stella Biderman Committed by GitHub
Browse files

Merge pull request #48 from cfoster0/quac

Add QuAC dataset
parents 2b64cae6 a543cc5d
...@@ -4,6 +4,7 @@ from . import arc ...@@ -4,6 +4,7 @@ from . import arc
from . import race from . import race
from . import webqs from . import webqs
from . import anli from . import anli
from . import quac
from . import hellaswag from . import hellaswag
from . import openbookqa from . import openbookqa
from . import squad from . import squad
...@@ -29,6 +30,7 @@ TASK_REGISTRY = { ...@@ -29,6 +30,7 @@ TASK_REGISTRY = {
# Order by benchmark/genre? # Order by benchmark/genre?
"arc_easy": arc.ARCEasy, "arc_easy": arc.ARCEasy,
"arc_challenge": arc.ARCChallenge, "arc_challenge": arc.ARCChallenge,
"quac": quac.QuAC,
"hellaswag": hellaswag.HellaSwag, "hellaswag": hellaswag.HellaSwag,
"openbookqa": openbookqa.OpenBookQA, "openbookqa": openbookqa.OpenBookQA,
"squad": squad.SQuAD, "squad": squad.SQuAD,
......
import json
import random
import os
from lm_eval.base import Dataset
from ..utils import sh
class QuAC(Dataset):
def __init__(self):
super().__init__()
def download(self):
if not os.path.exists('data/quac'):
sh("""
mkdir -p data/quac
wget https://s3.amazonaws.com/my89public/quac/train_v0.2.json -O data/quac/train_v0.2.json
wget https://s3.amazonaws.com/my89public/quac/val_v0.2.json -O data/quac/val_v0.2.json
""")
def has_training_docs(self):
return True
def has_validation_docs(self):
return True
def has_test_docs(self):
return False
def training_docs(self):
myjson = json.load(open('data/quac/train_v0.2.json'))['data']
return self.load_doc(myjson)
def validation_docs(self):
myjson = json.load(open('data/quac/val_v0.2.json'))['data']
return self.load_doc(myjson)
def test_docs(self):
raise NotImplementedError("QuAC has no test docs.")
def fewshot_examples(self, k):
traindocs = list(self.training_docs())
random.shuffle(traindocs)
return traindocs[:k]
def fewshot_description(self):
desc = "TITLE: Title of the context passage - subtitle of the passage\nPARAGRAPH: Passage describing the relevant information for answering questions.\n\nQ: Text of a question.\n\nA: Answer to the question, based on the passage. If it cannot be answered based on the passage, write CANNOTANSWER"
return desc
def load_doc(self, myjson):
docs = []
for item in myjson:
title = item['title'] + ' - ' + item['section_title']
paragraph = item['paragraphs'][0]['context'].replace("CANNOTANSWER", "")
qas = item['paragraphs'][0]['qas']
qa_pairs = [(qa['question'], qa['answers'][0]['text']) for qa in qas]
for (question, answer) in qa_pairs:
doc = { 'title': title, 'paragraph': paragraph, 'question': question, 'answer': answer }
docs.append(doc)
return docs
def doc_to_text(self, doc, include_target=True):
text = 'TITLE: ' + doc['title'] + '\n' + 'PARAGRAPH: ' + doc['paragraph'] + '\n\n' + 'Q: ' + doc['question'] + '\n\n' + 'A: '
if include_target:
text += doc['answer']
return text
def evaluate(self, docs, lm):
pass
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment