Commit a2871d9c authored by Anish Thite's avatar Anish Thite
Browse files

add generate docs and generate text for coqa

parent 345f5fef
from ...base import Dataset from base import Dataset
import os import os
import json import json
import random
class CoQA(Dataset): class CoQA(Dataset):
def has_training_docs(self):
return True
def has_validation_docs(self):
return False
def training_docs(self): def training_docs(self):
pass myjson = json.load(open('data/coqa/coqa-train-v1.0.json'))['data']
return self.load_doc(myjson)
def validation_docs(self): def validation_docs(self):
pass pass
def test_docs(self): def test_docs(self):
pass myjson = json.load(open('data/coqa/coqa-dev-v1.0.json'))['data']
return self.load_doc(myjson)
def fewshot_examples(self, k): def fewshot_examples(self, k):
traindocs = list(self.training_docs()) traindocs = list(self.training_docs())
...@@ -22,8 +32,22 @@ class CoQA(Dataset): ...@@ -22,8 +32,22 @@ class CoQA(Dataset):
def fewshot_description(self): def fewshot_description(self):
pass pass
def load_doc(self, myjson):
docs = []
for item in myjson:
new_instance = [item['story']]
qa_pairs = zip(item['questions'], item['answers'])
for pair in qa_pairs:
new_instance.append('\n')
new_instance.append(''.join(['Q: ',pair[0]['input_text']]))
new_instance.append(''.join(['A: ',pair[1]['input_text']]))
docs.append(new_instance)
return docs
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc, include_target=True):
json.load(open(doc)) text = '\n<|endoftext|>\n'.join(['\n'.join(instance) for instance in doc])
text = text + '\n<|endoftext|>'
return text
def evaluate(self, docs, lm):
pass
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment