Commit a2871d9c authored by Anish Thite's avatar Anish Thite
Browse files

add generate docs and generate text for coqa

parent 345f5fef
from ...base import Dataset
from base import Dataset
import os
import json
import random
class CoQA(Dataset):
def has_training_docs(self):
return True
def has_validation_docs(self):
return False
def training_docs(self):
pass
myjson = json.load(open('data/coqa/coqa-train-v1.0.json'))['data']
return self.load_doc(myjson)
def validation_docs(self):
pass
def test_docs(self):
pass
myjson = json.load(open('data/coqa/coqa-dev-v1.0.json'))['data']
return self.load_doc(myjson)
def fewshot_examples(self, k):
traindocs = list(self.training_docs())
......@@ -22,8 +32,22 @@ class CoQA(Dataset):
def fewshot_description(self):
pass
def load_doc(self, myjson):
docs = []
for item in myjson:
new_instance = [item['story']]
qa_pairs = zip(item['questions'], item['answers'])
for pair in qa_pairs:
new_instance.append('\n')
new_instance.append(''.join(['Q: ',pair[0]['input_text']]))
new_instance.append(''.join(['A: ',pair[1]['input_text']]))
docs.append(new_instance)
return docs
def doc_to_text(self, doc, include_target=True):
json.load(open(doc))
text = '\n<|endoftext|>\n'.join(['\n'.join(instance) for instance in doc])
text = text + '\n<|endoftext|>'
return text
def evaluate(self, docs, lm):
pass
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment