hf.py

import base
import nlp

def yesno(x):
    if x: return 'yes'
    else: return 'no'


def mean(x):
    return sum(x) / len(x)


class BoolQ(base.Dataset):
    def __init__(self):
        self.dataset = nlp.load_dataset('boolq')

    def training_docs(self):
        yield from self.dataset['train']
    
    def validation_docs(self):
        yield from self.dataset['validation']
    
    def test_docs(self):
        return []
    
    def fewshot_prefix(self):
        return "Read the following passages and answer each question with a yes or a no."

    def doc_to_text(self, doc, include_target=True):    
        return f"{doc['passage']}\nquestion: {doc['question']}\nanswer: " + (yesno(doc['answer']) if include_target else "")
    
    def evaluate(self, docs, lm, provide_description, num_fewshot):
        acc = []
        for doc in docs:
            ctx = '\n\n'.join(map(self.doc_to_text, self.fewshot_examples(k=num_fewshot))) + '\n\n'
            ctx += self.doc_to_text(doc, include_target=False).strip()
            ctx = ((self.fewshot_description() + "\n\n") if provide_description else "") + ctx

            ans = lm.loglikelihood(ctx, 'yes') > lm.loglikelihood(ctx, 'no')

            acc.append(int(ans == doc['answer']))
    
        return mean(acc)