utils.py 361 Bytes
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
import datasets


def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
    def _process_doc(doc):
        ctx = doc["ctx"]
        out_doc = {
            "query": doc["activity_label"] + ": " + ctx,
            "choices": doc["endings"],
            "gold": int(doc["label"]),
        }
        return out_doc

    return dataset.map(_process_doc)