utils.py 556 Bytes
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import datasets


def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
    label = ["A", "B", "C", "D"]

    def _process_doc(doc):
        choices = doc["choices"]
        choices["label"] = label
        answerKey = doc["answerKey"]
        if answerKey not in label:
            answerKey = label[int(answerKey) - 1]
        return {
            "question": doc["question"],
            "choices": choices,
            "answerKey": answerKey,
        }

    return dataset.filter(lambda x: len(x["choices"]["label"]) == 4).map(_process_doc)