utils.py 750 Bytes
Newer Older
Yen-Ting Lin's avatar
Yen-Ting Lin committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import datasets


def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
    def _helper(doc):
        # modifies the contents of a single
        # document in our dataset.
        answer_list = ["A", "B", "C", "D"]
        choices = [doc["A"], doc["B"], doc["C"], doc["D"]]
        if doc.get("E", None):
            answer_list.append("E")
            choices.append(doc["E"])
        if doc.get("F", None):
            answer_list.append("F")
            choices.append(doc["F"])
        out_doc = {
            "questions": doc["question"],
            "choices": choices,
            "goal": answer_list.index(doc["answer"]),
        }
        return out_doc

    return dataset.map(_helper)  # returns back a datasets.Dataset object