utils.py 498 Bytes
Newer Older
Baber's avatar
Baber committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import datasets


def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
    COLUMNS = dataset.column_names

    def map_(doc):
        doc["doc_to_text"] = doc["input_final_prompts"][0].strip()[:-2].strip()
        doc["doc_to_choice"] = [
            x.replace("Answer:", "").strip() for x in doc["output_choice_completions"]
        ]
        doc["doc_to_target"] = doc["input_correct_responses"][0].strip()[-1]
        return doc

    return dataset.map(map_, remove_columns=COLUMNS)