utils.py 1.4 KB
Newer Older
jp's avatar
jp committed
1
2
3
4
5
6
7
8
from datasets import Dataset
from sklearn.metrics import f1_score


def copa_doc_to_text(doc: dict) -> str:
    connector = {"원인": " 왜냐하면", "결과": " 그래서"}[doc["question"].strip()]
    return f"""{doc["premise"]} {connector}"""

9

jp's avatar
jp committed
10
11
12
13
def copa_doc_to_target(doc: dict) -> str:
    correct_choice = doc["alternative_1"] if doc["label"] == 0 else doc["alternative_2"]
    return f"""{correct_choice}"""

14

jp's avatar
jp committed
15
16
17
def copa_doc_to_choice(doc: dict) -> list:
    return [f"""{doc["alternative_1"]}""", f"""{doc["alternative_2"]}"""]

18

jp's avatar
jp committed
19
20
21
def sentineg_doc_to_text(doc: dict):
    return f"""문장: {doc["sentence"]} 긍부정:"""

22

jp's avatar
jp committed
23
24
25
def wic_doc_to_text(doc: dict) -> str:
    return f"""문장1: {doc["context_1"]} 문장2: {doc["context_2"]} 두 문장에서 {doc["word"]}가 같은 뜻으로 쓰였나?"""

26

jp's avatar
jp committed
27
28
29
30
def hellaswag_process_doc(doc: Dataset) -> Dataset:
    def preprocessor(dataset):
        return {
            "query": f"""문장: {dataset["context"]}""",
31
32
33
34
35
36
            "choices": [
                dataset["ending_1"],
                dataset["ending_2"],
                dataset["ending_3"],
                dataset["ending_4"],
            ],
jp's avatar
jp committed
37
38
39
40
41
            "gold": int(dataset["label"]),
        }

    return doc.map(preprocessor)

42

jp's avatar
jp committed
43
44
45
46
def macro_f1_score(items):
    unzipped_list = list(zip(*items))
    golds = unzipped_list[0]
    preds = unzipped_list[1]
47
    fscore = f1_score(golds, preds, average="macro")
jp's avatar
jp committed
48
    return fscore