utils.py 1.41 KB
Newer Older
jp's avatar
jp committed
1
2
3
4
5
6
7
from datasets import Dataset


def copa_doc_to_text(doc: dict) -> str:
    connector = {"원인": " 왜냐하면", "결과": " 그래서"}[doc["question"].strip()]
    return f"""{doc["premise"]} {connector}"""

8

jp's avatar
jp committed
9
10
11
12
def copa_doc_to_target(doc: dict) -> str:
    correct_choice = doc["alternative_1"] if doc["label"] == 0 else doc["alternative_2"]
    return f"""{correct_choice}"""

13

jp's avatar
jp committed
14
15
16
def copa_doc_to_choice(doc: dict) -> list:
    return [f"""{doc["alternative_1"]}""", f"""{doc["alternative_2"]}"""]

17

jp's avatar
jp committed
18
19
20
def sentineg_doc_to_text(doc: dict):
    return f"""문장: {doc["sentence"]} 긍부정:"""

21

jp's avatar
jp committed
22
23
24
def wic_doc_to_text(doc: dict) -> str:
    return f"""문장1: {doc["context_1"]} 문장2: {doc["context_2"]} 두 문장에서 {doc["word"]}가 같은 뜻으로 쓰였나?"""

25

jp's avatar
jp committed
26
27
28
29
def hellaswag_process_doc(doc: Dataset) -> Dataset:
    def preprocessor(dataset):
        return {
            "query": f"""문장: {dataset["context"]}""",
30
31
32
33
34
35
            "choices": [
                dataset["ending_1"],
                dataset["ending_2"],
                dataset["ending_3"],
                dataset["ending_4"],
            ],
jp's avatar
jp committed
36
37
38
39
40
            "gold": int(dataset["label"]),
        }

    return doc.map(preprocessor)

41

jp's avatar
jp committed
42
def macro_f1_score(items):
43
44
    from sklearn.metrics import f1_score

jp's avatar
jp committed
45
46
47
    unzipped_list = list(zip(*items))
    golds = unzipped_list[0]
    preds = unzipped_list[1]
48
    fscore = f1_score(golds, preds, average="macro")
jp's avatar
jp committed
49
    return fscore