utils.py 1.18 KB
Newer Older
1
PROMPT = "This is a {}. Select the correct answer!\n\nQuestion: {}\n{}\n\nAnswer:"
Yazeed Alnumay's avatar
Yazeed Alnumay committed
2
3

level_en = {
4
5
6
7
8
    "Primary": "primary school",
    "Middle": "middle school",
    "High": "high school",
    "Univ": "university",
    "Prof": "professional",
Yazeed Alnumay's avatar
Yazeed Alnumay committed
9
10
}

11
alpa = ["A.", "B.", "C.", "D.", "E."]
Yazeed Alnumay's avatar
Yazeed Alnumay committed
12
13
14
15
16
17
18
19


def doc_to_text(doc):
    """
    Refactoring `prepare_data_en` to fit with the lm harness framework.
    https://github.com/mbzuai-nlp/ArabicMMLU/blob/main/util_prompt.py
    """

20
21
    level = "" if not doc["Level"] else " for " + level_en[doc["Level"]]
    country = "" if not doc["Country"] else " in " + doc["Country"]
Yazeed Alnumay's avatar
Yazeed Alnumay committed
22
23
    main_meta_data = f"{doc['Subject']} question{level}{country}"

24
25
26
27
28
    question = (
        doc["Question"]
        if doc["Context"] == ""
        else f"{doc['Context']}\n\n{doc['Question']}"
    )
Yazeed Alnumay's avatar
Yazeed Alnumay committed
29
30

    options = []
31
32
33
    for i, opt in enumerate(
        ["Option 1", "Option 2", "Option 3", "Option 4", "Option 5"]
    ):
Yazeed Alnumay's avatar
Yazeed Alnumay committed
34
35
36
37
        if not doc[opt]:
            break
        options.append(f"{alpa[i]} {doc[opt]}")

38
39
    doc_text = PROMPT.format(main_meta_data, question, "\n".join(options))

Yazeed Alnumay's avatar
Yazeed Alnumay committed
40
41
42
43
    return doc_text


def doc_to_choice(doc):
44
    return [alpa[i][0] for i in range(5) if doc[f"Option {i+1}"]]