utils_mcq.py 3.58 KB
Newer Older
Boda Sadallah's avatar
Boda Sadallah committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import os

from lm_eval.tasks.arab_culture.prompts import (
    BASE_PROMPT,
    BASE_PROMPT_AR,
    JAIS_CHAT_AR,
    JAIS_CHAT_EN,
    REGION_COUNTRY_PROMPT,
    REGION_COUNTRY_PROMPT_AR,
    REGION_PROMPT,
    REGION_PROMPT_AR,
)


### get the conutry variable from environment

### Set this to one to add the country and region information to the prompt
COUNTRY = True if os.getenv("COUNTRY", True) == "True" else False
### Set this to one to add the region information to the prompt
REGION = True if os.getenv("REGION", True) == "True" else False
### Set this to change between Arabic and English for the answer keys and the choices keys
ARABIC = True if os.getenv("ARABIC", True) == "True" else False
### Get the model name
MODEL_NAME = os.getenv("MODEL_NAME")
## Uncomment this to check if the environment variables are set correctly
# print(f'Task settings: COUNTRY: {COUNTRY}, REGION: {REGION}, ARABIC: {ARABIC}', MODEL_NAME: {MODEL_NAME})

en_ar_countries_regions = {
    "Egypt": "مصر",
    "Morocco": "المغرب",
    "Algeria": "الجزائر",
    "Libya": "ليبيا",
    "Sudan": "السودان",
    "Tunisia": "تونس",
    "Jordan": "الأردن",
    "Lebanon": "لبنان",
    "Syria": "سوريا",
    "Palestine": "فلسطين",
    "Yemen": "اليمن",
    "UAE": "الإمارات",
    "KSA": "السعودية",
    "Gulf": "الخليج",
    "Levant": "الشام",
    "North Africa": "شمال أفريقيا",
    "Nile Valley": "وادي النيل",
}


def doc_to_text(doc):
    country = "" if not doc["country"] else doc["country"]
    region = "" if not doc["region"] else doc["region"]
    first_statement = doc["first_statement"].strip()

    ## We don't have a setting for only information about the country without the region
    if COUNTRY:
        assert REGION, (
            "If you want to add the country information, you must also add the region information"
        )

    ## convert contry and region name to arabic if the language is arabic
    if ARABIC:
        country = en_ar_countries_regions[country]
        region = en_ar_countries_regions[region]

    choices = doc["options"]
    choices_str = ""
    for i in range(3):
        key = choices["arabic_keys"][i] if ARABIC else choices["english_keys"][i]
        choice_str = key + ". " + choices["text"][i].strip() + "\n"
        choices_str += choice_str

    if COUNTRY and REGION:
        cur_prompt = REGION_COUNTRY_PROMPT_AR if ARABIC else REGION_COUNTRY_PROMPT
        doc_text = cur_prompt.format(
            country=country,
            region=region,
            first_statement=first_statement,
            choices=choices_str,
        )
    elif REGION:
        cur_prompt = REGION_PROMPT_AR if ARABIC else REGION_PROMPT
        doc_text = cur_prompt.format(
            region=region, first_statement=first_statement, choices=choices_str
        )
    else:
        cur_prompt = BASE_PROMPT_AR if ARABIC else BASE_PROMPT
        doc_text = cur_prompt.format(
            first_statement=first_statement, choices=choices_str
        )

91
    ### apply jais chat template
Boda Sadallah's avatar
Boda Sadallah committed
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
    if MODEL_NAME and "jais" in MODEL_NAME and "chat" in MODEL_NAME:
        if ARABIC:
            doc_text = JAIS_CHAT_AR.format(question=doc_text)
        else:
            doc_text = JAIS_CHAT_EN.format(question=doc_text)

    return doc_text


def doc_to_choice(doc):
    return doc["options"]["arabic_keys"] if ARABIC else doc["options"]["english_keys"]


def doc_to_target(doc):
    ans = (
        doc["answer_key"]["arabic_answer_key"]
        if ARABIC
        else doc["answer_key"]["english_answer_key"]
    )
    ans = ans.strip()
    return ans