utils.py 1.16 KB
Newer Older
Yazeed Alnumay's avatar
Yazeed Alnumay committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
PROMPT = 'This is a {}. Select the correct answer!\n\nQuestion: {}\n{}\n\nAnswer:'

level_en = {
        'Primary': 'primary school',
        'Middle': 'middle school',
        'High': 'high school',
        'Univ': 'university',
        'Prof': 'professional',
}

alpa = ['A.', 'B.', 'C.', 'D.', 'E.']


def doc_to_text(doc):
    """
    Refactoring `prepare_data_en` to fit with the lm harness framework.
    https://github.com/mbzuai-nlp/ArabicMMLU/blob/main/util_prompt.py
    """

    level = "" if not doc['Level'] else " for " + level_en[doc['Level']]
    country = "" if not doc['Country'] else " in " + doc['Country']
    main_meta_data = f"{doc['Subject']} question{level}{country}"

    question = doc['Question'] if doc['Context']=="" else f"{doc['Context']}\n\n{doc['Question']}"

    options = []
    for i, opt in enumerate(['Option 1', 'Option 2', 'Option 3', 'Option 4', 'Option 5']):
        if not doc[opt]:
            break
        options.append(f"{alpa[i]} {doc[opt]}")

    doc_text = PROMPT.format(main_meta_data, question, '\n'.join(options))
    
    return doc_text


def doc_to_choice(doc):
    return [alpa[i][0] for i in range(5) if doc[f'Option {i+1}']]