Unverified Commit a08bc3c8 authored by Yazeed Alnumay's avatar Yazeed Alnumay Committed by GitHub
Browse files

Added ArabicMMLU (#1987)

* Added ArabicMMLU

* Rename `ammlu` to `arabicmmlu`
parent 78a54e14
"dataset_name": "Univ Accounting"
"group": "arabicmmlu_social_science"
"group_alias": "social science"
"include": "_default_template_yaml"
"task": "arabicmmlu_univ_accounting"
"task_alias": "Univ Accounting"
"dataset_name": "Univ Computer Science"
"group": "arabicmmlu_stem"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "arabicmmlu_univ_computer_science"
"task_alias": "Univ Computer Science"
"dataset_name": "Univ Economics"
"group": "arabicmmlu_social_science"
"group_alias": "social science"
"include": "_default_template_yaml"
"task": "arabicmmlu_univ_economics"
"task_alias": "Univ Economics"
"dataset_name": "Univ Management"
"group": "arabicmmlu_other"
"group_alias": "other"
"include": "_default_template_yaml"
"task": "arabicmmlu_univ_management"
"task_alias": "Univ Management"
"dataset_name": "Univ Political Science"
"group": "arabicmmlu_social_science"
"group_alias": "social science"
"include": "_default_template_yaml"
"task": "arabicmmlu_univ_political_science"
"task_alias": "Univ Political Science"
PROMPT = 'This is a {}. Select the correct answer!\n\nQuestion: {}\n{}\n\nAnswer:'
level_en = {
'Primary': 'primary school',
'Middle': 'middle school',
'High': 'high school',
'Univ': 'university',
'Prof': 'professional',
}
alpa = ['A.', 'B.', 'C.', 'D.', 'E.']
def doc_to_text(doc):
"""
Refactoring `prepare_data_en` to fit with the lm harness framework.
https://github.com/mbzuai-nlp/ArabicMMLU/blob/main/util_prompt.py
"""
level = "" if not doc['Level'] else " for " + level_en[doc['Level']]
country = "" if not doc['Country'] else " in " + doc['Country']
main_meta_data = f"{doc['Subject']} question{level}{country}"
question = doc['Question'] if doc['Context']=="" else f"{doc['Context']}\n\n{doc['Question']}"
options = []
for i, opt in enumerate(['Option 1', 'Option 2', 'Option 3', 'Option 4', 'Option 5']):
if not doc[opt]:
break
options.append(f"{alpa[i]} {doc[opt]}")
doc_text = PROMPT.format(main_meta_data, question, '\n'.join(options))
return doc_text
def doc_to_choice(doc):
return [alpa[i][0] for i in range(5) if doc[f'Option {i+1}']]
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment