PROMPT = "This is a {}. Select the correct answer!\n\nQuestion: {}\n{}\n\nAnswer:" level_en = { "Primary": "primary school", "Middle": "middle school", "High": "high school", "Univ": "university", "Prof": "professional", } alpa = ["A.", "B.", "C.", "D.", "E."] def doc_to_text(doc): """ Refactoring `prepare_data_en` to fit with the lm harness framework. https://github.com/mbzuai-nlp/ArabicMMLU/blob/main/util_prompt.py """ level = "" if not doc["Level"] else " for " + level_en[doc["Level"]] country = "" if not doc["Country"] else " in " + doc["Country"] main_meta_data = f"{doc['Subject']} question{level}{country}" question = ( doc["Question"] if doc["Context"] == "" else f"{doc['Context']}\n\n{doc['Question']}" ) options = [] for i, opt in enumerate( ["Option 1", "Option 2", "Option 3", "Option 4", "Option 5"] ): if not doc[opt]: break options.append(f"{alpa[i]} {doc[opt]}") doc_text = PROMPT.format(main_meta_data, question, "\n".join(options)) return doc_text def doc_to_choice(doc): return [alpa[i][0] for i in range(5) if doc[f"Option {i+1}"]]