Unverified Commit ebbbb968 authored by Hadi Abdine's avatar Hadi Abdine Committed by GitHub
Browse files

add Darija (Moroccan dialects) tasks including darijammlu. darijahellaswag and darija_bench (#2521)



* add Darija tasks

* fix multiple groups issue in darijammlu

* add MT to the description of the Darija tasks

* Update README.md

nit

* fix the recursion error caused by the darija_summarization task

* use a custom filter instead of the decorator for the strip function

---------
Co-authored-by: default avatarBaber Abbasi <92168766+baberabb@users.noreply.github.com>
parent 5a9d5ba0
"dataset_name": "political_science"
"include": "_default_darijammlu_template_yaml"
"tag":
- "darijammlu_ar_mmlu_tasks"
"task": "darijammlu_political_science"
"task_alias": "political science"
"dataset_name": "professional_law"
"include": "_default_darijammlu_template_yaml"
"tag":
- "darijammlu_mmlu_tasks"
"task": "darijammlu_professional_law"
"task_alias": "professional law"
"dataset_name": "professional_psychology"
"include": "_default_darijammlu_template_yaml"
"tag":
- "darijammlu_mmlu_tasks"
"task": "darijammlu_professional_psychology"
"task_alias": "professional psychology"
"dataset_name": "public_relations"
"include": "_default_darijammlu_template_yaml"
"tag":
- "darijammlu_mmlu_tasks"
"task": "darijammlu_public_relations"
"task_alias": "public relations"
"dataset_name": "security_studies"
"include": "_default_darijammlu_template_yaml"
"tag":
- "darijammlu_mmlu_tasks"
"task": "darijammlu_security_studies"
"task_alias": "security studies"
"dataset_name": "social_science"
"include": "_default_darijammlu_template_yaml"
"tag":
- "darijammlu_ar_mmlu_tasks"
"task": "darijammlu_social_science"
"task_alias": "social science"
"dataset_name": "sociology"
"include": "_default_darijammlu_template_yaml"
"tag":
- "darijammlu_mmlu_tasks"
"task": "darijammlu_sociology"
"task_alias": "sociology"
"dataset_name": "world_religions"
"include": "_default_darijammlu_template_yaml"
"tag":
- "darijammlu_mmlu_tasks"
"task": "darijammlu_world_religions"
"task_alias": "world religions"
PROMPT = "هادا سؤال متعدد الخيارات (مع الجواب ديالو) على {}\n\n{}\n{}\nالجواب:"
alpha = ["A.", "B.", "C.", "D.", "E."]
def doc_to_text(doc):
subject = doc["subject_darija"]
question = (
doc["question"]
if doc["context"] == ""
else f"{doc['context']}\n\n{doc['question']}"
)
options = []
for i, opt in enumerate(doc["choices"]):
options.append(f"{alpha[i]} {opt}")
doc_text = PROMPT.format(subject, question, "\n".join(options))
return doc_text
def doc_to_choice(doc):
return [alpha[i][0] for i in range(len(doc['choices']))]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment