Unverified Commit 2ea6114e authored by Atou Houdaifa's avatar Atou Houdaifa Committed by GitHub
Browse files

Adding EgyMMLU and EgyHellaSwag (#3063)

* add egy mmlu hellaswag

* add egymmlu egyhellaswag to tasks readme

* fix egymmlu config generation

* fix _generate_configs formating
parent fcddf195
"dataset_name": "moral_scenarios"
"include": "_default_egymmlu_template_yaml"
"tag":
- "egymmlu_humanities_tasks"
- "egymmlu_mmlu_tasks"
"task": "egymmlu_moral_scenarios"
"task_alias": "moral scenarios"
"dataset_name": "natural_science"
"include": "_default_egymmlu_template_yaml"
"tag":
- "egymmlu_stem_tasks"
- "egymmlu_ar_mmlu_tasks"
"task": "egymmlu_natural_science"
"task_alias": "natural science"
"dataset_name": "nutrition"
"include": "_default_egymmlu_template_yaml"
"tag":
- "egymmlu_other_tasks"
- "egymmlu_mmlu_tasks"
"task": "egymmlu_nutrition"
"task_alias": "nutrition"
"dataset_name": "philosophy"
"include": "_default_egymmlu_template_yaml"
"tag":
- "egymmlu_humanities_tasks"
- "egymmlu_mmlu_tasks"
"task": "egymmlu_philosophy"
"task_alias": "philosophy"
"dataset_name": "philosophy_ar"
"include": "_default_egymmlu_template_yaml"
"tag":
- "egymmlu_humanities_tasks"
- "egymmlu_ar_mmlu_tasks"
"task": "egymmlu_philosophy_ar"
"task_alias": "philosophy ar"
"dataset_name": "physics"
"include": "_default_egymmlu_template_yaml"
"tag":
- "egymmlu_stem_tasks"
- "egymmlu_ar_mmlu_tasks"
"task": "egymmlu_physics"
"task_alias": "physics"
"dataset_name": "political_science"
"include": "_default_egymmlu_template_yaml"
"tag":
- "egymmlu_social_sciences_tasks"
- "egymmlu_ar_mmlu_tasks"
"task": "egymmlu_political_science"
"task_alias": "political science"
"dataset_name": "professional_law"
"include": "_default_egymmlu_template_yaml"
"tag":
- "egymmlu_humanities_tasks"
- "egymmlu_mmlu_tasks"
"task": "egymmlu_professional_law"
"task_alias": "professional law"
"dataset_name": "professional_psychology"
"include": "_default_egymmlu_template_yaml"
"tag":
- "egymmlu_social_sciences_tasks"
- "egymmlu_mmlu_tasks"
"task": "egymmlu_professional_psychology"
"task_alias": "professional psychology"
"dataset_name": "public_relations"
"include": "_default_egymmlu_template_yaml"
"tag":
- "egymmlu_social_sciences_tasks"
- "egymmlu_mmlu_tasks"
"task": "egymmlu_public_relations"
"task_alias": "public relations"
"dataset_name": "security_studies"
"include": "_default_egymmlu_template_yaml"
"tag":
- "egymmlu_social_sciences_tasks"
- "egymmlu_mmlu_tasks"
"task": "egymmlu_security_studies"
"task_alias": "security studies"
"dataset_name": "social_science"
"include": "_default_egymmlu_template_yaml"
"tag":
- "egymmlu_social_sciences_tasks"
- "egymmlu_ar_mmlu_tasks"
"task": "egymmlu_social_science"
"task_alias": "social science"
"dataset_name": "sociology"
"include": "_default_egymmlu_template_yaml"
"tag":
- "egymmlu_social_sciences_tasks"
- "egymmlu_mmlu_tasks"
"task": "egymmlu_sociology"
"task_alias": "sociology"
"dataset_name": "world_religions"
"include": "_default_egymmlu_template_yaml"
"tag":
- "egymmlu_humanities_tasks"
- "egymmlu_mmlu_tasks"
"task": "egymmlu_world_religions"
"task_alias": "world religions"
PROMPT = "ده سؤال متعدد الاختيار (مع إجابته) على {}\n\n{}\n{}\n الجواب:"
alpha = ["A.", "B.", "C.", "D.", "E."]
def doc_to_text(doc):
subject = doc["egy_subject"] # subject_egyptian
question = (
doc["question"]
if doc["context"] == ""
else f"{doc['context']}\n\n{doc['question']}"
)
options = []
for i, opt in enumerate(doc["choices"]):
options.append(f"{alpha[i]} {opt}")
doc_text = PROMPT.format(subject, question, "\n".join(options))
return doc_text
def doc_to_choice(doc):
return [alpha[i][0] for i in range(len(doc["choices"]))]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment