Commit 2b56339e authored by Baber's avatar Baber
Browse files

Merge branch 'main' into longcxt

parents 0b533339 703fbffd
"dataset_name": "univ_social-science_political-science"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_social-science_egy"
"task": "AraDiCE_ArabicMMLU_univ_social-science_political-science_egy"
"task_alias": "univ social-science political-science"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
"dataset_name": "univ_stem_computer-science"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_stem_egy"
"task": "AraDiCE_ArabicMMLU_univ_stem_computer-science_egy"
"task_alias": "univ stem computer-science"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
dataset_path: "QCRI/AraDICE-ArabicMMLU-egy"
fewshot_config:
sampler: default
output_type: multiple_choice
process_docs: !function utils.process_docs
doc_to_text: "{{prompt}}"
doc_to_choice: choices
doc_to_target: target
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
- metric: f1
higher_is_better: true
aggregation: !function metrics.micro_f1_score
metadata:
version: 0.0
from sklearn.metrics import f1_score
def macro_f1_score(items):
unzipped_list = list(zip(*items))
golds = unzipped_list[0]
preds = unzipped_list[1]
fscore = f1_score(golds, preds, average="macro")
return fscore
def micro_f1_score(items):
unzipped_list = list(zip(*items))
golds = unzipped_list[0]
preds = unzipped_list[1]
fscore = f1_score(golds, preds, average="micro")
return fscore
def weighted_f1_score(items):
unzipped_list = list(zip(*items))
golds = unzipped_list[0]
preds = unzipped_list[1]
fscore = f1_score(golds, preds, average="weighted")
return fscore
level_ar = {
"Primary": "للمرحلة الابتدائية",
"Middle": "للمرحلة المتوسطة",
"High": "للمرحلة الثانوية",
"Univ": "للمرحلة الجامعية ",
"Prof": "للمحترفين",
}
country_ar = {
"UAE": "في الإمارات",
"Egypt": "في مصر",
"Lebanon": "في لبنان",
"Jordan": "في الأردن",
"Kuwait": "في الكويت",
"KSA": "في السعودية",
"Palestine": "في فلسطين",
"Morocco": "في المغرب",
}
subject_ar = {
"Islamic Studies": "في الدراسات إسلامية",
"Driving Test": "في اختبار القيادة",
"Natural Science": "في العلوم الطبيعية",
"History": "في مادة التاريخ",
"General Knowledge": "في المعرفة العامة",
"Law": "في القانون",
"Physics": "في الفيزياء",
"Social Science": "في العلوم الاجتماعية",
"Management": "في الإدارة",
"Arabic Language": "في اللغة العربية",
"Political Science": " في العلوم السياسية",
"Philosophy": "في الفلسفة",
"Accounting": "في المحاسبة",
"Computer Science": "في علوم الحاسوب",
"Geography": "في الجغرافيا",
"Math": "في الرياضيات",
"Biology": "في علم الأحياء",
"Economics": "في الاقتصاد",
"Arabic Language (General)": "في اللغة العربية (عام)",
"Arabic Language (Grammar)": "في اللغة العربية (النحو)",
"Civics": "في التربية المدنية",
}
alpa_ar = ["أ-", "ب-", "ج-", "د-", "و-"]
alpa_en = ["A-", "B-", "C-", "D-", "E-"]
all_choices = ["أ", "ب", "ج", "د", "و"]
all_choices_en = ["A", "B", "C", "D", "E"]
def process_docs(dataset):
def _helper(doc):
# modifies the contents of a single
# document in our dataset.
PROMPT = "ده سؤال [MAIN_META_DATA]. اختار الإجابة الصحيحة!\n\nسؤال: [INPUT]\n[OPTION]"
PROMPT = f"{PROMPT}\n\nإجابة:"
alpa = alpa_ar
subject = subject_ar[doc["Subject"]]
level = " " + level_ar[doc["Level"]] if doc["Level"] else ""
country = " " + country_ar[doc["Country"]] if doc["Country"] else ""
main_meta_data = f"{subject}{level}{country}"
question = (
f"{doc['context']}\n\n{doc['question']}"
if doc["context"]
else doc["question"]
)
options = []
for i, opt in enumerate(["A", "B", "C", "D", "E"]):
if opt not in doc["options"] or doc["options"][opt] is None:
break
options.append(f"{alpa[i]} {doc['options'][opt]}")
doc["prompt"] = (
PROMPT.replace("[MAIN_META_DATA]", main_meta_data)
.replace("[INPUT]", question)
.replace("[OPTION]", "\n".join(options))
)
doc["choices"] = all_choices[: len(options)]
doc["target"] = ["A", "B", "C", "D", "E"].index(doc["Answer Key"])
return doc
return dataset.map(_helper) # returns back a datasets.Dataset object
group: AraDiCE_ArabicMMLU_lev
task:
- AraDiCE_ArabicMMLU_humanities_lev
- AraDiCE_ArabicMMLU_language_lev
- AraDiCE_ArabicMMLU_social-science_lev
- AraDiCE_ArabicMMLU_stem_lev
- AraDiCE_ArabicMMLU_other_lev
aggregate_metric_list:
- metric: acc
weight_by_size: True
- metric: acc_norm
weight_by_size: True
"dataset_name": "high_humanities_history"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_humanities_lev"
"task": "AraDiCE_ArabicMMLU_high_humanities_history_lev"
"task_alias": "high humanities history"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
"dataset_name": "high_humanities_islamic-studies"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_humanities_lev"
"task": "AraDiCE_ArabicMMLU_high_humanities_islamic-studies_lev"
"task_alias": "high humanities islamic-studies"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
"dataset_name": "high_humanities_philosophy"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_humanities_lev"
"task": "AraDiCE_ArabicMMLU_high_humanities_philosophy_lev"
"task_alias": "high humanities philosophy"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
"dataset_name": "high_language_arabic-language"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_language_lev"
"task": "AraDiCE_ArabicMMLU_high_language_arabic-language_lev"
"task_alias": "high language arabic-language"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
"dataset_name": "high_social-science_civics"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_social-science_lev"
"task": "AraDiCE_ArabicMMLU_high_social-science_civics_lev"
"task_alias": "high social-science civics"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
"dataset_name": "high_social-science_economics"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_social-science_lev"
"task": "AraDiCE_ArabicMMLU_high_social-science_economics_lev"
"task_alias": "high social-science economics"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
"dataset_name": "high_social-science_geography"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_social-science_lev"
"task": "AraDiCE_ArabicMMLU_high_social-science_geography_lev"
"task_alias": "high social-science geography"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
"dataset_name": "high_stem_biology"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_stem_lev"
"task": "AraDiCE_ArabicMMLU_high_stem_biology_lev"
"task_alias": "high stem biology"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
"dataset_name": "high_stem_computer-science"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_stem_lev"
"task": "AraDiCE_ArabicMMLU_high_stem_computer-science_lev"
"task_alias": "high stem computer-science"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
"dataset_name": "high_stem_physics"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_stem_lev"
"task": "AraDiCE_ArabicMMLU_high_stem_physics_lev"
"task_alias": "high stem physics"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
"dataset_name": "middle_humanities_history"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_humanities_lev"
"task": "AraDiCE_ArabicMMLU_middle_humanities_history_lev"
"task_alias": "middle humanities history"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
"dataset_name": "middle_humanities_islamic-studies"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_humanities_lev"
"task": "AraDiCE_ArabicMMLU_middle_humanities_islamic-studies_lev"
"task_alias": "middle humanities islamic-studies"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
"dataset_name": "middle_language_arabic-language"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_language_lev"
"task": "AraDiCE_ArabicMMLU_middle_language_arabic-language_lev"
"task_alias": "middle language arabic-language"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
"dataset_name": "middle_other_general-knowledge"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_other_lev"
"task": "AraDiCE_ArabicMMLU_middle_other_general-knowledge_lev"
"task_alias": "middle other general-knowledge"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment