"...lm-evaluation-harness.git" did not exist on "03e7df519ea2505fa0890847cdf23a6d45393efc"
Commit 89b6bdb3 authored by Baber's avatar Baber
Browse files

Merge branch 'main' into ai2d

parents 59053d58 144a1e58
"dataset_name": "Univ Accounting"
"tag": "arabicmmlu_social_science_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_univ_accounting"
"task_alias": "Univ Accounting"
"dataset_name": "Univ Computer Science"
"tag": "arabicmmlu_stem_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_univ_computer_science"
"task_alias": "Univ Computer Science"
"dataset_name": "Univ Economics"
"tag": "arabicmmlu_social_science_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_univ_economics"
"task_alias": "Univ Economics"
"dataset_name": "Univ Political Science"
"tag": "arabicmmlu_social_science_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_univ_political_science"
"task_alias": "Univ Political Science"
...@@ -23,7 +23,7 @@ def doc_to_text(doc): ...@@ -23,7 +23,7 @@ def doc_to_text(doc):
question = ( question = (
doc["Question"] doc["Question"]
if doc["Context"] == "" if not doc["Context"]
else f"{doc['Context']}\n\n{doc['Question']}" else f"{doc['Context']}\n\n{doc['Question']}"
) )
...@@ -41,4 +41,4 @@ def doc_to_text(doc): ...@@ -41,4 +41,4 @@ def doc_to_text(doc):
def doc_to_choice(doc): def doc_to_choice(doc):
return [alpa[i][0] for i in range(5) if doc[f"Option {i+1}"]] return [alpa[i][0] for i in range(5) if doc[f"Option {i + 1}"]]
group: AraDiCE_ArabicMMLU_egy
task:
- AraDiCE_ArabicMMLU_humanities_egy
- AraDiCE_ArabicMMLU_language_egy
- AraDiCE_ArabicMMLU_social-science_egy
- AraDiCE_ArabicMMLU_stem_egy
- AraDiCE_ArabicMMLU_other_egy
aggregate_metric_list:
- metric: acc
weight_by_size: True
- metric: acc_norm
weight_by_size: True
"dataset_name": "high_humanities_history"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_humanities_egy"
"task": "AraDiCE_ArabicMMLU_high_humanities_history_egy"
"task_alias": "high humanities history"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
"dataset_name": "high_humanities_islamic-studies"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_humanities_egy"
"task": "AraDiCE_ArabicMMLU_high_humanities_islamic-studies_egy"
"task_alias": "high humanities islamic-studies"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
"dataset_name": "high_humanities_philosophy"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_humanities_egy"
"task": "AraDiCE_ArabicMMLU_high_humanities_philosophy_egy"
"task_alias": "high humanities philosophy"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
"dataset_name": "high_language_arabic-language"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_language_egy"
"task": "AraDiCE_ArabicMMLU_high_language_arabic-language_egy"
"task_alias": "high language arabic-language"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
"dataset_name": "high_social-science_civics"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_social-science_egy"
"task": "AraDiCE_ArabicMMLU_high_social-science_civics_egy"
"task_alias": "high social-science civics"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
"dataset_name": "high_social-science_economics"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_social-science_egy"
"task": "AraDiCE_ArabicMMLU_high_social-science_economics_egy"
"task_alias": "high social-science economics"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
"dataset_name": "high_social-science_geography"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_social-science_egy"
"task": "AraDiCE_ArabicMMLU_high_social-science_geography_egy"
"task_alias": "high social-science geography"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
"dataset_name": "high_stem_biology"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_stem_egy"
"task": "AraDiCE_ArabicMMLU_high_stem_biology_egy"
"task_alias": "high stem biology"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
"dataset_name": "high_stem_computer-science"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_stem_egy"
"task": "AraDiCE_ArabicMMLU_high_stem_computer-science_egy"
"task_alias": "high stem computer-science"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
"dataset_name": "high_stem_physics"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_stem_egy"
"task": "AraDiCE_ArabicMMLU_high_stem_physics_egy"
"task_alias": "high stem physics"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
"dataset_name": "middle_humanities_history"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_humanities_egy"
"task": "AraDiCE_ArabicMMLU_middle_humanities_history_egy"
"task_alias": "middle humanities history"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
"dataset_name": "middle_humanities_islamic-studies"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_humanities_egy"
"task": "AraDiCE_ArabicMMLU_middle_humanities_islamic-studies_egy"
"task_alias": "middle humanities islamic-studies"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
"dataset_name": "middle_language_arabic-language"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_language_egy"
"task": "AraDiCE_ArabicMMLU_middle_language_arabic-language_egy"
"task_alias": "middle language arabic-language"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
"dataset_name": "middle_other_general-knowledge"
"description": ""
"fewshot_split": !!null "null"
"include": "_default_template_yaml"
"tag": "AraDiCE_ArabicMMLU_other_egy"
"task": "AraDiCE_ArabicMMLU_middle_other_general-knowledge_egy"
"task_alias": "middle other general-knowledge"
"test_split": "test"
"training_split": !!null "null"
"validation_split": !!null "null"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment