Unverified Commit da211969 authored by Jess's avatar Jess Committed by GitHub
Browse files

Merge branch 'EleutherAI:main' into main

parents 1b97e487 801322e0
"dataset_name": "Primary General Knowledge"
"group": "arabicmmlu_other"
"group_alias": "other"
"include": "_default_template_yaml"
"task": "arabicmmlu_primary_general_knowledge"
"task_alias": "Primary General Knowledge"
"dataset_name": "Primary Geography"
"group": "arabicmmlu_social_science"
"group_alias": "social science"
"include": "_default_template_yaml"
"task": "arabicmmlu_primary_geography"
"task_alias": "Primary Geography"
"dataset_name": "Primary History"
"group": "arabicmmlu_humanities"
"group_alias": "humanities"
"include": "_default_template_yaml"
"task": "arabicmmlu_primary_history"
"task_alias": "Primary History"
"dataset_name": "Primary Islamic Studies"
"group": "arabicmmlu_humanities"
"group_alias": "humanities"
"include": "_default_template_yaml"
"task": "arabicmmlu_primary_islamic_studies"
"task_alias": "Primary Islamic Studies"
"dataset_name": "Primary Math"
"group": "arabicmmlu_stem"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "arabicmmlu_primary_math"
"task_alias": "Primary Math"
"dataset_name": "Primary Natural Science"
"group": "arabicmmlu_stem"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "arabicmmlu_primary_natural_science"
"task_alias": "Primary Natural Science"
"dataset_name": "Primary Social Science"
"group": "arabicmmlu_social_science"
"group_alias": "social science"
"include": "_default_template_yaml"
"task": "arabicmmlu_primary_social_science"
"task_alias": "Primary Social Science"
"dataset_name": "Prof Law"
"group": "arabicmmlu_humanities"
"group_alias": "humanities"
"include": "_default_template_yaml"
"task": "arabicmmlu_prof_law"
"task_alias": "Prof Law"
"dataset_name": "Univ Accounting"
"group": "arabicmmlu_social_science"
"group_alias": "social science"
"include": "_default_template_yaml"
"task": "arabicmmlu_univ_accounting"
"task_alias": "Univ Accounting"
"dataset_name": "Univ Computer Science"
"group": "arabicmmlu_stem"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "arabicmmlu_univ_computer_science"
"task_alias": "Univ Computer Science"
"dataset_name": "Univ Economics"
"group": "arabicmmlu_social_science"
"group_alias": "social science"
"include": "_default_template_yaml"
"task": "arabicmmlu_univ_economics"
"task_alias": "Univ Economics"
"dataset_name": "Univ Management"
"group": "arabicmmlu_other"
"group_alias": "other"
"include": "_default_template_yaml"
"task": "arabicmmlu_univ_management"
"task_alias": "Univ Management"
"dataset_name": "Univ Political Science"
"group": "arabicmmlu_social_science"
"group_alias": "social science"
"include": "_default_template_yaml"
"task": "arabicmmlu_univ_political_science"
"task_alias": "Univ Political Science"
PROMPT = "This is a {}. Select the correct answer!\n\nQuestion: {}\n{}\n\nAnswer:"
level_en = {
"Primary": "primary school",
"Middle": "middle school",
"High": "high school",
"Univ": "university",
"Prof": "professional",
}
alpa = ["A.", "B.", "C.", "D.", "E."]
def doc_to_text(doc):
"""
Refactoring `prepare_data_en` to fit with the lm harness framework.
https://github.com/mbzuai-nlp/ArabicMMLU/blob/main/util_prompt.py
"""
level = "" if not doc["Level"] else " for " + level_en[doc["Level"]]
country = "" if not doc["Country"] else " in " + doc["Country"]
main_meta_data = f"{doc['Subject']} question{level}{country}"
question = (
doc["Question"]
if doc["Context"] == ""
else f"{doc['Context']}\n\n{doc['Question']}"
)
options = []
for i, opt in enumerate(
["Option 1", "Option 2", "Option 3", "Option 4", "Option 5"]
):
if not doc[opt]:
break
options.append(f"{alpa[i]} {doc[opt]}")
doc_text = PROMPT.format(main_meta_data, question, "\n".join(options))
return doc_text
def doc_to_choice(doc):
return [alpa[i][0] for i in range(5) if doc[f"Option {i+1}"]]
# arc mt
arc mt is an implementation of tasks to support machine translated arc
challenge evals, to improve eval support across a number of additional
languages.
The main page for the effort is
[here](https://huggingface.co/datasets/LumiOpen/arc_challenge_mt) and we will
include more data and analysis there.
Initial datasets include a number of European languages, and we plan to expand
more in the future.
include: arc_challenge_mt_fi.yaml
task: arc_challenge_mt_da
dataset_name: da
include: arc_challenge_mt_fi.yaml
task: arc_challenge_mt_de
dataset_name: de
include: arc_challenge_mt_fi.yaml
task: arc_challenge_mt_el
dataset_name: el
include: arc_challenge_mt_fi.yaml
task: arc_challenge_mt_es
dataset_name: es
group:
- arc_challenge_mt
task: arc_challenge_mt_fi
dataset_path: LumiOpen/arc_challenge_mt
dataset_name: fi
output_type: multiple_choice
training_split: train
validation_split: validation
test_split: test
doc_to_text: "Question: {{question}}\nAnswer:"
doc_to_target: "{{choices.label.index(answerKey)}}"
doc_to_choice: "{{choices.text}}"
should_decontaminate: true
doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment