Commit c1e63555 authored by Yu Shi Jie's avatar Yu Shi Jie
Browse files

Merge branch 'upstream' into 'mmlu-pro'

add tokenizer logs info (#1731)

See merge request shijie.yu/lm-evaluation-harness!4
parents e361687c 42dc2448
"dataset_name": "Middle Natural Science" "dataset_name": "Middle Natural Science"
"group": "arabicmmlu_stem" "tag": "arabicmmlu_stem_tasks"
"group_alias": "stem" "include": "_default_arabicmmlu_template_yaml"
"include": "_default_template_yaml"
"task": "arabicmmlu_middle_natural_science" "task": "arabicmmlu_middle_natural_science"
"task_alias": "Middle Natural Science" "task_alias": "Middle Natural Science"
"dataset_name": "Middle Social Science" "dataset_name": "Middle Social Science"
"group": "arabicmmlu_social_science" "tag": "arabicmmlu_social_science_tasks"
"group_alias": "social science" "include": "_default_arabicmmlu_template_yaml"
"include": "_default_template_yaml"
"task": "arabicmmlu_middle_social_science" "task": "arabicmmlu_middle_social_science"
"task_alias": "Middle Social Science" "task_alias": "Middle Social Science"
"dataset_name": "Primary Arabic Language" "dataset_name": "Primary Arabic Language"
"group": "arabicmmlu_language" "tag": "arabicmmlu_language_tasks"
"group_alias": "language" "include": "_default_arabicmmlu_template_yaml"
"include": "_default_template_yaml"
"task": "arabicmmlu_primary_arabic_language" "task": "arabicmmlu_primary_arabic_language"
"task_alias": "Primary Arabic Language" "task_alias": "Primary Arabic Language"
"dataset_name": "Primary Computer Science" "dataset_name": "Primary Computer Science"
"group": "arabicmmlu_stem" "tag": "arabicmmlu_stem_tasks"
"group_alias": "stem" "include": "_default_arabicmmlu_template_yaml"
"include": "_default_template_yaml"
"task": "arabicmmlu_primary_computer_science" "task": "arabicmmlu_primary_computer_science"
"task_alias": "Primary Computer Science" "task_alias": "Primary Computer Science"
"dataset_name": "Primary General Knowledge" "dataset_name": "Primary General Knowledge"
"group": "arabicmmlu_other" "tag": "arabicmmlu_other_tasks"
"group_alias": "other" "include": "_default_arabicmmlu_template_yaml"
"include": "_default_template_yaml"
"task": "arabicmmlu_primary_general_knowledge" "task": "arabicmmlu_primary_general_knowledge"
"task_alias": "Primary General Knowledge" "task_alias": "Primary General Knowledge"
"dataset_name": "Primary Geography" "dataset_name": "Primary Geography"
"group": "arabicmmlu_social_science" "tag": "arabicmmlu_social_science_tasks"
"group_alias": "social science" "include": "_default_arabicmmlu_template_yaml"
"include": "_default_template_yaml"
"task": "arabicmmlu_primary_geography" "task": "arabicmmlu_primary_geography"
"task_alias": "Primary Geography" "task_alias": "Primary Geography"
"dataset_name": "Primary History" "dataset_name": "Primary History"
"group": "arabicmmlu_humanities" "tag": "arabicmmlu_humanities_tasks"
"group_alias": "humanities" "include": "_default_arabicmmlu_template_yaml"
"include": "_default_template_yaml"
"task": "arabicmmlu_primary_history" "task": "arabicmmlu_primary_history"
"task_alias": "Primary History" "task_alias": "Primary History"
"dataset_name": "Primary Islamic Studies" "dataset_name": "Primary Islamic Studies"
"group": "arabicmmlu_humanities" "tag": "arabicmmlu_humanities_tasks"
"group_alias": "humanities" "include": "_default_arabicmmlu_template_yaml"
"include": "_default_template_yaml"
"task": "arabicmmlu_primary_islamic_studies" "task": "arabicmmlu_primary_islamic_studies"
"task_alias": "Primary Islamic Studies" "task_alias": "Primary Islamic Studies"
"dataset_name": "Primary Math" "dataset_name": "Primary Math"
"group": "arabicmmlu_stem" "tag": "arabicmmlu_stem_tasks"
"group_alias": "stem" "include": "_default_arabicmmlu_template_yaml"
"include": "_default_template_yaml"
"task": "arabicmmlu_primary_math" "task": "arabicmmlu_primary_math"
"task_alias": "Primary Math" "task_alias": "Primary Math"
"dataset_name": "Primary Natural Science" "dataset_name": "Primary Natural Science"
"group": "arabicmmlu_stem" "tag": "arabicmmlu_stem_tasks"
"group_alias": "stem" "include": "_default_arabicmmlu_template_yaml"
"include": "_default_template_yaml"
"task": "arabicmmlu_primary_natural_science" "task": "arabicmmlu_primary_natural_science"
"task_alias": "Primary Natural Science" "task_alias": "Primary Natural Science"
"dataset_name": "Primary Social Science" "dataset_name": "Primary Social Science"
"group": "arabicmmlu_social_science" "tag": "arabicmmlu_social_science_tasks"
"group_alias": "social science" "include": "_default_arabicmmlu_template_yaml"
"include": "_default_template_yaml"
"task": "arabicmmlu_primary_social_science" "task": "arabicmmlu_primary_social_science"
"task_alias": "Primary Social Science" "task_alias": "Primary Social Science"
"dataset_name": "Prof Law" "dataset_name": "Prof Law"
"group": "arabicmmlu_humanities" "tag": "arabicmmlu_humanities_tasks"
"group_alias": "humanities" "include": "_default_arabicmmlu_template_yaml"
"include": "_default_template_yaml"
"task": "arabicmmlu_prof_law" "task": "arabicmmlu_prof_law"
"task_alias": "Prof Law" "task_alias": "Prof Law"
"dataset_name": "Univ Accounting" "dataset_name": "Univ Accounting"
"group": "arabicmmlu_social_science" "tag": "arabicmmlu_social_science_tasks"
"group_alias": "social science" "include": "_default_arabicmmlu_template_yaml"
"include": "_default_template_yaml"
"task": "arabicmmlu_univ_accounting" "task": "arabicmmlu_univ_accounting"
"task_alias": "Univ Accounting" "task_alias": "Univ Accounting"
"dataset_name": "Univ Computer Science" "dataset_name": "Univ Computer Science"
"group": "arabicmmlu_stem" "tag": "arabicmmlu_stem_tasks"
"group_alias": "stem" "include": "_default_arabicmmlu_template_yaml"
"include": "_default_template_yaml"
"task": "arabicmmlu_univ_computer_science" "task": "arabicmmlu_univ_computer_science"
"task_alias": "Univ Computer Science" "task_alias": "Univ Computer Science"
"dataset_name": "Univ Economics" "dataset_name": "Univ Economics"
"group": "arabicmmlu_social_science" "tag": "arabicmmlu_social_science_tasks"
"group_alias": "social science" "include": "_default_arabicmmlu_template_yaml"
"include": "_default_template_yaml"
"task": "arabicmmlu_univ_economics" "task": "arabicmmlu_univ_economics"
"task_alias": "Univ Economics" "task_alias": "Univ Economics"
"dataset_name": "Univ Management" "dataset_name": "Univ Management"
"group": "arabicmmlu_other" "tag": "arabicmmlu_other_tasks"
"group_alias": "other" "include": "_default_arabicmmlu_template_yaml"
"include": "_default_template_yaml"
"task": "arabicmmlu_univ_management" "task": "arabicmmlu_univ_management"
"task_alias": "Univ Management" "task_alias": "Univ Management"
"dataset_name": "Univ Political Science" "dataset_name": "Univ Political Science"
"group": "arabicmmlu_social_science" "tag": "arabicmmlu_social_science_tasks"
"group_alias": "social science" "include": "_default_arabicmmlu_template_yaml"
"include": "_default_template_yaml"
"task": "arabicmmlu_univ_political_science" "task": "arabicmmlu_univ_political_science"
"task_alias": "Univ Political Science" "task_alias": "Univ Political Science"
PROMPT = 'This is a {}. Select the correct answer!\n\nQuestion: {}\n{}\n\nAnswer:' PROMPT = "This is a {}. Select the correct answer!\n\nQuestion: {}\n{}\n\nAnswer:"
level_en = { level_en = {
'Primary': 'primary school', "Primary": "primary school",
'Middle': 'middle school', "Middle": "middle school",
'High': 'high school', "High": "high school",
'Univ': 'university', "Univ": "university",
'Prof': 'professional', "Prof": "professional",
} }
alpa = ['A.', 'B.', 'C.', 'D.', 'E.'] alpa = ["A.", "B.", "C.", "D.", "E."]
def doc_to_text(doc): def doc_to_text(doc):
...@@ -17,22 +17,28 @@ def doc_to_text(doc): ...@@ -17,22 +17,28 @@ def doc_to_text(doc):
https://github.com/mbzuai-nlp/ArabicMMLU/blob/main/util_prompt.py https://github.com/mbzuai-nlp/ArabicMMLU/blob/main/util_prompt.py
""" """
level = "" if not doc['Level'] else " for " + level_en[doc['Level']] level = "" if not doc["Level"] else " for " + level_en[doc["Level"]]
country = "" if not doc['Country'] else " in " + doc['Country'] country = "" if not doc["Country"] else " in " + doc["Country"]
main_meta_data = f"{doc['Subject']} question{level}{country}" main_meta_data = f"{doc['Subject']} question{level}{country}"
question = doc['Question'] if doc['Context']=="" else f"{doc['Context']}\n\n{doc['Question']}" question = (
doc["Question"]
if doc["Context"] == ""
else f"{doc['Context']}\n\n{doc['Question']}"
)
options = [] options = []
for i, opt in enumerate(['Option 1', 'Option 2', 'Option 3', 'Option 4', 'Option 5']): for i, opt in enumerate(
["Option 1", "Option 2", "Option 3", "Option 4", "Option 5"]
):
if not doc[opt]: if not doc[opt]:
break break
options.append(f"{alpa[i]} {doc[opt]}") options.append(f"{alpa[i]} {doc[opt]}")
doc_text = PROMPT.format(main_meta_data, question, '\n'.join(options)) doc_text = PROMPT.format(main_meta_data, question, "\n".join(options))
return doc_text return doc_text
def doc_to_choice(doc): def doc_to_choice(doc):
return [alpa[i][0] for i in range(5) if doc[f'Option {i+1}']] return [alpa[i][0] for i in range(5) if doc[f"Option {i+1}"]]
\ No newline at end of file
...@@ -29,10 +29,14 @@ Homepage: https://allenai.org/data/arc ...@@ -29,10 +29,14 @@ Homepage: https://allenai.org/data/arc
} }
``` ```
### Groups and Tasks ### Groups, Tags, and Tasks
#### Groups #### Groups
None.
#### Tags
* `ai2_arc`: Evaluates `arc_easy` and `arc_challenge` * `ai2_arc`: Evaluates `arc_easy` and `arc_challenge`
#### Tasks #### Tasks
......
group: tag:
- ai2_arc - ai2_arc
task: arc_easy task: arc_easy
dataset_path: allenai/ai2_arc dataset_path: allenai/ai2_arc
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment