Commit c1e63555 authored by Yu Shi Jie's avatar Yu Shi Jie
Browse files

Merge branch 'upstream' into 'mmlu-pro'

add tokenizer logs info (#1731)

See merge request shijie.yu/lm-evaluation-harness!4
parents e361687c 42dc2448
"dataset_name": "moral_disputes"
"description": "The following are multiple choice questions (with answers) about moral\
\ disputes.\n\n"
"group": "mmlu_humanities"
"group_alias": "humanities"
"tag": "mmlu_humanities_tasks"
"include": "_default_template_yaml"
"task": "mmlu_moral_disputes"
"task_alias": "moral_disputes"
"dataset_name": "moral_scenarios"
"description": "The following are multiple choice questions (with answers) about moral\
\ scenarios.\n\n"
"group": "mmlu_humanities"
"group_alias": "humanities"
"tag": "mmlu_humanities_tasks"
"include": "_default_template_yaml"
"task": "mmlu_moral_scenarios"
"task_alias": "moral_scenarios"
"dataset_name": "nutrition"
"description": "The following are multiple choice questions (with answers) about nutrition.\n\
\n"
"group": "mmlu_other"
"group_alias": "other"
"tag": "mmlu_other_tasks"
"include": "_default_template_yaml"
"task": "mmlu_nutrition"
"task_alias": "nutrition"
"dataset_name": "philosophy"
"description": "The following are multiple choice questions (with answers) about philosophy.\n\
\n"
"group": "mmlu_humanities"
"group_alias": "humanities"
"tag": "mmlu_humanities_tasks"
"include": "_default_template_yaml"
"task": "mmlu_philosophy"
"task_alias": "philosophy"
"dataset_name": "prehistory"
"description": "The following are multiple choice questions (with answers) about prehistory.\n\
\n"
"group": "mmlu_humanities"
"group_alias": "humanities"
"tag": "mmlu_humanities_tasks"
"include": "_default_template_yaml"
"task": "mmlu_prehistory"
"task_alias": "prehistory"
"dataset_name": "professional_accounting"
"description": "The following are multiple choice questions (with answers) about professional\
\ accounting.\n\n"
"group": "mmlu_other"
"group_alias": "other"
"tag": "mmlu_other_tasks"
"include": "_default_template_yaml"
"task": "mmlu_professional_accounting"
"task_alias": "professional_accounting"
"dataset_name": "professional_law"
"description": "The following are multiple choice questions (with answers) about professional\
\ law.\n\n"
"group": "mmlu_humanities"
"group_alias": "humanities"
"tag": "mmlu_humanities_tasks"
"include": "_default_template_yaml"
"task": "mmlu_professional_law"
"task_alias": "professional_law"
"dataset_name": "professional_medicine"
"description": "The following are multiple choice questions (with answers) about professional\
\ medicine.\n\n"
"group": "mmlu_other"
"group_alias": "other"
"tag": "mmlu_other_tasks"
"include": "_default_template_yaml"
"task": "mmlu_professional_medicine"
"task_alias": "professional_medicine"
"dataset_name": "professional_psychology"
"description": "The following are multiple choice questions (with answers) about professional\
\ psychology.\n\n"
"group": "mmlu_social_sciences"
"group_alias": "social_sciences"
"tag": "mmlu_social_sciences_tasks"
"include": "_default_template_yaml"
"task": "mmlu_professional_psychology"
"task_alias": "professional_psychology"
"dataset_name": "public_relations"
"description": "The following are multiple choice questions (with answers) about public\
\ relations.\n\n"
"group": "mmlu_social_sciences"
"group_alias": "social_sciences"
"tag": "mmlu_social_sciences_tasks"
"include": "_default_template_yaml"
"task": "mmlu_public_relations"
"task_alias": "public_relations"
"dataset_name": "security_studies"
"description": "The following are multiple choice questions (with answers) about security\
\ studies.\n\n"
"group": "mmlu_social_sciences"
"group_alias": "social_sciences"
"tag": "mmlu_social_sciences_tasks"
"include": "_default_template_yaml"
"task": "mmlu_security_studies"
"task_alias": "security_studies"
"dataset_name": "sociology"
"description": "The following are multiple choice questions (with answers) about sociology.\n\
\n"
"group": "mmlu_social_sciences"
"group_alias": "social_sciences"
"tag": "mmlu_social_sciences_tasks"
"include": "_default_template_yaml"
"task": "mmlu_sociology"
"task_alias": "sociology"
"dataset_name": "us_foreign_policy"
"description": "The following are multiple choice questions (with answers) about us\
\ foreign policy.\n\n"
"group": "mmlu_social_sciences"
"group_alias": "social_sciences"
"tag": "mmlu_social_sciences_tasks"
"include": "_default_template_yaml"
"task": "mmlu_us_foreign_policy"
"task_alias": "us_foreign_policy"
"dataset_name": "virology"
"description": "The following are multiple choice questions (with answers) about virology.\n\
\n"
"group": "mmlu_other"
"group_alias": "other"
"tag": "mmlu_other_tasks"
"include": "_default_template_yaml"
"task": "mmlu_virology"
"task_alias": "virology"
"dataset_name": "world_religions"
"description": "The following are multiple choice questions (with answers) about world\
\ religions.\n\n"
"group": "mmlu_humanities"
"group_alias": "humanities"
"tag": "mmlu_humanities_tasks"
"include": "_default_template_yaml"
"task": "mmlu_world_religions"
"task_alias": "world_religions"
group: mmlu_flan_cot_fewshot
group_alias: mmlu (flan style, fewshot cot)
task:
- mmlu_flan_cot_fewshot_stem
- mmlu_flan_cot_fewshot_other
- mmlu_flan_cot_fewshot_social_sciences
- mmlu_flan_cot_fewshot_humanities
- group: stem
task:
- mmlu_flan_cot_fewshot_stem
aggregate_metric_list:
- metric: acc
weight_by_size: True
- group: other
task:
- mmlu_flan_cot_fewshot_other
aggregate_metric_list:
- metric: acc
weight_by_size: True
- group: social sciences
task:
- mmlu_flan_cot_fewshot_social_sciences
aggregate_metric_list:
- metric: acc
weight_by_size: True
- group: humanities
task:
- mmlu_flan_cot_fewshot_humanities
aggregate_metric_list:
- metric: acc
weight_by_size: True
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 1
......@@ -27,3 +27,5 @@ metric_list:
ignore_punctuation: true
metadata:
version: 1.0
dataset_kwargs:
trust_remote_code: true
......@@ -54,6 +54,6 @@ fewshot_config:
not have any roots. For c = 2 the polynomial x^2 + 2 has two roots at x = 1
and x = 2. Hence Z_3[x]/(x^2 + c) is a field if and only if c = 1. The answer
is (B).'
group: mmlu_flan_cot_fewshot_stem
tag: mmlu_flan_cot_fewshot_stem
include: _mmlu_flan_cot_fewshot_template_yaml
task: mmlu_flan_cot_fewshot_abstract_algebra
......@@ -70,6 +70,6 @@ fewshot_config:
\ origin of the hyoid bone are the second and the third pharyngeal arches\u2014\
this information is covered in the last option (D). Therefore, we conclude that\
\ (D) must be the correct answer. The answer is (D).\n\n"
group: mmlu_flan_cot_fewshot_stem
tag: mmlu_flan_cot_fewshot_stem
include: _mmlu_flan_cot_fewshot_template_yaml
task: mmlu_flan_cot_fewshot_anatomy
......@@ -65,6 +65,6 @@ fewshot_config:
because it explains that the surface is red due to the rusted materials on the
surface and the red color comes from the rust. So the correct option is (A).
The answer is (A).'
group: mmlu_flan_cot_fewshot_stem
tag: mmlu_flan_cot_fewshot_stem
include: _mmlu_flan_cot_fewshot_template_yaml
task: mmlu_flan_cot_fewshot_astronomy
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment