Commit e4db76cb authored by haileyschoelkopf's avatar haileyschoelkopf
Browse files

Merge branch 'main' into multimodal-prototyping

parents 6cc6e9cd ad80f555
...@@ -51,6 +51,6 @@ fewshot_config: ...@@ -51,6 +51,6 @@ fewshot_config:
target: 'Let''s think step by step. We refer to Wikipedia articles on us foreign target: 'Let''s think step by step. We refer to Wikipedia articles on us foreign
policy for help. The 2008 financial crisis damanged the international reputation policy for help. The 2008 financial crisis damanged the international reputation
of the American model of political economy and capitalism. The answer is (A).' of the American model of political economy and capitalism. The answer is (A).'
group: mmlu_flan_cot_fewshot_social_sciences tag: mmlu_flan_cot_fewshot_social_sciences
include: _mmlu_flan_cot_fewshot_template_yaml include: _mmlu_flan_cot_fewshot_template_yaml
task: mmlu_flan_cot_fewshot_us_foreign_policy task: mmlu_flan_cot_fewshot_us_foreign_policy
...@@ -40,6 +40,6 @@ fewshot_config: ...@@ -40,6 +40,6 @@ fewshot_config:
target: 'Let''s think step by step. We refer to Wikipedia articles on virology target: 'Let''s think step by step. We refer to Wikipedia articles on virology
for help. Paroviruses are highly impactful because they do not have nucleic for help. Paroviruses are highly impactful because they do not have nucleic
acid. The answer is (A).' acid. The answer is (A).'
group: mmlu_flan_cot_fewshot_other tag: mmlu_flan_cot_fewshot_other
include: _mmlu_flan_cot_fewshot_template_yaml include: _mmlu_flan_cot_fewshot_template_yaml
task: mmlu_flan_cot_fewshot_virology task: mmlu_flan_cot_fewshot_virology
...@@ -37,6 +37,6 @@ fewshot_config: ...@@ -37,6 +37,6 @@ fewshot_config:
target: 'Let''s think step by step. We refer to Wikipedia articles on world religions target: 'Let''s think step by step. We refer to Wikipedia articles on world religions
for help. In Judaism, the most distinctive sign of the covenant is circumcision for help. In Judaism, the most distinctive sign of the covenant is circumcision
(brit milah). The answer is (B).' (brit milah). The answer is (B).'
group: mmlu_flan_cot_fewshot_humanities tag: mmlu_flan_cot_fewshot_humanities
include: _mmlu_flan_cot_fewshot_template_yaml include: _mmlu_flan_cot_fewshot_template_yaml
task: mmlu_flan_cot_fewshot_world_religions task: mmlu_flan_cot_fewshot_world_religions
group: mmlu_flan_cot_zeroshot group: mmlu_flan_cot_zeroshot
group_alias: mmlu (flan style, zeroshot cot)
task: task:
- mmlu_flan_cot_zeroshot_stem - group: stem
- mmlu_flan_cot_zeroshot_other task:
- mmlu_flan_cot_zeroshot_social_sciences - mmlu_flan_cot_zeroshot_stem
- mmlu_flan_cot_zeroshot_humanities aggregate_metric_list:
- metric: acc
weight_by_size: True
- group: other
task:
- mmlu_flan_cot_zeroshot_other
aggregate_metric_list:
- metric: acc
weight_by_size: True
- group: social sciences
task:
- mmlu_flan_cot_zeroshot_social_sciences
aggregate_metric_list:
- metric: acc
weight_by_size: True
- group: humanities
task:
- mmlu_flan_cot_zeroshot_humanities
aggregate_metric_list:
- metric: acc
weight_by_size: True
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 1
"dataset_name": "abstract_algebra" "dataset_name": "abstract_algebra"
"description": "The following are multiple choice questions (with answers) about abstract\ "description": "The following are multiple choice questions (with answers) about abstract\
\ algebra.\n\n" \ algebra.\n\n"
"group": "mmlu_flan_cot_zeroshot_stem" "tag": "mmlu_flan_cot_zeroshot_stem"
"include": "_mmlu_flan_cot_zeroshot_template_yaml" "include": "_mmlu_flan_cot_zeroshot_template_yaml"
"task": "mmlu_flan_cot_zeroshot_abstract_algebra" "task": "mmlu_flan_cot_zeroshot_abstract_algebra"
"dataset_name": "anatomy" "dataset_name": "anatomy"
"description": "The following are multiple choice questions (with answers) about anatomy.\n\ "description": "The following are multiple choice questions (with answers) about anatomy.\n\
\n" \n"
"group": "mmlu_flan_cot_zeroshot_stem" "tag": "mmlu_flan_cot_zeroshot_stem"
"include": "_mmlu_flan_cot_zeroshot_template_yaml" "include": "_mmlu_flan_cot_zeroshot_template_yaml"
"task": "mmlu_flan_cot_zeroshot_anatomy" "task": "mmlu_flan_cot_zeroshot_anatomy"
"dataset_name": "astronomy" "dataset_name": "astronomy"
"description": "The following are multiple choice questions (with answers) about astronomy.\n\ "description": "The following are multiple choice questions (with answers) about astronomy.\n\
\n" \n"
"group": "mmlu_flan_cot_zeroshot_stem" "tag": "mmlu_flan_cot_zeroshot_stem"
"include": "_mmlu_flan_cot_zeroshot_template_yaml" "include": "_mmlu_flan_cot_zeroshot_template_yaml"
"task": "mmlu_flan_cot_zeroshot_astronomy" "task": "mmlu_flan_cot_zeroshot_astronomy"
"dataset_name": "business_ethics" "dataset_name": "business_ethics"
"description": "The following are multiple choice questions (with answers) about business\ "description": "The following are multiple choice questions (with answers) about business\
\ ethics.\n\n" \ ethics.\n\n"
"group": "mmlu_flan_cot_zeroshot_other" "tag": "mmlu_flan_cot_zeroshot_other"
"include": "_mmlu_flan_cot_zeroshot_template_yaml" "include": "_mmlu_flan_cot_zeroshot_template_yaml"
"task": "mmlu_flan_cot_zeroshot_business_ethics" "task": "mmlu_flan_cot_zeroshot_business_ethics"
"dataset_name": "clinical_knowledge" "dataset_name": "clinical_knowledge"
"description": "The following are multiple choice questions (with answers) about clinical\ "description": "The following are multiple choice questions (with answers) about clinical\
\ knowledge.\n\n" \ knowledge.\n\n"
"group": "mmlu_flan_cot_zeroshot_other" "tag": "mmlu_flan_cot_zeroshot_other"
"include": "_mmlu_flan_cot_zeroshot_template_yaml" "include": "_mmlu_flan_cot_zeroshot_template_yaml"
"task": "mmlu_flan_cot_zeroshot_clinical_knowledge" "task": "mmlu_flan_cot_zeroshot_clinical_knowledge"
"dataset_name": "college_biology" "dataset_name": "college_biology"
"description": "The following are multiple choice questions (with answers) about college\ "description": "The following are multiple choice questions (with answers) about college\
\ biology.\n\n" \ biology.\n\n"
"group": "mmlu_flan_cot_zeroshot_stem" "tag": "mmlu_flan_cot_zeroshot_stem"
"include": "_mmlu_flan_cot_zeroshot_template_yaml" "include": "_mmlu_flan_cot_zeroshot_template_yaml"
"task": "mmlu_flan_cot_zeroshot_college_biology" "task": "mmlu_flan_cot_zeroshot_college_biology"
"dataset_name": "college_chemistry" "dataset_name": "college_chemistry"
"description": "The following are multiple choice questions (with answers) about college\ "description": "The following are multiple choice questions (with answers) about college\
\ chemistry.\n\n" \ chemistry.\n\n"
"group": "mmlu_flan_cot_zeroshot_stem" "tag": "mmlu_flan_cot_zeroshot_stem"
"include": "_mmlu_flan_cot_zeroshot_template_yaml" "include": "_mmlu_flan_cot_zeroshot_template_yaml"
"task": "mmlu_flan_cot_zeroshot_college_chemistry" "task": "mmlu_flan_cot_zeroshot_college_chemistry"
"dataset_name": "college_computer_science" "dataset_name": "college_computer_science"
"description": "The following are multiple choice questions (with answers) about college\ "description": "The following are multiple choice questions (with answers) about college\
\ computer science.\n\n" \ computer science.\n\n"
"group": "mmlu_flan_cot_zeroshot_stem" "tag": "mmlu_flan_cot_zeroshot_stem"
"include": "_mmlu_flan_cot_zeroshot_template_yaml" "include": "_mmlu_flan_cot_zeroshot_template_yaml"
"task": "mmlu_flan_cot_zeroshot_college_computer_science" "task": "mmlu_flan_cot_zeroshot_college_computer_science"
"dataset_name": "college_mathematics" "dataset_name": "college_mathematics"
"description": "The following are multiple choice questions (with answers) about college\ "description": "The following are multiple choice questions (with answers) about college\
\ mathematics.\n\n" \ mathematics.\n\n"
"group": "mmlu_flan_cot_zeroshot_stem" "tag": "mmlu_flan_cot_zeroshot_stem"
"include": "_mmlu_flan_cot_zeroshot_template_yaml" "include": "_mmlu_flan_cot_zeroshot_template_yaml"
"task": "mmlu_flan_cot_zeroshot_college_mathematics" "task": "mmlu_flan_cot_zeroshot_college_mathematics"
"dataset_name": "college_medicine" "dataset_name": "college_medicine"
"description": "The following are multiple choice questions (with answers) about college\ "description": "The following are multiple choice questions (with answers) about college\
\ medicine.\n\n" \ medicine.\n\n"
"group": "mmlu_flan_cot_zeroshot_other" "tag": "mmlu_flan_cot_zeroshot_other"
"include": "_mmlu_flan_cot_zeroshot_template_yaml" "include": "_mmlu_flan_cot_zeroshot_template_yaml"
"task": "mmlu_flan_cot_zeroshot_college_medicine" "task": "mmlu_flan_cot_zeroshot_college_medicine"
"dataset_name": "college_physics" "dataset_name": "college_physics"
"description": "The following are multiple choice questions (with answers) about college\ "description": "The following are multiple choice questions (with answers) about college\
\ physics.\n\n" \ physics.\n\n"
"group": "mmlu_flan_cot_zeroshot_stem" "tag": "mmlu_flan_cot_zeroshot_stem"
"include": "_mmlu_flan_cot_zeroshot_template_yaml" "include": "_mmlu_flan_cot_zeroshot_template_yaml"
"task": "mmlu_flan_cot_zeroshot_college_physics" "task": "mmlu_flan_cot_zeroshot_college_physics"
"dataset_name": "computer_security" "dataset_name": "computer_security"
"description": "The following are multiple choice questions (with answers) about computer\ "description": "The following are multiple choice questions (with answers) about computer\
\ security.\n\n" \ security.\n\n"
"group": "mmlu_flan_cot_zeroshot_stem" "tag": "mmlu_flan_cot_zeroshot_stem"
"include": "_mmlu_flan_cot_zeroshot_template_yaml" "include": "_mmlu_flan_cot_zeroshot_template_yaml"
"task": "mmlu_flan_cot_zeroshot_computer_security" "task": "mmlu_flan_cot_zeroshot_computer_security"
"dataset_name": "conceptual_physics" "dataset_name": "conceptual_physics"
"description": "The following are multiple choice questions (with answers) about conceptual\ "description": "The following are multiple choice questions (with answers) about conceptual\
\ physics.\n\n" \ physics.\n\n"
"group": "mmlu_flan_cot_zeroshot_stem" "tag": "mmlu_flan_cot_zeroshot_stem"
"include": "_mmlu_flan_cot_zeroshot_template_yaml" "include": "_mmlu_flan_cot_zeroshot_template_yaml"
"task": "mmlu_flan_cot_zeroshot_conceptual_physics" "task": "mmlu_flan_cot_zeroshot_conceptual_physics"
"dataset_name": "econometrics" "dataset_name": "econometrics"
"description": "The following are multiple choice questions (with answers) about econometrics.\n\ "description": "The following are multiple choice questions (with answers) about econometrics.\n\
\n" \n"
"group": "mmlu_flan_cot_zeroshot_social_sciences" "tag": "mmlu_flan_cot_zeroshot_social_sciences"
"include": "_mmlu_flan_cot_zeroshot_template_yaml" "include": "_mmlu_flan_cot_zeroshot_template_yaml"
"task": "mmlu_flan_cot_zeroshot_econometrics" "task": "mmlu_flan_cot_zeroshot_econometrics"
"dataset_name": "electrical_engineering" "dataset_name": "electrical_engineering"
"description": "The following are multiple choice questions (with answers) about electrical\ "description": "The following are multiple choice questions (with answers) about electrical\
\ engineering.\n\n" \ engineering.\n\n"
"group": "mmlu_flan_cot_zeroshot_stem" "tag": "mmlu_flan_cot_zeroshot_stem"
"include": "_mmlu_flan_cot_zeroshot_template_yaml" "include": "_mmlu_flan_cot_zeroshot_template_yaml"
"task": "mmlu_flan_cot_zeroshot_electrical_engineering" "task": "mmlu_flan_cot_zeroshot_electrical_engineering"
"dataset_name": "elementary_mathematics" "dataset_name": "elementary_mathematics"
"description": "The following are multiple choice questions (with answers) about elementary\ "description": "The following are multiple choice questions (with answers) about elementary\
\ mathematics.\n\n" \ mathematics.\n\n"
"group": "mmlu_flan_cot_zeroshot_stem" "tag": "mmlu_flan_cot_zeroshot_stem"
"include": "_mmlu_flan_cot_zeroshot_template_yaml" "include": "_mmlu_flan_cot_zeroshot_template_yaml"
"task": "mmlu_flan_cot_zeroshot_elementary_mathematics" "task": "mmlu_flan_cot_zeroshot_elementary_mathematics"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment