Commit 60c9c170 authored by haileyschoelkopf's avatar haileyschoelkopf
Browse files

Merge branch 'main' into inverse-scaling-tasks

parents 4b2d565b b4cd85d4
"dataset_name": "professional_law"
"description": "The following are questions (with answers) about professional\
\ law.\n\n"
"group": "mmlu_continuation_humanities"
"include": "_continuation_template_yaml"
"task": "mmlu_continuation_professional_law"
"dataset_name": "professional_medicine"
"description": "The following are questions (with answers) about professional\
\ medicine.\n\n"
"group": "mmlu_continuation_other"
"include": "_continuation_template_yaml"
"task": "mmlu_continuation_professional_medicine"
"dataset_name": "professional_psychology"
"description": "The following are questions (with answers) about professional\
\ psychology.\n\n"
"group": "mmlu_continuation_social_sciences"
"include": "_continuation_template_yaml"
"task": "mmlu_continuation_professional_psychology"
"dataset_name": "public_relations"
"description": "The following are questions (with answers) about public\
\ relations.\n\n"
"group": "mmlu_continuation_social_sciences"
"include": "_continuation_template_yaml"
"task": "mmlu_continuation_public_relations"
"dataset_name": "security_studies"
"description": "The following are questions (with answers) about security\
\ studies.\n\n"
"group": "mmlu_continuation_social_sciences"
"include": "_continuation_template_yaml"
"task": "mmlu_continuation_security_studies"
"dataset_name": "sociology"
"description": "The following are questions (with answers) about sociology.\n\
\n"
"group": "mmlu_continuation_social_sciences"
"include": "_continuation_template_yaml"
"task": "mmlu_continuation_sociology"
"dataset_name": "us_foreign_policy"
"description": "The following are questions (with answers) about us\
\ foreign policy.\n\n"
"group": "mmlu_continuation_social_sciences"
"include": "_continuation_template_yaml"
"task": "mmlu_continuation_us_foreign_policy"
"dataset_name": "virology"
"description": "The following are questions (with answers) about virology.\n\
\n"
"group": "mmlu_continuation_other"
"include": "_continuation_template_yaml"
"task": "mmlu_continuation_virology"
"dataset_name": "world_religions"
"description": "The following are questions (with answers) about world\
\ religions.\n\n"
"group": "mmlu_continuation_humanities"
"include": "_continuation_template_yaml"
"task": "mmlu_continuation_world_religions"
dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
validation_split: validation
fewshot_split: dev
fewshot_config:
sampler: first_n
output_type: generate_until
doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA: Let's think step by step."
doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer]}}"
......@@ -8,7 +10,7 @@ filter_list:
- name: "get-answer"
filter:
- function: "regex"
regex_pattern: "(?<=The answer is )(.*)(?=.)"
regex_pattern: "(?<=answer is )(.*)(?=.)"
- function: "take_first"
generation_kwargs:
until:
......@@ -23,4 +25,4 @@ metric_list:
ignore_case: true
ignore_punctuation: true
metadata:
version: 0.0
version: 1.0
......@@ -8,7 +8,7 @@ filter_list:
- name: "strict-match"
filter:
- function: "regex"
regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=the answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
- function: "take_first"
- name: "flexible-extract"
filter:
......@@ -33,4 +33,4 @@ metric_list:
ignore_case: true
ignore_punctuation: true
metadata:
version: 1.0
version: 2.0
......@@ -2,8 +2,10 @@ group: mmlu_flan_n_shot_generative
dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
test_split: test
fewshot_split: dev
fewshot_config:
sampler: first_n
output_type: generate_until
doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA: "
doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA:"
doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer]}}"
filter_list:
- name: "strict-match"
......@@ -22,9 +24,10 @@ generation_kwargs:
- "</s>"
- "Q:"
- "<|im_end|>"
- "\n"
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
version: 2.0
dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
test_split: test
fewshot_split: dev
fewshot_config:
sampler: first_n
output_type: multiple_choice
doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA: "
doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA:"
doc_to_choice: ["(A)", "(B)", "(C)", "(D)"]
doc_to_target: answer
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 0.0
version: 1.0
dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
test_split: test
fewshot_split: dev
fewshot_config:
sampler: first_n
output_type: generate_until
doc_to_text: "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:"
doc_to_target: "{{['A', 'B', 'C', 'D'][answer]}}"
generation_kwargs:
until:
- "</s>"
- "\n"
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
group: mmlu_generative
task:
- mmlu_stem_generative
- mmlu_other_generative
- mmlu_social_sciences_generative
- mmlu_humanities_generative
"dataset_name": "abstract_algebra"
"description": "The following are multiple choice questions (with answers) about abstract\
\ algebra.\n\n"
"group": "mmlu_stem_generative"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_abstract_algebra_generative"
"task_alias": "abstract_algebra"
"dataset_name": "anatomy"
"description": "The following are multiple choice questions (with answers) about anatomy.\n\
\n"
"group": "mmlu_stem_generative"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_anatomy_generative"
"task_alias": "anatomy"
"dataset_name": "astronomy"
"description": "The following are multiple choice questions (with answers) about astronomy.\n\
\n"
"group": "mmlu_stem_generative"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_astronomy_generative"
"task_alias": "astronomy"
"dataset_name": "business_ethics"
"description": "The following are multiple choice questions (with answers) about business\
\ ethics.\n\n"
"group": "mmlu_other_generative"
"group_alias": "other"
"include": "_default_template_yaml"
"task": "mmlu_business_ethics_generative"
"task_alias": "business_ethics"
"dataset_name": "clinical_knowledge"
"description": "The following are multiple choice questions (with answers) about clinical\
\ knowledge.\n\n"
"group": "mmlu_other_generative"
"group_alias": "other"
"include": "_default_template_yaml"
"task": "mmlu_clinical_knowledge_generative"
"task_alias": "clinical_knowledge"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment