Commit 60c9c170 authored by haileyschoelkopf's avatar haileyschoelkopf
Browse files

Merge branch 'main' into inverse-scaling-tasks

parents 4b2d565b b4cd85d4
"dataset_name": "professional_law"
"description": "The following are questions (with answers) about professional\
\ law.\n\n"
"group": "mmlu_continuation_humanities"
"include": "_continuation_template_yaml"
"task": "mmlu_continuation_professional_law"
"dataset_name": "professional_medicine"
"description": "The following are questions (with answers) about professional\
\ medicine.\n\n"
"group": "mmlu_continuation_other"
"include": "_continuation_template_yaml"
"task": "mmlu_continuation_professional_medicine"
"dataset_name": "professional_psychology"
"description": "The following are questions (with answers) about professional\
\ psychology.\n\n"
"group": "mmlu_continuation_social_sciences"
"include": "_continuation_template_yaml"
"task": "mmlu_continuation_professional_psychology"
"dataset_name": "public_relations"
"description": "The following are questions (with answers) about public\
\ relations.\n\n"
"group": "mmlu_continuation_social_sciences"
"include": "_continuation_template_yaml"
"task": "mmlu_continuation_public_relations"
"dataset_name": "security_studies"
"description": "The following are questions (with answers) about security\
\ studies.\n\n"
"group": "mmlu_continuation_social_sciences"
"include": "_continuation_template_yaml"
"task": "mmlu_continuation_security_studies"
"dataset_name": "sociology"
"description": "The following are questions (with answers) about sociology.\n\
\n"
"group": "mmlu_continuation_social_sciences"
"include": "_continuation_template_yaml"
"task": "mmlu_continuation_sociology"
"dataset_name": "us_foreign_policy"
"description": "The following are questions (with answers) about us\
\ foreign policy.\n\n"
"group": "mmlu_continuation_social_sciences"
"include": "_continuation_template_yaml"
"task": "mmlu_continuation_us_foreign_policy"
"dataset_name": "virology"
"description": "The following are questions (with answers) about virology.\n\
\n"
"group": "mmlu_continuation_other"
"include": "_continuation_template_yaml"
"task": "mmlu_continuation_virology"
"dataset_name": "world_religions"
"description": "The following are questions (with answers) about world\
\ religions.\n\n"
"group": "mmlu_continuation_humanities"
"include": "_continuation_template_yaml"
"task": "mmlu_continuation_world_religions"
dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
validation_split: validation validation_split: validation
fewshot_split: dev fewshot_split: dev
fewshot_config:
sampler: first_n
output_type: generate_until output_type: generate_until
doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA: Let's think step by step." doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA: Let's think step by step."
doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer]}}" doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer]}}"
...@@ -8,7 +10,7 @@ filter_list: ...@@ -8,7 +10,7 @@ filter_list:
- name: "get-answer" - name: "get-answer"
filter: filter:
- function: "regex" - function: "regex"
regex_pattern: "(?<=The answer is )(.*)(?=.)" regex_pattern: "(?<=answer is )(.*)(?=.)"
- function: "take_first" - function: "take_first"
generation_kwargs: generation_kwargs:
until: until:
...@@ -23,4 +25,4 @@ metric_list: ...@@ -23,4 +25,4 @@ metric_list:
ignore_case: true ignore_case: true
ignore_punctuation: true ignore_punctuation: true
metadata: metadata:
version: 0.0 version: 1.0
...@@ -8,7 +8,7 @@ filter_list: ...@@ -8,7 +8,7 @@ filter_list:
- name: "strict-match" - name: "strict-match"
filter: filter:
- function: "regex" - function: "regex"
regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=the answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))" regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
- function: "take_first" - function: "take_first"
- name: "flexible-extract" - name: "flexible-extract"
filter: filter:
...@@ -33,4 +33,4 @@ metric_list: ...@@ -33,4 +33,4 @@ metric_list:
ignore_case: true ignore_case: true
ignore_punctuation: true ignore_punctuation: true
metadata: metadata:
version: 1.0 version: 2.0
...@@ -2,8 +2,10 @@ group: mmlu_flan_n_shot_generative ...@@ -2,8 +2,10 @@ group: mmlu_flan_n_shot_generative
dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
test_split: test test_split: test
fewshot_split: dev fewshot_split: dev
fewshot_config:
sampler: first_n
output_type: generate_until output_type: generate_until
doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA: " doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA:"
doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer]}}" doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer]}}"
filter_list: filter_list:
- name: "strict-match" - name: "strict-match"
...@@ -22,9 +24,10 @@ generation_kwargs: ...@@ -22,9 +24,10 @@ generation_kwargs:
- "</s>" - "</s>"
- "Q:" - "Q:"
- "<|im_end|>" - "<|im_end|>"
- "\n"
metric_list: metric_list:
- metric: exact_match - metric: exact_match
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata: metadata:
version: 1.0 version: 2.0
dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
test_split: test test_split: test
fewshot_split: dev fewshot_split: dev
fewshot_config:
sampler: first_n
output_type: multiple_choice output_type: multiple_choice
doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA: " doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA:"
doc_to_choice: ["(A)", "(B)", "(C)", "(D)"] doc_to_choice: ["(A)", "(B)", "(C)", "(D)"]
doc_to_target: answer doc_to_target: answer
metric_list: metric_list:
- metric: acc - metric: acc
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata: metadata:
version: 0.0 version: 1.0
dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
test_split: test
fewshot_split: dev
fewshot_config:
sampler: first_n
output_type: generate_until
doc_to_text: "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:"
doc_to_target: "{{['A', 'B', 'C', 'D'][answer]}}"
generation_kwargs:
until:
- "</s>"
- "\n"
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
group: mmlu_generative
task:
- mmlu_stem_generative
- mmlu_other_generative
- mmlu_social_sciences_generative
- mmlu_humanities_generative
"dataset_name": "abstract_algebra"
"description": "The following are multiple choice questions (with answers) about abstract\
\ algebra.\n\n"
"group": "mmlu_stem_generative"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_abstract_algebra_generative"
"task_alias": "abstract_algebra"
"dataset_name": "anatomy"
"description": "The following are multiple choice questions (with answers) about anatomy.\n\
\n"
"group": "mmlu_stem_generative"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_anatomy_generative"
"task_alias": "anatomy"
"dataset_name": "astronomy"
"description": "The following are multiple choice questions (with answers) about astronomy.\n\
\n"
"group": "mmlu_stem_generative"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_astronomy_generative"
"task_alias": "astronomy"
"dataset_name": "business_ethics"
"description": "The following are multiple choice questions (with answers) about business\
\ ethics.\n\n"
"group": "mmlu_other_generative"
"group_alias": "other"
"include": "_default_template_yaml"
"task": "mmlu_business_ethics_generative"
"task_alias": "business_ethics"
"dataset_name": "clinical_knowledge"
"description": "The following are multiple choice questions (with answers) about clinical\
\ knowledge.\n\n"
"group": "mmlu_other_generative"
"group_alias": "other"
"include": "_default_template_yaml"
"task": "mmlu_clinical_knowledge_generative"
"task_alias": "clinical_knowledge"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment