"sgl-kernel/csrc/vscode:/vscode.git/clone" did not exist on "7577f0e40f56717491ee96e95b07fd34926939d0"
Unverified Commit f2ea37e3 authored by Lintang Sutawika's avatar Lintang Sutawika Committed by GitHub
Browse files

add mmlu tasks from pile-t5 (#1710)



* add mmlu tasks from pile-t5

* Update _mmlu_flan_cot_fewshot_template_yaml

* Update _mmlu_flan_cot_zeroshot_template_yaml

* Update _mmlu_flan_generative_template_yaml

* Update _mmlu_flan_loglikelihood_template_yaml

* Update _default_template_yaml

---------
Co-authored-by: default avatarHailey Schoelkopf <65563625+haileyschoelkopf@users.noreply.github.com>
parent b043b050
dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
validation_split: validation validation_split: validation
fewshot_split: dev fewshot_split: dev
fewshot_config:
sampler: first_n
output_type: generate_until output_type: generate_until
doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA: Let's think step by step." doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA: Let's think step by step."
doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer]}}" doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer]}}"
...@@ -8,7 +10,7 @@ filter_list: ...@@ -8,7 +10,7 @@ filter_list:
- name: "get-answer" - name: "get-answer"
filter: filter:
- function: "regex" - function: "regex"
regex_pattern: "(?<=The answer is )(.*)(?=.)" regex_pattern: "(?<=answer is )(.*)(?=.)"
- function: "take_first" - function: "take_first"
generation_kwargs: generation_kwargs:
until: until:
...@@ -23,4 +25,4 @@ metric_list: ...@@ -23,4 +25,4 @@ metric_list:
ignore_case: true ignore_case: true
ignore_punctuation: true ignore_punctuation: true
metadata: metadata:
version: 0.0 version: 1.0
...@@ -8,7 +8,7 @@ filter_list: ...@@ -8,7 +8,7 @@ filter_list:
- name: "strict-match" - name: "strict-match"
filter: filter:
- function: "regex" - function: "regex"
regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=the answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))" regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
- function: "take_first" - function: "take_first"
- name: "flexible-extract" - name: "flexible-extract"
filter: filter:
...@@ -33,4 +33,4 @@ metric_list: ...@@ -33,4 +33,4 @@ metric_list:
ignore_case: true ignore_case: true
ignore_punctuation: true ignore_punctuation: true
metadata: metadata:
version: 1.0 version: 2.0
...@@ -2,8 +2,10 @@ group: mmlu_flan_n_shot_generative ...@@ -2,8 +2,10 @@ group: mmlu_flan_n_shot_generative
dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
test_split: test test_split: test
fewshot_split: dev fewshot_split: dev
fewshot_config:
sampler: first_n
output_type: generate_until output_type: generate_until
doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA: " doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA:"
doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer]}}" doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer]}}"
filter_list: filter_list:
- name: "strict-match" - name: "strict-match"
...@@ -22,9 +24,10 @@ generation_kwargs: ...@@ -22,9 +24,10 @@ generation_kwargs:
- "</s>" - "</s>"
- "Q:" - "Q:"
- "<|im_end|>" - "<|im_end|>"
- "\n"
metric_list: metric_list:
- metric: exact_match - metric: exact_match
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata: metadata:
version: 1.0 version: 2.0
dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
test_split: test test_split: test
fewshot_split: dev fewshot_split: dev
fewshot_config:
sampler: first_n
output_type: multiple_choice output_type: multiple_choice
doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA: " doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA:"
doc_to_choice: ["(A)", "(B)", "(C)", "(D)"] doc_to_choice: ["(A)", "(B)", "(C)", "(D)"]
doc_to_target: answer doc_to_target: answer
metric_list: metric_list:
- metric: acc - metric: acc
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata: metadata:
version: 0.0 version: 1.0
dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
test_split: test
fewshot_split: dev
fewshot_config:
sampler: first_n
output_type: generate_until
doc_to_text: "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:"
doc_to_target: "{{['A', 'B', 'C', 'D'][answer]}}"
generation_kwargs:
until:
- "</s>"
- "\n"
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
group: mmlu_generative
task:
- mmlu_stem_generative
- mmlu_other_generative
- mmlu_social_sciences_generative
- mmlu_humanities_generative
"dataset_name": "abstract_algebra"
"description": "The following are multiple choice questions (with answers) about abstract\
\ algebra.\n\n"
"group": "mmlu_stem_generative"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_abstract_algebra_generative"
"task_alias": "abstract_algebra"
"dataset_name": "anatomy"
"description": "The following are multiple choice questions (with answers) about anatomy.\n\
\n"
"group": "mmlu_stem_generative"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_anatomy_generative"
"task_alias": "anatomy"
"dataset_name": "astronomy"
"description": "The following are multiple choice questions (with answers) about astronomy.\n\
\n"
"group": "mmlu_stem_generative"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_astronomy_generative"
"task_alias": "astronomy"
"dataset_name": "business_ethics"
"description": "The following are multiple choice questions (with answers) about business\
\ ethics.\n\n"
"group": "mmlu_other_generative"
"group_alias": "other"
"include": "_default_template_yaml"
"task": "mmlu_business_ethics_generative"
"task_alias": "business_ethics"
"dataset_name": "clinical_knowledge"
"description": "The following are multiple choice questions (with answers) about clinical\
\ knowledge.\n\n"
"group": "mmlu_other_generative"
"group_alias": "other"
"include": "_default_template_yaml"
"task": "mmlu_clinical_knowledge_generative"
"task_alias": "clinical_knowledge"
"dataset_name": "college_biology"
"description": "The following are multiple choice questions (with answers) about college\
\ biology.\n\n"
"group": "mmlu_stem_generative"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_college_biology_generative"
"task_alias": "college_biology"
"dataset_name": "college_chemistry"
"description": "The following are multiple choice questions (with answers) about college\
\ chemistry.\n\n"
"group": "mmlu_stem_generative"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_college_chemistry_generative"
"task_alias": "college_chemistry"
"dataset_name": "college_computer_science"
"description": "The following are multiple choice questions (with answers) about college\
\ computer science.\n\n"
"group": "mmlu_stem_generative"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_college_computer_science_generative"
"task_alias": "college_computer_science"
"dataset_name": "college_mathematics"
"description": "The following are multiple choice questions (with answers) about college\
\ mathematics.\n\n"
"group": "mmlu_stem_generative"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_college_mathematics_generative"
"task_alias": "college_mathematics"
"dataset_name": "college_medicine"
"description": "The following are multiple choice questions (with answers) about college\
\ medicine.\n\n"
"group": "mmlu_other_generative"
"group_alias": "other"
"include": "_default_template_yaml"
"task": "mmlu_college_medicine_generative"
"task_alias": "college_medicine"
"dataset_name": "college_physics"
"description": "The following are multiple choice questions (with answers) about college\
\ physics.\n\n"
"group": "mmlu_stem_generative"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_college_physics_generative"
"task_alias": "college_physics"
"dataset_name": "computer_security"
"description": "The following are multiple choice questions (with answers) about computer\
\ security.\n\n"
"group": "mmlu_stem_generative"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_computer_security_generative"
"task_alias": "computer_security"
"dataset_name": "conceptual_physics"
"description": "The following are multiple choice questions (with answers) about conceptual\
\ physics.\n\n"
"group": "mmlu_stem_generative"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_conceptual_physics_generative"
"task_alias": "conceptual_physics"
"dataset_name": "econometrics"
"description": "The following are multiple choice questions (with answers) about econometrics.\n\
\n"
"group": "mmlu_social_sciences_generative"
"group_alias": "social_sciences"
"include": "_default_template_yaml"
"task": "mmlu_econometrics_generative"
"task_alias": "econometrics"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment