Commit c171fa30 authored by haileyschoelkopf's avatar haileyschoelkopf
Browse files

add more groupings / tags distinctions

parent 46e8c8e6
"dataset_name": "professional_accounting"
"description": "以下是关于专业会计的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_professional_accounting"
"dataset_name": "professional_law"
"description": "以下是关于专业法学的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_professional_law"
"dataset_name": "professional_medicine"
"description": "以下是关于专业医学的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_professional_medicine"
"dataset_name": "professional_psychology"
"description": "以下是关于专业心理学的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_professional_psychology"
"dataset_name": "public_relations"
"description": "以下是关于公共关系的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_public_relations"
"dataset_name": "security_study"
"description": "以下是关于安全研究的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_security_study"
"dataset_name": "sociology"
"description": "以下是关于社会学的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_sociology"
"dataset_name": "sports_science"
"description": "以下是关于体育学的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_sports_science"
"dataset_name": "traditional_chinese_medicine"
"description": "以下是关于中医中药的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_traditional_chinese_medicine"
"dataset_name": "virology"
"description": "以下是关于病毒学的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_virology"
"dataset_name": "world_history"
"description": "以下是关于世界历史的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_world_history"
"dataset_name": "world_religions"
"description": "以下是关于世界宗教的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_world_religions"
task: gsm8k
dataset_path: mcaleste/sat_multiple_choice_math_may_23
dataset_name: main
output_type: generate_until
fewshot_split: none
test_split: train
doc_to_text: "Question: {{question}}\nAnswer:"
doc_to_target: "({{answer}})"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
process_results: !function _utils.process_results
generation_kwargs:
until:
- "I hope it is correct."
do_sample: false
temperature: 0.0
repeats: 1
num_fewshot: 5
# filter_list:
# - name: "get-answer"
# filter:
# - function: "regex"
# regex_pattern: "### (\\-?[0-9\\.\\,]+)"
# - function: "take_first"
tag: winogenerated
dataset_path: EleutherAI/winogenerated
output_type: multiple_choice
validation_split: validation
target_delimiter: ""
doc_to_text: "{{question}}"
doc_to_target: 0
doc_to_choice: "{{[answer_matching_behavior, answer_not_matching_behavior]}}"
metric_list:
- metric: acc
metadata:
version: 0.0
......@@ -24,7 +24,7 @@ Homepage: https://wmdp.ai
}
```
### Groups and Tasks
### Groups, Tags, and Tasks
#### Groups
......
dataset_path: cais/wmdp
group: wmdp
test_split: test
training_split: null
validation_split: null
......
group: wmdp
task:
- wmdp_bio
- wmdp_chem
- wmdp_cyber
aggregate_metric_list:
- metric: acc
aggregation: mean
weight_by_size: True
metadata:
version: 0
......@@ -27,11 +27,7 @@ Homepage: https://huggingface.co/datasets/wmt16
}
```
### Groups and Tasks
#### Groups
* `wmt-t5-prompt`: Group for all wmt tasks with prompt templates used for T5 (`Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer`)
### Groups, Tags, and Tasks
#### Tasks
......
group:
- wmt-t5-prompt
task: wmt-ro-en-t5-prompt
dataset_path: wmt16
dataset_name: ro-en
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment