Commit a5e93901 authored by lintangsutawika's avatar lintangsutawika
Browse files

merged latest

parents 694af7d6 0aa37743
"dataset_name": "marxist_theory"
"description": "以下是关于马克思主义理论的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_marxist_theory"
"dataset_name": "modern_chinese"
"description": "以下是关于现代汉语的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_modern_chinese"
"dataset_name": "nutrition"
"description": "以下是关于营养学的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_nutrition"
"dataset_name": "philosophy"
"description": "以下是关于哲学的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_philosophy"
"dataset_name": "professional_accounting"
"description": "以下是关于专业会计的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_professional_accounting"
"dataset_name": "professional_law"
"description": "以下是关于专业法学的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_professional_law"
"dataset_name": "professional_medicine"
"description": "以下是关于专业医学的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_professional_medicine"
"dataset_name": "professional_psychology"
"description": "以下是关于专业心理学的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_professional_psychology"
"dataset_name": "public_relations"
"description": "以下是关于公共关系的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_public_relations"
"dataset_name": "security_study"
"description": "以下是关于安全研究的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_security_study"
"dataset_name": "sociology"
"description": "以下是关于社会学的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_sociology"
"dataset_name": "sports_science"
"description": "以下是关于体育学的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_sports_science"
"dataset_name": "traditional_chinese_medicine"
"description": "以下是关于中医中药的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_traditional_chinese_medicine"
"dataset_name": "virology"
"description": "以下是关于病毒学的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_virology"
"dataset_name": "world_history"
"description": "以下是关于世界历史的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_world_history"
"dataset_name": "world_religions"
"description": "以下是关于世界宗教的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_world_religions"
...@@ -93,10 +93,9 @@ All tasks evaluate the percentage of more-stereotypical sentences that are rated ...@@ -93,10 +93,9 @@ All tasks evaluate the percentage of more-stereotypical sentences that are rated
* [x] Is the task an existing benchmark in the literature? * [x] Is the task an existing benchmark in the literature?
* [x] Have you referenced the original paper that introduced the task? * [x] Have you referenced the original paper that introduced the task?
* [x] If yes, does the original paper provide a reference implementation? * [x] If yes, does the original paper provide a reference implementation?
* [x] The original paper does not for causal language models, so * [x] The original paper does not for causal language models, so this is a novel formulation of the task for autoregressive LMs.
If other tasks on this dataset are already supported: If other tasks on this dataset are already supported:
* [x] Is the "Main" variant of this task clearly denoted? * [x] Is the "Main" variant of this task clearly denoted?
* [x] Have you provided a short sentence in a README on what each new variant adds / evaluates? * [x] Have you provided a short sentence in a README on what each new variant adds / evaluates?
* [x] Have you noted which, if any, published evaluation setups are matched by this variant? * [x] Have you noted which, if any, published evaluation setups are matched by this variant?
* [x] This matches the evaluations performed in the [Pythia paper](https://arxiv.org/abs/2304.01373)
group: csatqa
dataset_path: EleutherAI/csatqa
test_split: test
output_type: multiple_choice
process_docs: !function utils.process_docs
doc_to_text: "{{question}}"
doc_to_choice: "{{choices}}"
doc_to_target: "{{gold}}"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
"""
Take in a YAML, and output all other splits with this YAML
"""
import os
import yaml
import argparse
from tqdm import tqdm
from lm_eval.logger import eval_logger
SUBSETS = ["WR", "GR", "RCS", "RCSS", "RCH", "LI"]
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--base_yaml_path", required=True)
parser.add_argument("--save_prefix_path", default="csatqa")
parser.add_argument("--task_prefix", default="")
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
base_yaml_name = os.path.split(args.base_yaml_path)[-1]
with open(args.base_yaml_path) as f:
base_yaml = yaml.full_load(f)
for name in tqdm(SUBSETS):
yaml_dict = {
"include": base_yaml_name,
"task": f"csatqa_{args.task_prefix}_{name}"
if args.task_prefix != ""
else f"csatqa_{name.lower()}",
"dataset_name": name,
}
file_save_path = args.save_prefix_path + f"_{name.lower()}.yaml"
eval_logger.info(f"Saving yaml for subset {name} to {file_save_path}")
with open(file_save_path, "w") as yaml_file:
yaml.dump(
yaml_dict,
yaml_file,
width=float("inf"),
allow_unicode=True,
default_style='"',
)
"dataset_name": "GR"
"include": "_default_csatqa_yaml"
"task": "csatqa_gr"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment