Unverified Commit a2af2101 authored by Yen-Ting Lin's avatar Yen-Ting Lin Committed by GitHub
Browse files

Merge branch 'EleutherAI:main' into main

parents 82cb25c1 d5f39bf8
group: blimp
dataset_path: blimp dataset_path: blimp
output_type: multiple_choice output_type: multiple_choice
validation_split: train validation_split: train
......
aggregate_metric_list:
- aggregation: mean
metric: acc
weight_by_size: true
- aggregation: mean
metric: acc_norm
weight_by_size: true
group: ceval-valid
metadata:
version: 1.0
task:
- ceval-valid_computer_network
- ceval-valid_operating_system
- ceval-valid_computer_architecture
- ceval-valid_college_programming
- ceval-valid_college_physics
- ceval-valid_college_chemistry
- ceval-valid_advanced_mathematics
- ceval-valid_probability_and_statistics
- ceval-valid_discrete_mathematics
- ceval-valid_electrical_engineer
- ceval-valid_metrology_engineer
- ceval-valid_high_school_mathematics
- ceval-valid_high_school_physics
- ceval-valid_high_school_chemistry
- ceval-valid_high_school_biology
- ceval-valid_middle_school_mathematics
- ceval-valid_middle_school_biology
- ceval-valid_middle_school_physics
- ceval-valid_middle_school_chemistry
- ceval-valid_veterinary_medicine
- ceval-valid_college_economics
- ceval-valid_business_administration
- ceval-valid_marxism
- ceval-valid_mao_zedong_thought
- ceval-valid_education_science
- ceval-valid_teacher_qualification
- ceval-valid_high_school_politics
- ceval-valid_high_school_geography
- ceval-valid_middle_school_politics
- ceval-valid_middle_school_geography
- ceval-valid_modern_chinese_history
- ceval-valid_ideological_and_moral_cultivation
- ceval-valid_logic
- ceval-valid_law
- ceval-valid_chinese_language_and_literature
- ceval-valid_art_studies
- ceval-valid_professional_tour_guide
- ceval-valid_legal_professional
- ceval-valid_high_school_chinese
- ceval-valid_high_school_history
- ceval-valid_middle_school_history
- ceval-valid_civil_servant
- ceval-valid_sports_science
- ceval-valid_plant_protection
- ceval-valid_basic_medicine
- ceval-valid_clinical_medicine
- ceval-valid_urban_and_rural_planner
- ceval-valid_accountant
- ceval-valid_fire_engineer
- ceval-valid_environmental_impact_assessment_engineer
- ceval-valid_tax_accountant
- ceval-valid_physician
group: ceval-valid
dataset_path: ceval/ceval-exam dataset_path: ceval/ceval-exam
validation_split: val validation_split: val
fewshot_split: dev fewshot_split: dev
......
""" """
Take in a YAML, and output all other splits with this YAML Take in a YAML, and output all other splits with this YAML
""" """
import argparse import argparse
import os import os
import yaml import yaml
from tqdm import tqdm from tqdm import tqdm
from lm_eval.logger import eval_logger from lm_eval.utils import eval_logger
SUBJECTS = { SUBJECTS = {
...@@ -116,3 +117,26 @@ if __name__ == "__main__": ...@@ -116,3 +117,26 @@ if __name__ == "__main__":
allow_unicode=True, allow_unicode=True,
default_style='"', default_style='"',
) )
# write group config out
group_yaml_dict = {
"group": "ceval-valid",
"task": [f"ceval-valid_{task_name}" for task_name in SUBJECTS.keys()],
"aggregate_metric_list": [
{"metric": "acc", "aggregation": "mean", "weight_by_size": True},
{"metric": "acc_norm", "aggregation": "mean", "weight_by_size": True},
],
"metadata": {"version": 1.0},
}
file_save_path = "_" + args.save_prefix_path + ".yaml"
with open(file_save_path, "w", encoding="utf-8") as group_yaml_file:
yaml.dump(
group_yaml_dict,
group_yaml_file,
width=float("inf"),
allow_unicode=True,
default_style='"',
)
group: cmmlu
task:
- cmmlu_agronomy
- cmmlu_anatomy
- cmmlu_ancient_chinese
- cmmlu_arts
- cmmlu_astronomy
- cmmlu_business_ethics
- cmmlu_chinese_civil_service_exam
- cmmlu_chinese_driving_rule
- cmmlu_chinese_food_culture
- cmmlu_chinese_foreign_policy
- cmmlu_chinese_history
- cmmlu_chinese_literature
- cmmlu_chinese_teacher_qualification
- cmmlu_clinical_knowledge
- cmmlu_college_actuarial_science
- cmmlu_college_education
- cmmlu_college_engineering_hydrology
- cmmlu_college_law
- cmmlu_college_mathematics
- cmmlu_college_medical_statistics
- cmmlu_college_medicine
- cmmlu_computer_science
- cmmlu_computer_security
- cmmlu_conceptual_physics
- cmmlu_construction_project_management
- cmmlu_economics
- cmmlu_education
- cmmlu_electrical_engineering
- cmmlu_elementary_chinese
- cmmlu_elementary_commonsense
- cmmlu_elementary_information_and_technology
- cmmlu_elementary_mathematics
- cmmlu_ethnology
- cmmlu_food_science
- cmmlu_genetics
- cmmlu_global_facts
- cmmlu_high_school_biology
- cmmlu_high_school_chemistry
- cmmlu_high_school_geography
- cmmlu_high_school_mathematics
- cmmlu_high_school_physics
- cmmlu_high_school_politics
- cmmlu_human_sexuality
- cmmlu_international_law
- cmmlu_journalism
- cmmlu_jurisprudence
- cmmlu_legal_and_moral_basis
- cmmlu_logical
- cmmlu_machine_learning
- cmmlu_management
- cmmlu_marketing
- cmmlu_marxist_theory
- cmmlu_modern_chinese
- cmmlu_nutrition
- cmmlu_philosophy
- cmmlu_professional_accounting
- cmmlu_professional_law
- cmmlu_professional_medicine
- cmmlu_professional_psychology
- cmmlu_public_relations
- cmmlu_security_study
- cmmlu_sociology
- cmmlu_sports_science
- cmmlu_traditional_chinese_medicine
- cmmlu_virology
- cmmlu_world_history
- cmmlu_world_religions
aggregate_metric_list:
- aggregation: mean
metric: acc
weight_by_size: true
- aggregation: mean
metric: acc_norm
weight_by_size: true
metadata:
version: 0.0
group: cmmlu
dataset_path: haonan-li/cmmlu dataset_path: haonan-li/cmmlu
test_split: test test_split: test
fewshot_split: dev fewshot_split: dev
......
""" """
Take in a YAML, and output all other splits with this YAML Take in a YAML, and output all other splits with this YAML
""" """
import argparse import argparse
import os import os
...@@ -131,3 +132,33 @@ if __name__ == "__main__": ...@@ -131,3 +132,33 @@ if __name__ == "__main__":
allow_unicode=True, allow_unicode=True,
default_style='"', default_style='"',
) )
# write group config out
group_yaml_dict = {
"group": "cmmlu",
"task": [
(
f"cmmlu_{args.task_prefix}_{subject_eng}"
if args.task_prefix != ""
else f"cmmlu_{subject_eng}"
)
for subject_eng in SUBJECTS.keys()
],
"aggregate_metric_list": [
{"metric": "acc", "aggregation": "mean", "weight_by_size": True},
{"metric": "acc_norm", "aggregation": "mean", "weight_by_size": True},
],
"metadata": {"version": 0.0},
}
file_save_path = "_" + args.save_prefix_path + ".yaml"
with open(file_save_path, "w", encoding="utf-8") as group_yaml_file:
yaml.dump(
group_yaml_dict,
group_yaml_file,
width=float("inf"),
allow_unicode=True,
default_style='"',
)
"dataset_name": "agronomy"
"description": "以下是关于农学的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_agronomy"
"dataset_name": "anatomy"
"description": "以下是关于解剖学的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_anatomy"
"dataset_name": "ancient_chinese"
"description": "以下是关于古汉语的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_ancient_chinese"
"dataset_name": "arts"
"description": "以下是关于艺术学的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_arts"
"dataset_name": "astronomy"
"description": "以下是关于天文学的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_astronomy"
"dataset_name": "business_ethics" "dataset_name": "business_ethics"
"description": "فم بعملية التقييم في مجال علوم أخرى \n\n" "description": "以下是关于商业伦理的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "ammlu_business_ethics" "task": "cmmlu_business_ethics"
"dataset_name": "chinese_civil_service_exam"
"description": "以下是关于中国公务员考试的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_chinese_civil_service_exam"
"dataset_name": "chinese_driving_rule"
"description": "以下是关于中国驾驶规则的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_chinese_driving_rule"
"dataset_name": "chinese_food_culture"
"description": "以下是关于中国饮食文化的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_chinese_food_culture"
"dataset_name": "chinese_foreign_policy"
"description": "以下是关于中国外交政策的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_chinese_foreign_policy"
"dataset_name": "chinese_history"
"description": "以下是关于中国历史的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_chinese_history"
"dataset_name": "chinese_literature"
"description": "以下是关于中国文学的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_chinese_literature"
"dataset_name": "chinese_teacher_qualification"
"description": "以下是关于中国教师资格的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "cmmlu_chinese_teacher_qualification"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment