Commit bc4b922c authored by Baber's avatar Baber
Browse files

Merge branch 'main' into llama

# Conflicts:
#	lm_eval/tasks/llama3/README.md
parents 748eb47e b2c090cc
# Generated by _generate_configs.py
include: _en_template_yaml
process_docs: !function utils.process_professional_psychology
tag: global_mmlu_full_en_social_sciences_tasks
task: global_mmlu_full_en_professional_psychology
# Generated by _generate_configs.py
include: _en_template_yaml
process_docs: !function utils.process_public_relations
tag: global_mmlu_full_en_social_sciences_tasks
task: global_mmlu_full_en_public_relations
# Generated by _generate_configs.py
include: _en_template_yaml
process_docs: !function utils.process_security_studies
tag: global_mmlu_full_en_social_sciences_tasks
task: global_mmlu_full_en_security_studies
# Generated by _generate_configs.py
include: _en_template_yaml
process_docs: !function utils.process_sociology
tag: global_mmlu_full_en_social_sciences_tasks
task: global_mmlu_full_en_sociology
# Generated by _generate_configs.py
include: _en_template_yaml
process_docs: !function utils.process_us_foreign_policy
tag: global_mmlu_full_en_social_sciences_tasks
task: global_mmlu_full_en_us_foreign_policy
# Generated by _generate_configs.py
include: _en_template_yaml
process_docs: !function utils.process_virology
tag: global_mmlu_full_en_other_tasks
task: global_mmlu_full_en_virology
# Generated by _generate_configs.py
include: _en_template_yaml
process_docs: !function utils.process_world_religions
tag: global_mmlu_full_en_humanities_tasks
task: global_mmlu_full_en_world_religions
from functools import partial
SUBJECTS = [
"abstract_algebra",
"anatomy",
"astronomy",
"business_ethics",
"clinical_knowledge",
"college_biology",
"college_chemistry",
"college_computer_science",
"college_mathematics",
"college_medicine",
"college_physics",
"computer_security",
"conceptual_physics",
"econometrics",
"electrical_engineering",
"elementary_mathematics",
"formal_logic",
"global_facts",
"high_school_biology",
"high_school_chemistry",
"high_school_computer_science",
"high_school_european_history",
"high_school_geography",
"high_school_government_and_politics",
"high_school_macroeconomics",
"high_school_mathematics",
"high_school_microeconomics",
"high_school_physics",
"high_school_psychology",
"high_school_statistics",
"high_school_us_history",
"high_school_world_history",
"human_aging",
"human_sexuality",
"international_law",
"jurisprudence",
"logical_fallacies",
"machine_learning",
"management",
"marketing",
"medical_genetics",
"miscellaneous",
"moral_disputes",
"moral_scenarios",
"nutrition",
"philosophy",
"prehistory",
"professional_accounting",
"professional_law",
"professional_medicine",
"professional_psychology",
"public_relations",
"security_studies",
"sociology",
"us_foreign_policy",
"virology",
"world_religions",
]
def process_docs(dataset, subject):
return dataset.filter(lambda x: x["subject"] == subject)
process_functions = {
f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS
}
globals().update(process_functions)
dataset_path: CohereForAI/Global-MMLU
dataset_name: es
test_split: test
fewshot_split: dev
fewshot_config:
sampler: first_n
output_type: multiple_choice
doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:"
doc_to_choice: ["A", "B", "C", "D"]
doc_to_target: answer
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
version: 0.0
group: global_mmlu_full_es
task:
- global_mmlu_full_es_stem
- global_mmlu_full_es_other
- global_mmlu_full_es_social_sciences
- global_mmlu_full_es_humanities
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 1.0
group: global_mmlu_full_es_humanities
task:
- global_mmlu_full_es_humanities_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: true
metadata:
version: 0.0
group: global_mmlu_full_es_other
task:
- global_mmlu_full_es_other_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: true
metadata:
version: 0.0
group: global_mmlu_full_es_social_sciences
task:
- global_mmlu_full_es_social_sciences_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: true
metadata:
version: 0.0
group: global_mmlu_full_es_stem
task:
- global_mmlu_full_es_stem_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: true
metadata:
version: 0.0
# Generated by _generate_configs.py
include: _es_template_yaml
process_docs: !function utils.process_abstract_algebra
tag: global_mmlu_full_es_stem_tasks
task: global_mmlu_full_es_abstract_algebra
# Generated by _generate_configs.py
include: _es_template_yaml
process_docs: !function utils.process_anatomy
tag: global_mmlu_full_es_stem_tasks
task: global_mmlu_full_es_anatomy
# Generated by _generate_configs.py
include: _es_template_yaml
process_docs: !function utils.process_astronomy
tag: global_mmlu_full_es_stem_tasks
task: global_mmlu_full_es_astronomy
# Generated by _generate_configs.py
include: _es_template_yaml
process_docs: !function utils.process_business_ethics
tag: global_mmlu_full_es_other_tasks
task: global_mmlu_full_es_business_ethics
# Generated by _generate_configs.py
include: _es_template_yaml
process_docs: !function utils.process_clinical_knowledge
tag: global_mmlu_full_es_other_tasks
task: global_mmlu_full_es_clinical_knowledge
# Generated by _generate_configs.py
include: _es_template_yaml
process_docs: !function utils.process_college_biology
tag: global_mmlu_full_es_stem_tasks
task: global_mmlu_full_es_college_biology
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment