Commit 2106fbeb authored by Baber's avatar Baber
Browse files

Merge branch 'main' into mathvista

# Conflicts:
#	lm_eval/models/openai_completions.py
parents 4354fe46 703fbffd
group: mmlu_llama_stem
group_alias: stem
task:
- mmlu_llama_stem_tasks
aggregate_metric_list:
- metric: exact_match
aggregation: mean
weight_by_size: True
filter_list: [strict_match]
metadata:
version: 0
group: mmlu_llama
task:
- mmlu_llama_stem
- mmlu_llama_other
- mmlu_llama_social_sciences
- mmlu_llama_humanities
aggregate_metric_list:
- metric: exact_match
aggregation: mean
weight_by_size: True
filter_list: [strict_match]
metadata:
version: 1
"dataset_name": "abstract_algebra"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_stem_tasks"
"task": "mmlu_llama_abstract_algebra"
"task_alias": "abstract algebra"
"dataset_name": "anatomy"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_stem_tasks"
"task": "mmlu_llama_anatomy"
"task_alias": "anatomy"
"dataset_name": "astronomy"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_stem_tasks"
"task": "mmlu_llama_astronomy"
"task_alias": "astronomy"
"dataset_name": "business_ethics"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_other_tasks"
"task": "mmlu_llama_business_ethics"
"task_alias": "business ethics"
"dataset_name": "clinical_knowledge"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_other_tasks"
"task": "mmlu_llama_clinical_knowledge"
"task_alias": "clinical knowledge"
"dataset_name": "college_biology"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_stem_tasks"
"task": "mmlu_llama_college_biology"
"task_alias": "college biology"
"dataset_name": "college_chemistry"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_stem_tasks"
"task": "mmlu_llama_college_chemistry"
"task_alias": "college chemistry"
"dataset_name": "college_computer_science"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_stem_tasks"
"task": "mmlu_llama_college_computer_science"
"task_alias": "college computer science"
"dataset_name": "college_mathematics"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_stem_tasks"
"task": "mmlu_llama_college_mathematics"
"task_alias": "college mathematics"
"dataset_name": "college_medicine"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_other_tasks"
"task": "mmlu_llama_college_medicine"
"task_alias": "college medicine"
"dataset_name": "college_physics"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_stem_tasks"
"task": "mmlu_llama_college_physics"
"task_alias": "college physics"
"dataset_name": "computer_security"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_stem_tasks"
"task": "mmlu_llama_computer_security"
"task_alias": "computer security"
"dataset_name": "conceptual_physics"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_stem_tasks"
"task": "mmlu_llama_conceptual_physics"
"task_alias": "conceptual physics"
"dataset_name": "econometrics"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_social_sciences_tasks"
"task": "mmlu_llama_econometrics"
"task_alias": "econometrics"
"dataset_name": "electrical_engineering"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_stem_tasks"
"task": "mmlu_llama_electrical_engineering"
"task_alias": "electrical engineering"
"dataset_name": "elementary_mathematics"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_stem_tasks"
"task": "mmlu_llama_elementary_mathematics"
"task_alias": "elementary mathematics"
"dataset_name": "formal_logic"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_humanities_tasks"
"task": "mmlu_llama_formal_logic"
"task_alias": "formal logic"
"dataset_name": "global_facts"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_other_tasks"
"task": "mmlu_llama_global_facts"
"task_alias": "global facts"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment