Commit 2b56339e authored by Baber's avatar Baber
Browse files

Merge branch 'main' into longcxt

parents 0b533339 703fbffd
"dataset_name": "security_studies"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_social_sciences_tasks"
"task": "mmlu_llama_security_studies"
"task_alias": "security studies"
"dataset_name": "sociology"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_social_sciences_tasks"
"task": "mmlu_llama_sociology"
"task_alias": "sociology"
"dataset_name": "us_foreign_policy"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_social_sciences_tasks"
"task": "mmlu_llama_us_foreign_policy"
"task_alias": "us foreign policy"
"dataset_name": "virology"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_other_tasks"
"task": "mmlu_llama_virology"
"task_alias": "virology"
"dataset_name": "world_religions"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_humanities_tasks"
"task": "mmlu_llama_world_religions"
"task_alias": "world religions"
dataset_path: TIGER-Lab/MMLU-Pro
output_type: generate_until
test_split: test
fewshot_split: validation
fewshot_config:
sampler: first_n
doc_to_target: !function utils.fewshot_to_text
doc_to_text: "{% set letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' %}Given the following question and candidate answers, choose the best answer.\nQuestion: {{question.strip()}}\n{% for choice in options %}{{letters[loop.index0]}}. {{choice}}\n{% endfor %}\nYour response should end with \"The best answer is [the_answer_letter].\" where the [the_answer_letter] is a letter from the provided choices.\n\nLet's think step by step."
doc_to_target: answer
num_fewshot: 5
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
regexes_to_ignore:
- "\\$"
- "\\.$"
generation_kwargs:
until:
- "."
max_gen_toks: 1024
filter_list:
- name: strict_match
filter:
- function: "regex"
regex_pattern: "[tT]he best answer is ([A-Z])"
group_select: -1
- function: take_first
metadata:
version: 1.0
dataset_kwargs:
trust_remote_code: true
group: mmlu_pro_llama
task:
- mmlu_pro_llama_biology
- mmlu_pro_llama_business
- mmlu_pro_llama_chemistry
- mmlu_pro_llama_computer_science
- mmlu_pro_llama_economics
- mmlu_pro_llama_engineering
- mmlu_pro_llama_health
- mmlu_pro_llama_history
- mmlu_pro_llama_law
- mmlu_pro_llama_math
- mmlu_pro_llama_other
- mmlu_pro_llama_philosophy
- mmlu_pro_llama_physics
- mmlu_pro_llama_psychology
aggregate_metric_list:
- aggregation: mean
metric: exact_match
weight_by_size: true
filter_list: custom-extract
metadata:
version: 1.0
include: "_default_template_yaml"
task: "mmlu_pro_llama_biology"
task_alias: "biology"
process_docs: !function utils.process_biology
include: "_default_template_yaml"
task: "mmlu_pro_llama_business"
task_alias: "business"
process_docs: !function utils.process_business
include: "_default_template_yaml"
task: "mmlu_pro_llama_chemistry"
task_alias: "chemistry"
process_docs: !function utils.process_chemistry
include: "_default_template_yaml"
task: "mmlu_pro_llama_computer_science"
task_alias: "computer_science"
process_docs: !function utils.process_computer_science
include: "_default_template_yaml"
task: "mmlu_pro_llama_economics"
task_alias: "economics"
process_docs: !function utils.process_economics
include: "_default_template_yaml"
task: "mmlu_pro_llama_engineering"
task_alias: "engineering"
process_docs: !function utils.process_engineering
include: "_default_template_yaml"
task: "mmlu_pro_llama_health"
task_alias: "health"
process_docs: !function utils.process_health
include: "_default_template_yaml"
task: "mmlu_pro_llama_history"
task_alias: "history"
process_docs: !function utils.process_history
include: "_default_template_yaml"
task: "mmlu_pro_llama_law"
task_alias: "law"
process_docs: !function utils.process_law
include: "_default_template_yaml"
task: "mmlu_pro_llama_math"
task_alias: "math"
process_docs: !function utils.process_math
include: "_default_template_yaml"
task: "mmlu_pro_llama_other"
task_alias: "other"
process_docs: !function utils.process_other
include: "_default_template_yaml"
task: "mmlu_pro_llama_philosophy"
task_alias: "philosophy"
process_docs: !function utils.process_philosophy
include: "_default_template_yaml"
task: "mmlu_pro_llama_physics"
task_alias: "physics"
process_docs: !function utils.process_physics
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment