"include/ck/utility/math.hpp" did not exist on "bbcb67d0aac81b51336981713662a726875ebd58"
Unverified Commit 703fbffd authored by Baber Abbasi's avatar Baber Abbasi Committed by GitHub
Browse files

assistant prefill (#2615)

* add assistant prefix

* add arc_challenge from llama

* nit

* nit

* nit

* add assistant prefix

* add mmlu_llama

* nit

* nit

* Revert "nit"

This reverts commit 6a97f8356237305e375212b966b30e8de59dd4bc.

* fix regex bug

* add assistant_prefix to vllm

* add `Question:`

* add mmlu_pro

* add fewshot assistant_prefix

* use `assistant_prefill`

* typehints

* nits

* nits

* add to docs

* add readme
parent e86cece6
"dataset_name": "moral_disputes"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_humanities_tasks"
"task": "mmlu_llama_moral_disputes"
"task_alias": "moral disputes"
"dataset_name": "moral_scenarios"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_humanities_tasks"
"task": "mmlu_llama_moral_scenarios"
"task_alias": "moral scenarios"
"dataset_name": "nutrition"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_other_tasks"
"task": "mmlu_llama_nutrition"
"task_alias": "nutrition"
"dataset_name": "philosophy"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_humanities_tasks"
"task": "mmlu_llama_philosophy"
"task_alias": "philosophy"
"dataset_name": "prehistory"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_humanities_tasks"
"task": "mmlu_llama_prehistory"
"task_alias": "prehistory"
"dataset_name": "professional_accounting"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_other_tasks"
"task": "mmlu_llama_professional_accounting"
"task_alias": "professional accounting"
"dataset_name": "professional_law"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_humanities_tasks"
"task": "mmlu_llama_professional_law"
"task_alias": "professional law"
"dataset_name": "professional_medicine"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_other_tasks"
"task": "mmlu_llama_professional_medicine"
"task_alias": "professional medicine"
"dataset_name": "professional_psychology"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_social_sciences_tasks"
"task": "mmlu_llama_professional_psychology"
"task_alias": "professional psychology"
"dataset_name": "public_relations"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_social_sciences_tasks"
"task": "mmlu_llama_public_relations"
"task_alias": "public relations"
"dataset_name": "security_studies"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_social_sciences_tasks"
"task": "mmlu_llama_security_studies"
"task_alias": "security studies"
"dataset_name": "sociology"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_social_sciences_tasks"
"task": "mmlu_llama_sociology"
"task_alias": "sociology"
"dataset_name": "us_foreign_policy"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_social_sciences_tasks"
"task": "mmlu_llama_us_foreign_policy"
"task_alias": "us foreign policy"
"dataset_name": "virology"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_other_tasks"
"task": "mmlu_llama_virology"
"task_alias": "virology"
"dataset_name": "world_religions"
"include": "_continuation_template_yaml"
"tag": "mmlu_llama_humanities_tasks"
"task": "mmlu_llama_world_religions"
"task_alias": "world religions"
dataset_path: TIGER-Lab/MMLU-Pro
output_type: generate_until
test_split: test
fewshot_split: validation
fewshot_config:
sampler: first_n
doc_to_target: !function utils.fewshot_to_text
doc_to_text: "{% set letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' %}Given the following question and candidate answers, choose the best answer.\nQuestion: {{question.strip()}}\n{% for choice in options %}{{letters[loop.index0]}}. {{choice}}\n{% endfor %}\nYour response should end with \"The best answer is [the_answer_letter].\" where the [the_answer_letter] is a letter from the provided choices.\n\nLet's think step by step."
doc_to_target: answer
num_fewshot: 5
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
regexes_to_ignore:
- "\\$"
- "\\.$"
generation_kwargs:
until:
- "."
max_gen_toks: 1024
filter_list:
- name: strict_match
filter:
- function: "regex"
regex_pattern: "[tT]he best answer is ([A-Z])"
group_select: -1
- function: take_first
metadata:
version: 1.0
dataset_kwargs:
trust_remote_code: true
group: mmlu_pro_llama
task:
- mmlu_pro_llama_biology
- mmlu_pro_llama_business
- mmlu_pro_llama_chemistry
- mmlu_pro_llama_computer_science
- mmlu_pro_llama_economics
- mmlu_pro_llama_engineering
- mmlu_pro_llama_health
- mmlu_pro_llama_history
- mmlu_pro_llama_law
- mmlu_pro_llama_math
- mmlu_pro_llama_other
- mmlu_pro_llama_philosophy
- mmlu_pro_llama_physics
- mmlu_pro_llama_psychology
aggregate_metric_list:
- aggregation: mean
metric: exact_match
weight_by_size: true
filter_list: custom-extract
metadata:
version: 1.0
include: "_default_template_yaml"
task: "mmlu_pro_llama_biology"
task_alias: "biology"
process_docs: !function utils.process_biology
include: "_default_template_yaml"
task: "mmlu_pro_llama_business"
task_alias: "business"
process_docs: !function utils.process_business
include: "_default_template_yaml"
task: "mmlu_pro_llama_chemistry"
task_alias: "chemistry"
process_docs: !function utils.process_chemistry
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment