Commit 6884c5a0 authored by Baber's avatar Baber
Browse files

add mgsm

parent b0108cf8
group: mgsm_chat
group_alias: mmlu (llama)
task:
- mgsm_chat_bn
- mgsm_chat_de
- mgsm_chat_en
- mgsm_chat_es
- mgsm_chat_fr
- mgsm_chat_ja
- mgsm_chat_ru
- mgsm_chat_sw
- mgsm_chat_te
- mgsm_chat_th
- mgsm_chat_zh
aggregate_metric_list:
- metric: exact_match
weight_by_size: True
metadata:
version: 0
include: mgsm_chat_template
dataset_name: bn # Overridden by language-specific config.
process_docs: !function utils.process_docs_bn
task: mgsm_chat_bn
include: mgsm_chat_template
dataset_name: de # Overridden by language-specific config.
process_docs: !function utils.process_docs_de
task: mgsm_chat_de
include: mgsm_chat_template
dataset_name: en # Overridden by language-specific config.
process_docs: !function utils.process_docs_en
task: mgsm_chat_en
include: mgsm_chat_template
dataset_name: es # Overridden by language-specific config.
process_docs: !function utils.process_docs_es
task: mgsm_chat_es
include: mgsm_chat_template
dataset_name: fr # Overridden by language-specific config.
process_docs: !function utils.process_docs_fr
task: mgsm_chat_fr
include: mgsm_chat_template
dataset_name: ja # Overridden by language-specific config.
process_docs: !function utils.process_docs_ja
task: mgsm_chat_ja
include: mgsm_chat_template
dataset_name: ru # Overridden by language-specific config.
process_docs: !function utils.process_docs_ru
task: mgsm_chat_ru
include: mgsm_chat_template
dataset_name: sw # Overridden by language-specific config.
process_docs: !function utils.process_docs_sw
task: mgsm_chat_sw
include: mgsm_chat_template
dataset_name: te # Overridden by language-specific config.
process_docs: !function utils.process_docs_te
task: mgsm_chat_te
include: mgsm_chat_template
dataset_name: th # Overridden by language-specific config.
process_docs: !function utils.process_docs_th
task: mgsm_chat_th
include: mgsm_chat_template
dataset_name: zh # Overridden by language-specific config.
process_docs: !function utils.process_docs_zh
task: mgsm_chat_zh
...@@ -76,7 +76,7 @@ def number_variations(n): ...@@ -76,7 +76,7 @@ def number_variations(n):
def process_docs(lang: str, df: datasets.Dataset) -> datasets.Dataset: def process_docs(lang: str, df: datasets.Dataset) -> datasets.Dataset:
def map_(doc: dict): def map_(doc: dict):
suffix = [x for x in PROMPTS if x["subtask_name"] == lang][0]["rep"] suffix = [x for x in PROMPTS if x["subtask_name"] == lang][0]["rep"]
doc["question"] = suffix + r"\n\n" + doc["question"].split(":", 1)[-1] doc["question"] = suffix + "\n\n" + doc["question"].split(":", 1)[-1]
doc["answers"] = number_variations(doc["answer_number"]) doc["answers"] = number_variations(doc["answer_number"])
return doc return doc
...@@ -97,7 +97,7 @@ process_docs_zh = partial(process_docs, "zh") ...@@ -97,7 +97,7 @@ process_docs_zh = partial(process_docs, "zh")
def process_results(doc, prediction): def process_results(doc, prediction):
gold: List = doc["input_correct_responses"] gold: List = doc["answers"]
return { return {
"exact_match": int( "exact_match": int(
exact_match_fn( exact_match_fn(
......
tag: llama3
task: mgsm_chat
dataset_path: meta-llama/Llama-3.2-3B-Instruct-evals
dataset_name: Llama-3.2-3B-Instruct-evals__mgsm__details
output_type: generate_until
test_split: latest
doc_to_text: "{{
input_final_prompts
|first
|replace('<|start_header_id|>user<|end_header_id|>', '')
|replace('<|eot_id|><|start_header_id|>assistant<|end_header_id|>', '')
|trim
}}"
doc_to_target: "input_correct_responses"
process_results: !function utils.process_results_mgsm
generation_kwargs:
until: []
do_sample: false
temperature: 0.0
max_gen_toks: 2048
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
filter_list:
- name: "strict-match"
filter:
- function: "regex"
regex_pattern: "(?:Answer|Réponse|Antwort|Ответ|Respuesta|答え|Jibu|答案|คำตอบ|సమాధానం|উত্তর): (\\-?[0-9\\.\\,]+)"
- function: remove_whitespace
- function: take_first
- name: "flexible-extract"
filter:
- function: regex
group_select: -1
regex_pattern: "(?:Answer|Réponse|Antwort|Ответ|Respuesta|答え|Jibu|答案|คำตอบ|సమాధానం|উত্তর): (-?[$0-9.,]{2,})|(-?[0-9]+)"
- function: remove_whitespace
- function: take_first
metadata:
version: 0.0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment