Commit 601be343 authored by Baber's avatar Baber
Browse files

Merge branch 'main' into feature/eval_from_config

parents d0884a96 68c3a811
# Generated by utils.py
dataset_name: lin
doc_to_text: "For mathematical questions provided in Lingala language. Supply the\
\ accurate step by step answer to the provided question. \n\nQuestion: {{question}}\
\ \nStep by step answer: "
include: afrimgsm_cot_translate_yaml
task: afrimgsm_cot_translate_lin_prompt_5
# Generated by utils.py
dataset_name: lug
doc_to_text: "For mathematical questions provided in Luganda language. Supply the\
\ accurate step by step answer to the provided question. \n\nQuestion: {{question}}\
\ \nStep by step answer: "
include: afrimgsm_cot_translate_yaml
task: afrimgsm_cot_translate_lug_prompt_5
# Generated by utils.py
dataset_name: orm
doc_to_text: "For mathematical questions provided in Oromo language. Supply the accurate\
\ step by step answer to the provided question. \n\nQuestion: {{question}} \nStep\
\ by step answer: "
include: afrimgsm_cot_translate_yaml
task: afrimgsm_cot_translate_orm_prompt_5
# Generated by utils.py
dataset_name: sna
doc_to_text: "For mathematical questions provided in chiShona language. Supply the\
\ accurate step by step answer to the provided question. \n\nQuestion: {{question}}\
\ \nStep by step answer: "
include: afrimgsm_cot_translate_yaml
task: afrimgsm_cot_translate_sna_prompt_5
# Generated by utils.py
dataset_name: sot
doc_to_text: "For mathematical questions provided in Sesotho language. Supply the\
\ accurate step by step answer to the provided question. \n\nQuestion: {{question}}\
\ \nStep by step answer: "
include: afrimgsm_cot_translate_yaml
task: afrimgsm_cot_translate_sot_prompt_5
# Generated by utils.py
dataset_name: swa
doc_to_text: "For mathematical questions provided in Swahili language. Supply the\
\ accurate step by step answer to the provided question. \n\nQuestion: {{question}}\
\ \nStep by step answer: "
include: afrimgsm_cot_translate_yaml
task: afrimgsm_cot_translate_swa_prompt_5
# Generated by utils.py
dataset_name: twi
doc_to_text: "For mathematical questions provided in Twi language. Supply the accurate\
\ step by step answer to the provided question. \n\nQuestion: {{question}} \nStep\
\ by step answer: "
include: afrimgsm_cot_translate_yaml
task: afrimgsm_cot_translate_twi_prompt_5
# Generated by utils.py
dataset_name: vai
doc_to_text: "For mathematical questions provided in Vai language. Supply the accurate\
\ step by step answer to the provided question. \n\nQuestion: {{question}} \nStep\
\ by step answer: "
include: afrimgsm_cot_translate_yaml
task: afrimgsm_cot_translate_vai_prompt_5
# Generated by utils.py
dataset_name: wol
doc_to_text: "For mathematical questions provided in Wolof language. Supply the accurate\
\ step by step answer to the provided question. \n\nQuestion: {{question}} \nStep\
\ by step answer: "
include: afrimgsm_cot_translate_yaml
task: afrimgsm_cot_translate_wol_prompt_5
# Generated by utils.py
dataset_name: xho
doc_to_text: "For mathematical questions provided in isiXhosa language. Supply the\
\ accurate step by step answer to the provided question. \n\nQuestion: {{question}}\
\ \nStep by step answer: "
include: afrimgsm_cot_translate_yaml
task: afrimgsm_cot_translate_xho_prompt_5
tag: afrimgsm_tt_cot_tasks
dataset_path: masakhane/afrimgsm-translate-test
dataset_name: null # Overridden by language-specific config.
output_type: generate_until
test_split: test
doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Question:'
- </s>
- <|im_end|>
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
filter_list:
- name: "strict-match"
filter:
- function: "regex"
regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
- function: "take_first"
- filter:
- function: regex
group_select: -1
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
- function: take_first
name: flexible-extract
metadata:
version: 2.0
# Generated by utils.py
dataset_name: yor
doc_to_text: "For mathematical questions provided in Yoruba language. Supply the accurate\
\ step by step answer to the provided question. \n\nQuestion: {{question}} \nStep\
\ by step answer: "
include: afrimgsm_cot_translate_yaml
task: afrimgsm_cot_translate_yor_prompt_5
# Generated by utils.py
dataset_name: zul
doc_to_text: "For mathematical questions provided in Zulu language. Supply the accurate\
\ step by step answer to the provided question. \n\nQuestion: {{question}} \nStep\
\ by step answer: "
include: afrimgsm_cot_translate_yaml
task: afrimgsm_cot_translate_zul_prompt_5
group: afrimmlu-irokobench
task:
- afrimmlu_tasks_prompt_1
- afrimmlu_tasks_prompt_2
- afrimmlu_tasks_prompt_3
- afrimmlu_tasks_prompt_4
- afrimmlu_tasks_prompt_5
aggregate_metric_list:
- metric: acc
aggregation: mean
weight_by_size: true
metadata:
version: 2
tag:
- afrimmlu
- afrimmlu_direct
task: null
dataset_path: masakhane/afrimmlu
dataset_name: null
output_type: multiple_choice
validation_split: validation
test_split: test
fewshot_split: validation
doc_to_text: !function utils.doc_to_text
doc_to_target: "{{['A', 'B', 'C', 'D'].index(answer)}}"
doc_to_choice: !function utils.doc_to_choice
should_decontaminate: true
doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
metric_list:
- metric: f1
aggregation: !function utils.weighted_f1_score
# aggregation: mean
average: weighted
hf_evaluate: true
higher_is_better: True
ignore_case: true
ignore_punctuation: true
regexes_to_ignore:
- ","
- "\\$"
- metric: acc
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
regexes_to_ignore:
- ","
- "\\$"
metadata:
version: 1.0
dataset_name: amh
include: afrimmlu_common_yaml
task: afrimmlu_direct_amh
dataset_name: eng
include: afrimmlu_common_yaml
task: afrimmlu_direct_eng
dataset_name: ewe
include: afrimmlu_common_yaml
task: afrimmlu_direct_ewe
dataset_name: fra
include: afrimmlu_common_yaml
task: afrimmlu_direct_fra
dataset_name: hau
include: afrimmlu_common_yaml
task: afrimmlu_direct_hau
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment