Commit 741a6a69 authored by lintangsutawika's avatar lintangsutawika
Browse files

Merge branch 'main' of https://github.com/EleutherAI/lm-evaluation-harness into mela

parents 494a4515 b536f067
# This file will be included in the generated language-specific task configs.
# It doesn't have a yaml file extension as it is not meant to be imported directly by the harness.
group:
- afrimgsm
- afrimgsm_en_cot
dataset_path: masakhane/afrimgsm
dataset_name: null # Overridden by language-specific config.
output_type: generate_until
training_split: train
test_split: test
generation_kwargs:
until:
- "\n\n"
- "\n"
do_sample: false
temperature: 0.0
target_delimiter: " "
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
filter_list:
- name: "strict-match"
filter:
- function: "regex"
regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
- function: "take_first"
- filter:
- function: regex
group_select: -1
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
- function: take_first
name: flexible-extract
metadata:
version: 2.0
#!/bin/bash
# python utils.py --overwrite --output-dir direct --mode direct
# python utils.py --overwrite --output-dir direct_native --mode direct-native
# python utils.py --overwrite --output-dir en_cot --mode en-cot
# python utils.py --overwrite --output-dir native_cot --mode native-cot
python utils.py --overwrite --output-dir translate_direct --mode translate-direct
lm_eval --model hf \
--model_args pretrained="google/gemma-7b" --tasks afrimgsm_en_cot_eng,mgsm_en_cot_en,afrimgsm_native_cot_eng,mgsm_native_cot_en,afrimgsm_direct_eng,mgsm_direct_en,afrimgsm_direct_native_eng \
--device cuda:0 \
--batch_size 1 \
--verbosity DEBUG \
--limit 5
# Generated by utils.py
dataset_name: amh
doc_to_target: '{% if answer is not none %}{{answer[15:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Question:'
- </s>
- <|im_end|>
include: translate_direct_yaml
task: afrimgsm_translate_direct_amh
# Generated by utils.py
dataset_name: eng
doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Question:'
- </s>
- <|im_end|>
include: translate_direct_yaml
task: afrimgsm_translate_direct_eng
# Generated by utils.py
dataset_name: ewe
doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Question:'
- </s>
- <|im_end|>
include: translate_direct_yaml
task: afrimgsm_translate_direct_ewe
# Generated by utils.py
dataset_name: fra
doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Question:'
- </s>
- <|im_end|>
include: translate_direct_yaml
task: afrimgsm_translate_direct_fra
# Generated by utils.py
dataset_name: hau
doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Question:'
- </s>
- <|im_end|>
include: translate_direct_yaml
task: afrimgsm_translate_direct_hau
# Generated by utils.py
dataset_name: ibo
doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Question:'
- </s>
- <|im_end|>
include: translate_direct_yaml
task: afrimgsm_translate_direct_ibo
# Generated by utils.py
dataset_name: kin
doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Question:'
- </s>
- <|im_end|>
include: translate_direct_yaml
task: afrimgsm_translate_direct_kin
# Generated by utils.py
dataset_name: lin
doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Question:'
- </s>
- <|im_end|>
include: translate_direct_yaml
task: afrimgsm_translate_direct_lin
# Generated by utils.py
dataset_name: lug
doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Question:'
- </s>
- <|im_end|>
include: translate_direct_yaml
task: afrimgsm_translate_direct_lug
# Generated by utils.py
dataset_name: orm
doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Question:'
- </s>
- <|im_end|>
include: translate_direct_yaml
task: afrimgsm_translate_direct_orm
# Generated by utils.py
dataset_name: sna
doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Question:'
- </s>
- <|im_end|>
include: translate_direct_yaml
task: afrimgsm_translate_direct_sna
# Generated by utils.py
dataset_name: sot
doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Question:'
- </s>
- <|im_end|>
include: translate_direct_yaml
task: afrimgsm_translate_direct_sot
# Generated by utils.py
dataset_name: swa
doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Question:'
- </s>
- <|im_end|>
include: translate_direct_yaml
task: afrimgsm_translate_direct_swa
# Generated by utils.py
dataset_name: twi
doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Question:'
- </s>
- <|im_end|>
include: translate_direct_yaml
task: afrimgsm_translate_direct_twi
# Generated by utils.py
dataset_name: wol
doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Question:'
- </s>
- <|im_end|>
include: translate_direct_yaml
task: afrimgsm_translate_direct_wol
# Generated by utils.py
dataset_name: xho
doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Question:'
- </s>
- <|im_end|>
include: translate_direct_yaml
task: afrimgsm_translate_direct_xho
# Generated by utils.py
dataset_name: yor
doc_to_target: '{% if answer is not none %}{{answer[16:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Question:'
- </s>
- <|im_end|>
include: translate_direct_yaml
task: afrimgsm_translate_direct_yor
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment