Unverified Commit f41e4425 authored by Jess's avatar Jess Committed by GitHub
Browse files

Merge pull request #5 from JessicaOjo/africanli

update afrixnli tasks
parents 3a8f1e44 b5067932
# Generated by utils.py
dataset_name: ewe
include: afrixnli_en_direct_yaml
task: afrixnli_en_direct_ewe
# Generated by utils.py
dataset_name: fra
include: afrixnli_en_direct_yaml
task: afrixnli_en_direct_fra
# Generated by utils.py
dataset_name: hau
include: afrixnli_en_direct_yaml
task: afrixnli_en_direct_hau
# Generated by utils.py
dataset_name: ibo
include: afrixnli_en_direct_yaml
task: afrixnli_en_direct_ibo
# Generated by utils.py
dataset_name: kin
include: afrixnli_en_direct_yaml
task: afrixnli_en_direct_kin
# Generated by utils.py
dataset_name: lin
include: afrixnli_en_direct_yaml
task: afrixnli_en_direct_lin
# Generated by utils.py
dataset_name: lug
include: afrixnli_en_direct_yaml
task: afrixnli_en_direct_lug
# Generated by utils.py
dataset_name: orm
include: afrixnli_en_direct_yaml
task: afrixnli_en_direct_orm
# Generated by utils.py
dataset_name: sna
include: afrixnli_en_direct_yaml
task: afrixnli_en_direct_sna
# Generated by utils.py
dataset_name: sot
include: afrixnli_en_direct_yaml
task: afrixnli_en_direct_sot
# Generated by utils.py
dataset_name: swa
include: afrixnli_en_direct_yaml
task: afrixnli_en_direct_swa
# Generated by utils.py
dataset_name: twi
include: afrixnli_en_direct_yaml
task: afrixnli_en_direct_twi
# Generated by utils.py
dataset_name: wol
include: afrixnli_en_direct_yaml
task: afrixnli_en_direct_wol
# Generated by utils.py
dataset_name: xho
include: afrixnli_en_direct_yaml
task: afrixnli_en_direct_xho
group:
- xnli
- afrixnli
dataset_path: masakhane/afrixnli
dataset_name: null
output_type: multiple_choice
validation_split: validation
test_split: test
fewshot_split: validation
doc_to_text: "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither?\nAnswer:"
# True = entailment
# False = contradiction
# Neither = neutral
doc_to_target: !function utils.doc_to_target
doc_to_choice:
- "True"
- "Neither"
- "False"
should_decontaminate: true
doc_to_decontamination_query: premise
metric_list:
- metric: f1
aggregation: !function utils.weighted_f1_score
average: weighted
hf_evaluate: true
higher_is_better: True
ignore_case: true
ignore_punctuation: true
regexes_to_ignore:
- ","
- "\\$"
- metric: acc
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
regexes_to_ignore:
- ","
- "\\$"
metadata:
version: 1.0
# Generated by utils.py
dataset_name: yor
include: afrixnli_en_direct_yaml
task: afrixnli_en_direct_yor
# Generated by utils.py
dataset_name: zul
include: afrixnli_en_direct_yaml
task: afrixnli_en_direct_zul
from sklearn.metrics import f1_score
def doc_to_target(doc):
replacements = {
0: 'True',
1: 'Neither',
2: 'False'
}
return replacements[doc["label"]]
def weighted_f1_score(items):
unzipped_list = list(zip(*items))
golds = unzipped_list[0]
preds = unzipped_list[1]
fscore = f1_score(golds, preds, average="weighted")
return fscore
#!/bin/bash
models=(
"masakhane/African-ultrachat-alpaca"
"masakhane/zephyr-7b-gemma-sft-african-alpaca"
"masakhane/zephyr-7b-gemma-sft-african-ultrachat-5k"
"google/flan-t5-xxl"
"bigscience/mt0-xxl-mt"
"CohereForAI/aya-101"
"bigscience/bloomz-7b1-mt"
"meta-llama/Llama-2-7b-chat-hf"
"meta-llama/Meta-Llama-3-8B-Instruct"
"meta-llama/Meta-Llama-3-70B-Instruct"
"google/gemma-1.1-7b-it"
"RWKV/v5-EagleX-v2-7B-HF"
"RWKV/rwkv-6-world-7b"
)
task=afrixnli_en_direct_amh,afrixnli_en_direct_eng,afrixnli_en_direct_ewe,afrixnli_en_direct_fra,afrixnli_en_direct_hau,afrixnli_en_direct_ibo,afrixnli_en_direct_kin,afrixnli_en_direct_lin,afrixnli_en_direct_lug,afrixnli_en_direct_orm,afrixnli_en_direct_sna,afrixnli_en_direct_sot,afrixnli_en_direct_swa,afrixnli_en_direct_twi,afrixnli_en_direct_wol,afrixnli_en_direct_xho,afrixnli_en_direct_yor,afrixnli_en_direct_zul
for model in "${models[@]}"
do
echo "Evaluating model: $model"
for fewshot in 0 2 4 6 8
do
export OUTPUT_DIR=results/${model##*/}/$fewshot
mkdir -p "$OUTPUT_DIR"
lm_eval --model hf \
--model_args "pretrained=${model}" \
--tasks $task\
--device cuda:0 \
--batch_size 16 \
--output_path "$OUTPUT_DIR" \
--num_fewshot $fewshot \
--limit 1 \
--verbosity DEBUG
done
done
\ No newline at end of file
# Generated by utils.py
dataset_name: amh
doc_to_choice: '{{[premise+", ትክክል? አዎ, "+hypothesis,premise+", ትክክል? እንዲሁም, "+hypothesis,premise+",
ትክክል? አይ, "+hypothesis]}}'
include: afrixnli_native_direct_yaml
task: afrixnli_native_direct_amh
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment