"src/tl_templates/vscode:/vscode.git/clone" did not exist on "5ccac4fa53c2c0ab7cdd0e0bb8f0965d8b670682"
Unverified Commit 348e304a authored by Jess's avatar Jess Committed by GitHub
Browse files

Merge pull request #17 from JessicaOjo/afrixnli_translate_test

Afrixnli translate test
parents e57e1aef c1ed7068
# Generated by utils.py
dataset_name: amh
include: afrixnli_translate_yaml
task: afrixnli_translate_amh
# Generated by utils.py
dataset_name: ewe
include: afrixnli_translate_yaml
task: afrixnli_translate_ewe
# Generated by utils.py
dataset_name: fra
include: afrixnli_translate_yaml
task: afrixnli_translate_fra
# Generated by utils.py
dataset_name: hau
include: afrixnli_translate_yaml
task: afrixnli_translate_hau
# Generated by utils.py
dataset_name: ibo
include: afrixnli_translate_yaml
task: afrixnli_translate_ibo
# Generated by utils.py
dataset_name: kin
include: afrixnli_translate_yaml
task: afrixnli_translate_kin
# Generated by utils.py
dataset_name: lin
include: afrixnli_translate_yaml
task: afrixnli_translate_lin
# Generated by utils.py
dataset_name: lug
include: afrixnli_translate_yaml
task: afrixnli_translate_lug
# Generated by utils.py
dataset_name: orm
include: afrixnli_translate_yaml
task: afrixnli_translate_orm
# Generated by utils.py
dataset_name: sna
include: afrixnli_translate_yaml
task: afrixnli_translate_sna
# Generated by utils.py
dataset_name: sot
include: afrixnli_translate_yaml
task: afrixnli_translate_sot
# Generated by utils.py
dataset_name: swa
include: afrixnli_translate_yaml
task: afrixnli_translate_swa
# Generated by utils.py
dataset_name: twi
include: afrixnli_translate_yaml
task: afrixnli_translate_twi
# Generated by utils.py
dataset_name: wol
include: afrixnli_translate_yaml
task: afrixnli_translate_wol
# Generated by utils.py
dataset_name: xho
include: afrixnli_translate_yaml
task: afrixnli_translate_xho
group:
- xnli
- afrixnli
- afrixnli-translate-test
dataset_path: masakhane/afrixnli-translate-test
dataset_name: null
output_type: multiple_choice
test_split: test
doc_to_text: "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither?\nAnswer:"
# True = entailment
# False = contradiction
# Neither = neutral
doc_to_target: !function utils.doc_to_target
doc_to_choice:
- "True"
- "Neither"
- "False"
should_decontaminate: true
doc_to_decontamination_query: premise
metric_list:
- metric: f1
aggregation: !function utils.weighted_f1_score
average: weighted
higher_is_better: True
ignore_case: true
ignore_punctuation: true
- metric: acc
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
metadata:
version: 1.0
# Generated by utils.py
dataset_name: yor
include: afrixnli_translate_yaml
task: afrixnli_translate_yor
# Generated by utils.py
dataset_name: zul
include: afrixnli_translate_yaml
task: afrixnli_translate_zul
from sklearn.metrics import f1_score
def doc_to_target(doc):
replacements = {
0: 'True',
1: 'Neither',
2: 'False'
}
return replacements[doc["label"]]
def weighted_f1_score(items):
unzipped_list = list(zip(*items))
golds = unzipped_list[0]
preds = unzipped_list[1]
fscore = f1_score(golds, preds, average="weighted")
return fscore
#!/bin/bash
models=(
"masakhane/African-ultrachat-alpaca"
"masakhane/zephyr-7b-gemma-sft-african-alpaca"
"masakhane/zephyr-7b-gemma-sft-african-ultrachat-5k"
"google/flan-t5-xxl"
"bigscience/mt0-xxl-mt"
"CohereForAI/aya-101"
"bigscience/bloomz-7b1-mt"
"meta-llama/Llama-2-7b-chat-hf"
"meta-llama/Meta-Llama-3-8B-Instruct"
"meta-llama/Meta-Llama-3-70B-Instruct"
"google/gemma-1.1-7b-it"
"RWKV/v5-EagleX-v2-7B-HF"
"RWKV/rwkv-6-world-7b"
)
models=("masakhane/zephyr-7b-gemma-sft-african-ultrachat-5k")
task=afrixnli_translate_amh,afrixnli_translate_ewe,afrixnli_translate_fra,afrixnli_translate_hau,afrixnli_translate_ibo,afrixnli_translate_kin,afrixnli_translate_lin,afrixnli_translate_lug,afrixnli_translate_orm,afrixnli_translate_sna,afrixnli_translate_sot,afrixnli_translate_swa,afrixnli_translate_twi,afrixnli_translate_wol,afrixnli_translate_xho,afrixnli_translate_yor,afrixnli_translate_zul
for model in "${models[@]}"
do
echo "Evaluating model: $model"
for fewshot in 0
do
export OUTPUT_DIR=results/${model##*/}/$fewshot
mkdir -p "$OUTPUT_DIR"
lm_eval --model hf \
--model_args "pretrained=${model}" \
--tasks $task\
--device cuda:0 \
--batch_size 16 \
--output_path "$OUTPUT_DIR" \
--num_fewshot $fewshot \
--verbosity DEBUG
done
done
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment