Commit c1e63555 authored by Yu Shi Jie's avatar Yu Shi Jie
Browse files

Merge branch 'upstream' into 'mmlu-pro'

add tokenizer logs info (#1731)

See merge request shijie.yu/lm-evaluation-harness!4
parents e361687c 42dc2448
dataset_name: amh
include: afrimmlu_common_yaml
task: afrimmlu_direct_amh
dataset_name: eng
include: afrimmlu_common_yaml
task: afrimmlu_direct_eng
dataset_name: ewe
include: afrimmlu_common_yaml
task: afrimmlu_direct_ewe
dataset_name: fra
include: afrimmlu_common_yaml
task: afrimmlu_direct_fra
dataset_name: hau
include: afrimmlu_common_yaml
task: afrimmlu_direct_hau
dataset_name: ibo
include: afrimmlu_common_yaml
task: afrimmlu_direct_ibo
dataset_name: kin
include: afrimmlu_common_yaml
task: afrimmlu_direct_kin
dataset_name: lin
include: afrimmlu_common_yaml
task: afrimmlu_direct_lin
dataset_name: lug
include: afrimmlu_common_yaml
task: afrimmlu_direct_lug
dataset_name: orm
include: afrimmlu_common_yaml
task: afrimmlu_direct_orm
dataset_name: sna
include: afrimmlu_common_yaml
task: afrimmlu_direct_sna
dataset_name: sot
include: afrimmlu_common_yaml
task: afrimmlu_direct_sot
dataset_name: swa
include: afrimmlu_common_yaml
task: afrimmlu_direct_swa
dataset_name: twi
include: afrimmlu_common_yaml
task: afrimmlu_direct_twi
dataset_name: wol
include: afrimmlu_common_yaml
task: afrimmlu_direct_wol
dataset_name: xho
include: afrimmlu_common_yaml
task: afrimmlu_direct_xho
dataset_name: yor
include: afrimmlu_common_yaml
task: afrimmlu_direct_yor
dataset_name: zul
include: afrimmlu_common_yaml
task: afrimmlu_direct_zul
from sklearn.metrics import f1_score
def doc_to_choice(doc):
choices = eval(doc["choices"])
return choices
def doc_to_text(doc):
output = """You are a highly knowledgeable and intelligent artificial intelligence
model answers multiple-choice questions about {subject}
Question: {question}
Choices:
A: {choice1}
B: {choice2}
C: {choice3}
D: {choice4}
Answer: """
choices = eval(doc["choices"])
text = output.format(
subject=doc["subject"],
question=doc["question"],
choice1=choices[0],
choice2=choices[1],
choice3=choices[2],
choice4=choices[3],
)
return text
def weighted_f1_score(items):
unzipped_list = list(zip(*items))
golds = unzipped_list[0]
preds = unzipped_list[1]
fscore = f1_score(golds, preds, average="weighted")
return fscore
lm_eval --model hf \
--model_args pretrained=masakhane/African-ultrachat-alpaca \
--tasks afrimmlu_direct_amh,afrimmlu_direct_eng,afrimmlu_direct_ewe,afrimmlu_direct_fra,afrimmlu_direct_hau,afrimmlu_direct_ibo,afrimmlu_direct_kin,afrimmlu_direct_lin,afrimmlu_direct_lug,afrimmlu_direct_orm,afrimmlu_direct_sna,afrimmlu_direct_sot,afrimmlu_direct_twi,afrimmlu_direct_wol,afrimmlu_direct_xho,afrimmlu_direct_yor,afrimmlu_direct_zul \
--device cuda:0 \
--batch_size 1 \
--num_fewshot 0 \
--verbosity DEBUG \
--wandb_args project=afrimmlu
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment