Commit 741a6a69 authored by lintangsutawika's avatar lintangsutawika
Browse files

Merge branch 'main' of https://github.com/EleutherAI/lm-evaluation-harness into mela

parents 494a4515 b536f067
include: _inverse_scaling_mc_yaml
task: inverse_scaling_sig_figs
dataset_path: Albertmade/sig-figs
group:
- inverse_scaling_mc
task: inverse_scaling_winobias_antistereotype
dataset_path: mathemakitten/winobias_antistereotype_test_v5
output_type: multiple_choice
test_split: test
doc_to_text: text
doc_to_choice: classes
doc_to_target: target
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
dataset_kwargs:
trust_remote_code: true
metadata:
version: 0
group: tag:
- kmmlu - kmmlu
- kmmlu_hard_cot - kmmlu_hard_cot
dataset_path: HAERAE-HUB/KMMLU-HARD dataset_path: HAERAE-HUB/KMMLU-HARD
......
group: tag:
- kmmlu - kmmlu
- kmmlu_direct - kmmlu_direct
dataset_path: HAERAE-HUB/KMMLU dataset_path: HAERAE-HUB/KMMLU
......
group: tag:
- kmmlu - kmmlu
- kmmlu_hard_direct - kmmlu_hard_direct
dataset_path: HAERAE-HUB/KMMLU-HARD dataset_path: HAERAE-HUB/KMMLU-HARD
......
group: tag:
- kmmlu - kmmlu
- kmmlu_hard - kmmlu_hard
dataset_path: HAERAE-HUB/KMMLU-HARD dataset_path: HAERAE-HUB/KMMLU-HARD
......
group: tag:
- kobest - kobest
task: kobest_boolq task: kobest_boolq
dataset_path: skt/kobest_v1 dataset_path: skt/kobest_v1
......
group: tag:
- kobest - kobest
task: kobest_copa task: kobest_copa
dataset_path: skt/kobest_v1 dataset_path: skt/kobest_v1
......
group: tag:
- kobest - kobest
task: kobest_hellaswag task: kobest_hellaswag
dataset_path: skt/kobest_v1 dataset_path: skt/kobest_v1
......
group: tag:
- kobest - kobest
task: kobest_sentineg task: kobest_sentineg
dataset_path: skt/kobest_v1 dataset_path: skt/kobest_v1
......
group: tag:
- kobest - kobest
task: kobest_wic task: kobest_wic
dataset_path: skt/kobest_v1 dataset_path: skt/kobest_v1
......
from datasets import Dataset from datasets import Dataset
from sklearn.metrics import f1_score
def copa_doc_to_text(doc: dict) -> str: def copa_doc_to_text(doc: dict) -> str:
...@@ -41,6 +40,8 @@ def hellaswag_process_doc(doc: Dataset) -> Dataset: ...@@ -41,6 +40,8 @@ def hellaswag_process_doc(doc: Dataset) -> Dataset:
def macro_f1_score(items): def macro_f1_score(items):
from sklearn.metrics import f1_score
unzipped_list = list(zip(*items)) unzipped_list = list(zip(*items))
golds = unzipped_list[0] golds = unzipped_list[0]
preds = unzipped_list[1] preds = unzipped_list[1]
......
group: kormedmcqa
task:
- kormedmcqa_doctor
- kormedmcqa_nurse
- kormedmcqa_pharm
aggregate_metric_list:
- metric: exact_match
aggregation: mean
weight_by_size: true
metadata:
version: 0.0
group: kormedmcqa
task : kormedmcqa_doctor task : kormedmcqa_doctor
dataset_path : sean0042/KorMedMCQA dataset_path : sean0042/KorMedMCQA
dataset_name : doctor dataset_name : doctor
......
group: kormedmcqa
task : kormedmcqa_nurse task : kormedmcqa_nurse
dataset_path : sean0042/KorMedMCQA dataset_path : sean0042/KorMedMCQA
dataset_name : nurse dataset_name : nurse
......
group: kormedmcqa
task : kormedmcqa_pharm task : kormedmcqa_pharm
dataset_path : sean0042/KorMedMCQA dataset_path : sean0042/KorMedMCQA
dataset_name : pharm dataset_name : pharm
......
group: tag:
- lambada - lambada
task: lambada_openai task: lambada_openai
dataset_path: EleutherAI/lambada_openai dataset_path: EleutherAI/lambada_openai
......
group: tag:
- lambada - lambada
task: lambada_standard task: lambada_standard
dataset_path: lambada dataset_path: lambada
......
group: tag:
- lambada_cloze - lambada_cloze
task: lambada_openai_cloze_yaml task: lambada_openai_cloze_yaml
dataset_path: EleutherAI/lambada_openai dataset_path: EleutherAI/lambada_openai
......
group: tag:
- lambada_cloze - lambada_cloze
task: lambada_standard_cloze_yaml task: lambada_standard_cloze_yaml
dataset_path: lambada dataset_path: lambada
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment