Commit 741a6a69 authored by lintangsutawika's avatar lintangsutawika
Browse files

Merge branch 'main' of https://github.com/EleutherAI/lm-evaluation-harness into mela

parents 494a4515 b536f067
include: _inverse_scaling_mc_yaml
task: inverse_scaling_sig_figs
dataset_path: Albertmade/sig-figs
group:
- inverse_scaling_mc
task: inverse_scaling_winobias_antistereotype
dataset_path: mathemakitten/winobias_antistereotype_test_v5
output_type: multiple_choice
test_split: test
doc_to_text: text
doc_to_choice: classes
doc_to_target: target
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
dataset_kwargs:
trust_remote_code: true
metadata:
version: 0
group:
tag:
- kmmlu
- kmmlu_hard_cot
dataset_path: HAERAE-HUB/KMMLU-HARD
......
group:
tag:
- kmmlu
- kmmlu_direct
dataset_path: HAERAE-HUB/KMMLU
......
group:
tag:
- kmmlu
- kmmlu_hard_direct
dataset_path: HAERAE-HUB/KMMLU-HARD
......
group:
tag:
- kmmlu
- kmmlu_hard
dataset_path: HAERAE-HUB/KMMLU-HARD
......
group:
tag:
- kobest
task: kobest_boolq
dataset_path: skt/kobest_v1
......
group:
tag:
- kobest
task: kobest_copa
dataset_path: skt/kobest_v1
......
group:
tag:
- kobest
task: kobest_hellaswag
dataset_path: skt/kobest_v1
......
group:
tag:
- kobest
task: kobest_sentineg
dataset_path: skt/kobest_v1
......
group:
tag:
- kobest
task: kobest_wic
dataset_path: skt/kobest_v1
......
from datasets import Dataset
from sklearn.metrics import f1_score
def copa_doc_to_text(doc: dict) -> str:
......@@ -41,6 +40,8 @@ def hellaswag_process_doc(doc: Dataset) -> Dataset:
def macro_f1_score(items):
from sklearn.metrics import f1_score
unzipped_list = list(zip(*items))
golds = unzipped_list[0]
preds = unzipped_list[1]
......
group: kormedmcqa
task:
- kormedmcqa_doctor
- kormedmcqa_nurse
- kormedmcqa_pharm
aggregate_metric_list:
- metric: exact_match
aggregation: mean
weight_by_size: true
metadata:
version: 0.0
group: kormedmcqa
task : kormedmcqa_doctor
dataset_path : sean0042/KorMedMCQA
dataset_name : doctor
......
group: kormedmcqa
task : kormedmcqa_nurse
dataset_path : sean0042/KorMedMCQA
dataset_name : nurse
......
group: kormedmcqa
task : kormedmcqa_pharm
dataset_path : sean0042/KorMedMCQA
dataset_name : pharm
......
group:
tag:
- lambada
task: lambada_openai
dataset_path: EleutherAI/lambada_openai
......
group:
tag:
- lambada
task: lambada_standard
dataset_path: lambada
......
group:
tag:
- lambada_cloze
task: lambada_openai_cloze_yaml
dataset_path: EleutherAI/lambada_openai
......
group:
tag:
- lambada_cloze
task: lambada_standard_cloze_yaml
dataset_path: lambada
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment