Commit e4db76cb authored by haileyschoelkopf's avatar haileyschoelkopf
Browse files

Merge branch 'main' into multimodal-prototyping

parents 6cc6e9cd ad80f555
include: _inverse_scaling_mc_yaml
task: inverse_scaling_neqa
dataset_path: inverse-scaling/NeQA
include: _inverse_scaling_mc_yaml
task: inverse_scaling_pattern_matching_suppression
dataset_path: Albertmade/pattern-matching-suppression
include: _inverse_scaling_mc_yaml
task: inverse_scaling_quote_repetition
dataset_path: inverse-scaling/quote-repetition
include: _inverse_scaling_mc_yaml
task: inverse_scaling_redefine_math
dataset_path: inverse-scaling/redefine-math
include: _inverse_scaling_mc_yaml
task: inverse_scaling_repetitive_algebra
dataset_path: Albertmade/repetitive-algebra
include: _inverse_scaling_mc_yaml
task: inverse_scaling_sig_figs
dataset_path: Albertmade/sig-figs
group:
- inverse_scaling_mc
task: inverse_scaling_winobias_antistereotype
dataset_path: mathemakitten/winobias_antistereotype_test_v5
output_type: multiple_choice
test_split: test
doc_to_text: text
doc_to_choice: classes
doc_to_target: target
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
dataset_kwargs:
trust_remote_code: true
metadata:
version: 0
group:
tag:
- kmmlu
- kmmlu_hard_cot
dataset_path: HAERAE-HUB/KMMLU-HARD
......
group:
tag:
- kmmlu
- kmmlu_direct
dataset_path: HAERAE-HUB/KMMLU
......
group:
tag:
- kmmlu
- kmmlu_hard_direct
dataset_path: HAERAE-HUB/KMMLU-HARD
......
group:
tag:
- kmmlu
- kmmlu_hard
dataset_path: HAERAE-HUB/KMMLU-HARD
......
group:
tag:
- kobest
task: kobest_boolq
dataset_path: skt/kobest_v1
......
group:
tag:
- kobest
task: kobest_copa
dataset_path: skt/kobest_v1
......
group:
tag:
- kobest
task: kobest_hellaswag
dataset_path: skt/kobest_v1
......
group:
tag:
- kobest
task: kobest_sentineg
dataset_path: skt/kobest_v1
......
group:
tag:
- kobest
task: kobest_wic
dataset_path: skt/kobest_v1
......
group: kormedmcqa
task:
- kormedmcqa_doctor
- kormedmcqa_nurse
- kormedmcqa_pharm
aggregate_metric_list:
- metric: exact_match
aggregation: mean
weight_by_size: true
metadata:
version: 0.0
group: kormedmcqa
task : kormedmcqa_doctor
dataset_path : sean0042/KorMedMCQA
dataset_name : doctor
......
group: kormedmcqa
task : kormedmcqa_nurse
dataset_path : sean0042/KorMedMCQA
dataset_name : nurse
......
group: kormedmcqa
task : kormedmcqa_pharm
dataset_path : sean0042/KorMedMCQA
dataset_name : pharm
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment