Commit e4db76cb authored by haileyschoelkopf's avatar haileyschoelkopf
Browse files

Merge branch 'main' into multimodal-prototyping

parents 6cc6e9cd ad80f555
group: agieval_cn
task:
- agieval_gaokao_biology
- agieval_gaokao_chemistry
- agieval_gaokao_chinese
- agieval_gaokao_geography
- agieval_gaokao_history
- agieval_gaokao_mathcloze
- agieval_gaokao_mathqa
- agieval_gaokao_physics
- agieval_jec_qa_ca
- agieval_jec_qa_kd
- agieval_logiqa_zh
aggregate_metric_list:
- metric: acc
aggregation: mean
weight_by_size: true
metadata:
version: 0.0
group: agieval_en
task:
- agieval_aqua_rat
- agieval_gaokao_english # categorizing as EN because the AGIEval codebase lists this as in `english_qa_tasks`
- agieval_logiqa_en
- agieval_lsat_ar
- agieval_lsat_lr
- agieval_lsat_rc
- agieval_math
- agieval_sat_en_without_passage
- agieval_sat_en
- agieval_sat_math
aggregate_metric_list:
- metric: acc
aggregation: mean
weight_by_size: true
metadata:
version: 0.0
group: agieval_nous
task:
- agieval_aqua_rat
- agieval_logiqa_en
- agieval_lsat_ar
- agieval_lsat_lr
- agieval_lsat_rc
- agieval_sat_en_without_passage
- agieval_sat_en
- agieval_sat_math
aggregate_metric_list:
- metric: acc_norm
aggregation: mean
weight_by_size: true
metadata:
version: 0.0
group:
- agieval
- agieval_en
- agieval_nous
task: agieval_aqua_rat task: agieval_aqua_rat
dataset_path: hails/agieval-aqua-rat dataset_path: hails/agieval-aqua-rat
dataset_name: null dataset_name: null
......
include: aqua-rat.yaml include: aqua-rat.yaml
group:
- agieval
- agieval_cn
task: agieval_gaokao_biology task: agieval_gaokao_biology
dataset_path: hails/agieval-gaokao-biology dataset_path: hails/agieval-gaokao-biology
include: aqua-rat.yaml include: aqua-rat.yaml
group:
- agieval
- agieval_cn
task: agieval_gaokao_chemistry task: agieval_gaokao_chemistry
dataset_path: hails/agieval-gaokao-chemistry dataset_path: hails/agieval-gaokao-chemistry
include: aqua-rat.yaml include: aqua-rat.yaml
group:
- agieval
- agieval_cn
task: agieval_gaokao_chinese task: agieval_gaokao_chinese
dataset_path: hails/agieval-gaokao-chinese dataset_path: hails/agieval-gaokao-chinese
include: aqua-rat.yaml include: aqua-rat.yaml
group:
- agieval
- agieval_en # categorizing as EN because the AGIEval codebase lists this as in `english_qa_tasks`
task: agieval_gaokao_english task: agieval_gaokao_english
dataset_path: hails/agieval-gaokao-english dataset_path: hails/agieval-gaokao-english
include: aqua-rat.yaml include: aqua-rat.yaml
group:
- agieval
- agieval_cn
task: agieval_gaokao_geography task: agieval_gaokao_geography
dataset_path: hails/agieval-gaokao-geography dataset_path: hails/agieval-gaokao-geography
include: aqua-rat.yaml include: aqua-rat.yaml
group:
- agieval
- agieval_cn
task: agieval_gaokao_history task: agieval_gaokao_history
dataset_path: hails/agieval-gaokao-history dataset_path: hails/agieval-gaokao-history
group:
- agieval
- agieval_cn
task: agieval_gaokao_mathcloze task: agieval_gaokao_mathcloze
dataset_path: hails/agieval-gaokao-mathcloze dataset_path: hails/agieval-gaokao-mathcloze
dataset_name: null dataset_name: null
......
include: aqua-rat.yaml include: aqua-rat.yaml
group:
- agieval
- agieval_cn
task: agieval_gaokao_mathqa task: agieval_gaokao_mathqa
dataset_path: hails/agieval-gaokao-mathqa dataset_path: hails/agieval-gaokao-mathqa
include: aqua-rat.yaml include: aqua-rat.yaml
group:
- agieval
- agieval_cn
task: agieval_gaokao_physics task: agieval_gaokao_physics
dataset_path: hails/agieval-gaokao-physics dataset_path: hails/agieval-gaokao-physics
include: aqua-rat.yaml include: aqua-rat.yaml
group:
- agieval
- agieval_cn
task: agieval_jec_qa_ca task: agieval_jec_qa_ca
dataset_path: hails/agieval-jec-qa-ca dataset_path: hails/agieval-jec-qa-ca
include: aqua-rat.yaml include: aqua-rat.yaml
group:
- agieval
- agieval_cn
task: agieval_jec_qa_kd task: agieval_jec_qa_kd
dataset_path: hails/agieval-jec-qa-kd dataset_path: hails/agieval-jec-qa-kd
include: aqua-rat.yaml include: aqua-rat.yaml
group:
- agieval
- agieval_nous
- agieval_en
task: agieval_logiqa_en task: agieval_logiqa_en
dataset_path: hails/agieval-logiqa-en dataset_path: hails/agieval-logiqa-en
include: aqua-rat.yaml include: aqua-rat.yaml
group:
- agieval
- agieval_cn
task: agieval_logiqa_zh task: agieval_logiqa_zh
dataset_path: hails/agieval-logiqa-zh dataset_path: hails/agieval-logiqa-zh
include: aqua-rat.yaml include: aqua-rat.yaml
group:
- agieval
- agieval_nous
- agieval_en
task: agieval_lsat_ar task: agieval_lsat_ar
dataset_path: hails/agieval-lsat-ar dataset_path: hails/agieval-lsat-ar
include: aqua-rat.yaml include: aqua-rat.yaml
group:
- agieval
- agieval_nous
- agieval_en
task: agieval_lsat_lr task: agieval_lsat_lr
dataset_path: hails/agieval-lsat-lr dataset_path: hails/agieval-lsat-lr
include: aqua-rat.yaml include: aqua-rat.yaml
group:
- agieval
- agieval_nous
- agieval_en
task: agieval_lsat_rc task: agieval_lsat_rc
dataset_path: hails/agieval-lsat-rc dataset_path: hails/agieval-lsat-rc
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment