Unverified Commit a2af2101 authored by Yen-Ting Lin's avatar Yen-Ting Lin Committed by GitHub
Browse files

Merge branch 'EleutherAI:main' into main

parents 82cb25c1 d5f39bf8
......@@ -75,7 +75,7 @@ Please make sure to cite all the individual datasets in your paper when you use
}
```
### Groups and Tasks
### Groups, Tags, and Tasks
#### Groups
......@@ -89,6 +89,10 @@ Please make sure to cite all the individual datasets in your paper when you use
- `agieval_nous`: Evaluates a specific subset of AGIEval tasks (multiple-choice and english-only), namely those in https://github.com/teknium1/LLM-Benchmark-Logs/blob/main/benchmark-logs/Mistral-7B-Base.md
#### Tags
None.
#### Tasks
- `agieval_aqua_rat`
......
group: agieval
task:
- agieval_gaokao_biology
- agieval_gaokao_chemistry
- agieval_gaokao_chinese
- agieval_gaokao_geography
- agieval_gaokao_history
- agieval_gaokao_mathcloze
- agieval_gaokao_mathqa
- agieval_gaokao_physics
- agieval_jec_qa_ca
- agieval_jec_qa_kd
- agieval_logiqa_zh
- agieval_aqua_rat
- agieval_gaokao_english
- agieval_logiqa_en
- agieval_lsat_ar
- agieval_lsat_lr
- agieval_lsat_rc
- agieval_math
- agieval_sat_en_without_passage
- agieval_sat_en
- agieval_sat_math
aggregate_metric_list:
- metric: acc
aggregation: mean
weight_by_size: true
metadata:
version: 0.0
group: agieval_cn
task:
- agieval_gaokao_biology
- agieval_gaokao_chemistry
- agieval_gaokao_chinese
- agieval_gaokao_geography
- agieval_gaokao_history
- agieval_gaokao_mathcloze
- agieval_gaokao_mathqa
- agieval_gaokao_physics
- agieval_jec_qa_ca
- agieval_jec_qa_kd
- agieval_logiqa_zh
aggregate_metric_list:
- metric: acc
aggregation: mean
weight_by_size: true
metadata:
version: 0.0
group: agieval_en
task:
- agieval_aqua_rat
- agieval_gaokao_english # categorizing as EN because the AGIEval codebase lists this as in `english_qa_tasks`
- agieval_logiqa_en
- agieval_lsat_ar
- agieval_lsat_lr
- agieval_lsat_rc
- agieval_math
- agieval_sat_en_without_passage
- agieval_sat_en
- agieval_sat_math
aggregate_metric_list:
- metric: acc
aggregation: mean
weight_by_size: true
metadata:
version: 0.0
group: agieval_nous
task:
- agieval_aqua_rat
- agieval_logiqa_en
- agieval_lsat_ar
- agieval_lsat_lr
- agieval_lsat_rc
- agieval_sat_en_without_passage
- agieval_sat_en
- agieval_sat_math
aggregate_metric_list:
- metric: acc_norm
aggregation: mean
weight_by_size: true
metadata:
version: 0.0
group:
- agieval
- agieval_en
- agieval_nous
task: agieval_aqua_rat
dataset_path: hails/agieval-aqua-rat
dataset_name: null
......
include: aqua-rat.yaml
group:
- agieval
- agieval_cn
task: agieval_gaokao_biology
dataset_path: hails/agieval-gaokao-biology
include: aqua-rat.yaml
group:
- agieval
- agieval_cn
task: agieval_gaokao_chemistry
dataset_path: hails/agieval-gaokao-chemistry
include: aqua-rat.yaml
group:
- agieval
- agieval_cn
task: agieval_gaokao_chinese
dataset_path: hails/agieval-gaokao-chinese
include: aqua-rat.yaml
group:
- agieval
- agieval_en # categorizing as EN because the AGIEval codebase lists this as in `english_qa_tasks`
task: agieval_gaokao_english
dataset_path: hails/agieval-gaokao-english
include: aqua-rat.yaml
group:
- agieval
- agieval_cn
task: agieval_gaokao_geography
dataset_path: hails/agieval-gaokao-geography
include: aqua-rat.yaml
group:
- agieval
- agieval_cn
task: agieval_gaokao_history
dataset_path: hails/agieval-gaokao-history
group:
- agieval
- agieval_cn
task: agieval_gaokao_mathcloze
dataset_path: hails/agieval-gaokao-mathcloze
dataset_name: null
......
include: aqua-rat.yaml
group:
- agieval
- agieval_cn
task: agieval_gaokao_mathqa
dataset_path: hails/agieval-gaokao-mathqa
include: aqua-rat.yaml
group:
- agieval
- agieval_cn
task: agieval_gaokao_physics
dataset_path: hails/agieval-gaokao-physics
include: aqua-rat.yaml
group:
- agieval
- agieval_cn
task: agieval_jec_qa_ca
dataset_path: hails/agieval-jec-qa-ca
include: aqua-rat.yaml
group:
- agieval
- agieval_cn
task: agieval_jec_qa_kd
dataset_path: hails/agieval-jec-qa-kd
include: aqua-rat.yaml
group:
- agieval
- agieval_nous
- agieval_en
task: agieval_logiqa_en
dataset_path: hails/agieval-logiqa-en
include: aqua-rat.yaml
group:
- agieval
- agieval_cn
task: agieval_logiqa_zh
dataset_path: hails/agieval-logiqa-zh
include: aqua-rat.yaml
group:
- agieval
- agieval_nous
- agieval_en
task: agieval_lsat_ar
dataset_path: hails/agieval-lsat-ar
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment