leaderboard.yaml 737 Bytes
Newer Older
1
2
3
4
5
6
7
8
group: leaderboard
task:
  - leaderboard_mmlu_pro
  - leaderboard_bbh
  - leaderboard_gpqa
  - leaderboard_math_hard
  - leaderboard_ifeval
  - leaderboard_musr
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
aggregate_metric_list:
  - metric: acc
    aggregation: mean
    weight_by_size: true
  - metric: acc_norm
    aggregation: mean
    weight_by_size: true
  - metric: exact_match
    aggregation: mean
    weight_by_size: true
  - metric: inst_level_loose_acc
    aggregation: mean
    weight_by_size: true
  - metric: inst_level_strict_acc
    aggregation: mean
    weight_by_size: true
  - metric: prompt_level_loose_acc
    aggregation: mean
    weight_by_size: true
  - metric: prompt_level_strict_acc
    aggregation: mean
    weight_by_size: true
metadata:
  version: 1.0