bbq_generate.yaml 3.54 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
task: bbq_generate
dataset_path: oskarvanderwal/bbq
dataset_name: All
test_split: test
output_type: generate_until
process_docs: !function utils.process_docs
process_results: !function utils.process_results_generate_until
doc_to_text: "{{context}}\n\nQ: {{question}}\nA:"
doc_to_target: !function utils.doc_to_target
doc_to_choice: !function utils.doc_to_choice
metadata:
  version: 1.0
metric_list:
  - metric: acc
    aggregation: mean
    higher_is_better: true
  - metric: accuracy_amb
    aggregation: !function utils.agg_accuracy_amb
    higher_is_better: true
  - metric: accuracy_disamb
    aggregation: !function utils.agg_accuracy_disamb
    higher_is_better: true
  - metric: amb_bias_score
    aggregation: !function utils.agg_amb_bias_scores
    higher_is_better: false
  - metric: disamb_bias_score
    aggregation: !function utils.agg_disamb_bias_scores
    higher_is_better: false
  - metric: amb_bias_score_Age
    aggregation: !function utils.agg_amb_bias_scores
    higher_is_better: false
  - metric: amb_bias_score_Disability_status
    aggregation: !function utils.agg_amb_bias_scores
    higher_is_better: false
  - metric: amb_bias_score_Gender_identity
    aggregation: !function utils.agg_amb_bias_scores
    higher_is_better: false
  - metric: amb_bias_score_Nationality
    aggregation: !function utils.agg_amb_bias_scores
    higher_is_better: false
  - metric: amb_bias_score_Physical_appearance
    aggregation: !function utils.agg_amb_bias_scores
    higher_is_better: false
  - metric: amb_bias_score_Race_ethnicity
    aggregation: !function utils.agg_amb_bias_scores
    higher_is_better: false
  - metric: amb_bias_score_Race_x_gender
    aggregation: !function utils.agg_amb_bias_scores
    higher_is_better: false
  - metric: amb_bias_score_Race_x_SES
    aggregation: !function utils.agg_amb_bias_scores
    higher_is_better: false
  - metric: amb_bias_score_Religion
    aggregation: !function utils.agg_amb_bias_scores
    higher_is_better: false
  - metric: amb_bias_score_SES
    aggregation: !function utils.agg_amb_bias_scores
    higher_is_better: false
  - metric: amb_bias_score_Sexual_orientation
    aggregation: !function utils.agg_amb_bias_scores
    higher_is_better: false
  - metric: disamb_bias_score_Age
    aggregation: !function utils.agg_disamb_bias_scores
    higher_is_better: false
  - metric: disamb_bias_score_Disability_status
    aggregation: !function utils.agg_disamb_bias_scores
    higher_is_better: false
  - metric: disamb_bias_score_Gender_identity
    aggregation: !function utils.agg_disamb_bias_scores
    higher_is_better: false
  - metric: disamb_bias_score_Nationality
    aggregation: !function utils.agg_disamb_bias_scores
    higher_is_better: false
  - metric: disamb_bias_score_Physical_appearance
    aggregation: !function utils.agg_disamb_bias_scores
    higher_is_better: false
  - metric: disamb_bias_score_Race_ethnicity
    aggregation: !function utils.agg_disamb_bias_scores
    higher_is_better: false
  - metric: disamb_bias_score_Race_x_gender
    aggregation: !function utils.agg_disamb_bias_scores
    higher_is_better: false
  - metric: disamb_bias_score_Race_x_SES
    aggregation: !function utils.agg_disamb_bias_scores
    higher_is_better: false
  - metric: disamb_bias_score_Religion
    aggregation: !function utils.agg_disamb_bias_scores
    higher_is_better: false
  - metric: disamb_bias_score_SES
    aggregation: !function utils.agg_disamb_bias_scores
    higher_is_better: false
  - metric: disamb_bias_score_Sexual_orientation
    aggregation: !function utils.agg_disamb_bias_scores
    higher_is_better: false