_bbh.yaml 1.23 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
group: bbh
task:
  - bbh_cot_fewshot_boolean_expressions
  - bbh_cot_fewshot_causal_judgement
  - bbh_cot_fewshot_date_understanding
  - bbh_cot_fewshot_disambiguation_qa
  - bbh_cot_fewshot_dyck_languages
  - bbh_cot_fewshot_formal_languages
  - bbh_cot_fewshot_geometric_shapes
  - bbh_cot_fewshot_hyperbaton
  - bbh_cot_fewshot_logical_deduction_five_objects
  - bbh_cot_fewshot_logical_deduction_seven_objects
  - bbh_cot_fewshot_logical_deduction_three_objects
  - bbh_cot_fewshot_movie_recommendation
  - bbh_cot_fewshot_multistep_arithmetic_two
  - bbh_cot_fewshot_navigate
  - bbh_cot_fewshot_object_counting
  - bbh_cot_fewshot_penguins_in_a_table
  - bbh_cot_fewshot_reasoning_about_colored_objects
  - bbh_cot_fewshot_ruin_names
  - bbh_cot_fewshot_salient_translation_error_detection
  - bbh_cot_fewshot_snarks
  - bbh_cot_fewshot_sports_understanding
  - bbh_cot_fewshot_temporal_sequences
  - bbh_cot_fewshot_tracking_shuffled_objects_five_objects
  - bbh_cot_fewshot_tracking_shuffled_objects_seven_objects
  - bbh_cot_fewshot_tracking_shuffled_objects_three_objects
  - bbh_cot_fewshot_web_of_lies
  - bbh_cot_fewshot_word_sorting
aggregate_metric:
  - metric: exact_match
    aggregation: mean
    weight_by_size: true
metadata:
  version: 2.0