Commit 6348b947 authored by haileyschoelkopf's avatar haileyschoelkopf
Browse files

fix bbh aggregation filter usage

parent 94673d40
......@@ -31,5 +31,6 @@ aggregate_metric_list:
- metric: exact_match
aggregation: mean
weight_by_size: true
filter_list: get-answer
metadata:
version: 2.0
......@@ -5,7 +5,7 @@ task:
- bbh_cot_fewshot_date_understanding
- bbh_cot_fewshot_disambiguation_qa
- bbh_cot_fewshot_dyck_languages
- bbh_cot_fewshot_formal_languages
- bbh_cot_fewshot_formal_fallacies
- bbh_cot_fewshot_geometric_shapes
- bbh_cot_fewshot_hyperbaton
- bbh_cot_fewshot_logical_deduction_five_objects
......@@ -31,5 +31,6 @@ aggregate_metric_list:
- metric: exact_match
aggregation: mean
weight_by_size: true
filter_list: get-answer
metadata:
version: 2.0
......@@ -5,7 +5,7 @@ task:
- bbh_cot_zeroshot_date_understanding
- bbh_cot_zeroshot_disambiguation_qa
- bbh_cot_zeroshot_dyck_languages
- bbh_cot_zeroshot_formal_languages
- bbh_cot_zeroshot_formal_fallacies
- bbh_cot_zeroshot_geometric_shapes
- bbh_cot_zeroshot_hyperbaton
- bbh_cot_zeroshot_logical_deduction_five_objects
......@@ -31,5 +31,6 @@ aggregate_metric_list:
- metric: exact_match
aggregation: mean
weight_by_size: true
filter_list: flexible-extract
metadata:
version: 2.0
......@@ -5,7 +5,7 @@ task:
- bbh_fewshot_date_understanding
- bbh_fewshot_disambiguation_qa
- bbh_fewshot_dyck_languages
- bbh_fewshot_formal_languages
- bbh_fewshot_formal_fallacies
- bbh_fewshot_geometric_shapes
- bbh_fewshot_hyperbaton
- bbh_fewshot_logical_deduction_five_objects
......
......@@ -5,7 +5,7 @@ task:
- bbh_zeroshot_date_understanding
- bbh_zeroshot_disambiguation_qa
- bbh_zeroshot_dyck_languages
- bbh_zeroshot_formal_languages
- bbh_zeroshot_formal_fallacies
- bbh_zeroshot_geometric_shapes
- bbh_zeroshot_hyperbaton
- bbh_zeroshot_logical_deduction_five_objects
......@@ -31,5 +31,6 @@ aggregate_metric_list:
- metric: exact_match
aggregation: mean
weight_by_size: true
filter_list: flexible-extract
metadata:
version: 2.0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment