Commit 176a4b1d authored by lintangsutawika's avatar lintangsutawika
Browse files

Merge branch 'big-refactor' of...

Merge branch 'big-refactor' of https://github.com/EleutherAI/lm-evaluation-harness into num_fewshot_fix
parents c3e3643d dc5b3d5d
...@@ -18,3 +18,5 @@ metric_list: ...@@ -18,3 +18,5 @@ metric_list:
- metric: !function t5_utils.f1 - metric: !function t5_utils.f1
aggregation: !function t5_utils.squad_f1_agg aggregation: !function t5_utils.squad_f1_agg
higher_is_better: true higher_is_better: true
metadata:
- version: 0.0
...@@ -11,3 +11,5 @@ doc_to_target: label ...@@ -11,3 +11,5 @@ doc_to_target: label
doc_to_choice: ['True', 'False'] doc_to_choice: ['True', 'False']
metric_list: metric_list:
- metric: acc - metric: acc
metadata:
- version: 0.0
...@@ -18,3 +18,5 @@ metric_list: ...@@ -18,3 +18,5 @@ metric_list:
higher_is_better: true higher_is_better: true
ignore_case: true ignore_case: true
ignore_punctuation: true ignore_punctuation: true
metadata:
- version: 0.0
...@@ -11,3 +11,5 @@ doc_to_target: label ...@@ -11,3 +11,5 @@ doc_to_target: label
doc_to_choice: ['no', 'yes'] doc_to_choice: ['no', 'yes']
metric_list: metric_list:
- metric: acc - metric: acc
metadata:
- version: 1.0
...@@ -18,3 +18,5 @@ metric_list: ...@@ -18,3 +18,5 @@ metric_list:
higher_is_better: true higher_is_better: true
ignore_case: true ignore_case: true
ignore_punctuation: true ignore_punctuation: true
metadata:
- version: 0.0
...@@ -11,3 +11,5 @@ doc_to_target: label ...@@ -11,3 +11,5 @@ doc_to_target: label
doc_to_choice: ['no', 'yes'] doc_to_choice: ['no', 'yes']
metric_list: metric_list:
- metric: acc - metric: acc
metadata:
- version: 1.0
...@@ -19,3 +19,5 @@ filter_list: ...@@ -19,3 +19,5 @@ filter_list:
- name: "wsc_postprocessor" - name: "wsc_postprocessor"
filter: filter:
- function: !function t5_utils.WSCPostprocess - function: !function t5_utils.WSCPostprocess
metadata:
- version: 0.0
...@@ -15,3 +15,5 @@ metric_list: ...@@ -15,3 +15,5 @@ metric_list:
- metric: acc_norm - metric: acc_norm
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata:
- version: 1.0
...@@ -14,3 +14,5 @@ metric_list: ...@@ -14,3 +14,5 @@ metric_list:
- metric: acc_norm - metric: acc_norm
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata:
- version: 1.0
...@@ -13,3 +13,5 @@ generation_kwargs: ...@@ -13,3 +13,5 @@ generation_kwargs:
do_sample: false do_sample: false
temperature: 0.0 temperature: 0.0
repeats: 1 repeats: 1
metadata:
- version: 0.0
...@@ -27,3 +27,5 @@ metric_list: ...@@ -27,3 +27,5 @@ metric_list:
higher_is_better: true higher_is_better: true
ignore_case: true ignore_case: true
ignore_punctuation: true ignore_punctuation: true
metadata:
- version: 2.0
...@@ -75,3 +75,5 @@ metric_list: ...@@ -75,3 +75,5 @@ metric_list:
- metric: rougeL_diff - metric: rougeL_diff
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata:
- version: 2.0
...@@ -32,3 +32,5 @@ metric_list: ...@@ -32,3 +32,5 @@ metric_list:
- metric: acc - metric: acc
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata:
- version: 2.0
...@@ -9,3 +9,5 @@ metric_list: ...@@ -9,3 +9,5 @@ metric_list:
- metric: acc - metric: acc
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata:
- version: 2.0
...@@ -16,3 +16,5 @@ metric_list: ...@@ -16,3 +16,5 @@ metric_list:
higher_is_better: true higher_is_better: true
ignore_case: false ignore_case: false
ignore_punctuation: false ignore_punctuation: false
metadata:
- version: 1.0
...@@ -16,3 +16,5 @@ metric_list: ...@@ -16,3 +16,5 @@ metric_list:
higher_is_better: true higher_is_better: true
ignore_case: false ignore_case: false
ignore_punctuation: false ignore_punctuation: false
metadata:
- version: 1.0
...@@ -16,3 +16,5 @@ metric_list: ...@@ -16,3 +16,5 @@ metric_list:
higher_is_better: true higher_is_better: true
ignore_case: false ignore_case: false
ignore_punctuation: false ignore_punctuation: false
metadata:
- version: 1.0
...@@ -16,3 +16,5 @@ metric_list: ...@@ -16,3 +16,5 @@ metric_list:
higher_is_better: true higher_is_better: true
ignore_case: false ignore_case: false
ignore_punctuation: false ignore_punctuation: false
metadata:
- version: 1.0
...@@ -16,3 +16,5 @@ metric_list: ...@@ -16,3 +16,5 @@ metric_list:
higher_is_better: true higher_is_better: true
ignore_case: false ignore_case: false
ignore_punctuation: false ignore_punctuation: false
metadata:
- version: 1.0
...@@ -16,3 +16,5 @@ metric_list: ...@@ -16,3 +16,5 @@ metric_list:
- metric: exact_match - metric: exact_match
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata:
- version: 1.0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment