Merge branch 'main' of https://github.com/EleutherAI/lm-evaluation-harness into patch-scrolls

eb834c98 · lintangsutawika · 12f260cf · e5dfd030 · eb834c98 · eb834c98
Commit eb834c98 authored Dec 07, 2023 by lintangsutawika
8 changed files
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
 [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.10256836.svg)](https://doi.org/10.5281/zenodo.10256836)

 ## Announcement
-**A new v0.4.0 release of lm-evaluation-harness is available** ! 
+**A new v0.4.0 release of lm-evaluation-harness is available** !

 New updates and features include:


--- a/lm_eval/tasks/bbh/cot_fewshot/_cot_fewshot_template_yaml
+++ b/lm_eval/tasks/bbh/cot_fewshot/_cot_fewshot_template_yaml
@@ -24,5 +24,6 @@ filter_list:
      - function: "regex"
        regex_pattern: "(?<=the answer is )(.*)(?=.)"
      - function: "take_first"
+num_fewshot: 0
 metadata:
  - version: 0.0
--- a/lm_eval/tasks/bbh/cot_zeroshot/_cot_zeroshot_template_yaml
+++ b/lm_eval/tasks/bbh/cot_zeroshot/_cot_zeroshot_template_yaml
@@ -22,5 +22,6 @@ filter_list:
      - function: "regex"
        regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=the answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
      - function: "take_first"
+num_fewshot: 0
 metadata:
  - version: 0
--- a/lm_eval/tasks/bbh/fewshot/_fewshot_template_yaml
+++ b/lm_eval/tasks/bbh/fewshot/_fewshot_template_yaml
@@ -16,5 +16,6 @@ generation_kwargs:
    - "\n\n"
  do_sample: false
  temperature: 0.0
+num_fewshot: 0
 metadata:
  - version: 0
--- a/lm_eval/tasks/bbh/zeroshot/_zeroshot_template_yaml
+++ b/lm_eval/tasks/bbh/zeroshot/_zeroshot_template_yaml
@@ -16,5 +16,6 @@ generation_kwargs:
    - "\n\n"
  do_sample: false
  temperature: 0.0
+num_fewshot: 0
 metadata:
  - version: 0
--- a/lm_eval/tasks/minerva_math/minerva_math_algebra.yaml
+++ b/lm_eval/tasks/minerva_math/minerva_math_algebra.yaml
@@ -19,5 +19,6 @@ metric_list:
  - metric: exact_match
    aggregation: mean
    higher_is_better: true
+num_fewshot: 0
 metadata:
  - version: 0.0
--- a/lm_eval/tasks/mmlu/flan_cot_fewshot/_mmlu_flan_cot_fewshot_template_yaml
+++ b/lm_eval/tasks/mmlu/flan_cot_fewshot/_mmlu_flan_cot_fewshot_template_yaml
@@ -15,6 +15,7 @@ generation_kwargs:
    - "</s>"
  do_sample: false
  temperature: 0.0
+num_fewshot: 0
 metric_list:
  - metric: exact_match
    aggregation: mean

--- a/lm_eval/tasks/mmlu/flan_cot_zeroshot/_mmlu_flan_cot_zeroshot_template_yaml
+++ b/lm_eval/tasks/mmlu/flan_cot_zeroshot/_mmlu_flan_cot_zeroshot_template_yaml
@@ -15,6 +15,7 @@ generation_kwargs:
    - "</s>"
  do_sample: false
  temperature: 0.0
+num_fewshot: 0
 metric_list:
  - metric: exact_match
    aggregation: mean