Merge branch 'big-refactor' of...

Merge branch 'big-refactor' of https://github.com/EleutherAI/lm-evaluation-harness into mmlu_subgroups

Merge branch 'big-refactor' of...
Merge branch 'big-refactor' of https://github.com/EleutherAI/lm-evaluation-harness into mmlu_subgroups
f77a3a27 · lintangsutawika · 109ed1c7 · f8342178 · f77a3a27 · f77a3a27
Commit f77a3a27 authored Oct 17, 2023 by lintangsutawika
20 changed files
--- a/lm_eval/tasks/bigbench/greedy_until/unit_conversion.yaml
+++ b/lm_eval/tasks/bigbench/greedy_until/unit_conversion.yaml
 # Generated by utils.py
 dataset_name: unit_conversion_zero_shot
-include: ../greedy_until_template_yaml
+include: ../generate_until_template_yaml
-task: bigbench_unit_conversion_greedy_until
+task: bigbench_unit_conversion_generate_until
--- a/lm_eval/tasks/bigbench/greedy_until/unit_interpretation.yaml
+++ b/lm_eval/tasks/bigbench/greedy_until/unit_interpretation.yaml
 # Generated by utils.py
 dataset_name: unit_interpretation_zero_shot
-include: ../greedy_until_template_yaml
+include: ../generate_until_template_yaml
-task: bigbench_unit_interpretation_greedy_until
+task: bigbench_unit_interpretation_generate_until
--- a/lm_eval/tasks/bigbench/greedy_until/unnatural_in_context_learning.yaml
+++ b/lm_eval/tasks/bigbench/greedy_until/unnatural_in_context_learning.yaml
 # Generated by utils.py
 dataset_name: unnatural_in_context_learning_zero_shot
-include: ../greedy_until_template_yaml
+include: ../generate_until_template_yaml
-task: bigbench_unnatural_in_context_learning_greedy_until
+task: bigbench_unnatural_in_context_learning_generate_until
--- a/lm_eval/tasks/bigbench/greedy_until/vitaminc_fact_verification.yaml
+++ b/lm_eval/tasks/bigbench/greedy_until/vitaminc_fact_verification.yaml
 # Generated by utils.py
 dataset_name: vitaminc_fact_verification_zero_shot
-include: ../greedy_until_template_yaml
+include: ../generate_until_template_yaml
-task: bigbench_vitaminc_fact_verification_greedy_until
+task: bigbench_vitaminc_fact_verification_generate_until
--- a/lm_eval/tasks/bigbench/greedy_until/what_is_the_tao.yaml
+++ b/lm_eval/tasks/bigbench/greedy_until/what_is_the_tao.yaml
 # Generated by utils.py
 dataset_name: what_is_the_tao_zero_shot
-include: ../greedy_until_template_yaml
+include: ../generate_until_template_yaml
-task: bigbench_what_is_the_tao_greedy_until
+task: bigbench_what_is_the_tao_generate_until
--- a/lm_eval/tasks/bigbench/greedy_until/which_wiki_edit.yaml
+++ b/lm_eval/tasks/bigbench/greedy_until/which_wiki_edit.yaml
 # Generated by utils.py
 dataset_name: which_wiki_edit_zero_shot
-include: ../greedy_until_template_yaml
+include: ../generate_until_template_yaml
-task: bigbench_which_wiki_edit_greedy_until
+task: bigbench_which_wiki_edit_generate_until
--- a/lm_eval/tasks/bigbench/greedy_until/winowhy.yaml
+++ b/lm_eval/tasks/bigbench/greedy_until/winowhy.yaml
 # Generated by utils.py
 dataset_name: winowhy_zero_shot
-include: ../greedy_until_template_yaml
+include: ../generate_until_template_yaml
-task: bigbench_winowhy_greedy_until
+task: bigbench_winowhy_generate_until
--- a/lm_eval/tasks/bigbench/greedy_until/word_sorting.yaml
+++ b/lm_eval/tasks/bigbench/greedy_until/word_sorting.yaml
 # Generated by utils.py
 dataset_name: word_sorting_zero_shot
-include: ../greedy_until_template_yaml
+include: ../generate_until_template_yaml
-task: bigbench_word_sorting_greedy_until
+task: bigbench_word_sorting_generate_until
--- a/lm_eval/tasks/bigbench/greedy_until/word_unscrambling.yaml
+++ b/lm_eval/tasks/bigbench/greedy_until/word_unscrambling.yaml
 # Generated by utils.py
 dataset_name: word_unscrambling_zero_shot
-include: ../greedy_until_template_yaml
+include: ../generate_until_template_yaml
-task: bigbench_word_unscrambling_greedy_until
+task: bigbench_word_unscrambling_generate_until
--- a/lm_eval/tasks/bigbench/greedy_until_template_yaml
+++ b/lm_eval/tasks/bigbench/greedy_until_template_yaml
 group: bigbench
 dataset_path: bigbench # will switch to `hails/bigbench` when all tasks are pushed
-output_type: greedy_until
+output_type: generate_until
 dataset_kwargs:
  # num_shots: 0 # TODO: num of shots for `bigbench` HF dataset should be controlled through this, not through the typical methods
  # subtask_name: null

--- a/lm_eval/tasks/code_x_glue/code-text/go.yaml
+++ b/lm_eval/tasks/code_x_glue/code-text/go.yaml
@@ -5,7 +5,7 @@ dataset_path: CM/codexglue_code2text_go
 training_split: train
 validation_split: validation
 test_split: test
-output_type: greedy_until
+output_type: generate_until
 generation_kwargs:
  num_beams: 10
  max_length: 128

--- a/lm_eval/tasks/code_x_glue/code-text/java.yaml
+++ b/lm_eval/tasks/code_x_glue/code-text/java.yaml
@@ -5,7 +5,7 @@ dataset_path: CM/codexglue_code2text_java
 training_split: train
 validation_split: validation
 test_split: test
-output_type: greedy_until
+output_type: generate_until
 generation_kwargs:
  num_beams: 10
  max_length: 128

--- a/lm_eval/tasks/code_x_glue/code-text/javascript.yaml
+++ b/lm_eval/tasks/code_x_glue/code-text/javascript.yaml
@@ -5,7 +5,7 @@ dataset_path: CM/codexglue_code2text_javascript
 training_split: train
 validation_split: validation
 test_split: test
-output_type: greedy_until
+output_type: generate_until
 generation_kwargs:
  num_beams: 10
  max_length: 128

--- a/lm_eval/tasks/code_x_glue/code-text/php.yaml
+++ b/lm_eval/tasks/code_x_glue/code-text/php.yaml
@@ -5,7 +5,7 @@ dataset_path: CM/codexglue_code2text_php
 training_split: train
 validation_split: validation
 test_split: test
-output_type: greedy_until
+output_type: generate_until
 generation_kwargs:
  num_beams: 10
  max_length: 128

--- a/lm_eval/tasks/code_x_glue/code-text/python.yaml
+++ b/lm_eval/tasks/code_x_glue/code-text/python.yaml
@@ -5,7 +5,7 @@ dataset_path: CM/codexglue_code2text_python
 training_split: train
 validation_split: validation
 test_split: test
-output_type: greedy_until
+output_type: generate_until
 generation_kwargs:
  num_beams: 10
  max_length: 128

--- a/lm_eval/tasks/code_x_glue/code-text/ruby.yaml
+++ b/lm_eval/tasks/code_x_glue/code-text/ruby.yaml
@@ -5,7 +5,7 @@ dataset_path: CM/codexglue_code2text_ruby
 training_split: train
 validation_split: validation
 test_split: test
-output_type: greedy_until
+output_type: generate_until
 generation_kwargs:
  num_beams: 10
  max_length: 128

--- a/lm_eval/tasks/coqa/default.yaml
+++ b/lm_eval/tasks/coqa/default.yaml
 task: coqa
 dataset_path: EleutherAI/coqa
-output_type: greedy_until
+output_type: generate_until
 training_split: train
 validation_split: validation
 doc_to_text: !function utils.doc_to_text

--- a/lm_eval/tasks/drop/default.yaml
+++ b/lm_eval/tasks/drop/default.yaml
 task: drop
 dataset_path: EleutherAI/drop
-output_type: greedy_until
+output_type: generate_until
 training_split: train
 validation_split: validation
 process_docs: !function utils.process_docs

--- a/lm_eval/tasks/gsm8k/gsm8k-cot.yaml
+++ b/lm_eval/tasks/gsm8k/gsm8k-cot.yaml
@@ -3,7 +3,7 @@ group:
 task: gsm8k_cot
 dataset_path: gsm8k
 dataset_name: main
-output_type: greedy_until
+output_type: generate_until
 test_split: test
 doc_to_text: "Q: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?\n\nA: There are 15 trees originally. Then there were 21 trees after some more were planted. So there must have been 21 - 15 = 6. The answer is 6.\n\n\
 Q: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?\n\nA: There are originally 3 cars. 2 more cars arrive. 3 + 2 = 5. The answer is 5.\n\n\

--- a/lm_eval/tasks/gsm8k/gsm8k.yaml
+++ b/lm_eval/tasks/gsm8k/gsm8k.yaml
@@ -3,7 +3,7 @@ group:
 task: gsm8k_yaml
 dataset_path: gsm8k
 dataset_name: main
-output_type: greedy_until
+output_type: generate_until
 training_split: train
 fewshot_split: train
 test_split: test