Commit f77a3a27 authored by lintangsutawika's avatar lintangsutawika
Browse files

Merge branch 'big-refactor' of...

Merge branch 'big-refactor' of https://github.com/EleutherAI/lm-evaluation-harness into mmlu_subgroups
parents 109ed1c7 f8342178
# Generated by utils.py # Generated by utils.py
dataset_name: unit_conversion_zero_shot dataset_name: unit_conversion_zero_shot
include: ../greedy_until_template_yaml include: ../generate_until_template_yaml
task: bigbench_unit_conversion_greedy_until task: bigbench_unit_conversion_generate_until
# Generated by utils.py # Generated by utils.py
dataset_name: unit_interpretation_zero_shot dataset_name: unit_interpretation_zero_shot
include: ../greedy_until_template_yaml include: ../generate_until_template_yaml
task: bigbench_unit_interpretation_greedy_until task: bigbench_unit_interpretation_generate_until
# Generated by utils.py # Generated by utils.py
dataset_name: unnatural_in_context_learning_zero_shot dataset_name: unnatural_in_context_learning_zero_shot
include: ../greedy_until_template_yaml include: ../generate_until_template_yaml
task: bigbench_unnatural_in_context_learning_greedy_until task: bigbench_unnatural_in_context_learning_generate_until
# Generated by utils.py # Generated by utils.py
dataset_name: vitaminc_fact_verification_zero_shot dataset_name: vitaminc_fact_verification_zero_shot
include: ../greedy_until_template_yaml include: ../generate_until_template_yaml
task: bigbench_vitaminc_fact_verification_greedy_until task: bigbench_vitaminc_fact_verification_generate_until
# Generated by utils.py # Generated by utils.py
dataset_name: what_is_the_tao_zero_shot dataset_name: what_is_the_tao_zero_shot
include: ../greedy_until_template_yaml include: ../generate_until_template_yaml
task: bigbench_what_is_the_tao_greedy_until task: bigbench_what_is_the_tao_generate_until
# Generated by utils.py # Generated by utils.py
dataset_name: which_wiki_edit_zero_shot dataset_name: which_wiki_edit_zero_shot
include: ../greedy_until_template_yaml include: ../generate_until_template_yaml
task: bigbench_which_wiki_edit_greedy_until task: bigbench_which_wiki_edit_generate_until
# Generated by utils.py # Generated by utils.py
dataset_name: winowhy_zero_shot dataset_name: winowhy_zero_shot
include: ../greedy_until_template_yaml include: ../generate_until_template_yaml
task: bigbench_winowhy_greedy_until task: bigbench_winowhy_generate_until
# Generated by utils.py # Generated by utils.py
dataset_name: word_sorting_zero_shot dataset_name: word_sorting_zero_shot
include: ../greedy_until_template_yaml include: ../generate_until_template_yaml
task: bigbench_word_sorting_greedy_until task: bigbench_word_sorting_generate_until
# Generated by utils.py # Generated by utils.py
dataset_name: word_unscrambling_zero_shot dataset_name: word_unscrambling_zero_shot
include: ../greedy_until_template_yaml include: ../generate_until_template_yaml
task: bigbench_word_unscrambling_greedy_until task: bigbench_word_unscrambling_generate_until
group: bigbench group: bigbench
dataset_path: bigbench # will switch to `hails/bigbench` when all tasks are pushed dataset_path: bigbench # will switch to `hails/bigbench` when all tasks are pushed
output_type: greedy_until output_type: generate_until
dataset_kwargs: dataset_kwargs:
# num_shots: 0 # TODO: num of shots for `bigbench` HF dataset should be controlled through this, not through the typical methods # num_shots: 0 # TODO: num of shots for `bigbench` HF dataset should be controlled through this, not through the typical methods
# subtask_name: null # subtask_name: null
......
...@@ -5,7 +5,7 @@ dataset_path: CM/codexglue_code2text_go ...@@ -5,7 +5,7 @@ dataset_path: CM/codexglue_code2text_go
training_split: train training_split: train
validation_split: validation validation_split: validation
test_split: test test_split: test
output_type: greedy_until output_type: generate_until
generation_kwargs: generation_kwargs:
num_beams: 10 num_beams: 10
max_length: 128 max_length: 128
......
...@@ -5,7 +5,7 @@ dataset_path: CM/codexglue_code2text_java ...@@ -5,7 +5,7 @@ dataset_path: CM/codexglue_code2text_java
training_split: train training_split: train
validation_split: validation validation_split: validation
test_split: test test_split: test
output_type: greedy_until output_type: generate_until
generation_kwargs: generation_kwargs:
num_beams: 10 num_beams: 10
max_length: 128 max_length: 128
......
...@@ -5,7 +5,7 @@ dataset_path: CM/codexglue_code2text_javascript ...@@ -5,7 +5,7 @@ dataset_path: CM/codexglue_code2text_javascript
training_split: train training_split: train
validation_split: validation validation_split: validation
test_split: test test_split: test
output_type: greedy_until output_type: generate_until
generation_kwargs: generation_kwargs:
num_beams: 10 num_beams: 10
max_length: 128 max_length: 128
......
...@@ -5,7 +5,7 @@ dataset_path: CM/codexglue_code2text_php ...@@ -5,7 +5,7 @@ dataset_path: CM/codexglue_code2text_php
training_split: train training_split: train
validation_split: validation validation_split: validation
test_split: test test_split: test
output_type: greedy_until output_type: generate_until
generation_kwargs: generation_kwargs:
num_beams: 10 num_beams: 10
max_length: 128 max_length: 128
......
...@@ -5,7 +5,7 @@ dataset_path: CM/codexglue_code2text_python ...@@ -5,7 +5,7 @@ dataset_path: CM/codexglue_code2text_python
training_split: train training_split: train
validation_split: validation validation_split: validation
test_split: test test_split: test
output_type: greedy_until output_type: generate_until
generation_kwargs: generation_kwargs:
num_beams: 10 num_beams: 10
max_length: 128 max_length: 128
......
...@@ -5,7 +5,7 @@ dataset_path: CM/codexglue_code2text_ruby ...@@ -5,7 +5,7 @@ dataset_path: CM/codexglue_code2text_ruby
training_split: train training_split: train
validation_split: validation validation_split: validation
test_split: test test_split: test
output_type: greedy_until output_type: generate_until
generation_kwargs: generation_kwargs:
num_beams: 10 num_beams: 10
max_length: 128 max_length: 128
......
task: coqa task: coqa
dataset_path: EleutherAI/coqa dataset_path: EleutherAI/coqa
output_type: greedy_until output_type: generate_until
training_split: train training_split: train
validation_split: validation validation_split: validation
doc_to_text: !function utils.doc_to_text doc_to_text: !function utils.doc_to_text
......
task: drop task: drop
dataset_path: EleutherAI/drop dataset_path: EleutherAI/drop
output_type: greedy_until output_type: generate_until
training_split: train training_split: train
validation_split: validation validation_split: validation
process_docs: !function utils.process_docs process_docs: !function utils.process_docs
......
...@@ -3,7 +3,7 @@ group: ...@@ -3,7 +3,7 @@ group:
task: gsm8k_cot task: gsm8k_cot
dataset_path: gsm8k dataset_path: gsm8k
dataset_name: main dataset_name: main
output_type: greedy_until output_type: generate_until
test_split: test test_split: test
doc_to_text: "Q: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?\n\nA: There are 15 trees originally. Then there were 21 trees after some more were planted. So there must have been 21 - 15 = 6. The answer is 6.\n\n\ doc_to_text: "Q: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?\n\nA: There are 15 trees originally. Then there were 21 trees after some more were planted. So there must have been 21 - 15 = 6. The answer is 6.\n\n\
Q: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?\n\nA: There are originally 3 cars. 2 more cars arrive. 3 + 2 = 5. The answer is 5.\n\n\ Q: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?\n\nA: There are originally 3 cars. 2 more cars arrive. 3 + 2 = 5. The answer is 5.\n\n\
......
...@@ -3,7 +3,7 @@ group: ...@@ -3,7 +3,7 @@ group:
task: gsm8k_yaml task: gsm8k_yaml
dataset_path: gsm8k dataset_path: gsm8k
dataset_name: main dataset_name: main
output_type: greedy_until output_type: generate_until
training_split: train training_split: train
fewshot_split: train fewshot_split: train
test_split: test test_split: test
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment