Commit f77a3a27 authored by lintangsutawika's avatar lintangsutawika
Browse files

Merge branch 'big-refactor' of...

Merge branch 'big-refactor' of https://github.com/EleutherAI/lm-evaluation-harness into mmlu_subgroups
parents 109ed1c7 f8342178
...@@ -5,7 +5,7 @@ dataset_path: super_glue ...@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name: multirc dataset_name: multirc
training_split: train training_split: train
validation_split: validation validation_split: validation
output_type: greedy_until output_type: generate_until
doc_to_text: "multirc question: {{question}} answer: {{answer}} paragraph: {{paragraph}}" doc_to_text: "multirc question: {{question}} answer: {{answer}} paragraph: {{paragraph}}"
doc_to_target: label doc_to_target: label
doc_to_choice: "{% set group_id = idx.question|string %}{{[group_id+'_False', group_id+'_True']}}" doc_to_choice: "{% set group_id = idx.question|string %}{{[group_id+'_False', group_id+'_True']}}"
......
...@@ -4,7 +4,7 @@ task: super_glue-record-t5-prompt ...@@ -4,7 +4,7 @@ task: super_glue-record-t5-prompt
dataset_path: super_glue dataset_path: super_glue
dataset_name: record dataset_name: record
validation_split: validation validation_split: validation
output_type: greedy_until output_type: generate_until
process_docs: !function t5_utils.process_docs process_docs: !function t5_utils.process_docs
doc_to_text: !function t5_utils.doc_to_text doc_to_text: !function t5_utils.doc_to_text
doc_to_target: "{{idx.passage|string}}+{{idx.query}}_{{answers}}" doc_to_target: "{{idx.passage|string}}+{{idx.query}}_{{answers}}"
......
...@@ -5,7 +5,7 @@ dataset_path: super_glue ...@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name: rte dataset_name: rte
training_split: train training_split: train
validation_split: validation validation_split: validation
output_type: greedy_until output_type: generate_until
doc_to_text: "rte hypothesis: {{hypothesis}} premise: {{premise}}" doc_to_text: "rte hypothesis: {{hypothesis}} premise: {{premise}}"
doc_to_target: label doc_to_target: label
doc_to_choice: ['entailment', 'not_entailment'] doc_to_choice: ['entailment', 'not_entailment']
......
...@@ -5,7 +5,7 @@ dataset_path: super_glue ...@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name: wic dataset_name: wic
training_split: train training_split: train
validation_split: validation validation_split: validation
output_type: greedy_until output_type: generate_until
doc_to_text: "wic sentence1: {{sentence1}} sentence2: {{sentence2}} word: {{word}}" doc_to_text: "wic sentence1: {{sentence1}} sentence2: {{sentence2}} word: {{word}}"
doc_to_target: label doc_to_target: label
doc_to_choice: ['False', 'True'] doc_to_choice: ['False', 'True']
......
...@@ -5,7 +5,7 @@ dataset_path: super_glue ...@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name: wsc.fixed dataset_name: wsc.fixed
training_split: train training_split: train
validation_split: validation validation_split: validation
output_type: greedy_until output_type: generate_until
doc_to_text: !function "t5_utils.doc_to_text" doc_to_text: !function "t5_utils.doc_to_text"
doc_to_target: label doc_to_target: label
generation_kwargs: generation_kwargs:
......
...@@ -6,7 +6,7 @@ doc_to_text: 'Arabic phrase: {{translation["ar"]}} ...@@ -6,7 +6,7 @@ doc_to_text: 'Arabic phrase: {{translation["ar"]}}
English phrase:' English phrase:'
group: group:
- greedy_until - generate_until
- translation - translation
- iwslt2017 - iwslt2017
include: wmt_common_yaml include: wmt_common_yaml
......
...@@ -6,7 +6,7 @@ doc_to_text: 'English phrase: {{translation["en"]}} ...@@ -6,7 +6,7 @@ doc_to_text: 'English phrase: {{translation["en"]}}
Arabic phrase:' Arabic phrase:'
group: group:
- greedy_until - generate_until
- translation - translation
- iwslt2017 - iwslt2017
include: wmt_common_yaml include: wmt_common_yaml
......
...@@ -58,7 +58,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool) -> None: ...@@ -58,7 +58,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
try: try:
source, target = code_to_language(src), code_to_language(tgt) source, target = code_to_language(src), code_to_language(tgt)
groups = ["greedy_until", "translation", lang] groups = ["generate_until", "translation", lang]
if lang in gpt3_translation_benchmarks.keys(): if lang in gpt3_translation_benchmarks.keys():
groups += ["gpt3_translation_benchmarks"] groups += ["gpt3_translation_benchmarks"]
......
...@@ -6,7 +6,7 @@ doc_to_text: 'English phrase: {{translation["en"]}} ...@@ -6,7 +6,7 @@ doc_to_text: 'English phrase: {{translation["en"]}}
French phrase:' French phrase:'
group: group:
- greedy_until - generate_until
- translation - translation
- wmt14 - wmt14
- gpt3_translation_benchmarks - gpt3_translation_benchmarks
......
...@@ -6,7 +6,7 @@ doc_to_text: 'French phrase: {{translation["fr"]}} ...@@ -6,7 +6,7 @@ doc_to_text: 'French phrase: {{translation["fr"]}}
English phrase:' English phrase:'
group: group:
- greedy_until - generate_until
- translation - translation
- wmt14 - wmt14
- gpt3_translation_benchmarks - gpt3_translation_benchmarks
......
...@@ -6,7 +6,7 @@ doc_to_text: 'German phrase: {{translation["de"]}} ...@@ -6,7 +6,7 @@ doc_to_text: 'German phrase: {{translation["de"]}}
English phrase:' English phrase:'
group: group:
- greedy_until - generate_until
- translation - translation
- wmt16 - wmt16
- gpt3_translation_benchmarks - gpt3_translation_benchmarks
......
...@@ -6,7 +6,7 @@ doc_to_text: 'English phrase: {{translation["en"]}} ...@@ -6,7 +6,7 @@ doc_to_text: 'English phrase: {{translation["en"]}}
German phrase:' German phrase:'
group: group:
- greedy_until - generate_until
- translation - translation
- wmt16 - wmt16
- gpt3_translation_benchmarks - gpt3_translation_benchmarks
......
...@@ -6,7 +6,7 @@ doc_to_text: 'English phrase: {{translation["en"]}} ...@@ -6,7 +6,7 @@ doc_to_text: 'English phrase: {{translation["en"]}}
Romanian phrase:' Romanian phrase:'
group: group:
- greedy_until - generate_until
- translation - translation
- wmt16 - wmt16
- gpt3_translation_benchmarks - gpt3_translation_benchmarks
......
...@@ -6,7 +6,7 @@ doc_to_text: 'Romanian phrase: {{translation["ro"]}} ...@@ -6,7 +6,7 @@ doc_to_text: 'Romanian phrase: {{translation["ro"]}}
English phrase:' English phrase:'
group: group:
- greedy_until - generate_until
- translation - translation
- wmt16 - wmt16
- gpt3_translation_benchmarks - gpt3_translation_benchmarks
......
output_type: greedy_until output_type: generate_until
training_split: train training_split: train
validation_split: validation validation_split: validation
fewshot_split: validation fewshot_split: validation
......
task: triviaqa task: triviaqa
dataset_path: trivia_qa dataset_path: trivia_qa
dataset_name: rc.nocontext dataset_name: rc.nocontext
output_type: greedy_until output_type: generate_until
training_split: train training_split: train
validation_split: validation validation_split: validation
doc_to_text: "Question: {{question}}?\nAnswer:" doc_to_text: "Question: {{question}}?\nAnswer:"
......
...@@ -3,7 +3,7 @@ group: ...@@ -3,7 +3,7 @@ group:
task: truthfulqa_gen task: truthfulqa_gen
dataset_path: truthful_qa dataset_path: truthful_qa
dataset_name: generation dataset_name: generation
output_type: greedy_until output_type: generate_until
training_split: null training_split: null
validation_split: validation validation_split: validation
test_split: null test_split: null
......
...@@ -3,7 +3,7 @@ group: ...@@ -3,7 +3,7 @@ group:
task: anagrams1 task: anagrams1
dataset_path: EleutherAI/unscramble dataset_path: EleutherAI/unscramble
dataset_name: mid_word_1_anagrams dataset_name: mid_word_1_anagrams
output_type: greedy_until output_type: generate_until
test_split: validation test_split: validation
doc_to_text: "{{context}}" doc_to_text: "{{context}}"
doc_to_target: "{{completion}}" doc_to_target: "{{completion}}"
......
...@@ -3,7 +3,7 @@ group: ...@@ -3,7 +3,7 @@ group:
task: anagrams2 task: anagrams2
dataset_path: EleutherAI/unscramble dataset_path: EleutherAI/unscramble
dataset_name: mid_word_2_anagrams dataset_name: mid_word_2_anagrams
output_type: greedy_until output_type: generate_until
test_split: validation test_split: validation
doc_to_text: "{{context}}" doc_to_text: "{{context}}"
doc_to_target: "{{completion}}" doc_to_target: "{{completion}}"
......
...@@ -3,7 +3,7 @@ group: ...@@ -3,7 +3,7 @@ group:
task: cycle_letters task: cycle_letters
dataset_path: EleutherAI/unscramble dataset_path: EleutherAI/unscramble
dataset_name: cycle_letters_in_word dataset_name: cycle_letters_in_word
output_type: greedy_until output_type: generate_until
test_split: validation test_split: validation
doc_to_text: "{{context}}" doc_to_text: "{{context}}"
doc_to_target: "{{completion}}" doc_to_target: "{{completion}}"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment