Commit f77a3a27 authored by lintangsutawika's avatar lintangsutawika
Browse files

Merge branch 'big-refactor' of...

Merge branch 'big-refactor' of https://github.com/EleutherAI/lm-evaluation-harness into mmlu_subgroups
parents 109ed1c7 f8342178
......@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name: multirc
training_split: train
validation_split: validation
output_type: greedy_until
output_type: generate_until
doc_to_text: "multirc question: {{question}} answer: {{answer}} paragraph: {{paragraph}}"
doc_to_target: label
doc_to_choice: "{% set group_id = idx.question|string %}{{[group_id+'_False', group_id+'_True']}}"
......
......@@ -4,7 +4,7 @@ task: super_glue-record-t5-prompt
dataset_path: super_glue
dataset_name: record
validation_split: validation
output_type: greedy_until
output_type: generate_until
process_docs: !function t5_utils.process_docs
doc_to_text: !function t5_utils.doc_to_text
doc_to_target: "{{idx.passage|string}}+{{idx.query}}_{{answers}}"
......
......@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name: rte
training_split: train
validation_split: validation
output_type: greedy_until
output_type: generate_until
doc_to_text: "rte hypothesis: {{hypothesis}} premise: {{premise}}"
doc_to_target: label
doc_to_choice: ['entailment', 'not_entailment']
......
......@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name: wic
training_split: train
validation_split: validation
output_type: greedy_until
output_type: generate_until
doc_to_text: "wic sentence1: {{sentence1}} sentence2: {{sentence2}} word: {{word}}"
doc_to_target: label
doc_to_choice: ['False', 'True']
......
......@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name: wsc.fixed
training_split: train
validation_split: validation
output_type: greedy_until
output_type: generate_until
doc_to_text: !function "t5_utils.doc_to_text"
doc_to_target: label
generation_kwargs:
......
......@@ -6,7 +6,7 @@ doc_to_text: 'Arabic phrase: {{translation["ar"]}}
English phrase:'
group:
- greedy_until
- generate_until
- translation
- iwslt2017
include: wmt_common_yaml
......
......@@ -6,7 +6,7 @@ doc_to_text: 'English phrase: {{translation["en"]}}
Arabic phrase:'
group:
- greedy_until
- generate_until
- translation
- iwslt2017
include: wmt_common_yaml
......
......@@ -58,7 +58,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
try:
source, target = code_to_language(src), code_to_language(tgt)
groups = ["greedy_until", "translation", lang]
groups = ["generate_until", "translation", lang]
if lang in gpt3_translation_benchmarks.keys():
groups += ["gpt3_translation_benchmarks"]
......
......@@ -6,7 +6,7 @@ doc_to_text: 'English phrase: {{translation["en"]}}
French phrase:'
group:
- greedy_until
- generate_until
- translation
- wmt14
- gpt3_translation_benchmarks
......
......@@ -6,7 +6,7 @@ doc_to_text: 'French phrase: {{translation["fr"]}}
English phrase:'
group:
- greedy_until
- generate_until
- translation
- wmt14
- gpt3_translation_benchmarks
......
......@@ -6,7 +6,7 @@ doc_to_text: 'German phrase: {{translation["de"]}}
English phrase:'
group:
- greedy_until
- generate_until
- translation
- wmt16
- gpt3_translation_benchmarks
......
......@@ -6,7 +6,7 @@ doc_to_text: 'English phrase: {{translation["en"]}}
German phrase:'
group:
- greedy_until
- generate_until
- translation
- wmt16
- gpt3_translation_benchmarks
......
......@@ -6,7 +6,7 @@ doc_to_text: 'English phrase: {{translation["en"]}}
Romanian phrase:'
group:
- greedy_until
- generate_until
- translation
- wmt16
- gpt3_translation_benchmarks
......
......@@ -6,7 +6,7 @@ doc_to_text: 'Romanian phrase: {{translation["ro"]}}
English phrase:'
group:
- greedy_until
- generate_until
- translation
- wmt16
- gpt3_translation_benchmarks
......
output_type: greedy_until
output_type: generate_until
training_split: train
validation_split: validation
fewshot_split: validation
......
task: triviaqa
dataset_path: trivia_qa
dataset_name: rc.nocontext
output_type: greedy_until
output_type: generate_until
training_split: train
validation_split: validation
doc_to_text: "Question: {{question}}?\nAnswer:"
......
......@@ -3,7 +3,7 @@ group:
task: truthfulqa_gen
dataset_path: truthful_qa
dataset_name: generation
output_type: greedy_until
output_type: generate_until
training_split: null
validation_split: validation
test_split: null
......
......@@ -3,7 +3,7 @@ group:
task: anagrams1
dataset_path: EleutherAI/unscramble
dataset_name: mid_word_1_anagrams
output_type: greedy_until
output_type: generate_until
test_split: validation
doc_to_text: "{{context}}"
doc_to_target: "{{completion}}"
......
......@@ -3,7 +3,7 @@ group:
task: anagrams2
dataset_path: EleutherAI/unscramble
dataset_name: mid_word_2_anagrams
output_type: greedy_until
output_type: generate_until
test_split: validation
doc_to_text: "{{context}}"
doc_to_target: "{{completion}}"
......
......@@ -3,7 +3,7 @@ group:
task: cycle_letters
dataset_path: EleutherAI/unscramble
dataset_name: cycle_letters_in_word
output_type: greedy_until
output_type: generate_until
test_split: validation
doc_to_text: "{{context}}"
doc_to_target: "{{completion}}"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment