Unverified Commit de71ad92 authored by Lintang Sutawika's avatar Lintang Sutawika Committed by GitHub
Browse files

Merge branch 'big-refactor' into fix-unittests

parents 09d20bfa 73c80915
include: _template_yaml
task: squadv2 task: squadv2
dataset_path: squad_v2 output_type: generate_until
output_type: greedy_until
training_split: train
validation_split: validation
doc_to_text: "Title: {{title}}\n\nBackground: {{context}}\n\nQuestion: {{question}}\n\n Answer:"
doc_to_target: "{% if answers.text| length > 0 %}{{answers.text}}{% else %}{{['']}}{% endif %}"
target_delimiter: ""
should_decontaminate: true
doc_to_decontamination_query: context
generation_kwargs: generation_kwargs:
until: until:
- "\n" - "\n"
# filter_list:
# - name: remove_whitespace
# filter:
# - function: remove_whitespace
# - function: take_first
metric_list: metric_list:
- metric: !function utils.exact - metric: !function utils.exact
aggregation: mean aggregation: mean
......
include: default.yaml include: _template_yaml
task: squadv2_noans_loglikelihood task: squadv2_noans_loglikelihood
dataset_path: squad_v2
output_type: loglikelihood output_type: loglikelihood
training_split: train
validation_split: validation
doc_to_target: " unanswerable" doc_to_target: " unanswerable"
metric_list: metric_list:
- metric: perplexity - metric: perplexity
...@@ -3,7 +3,7 @@ group: ...@@ -3,7 +3,7 @@ group:
task: "boolq-seq2seq" task: "boolq-seq2seq"
dataset_path: super_glue dataset_path: super_glue
dataset_name: boolq dataset_name: boolq
output_type: greedy_until output_type: generate_until
training_split: train training_split: train
validation_split: validation validation_split: validation
doc_to_text: "{{passage}}\nQuestion: {{question}}?\nAnswer:" doc_to_text: "{{passage}}\nQuestion: {{question}}?\nAnswer:"
......
...@@ -5,7 +5,7 @@ dataset_path: super_glue ...@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name: boolq dataset_name: boolq
training_split: train training_split: train
validation_split: validation validation_split: validation
output_type: greedy_until output_type: generate_until
doc_to_text: "boolq passage: {{passage}} question: {{question}}" doc_to_text: "boolq passage: {{passage}} question: {{question}}"
doc_to_target: label doc_to_target: label
doc_to_choice: ['False', 'True'] doc_to_choice: ['False', 'True']
......
...@@ -5,7 +5,7 @@ dataset_path: super_glue ...@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name: cb dataset_name: cb
training_split: train training_split: train
validation_split: validation validation_split: validation
output_type: greedy_until output_type: generate_until
doc_to_text: "cb hypothesis: {{hypothesis}} premise: {{premise}}" doc_to_text: "cb hypothesis: {{hypothesis}} premise: {{premise}}"
doc_to_target: label doc_to_target: label
doc_to_choice: ['entailment', 'contradiction', 'neutral'] doc_to_choice: ['entailment', 'contradiction', 'neutral']
......
...@@ -5,7 +5,7 @@ dataset_path: super_glue ...@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name: copa dataset_name: copa
training_split: train training_split: train
validation_split: validation validation_split: validation
output_type: greedy_until output_type: generate_until
doc_to_text: "copa choice1: {{choice1}} choice2: {{choice2}} premise: {{premise}} question: {{question}}" doc_to_text: "copa choice1: {{choice1}} choice2: {{choice2}} premise: {{premise}} question: {{question}}"
doc_to_target: label doc_to_target: label
doc_to_choice: ['choice1', 'choice2'] doc_to_choice: ['choice1', 'choice2']
......
...@@ -5,7 +5,7 @@ dataset_path: super_glue ...@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name: multirc dataset_name: multirc
training_split: train training_split: train
validation_split: validation validation_split: validation
output_type: greedy_until output_type: generate_until
doc_to_text: "multirc question: {{question}} answer: {{answer}} paragraph: {{paragraph}}" doc_to_text: "multirc question: {{question}} answer: {{answer}} paragraph: {{paragraph}}"
doc_to_target: label doc_to_target: label
doc_to_choice: "{% set group_id = idx.question|string %}{{[group_id+'_False', group_id+'_True']}}" doc_to_choice: "{% set group_id = idx.question|string %}{{[group_id+'_False', group_id+'_True']}}"
......
...@@ -4,7 +4,7 @@ task: super_glue-record-t5-prompt ...@@ -4,7 +4,7 @@ task: super_glue-record-t5-prompt
dataset_path: super_glue dataset_path: super_glue
dataset_name: record dataset_name: record
validation_split: validation validation_split: validation
output_type: greedy_until output_type: generate_until
process_docs: !function t5_utils.process_docs process_docs: !function t5_utils.process_docs
doc_to_text: !function t5_utils.doc_to_text doc_to_text: !function t5_utils.doc_to_text
doc_to_target: "{{idx.passage|string}}+{{idx.query}}_{{answers}}" doc_to_target: "{{idx.passage|string}}+{{idx.query}}_{{answers}}"
......
...@@ -5,7 +5,7 @@ dataset_path: super_glue ...@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name: rte dataset_name: rte
training_split: train training_split: train
validation_split: validation validation_split: validation
output_type: greedy_until output_type: generate_until
doc_to_text: "rte hypothesis: {{hypothesis}} premise: {{premise}}" doc_to_text: "rte hypothesis: {{hypothesis}} premise: {{premise}}"
doc_to_target: label doc_to_target: label
doc_to_choice: ['entailment', 'not_entailment'] doc_to_choice: ['entailment', 'not_entailment']
......
...@@ -5,7 +5,7 @@ dataset_path: super_glue ...@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name: wic dataset_name: wic
training_split: train training_split: train
validation_split: validation validation_split: validation
output_type: greedy_until output_type: generate_until
doc_to_text: "wic sentence1: {{sentence1}} sentence2: {{sentence2}} word: {{word}}" doc_to_text: "wic sentence1: {{sentence1}} sentence2: {{sentence2}} word: {{word}}"
doc_to_target: label doc_to_target: label
doc_to_choice: ['False', 'True'] doc_to_choice: ['False', 'True']
......
...@@ -5,7 +5,7 @@ dataset_path: super_glue ...@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name: wsc.fixed dataset_name: wsc.fixed
training_split: train training_split: train
validation_split: validation validation_split: validation
output_type: greedy_until output_type: generate_until
doc_to_text: !function "t5_utils.doc_to_text" doc_to_text: !function "t5_utils.doc_to_text"
doc_to_target: label doc_to_target: label
generation_kwargs: generation_kwargs:
......
...@@ -6,7 +6,7 @@ doc_to_text: 'Arabic phrase: {{translation["ar"]}} ...@@ -6,7 +6,7 @@ doc_to_text: 'Arabic phrase: {{translation["ar"]}}
English phrase:' English phrase:'
group: group:
- greedy_until - generate_until
- translation - translation
- iwslt2017 - iwslt2017
include: wmt_common_yaml include: wmt_common_yaml
......
...@@ -6,7 +6,7 @@ doc_to_text: 'English phrase: {{translation["en"]}} ...@@ -6,7 +6,7 @@ doc_to_text: 'English phrase: {{translation["en"]}}
Arabic phrase:' Arabic phrase:'
group: group:
- greedy_until - generate_until
- translation - translation
- iwslt2017 - iwslt2017
include: wmt_common_yaml include: wmt_common_yaml
......
...@@ -58,7 +58,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool) -> None: ...@@ -58,7 +58,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
try: try:
source, target = code_to_language(src), code_to_language(tgt) source, target = code_to_language(src), code_to_language(tgt)
groups = ["greedy_until", "translation", lang] groups = ["generate_until", "translation", lang]
if lang in gpt3_translation_benchmarks.keys(): if lang in gpt3_translation_benchmarks.keys():
groups += ["gpt3_translation_benchmarks"] groups += ["gpt3_translation_benchmarks"]
......
...@@ -6,7 +6,7 @@ doc_to_text: 'English phrase: {{translation["en"]}} ...@@ -6,7 +6,7 @@ doc_to_text: 'English phrase: {{translation["en"]}}
French phrase:' French phrase:'
group: group:
- greedy_until - generate_until
- translation - translation
- wmt14 - wmt14
- gpt3_translation_benchmarks - gpt3_translation_benchmarks
......
...@@ -6,7 +6,7 @@ doc_to_text: 'French phrase: {{translation["fr"]}} ...@@ -6,7 +6,7 @@ doc_to_text: 'French phrase: {{translation["fr"]}}
English phrase:' English phrase:'
group: group:
- greedy_until - generate_until
- translation - translation
- wmt14 - wmt14
- gpt3_translation_benchmarks - gpt3_translation_benchmarks
......
...@@ -6,7 +6,7 @@ doc_to_text: 'German phrase: {{translation["de"]}} ...@@ -6,7 +6,7 @@ doc_to_text: 'German phrase: {{translation["de"]}}
English phrase:' English phrase:'
group: group:
- greedy_until - generate_until
- translation - translation
- wmt16 - wmt16
- gpt3_translation_benchmarks - gpt3_translation_benchmarks
......
...@@ -6,7 +6,7 @@ doc_to_text: 'English phrase: {{translation["en"]}} ...@@ -6,7 +6,7 @@ doc_to_text: 'English phrase: {{translation["en"]}}
German phrase:' German phrase:'
group: group:
- greedy_until - generate_until
- translation - translation
- wmt16 - wmt16
- gpt3_translation_benchmarks - gpt3_translation_benchmarks
......
...@@ -6,7 +6,7 @@ doc_to_text: 'English phrase: {{translation["en"]}} ...@@ -6,7 +6,7 @@ doc_to_text: 'English phrase: {{translation["en"]}}
Romanian phrase:' Romanian phrase:'
group: group:
- greedy_until - generate_until
- translation - translation
- wmt16 - wmt16
- gpt3_translation_benchmarks - gpt3_translation_benchmarks
......
...@@ -6,7 +6,7 @@ doc_to_text: 'Romanian phrase: {{translation["ro"]}} ...@@ -6,7 +6,7 @@ doc_to_text: 'Romanian phrase: {{translation["ro"]}}
English phrase:' English phrase:'
group: group:
- greedy_until - generate_until
- translation - translation
- wmt16 - wmt16
- gpt3_translation_benchmarks - gpt3_translation_benchmarks
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment