"docs/vscode:/vscode.git/clone" did not exist on "ad4125d1a9c4796cdbc6c6a5cdb69b09e60e5509"
Unverified Commit 26bc3eab authored by Lintang Sutawika's avatar Lintang Sutawika Committed by GitHub
Browse files

Merge branch 'big-refactor' into model-written-eval

parents 0d701496 cf617ab1
...@@ -2,7 +2,7 @@ group: mmlu_flan_cot_zeroshot ...@@ -2,7 +2,7 @@ group: mmlu_flan_cot_zeroshot
dataset_path: cais/mmlu dataset_path: cais/mmlu
validation_split: validation validation_split: validation
fewshot_split: dev fewshot_split: dev
output_type: greedy_until output_type: generate_until
doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA: Let's think step by step." doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA: Let's think step by step."
doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer]}}" doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer]}}"
filter_list: filter_list:
......
...@@ -2,7 +2,7 @@ group: mmlu_flan_n_shot_generative ...@@ -2,7 +2,7 @@ group: mmlu_flan_n_shot_generative
dataset_path: cais/mmlu dataset_path: cais/mmlu
test_split: test test_split: test
fewshot_split: dev fewshot_split: dev
output_type: greedy_until output_type: generate_until
doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA: " doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA: "
doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer]}}" doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer]}}"
generation_kwargs: generation_kwargs:
......
task: nq_open task: nq_open
dataset_path: nq_open dataset_path: nq_open
output_type: greedy_until output_type: generate_until
training_split: train training_split: train
validation_split: validation validation_split: validation
description: "Answer these questions:\n" description: "Answer these questions:\n"
......
...@@ -3,7 +3,7 @@ group: ...@@ -3,7 +3,7 @@ group:
task: polemo2_in task: polemo2_in
dataset_path: allegro/klej-polemo2-in dataset_path: allegro/klej-polemo2-in
dataset_name: klej-polemo2-in dataset_name: klej-polemo2-in
output_type: greedy_until output_type: generate_until
training_split: train training_split: train
validation_split: validation validation_split: validation
test_split: test test_split: test
......
group: qasper group: qasper
task: qasper_freeform task: qasper_freeform
dataset_path: qasper dataset_path: qasper
output_type: greedy_until output_type: generate_until
training_split: train training_split: train
validation_split: validation validation_split: validation
process_docs: !function utils.process_docs_freeform process_docs: !function utils.process_docs_freeform
......
...@@ -2,25 +2,44 @@ ...@@ -2,25 +2,44 @@
### Paper ### Paper
Title: `paper title goes here` Title: `Know What You Don’t Know: Unanswerable Questions for SQuAD`
Abstract: `link to paper PDF or arXiv abstract goes here` Abstract: https://arxiv.org/abs/1806.03822
`Short description of paper / benchmark goes here:` Stanford Question Answering Dataset (SQuAD) is a reading comprehension dataset,
consisting of questions posed by crowdworkers on a set of Wikipedia articles,
where the answer to every question is a segment of text, or span, from the
corresponding reading passage, or the question might be unanswerable.
SQuAD2.0 combines the 100,000 questions in SQuAD1.1 with over 50,000 unanswerable
questions written adversarially by crowdworkers to look similar to answerable ones.
To do well on SQuAD2.0, systems must not only answer questions when possible, but
also determine when no answer is supported by the paragraph and abstain from answering.
Homepage: `homepage to the benchmark's website goes here, if applicable` Homepage: https://rajpurkar.github.io/SQuAD-explorer/
### Citation ### Citation
``` ```
BibTeX-formatted citation goes here @misc{rajpurkar2018know,
title={Know What You Don't Know: Unanswerable Questions for SQuAD},
author={Pranav Rajpurkar and Robin Jia and Percy Liang},
year={2018},
eprint={1806.03822},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
``` ```
### Subtasks ### Groups and Tasks
List or describe tasks defined in this folder, and their names here: #### Groups
* `task_name`: `1-sentence description of what this particular task does`
* `task_name2`: ..... * `squadv2_complete`: Runs both `squadv2` and `squadv2_noans_loglikelihood`
#### Tasks
* `squadv2`: `Default squadv2 task`
* `squadv2_noans_loglikelihood`: `Additional task to acquire the probability of model predicting there is no answer`
### Checklist ### Checklist
......
dataset_path: squad_v2
training_split: train
validation_split: validation
doc_to_text: "Title: {{title}}\n\nBackground: {{context}}\n\nQuestion: {{question}}\n\n Answer:"
doc_to_target: "{% if answers.text| length > 0 %}{{answers.text}}{% else %}{{['']}}{% endif %}"
target_delimiter: ""
should_decontaminate: true
doc_to_decontamination_query: context
include: _template_yaml
task: squadv2 task: squadv2
dataset_path: squad_v2 output_type: generate_until
output_type: greedy_until
training_split: train
validation_split: validation
doc_to_text: "Title: {{title}}\n\nBackground: {{context}}\n\nQuestion: {{question}}\n\n Answer:"
doc_to_target: "{% if answers.text| length > 0 %}{{answers.text}}{% else %}{{['']}}{% endif %}"
target_delimiter: ""
should_decontaminate: true
doc_to_decontamination_query: context
generation_kwargs: generation_kwargs:
until: until:
- "\n" - "\n"
# filter_list:
# - name: remove_whitespace
# filter:
# - function: remove_whitespace
# - function: take_first
metric_list: metric_list:
- metric: !function utils.exact - metric: !function utils.exact
aggregation: mean aggregation: mean
......
include: default.yaml include: _template_yaml
task: squadv2_noans_loglikelihood task: squadv2_noans_loglikelihood
dataset_path: squad_v2
output_type: loglikelihood output_type: loglikelihood
training_split: train
validation_split: validation
doc_to_target: " unanswerable" doc_to_target: " unanswerable"
metric_list: metric_list:
- metric: perplexity - metric: perplexity
...@@ -3,7 +3,7 @@ group: ...@@ -3,7 +3,7 @@ group:
task: "boolq-seq2seq" task: "boolq-seq2seq"
dataset_path: super_glue dataset_path: super_glue
dataset_name: boolq dataset_name: boolq
output_type: greedy_until output_type: generate_until
training_split: train training_split: train
validation_split: validation validation_split: validation
doc_to_text: "{{passage}}\nQuestion: {{question}}?\nAnswer:" doc_to_text: "{{passage}}\nQuestion: {{question}}?\nAnswer:"
......
...@@ -5,7 +5,7 @@ dataset_path: super_glue ...@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name: boolq dataset_name: boolq
training_split: train training_split: train
validation_split: validation validation_split: validation
output_type: greedy_until output_type: generate_until
doc_to_text: "boolq passage: {{passage}} question: {{question}}" doc_to_text: "boolq passage: {{passage}} question: {{question}}"
doc_to_target: label doc_to_target: label
doc_to_choice: ['False', 'True'] doc_to_choice: ['False', 'True']
......
...@@ -5,7 +5,7 @@ dataset_path: super_glue ...@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name: cb dataset_name: cb
training_split: train training_split: train
validation_split: validation validation_split: validation
output_type: greedy_until output_type: generate_until
doc_to_text: "cb hypothesis: {{hypothesis}} premise: {{premise}}" doc_to_text: "cb hypothesis: {{hypothesis}} premise: {{premise}}"
doc_to_target: label doc_to_target: label
doc_to_choice: ['entailment', 'contradiction', 'neutral'] doc_to_choice: ['entailment', 'contradiction', 'neutral']
......
...@@ -5,7 +5,7 @@ dataset_path: super_glue ...@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name: copa dataset_name: copa
training_split: train training_split: train
validation_split: validation validation_split: validation
output_type: greedy_until output_type: generate_until
doc_to_text: "copa choice1: {{choice1}} choice2: {{choice2}} premise: {{premise}} question: {{question}}" doc_to_text: "copa choice1: {{choice1}} choice2: {{choice2}} premise: {{premise}} question: {{question}}"
doc_to_target: label doc_to_target: label
doc_to_choice: ['choice1', 'choice2'] doc_to_choice: ['choice1', 'choice2']
......
...@@ -5,7 +5,7 @@ dataset_path: super_glue ...@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name: multirc dataset_name: multirc
training_split: train training_split: train
validation_split: validation validation_split: validation
output_type: greedy_until output_type: generate_until
doc_to_text: "multirc question: {{question}} answer: {{answer}} paragraph: {{paragraph}}" doc_to_text: "multirc question: {{question}} answer: {{answer}} paragraph: {{paragraph}}"
doc_to_target: label doc_to_target: label
doc_to_choice: "{% set group_id = idx.question|string %}{{[group_id+'_False', group_id+'_True']}}" doc_to_choice: "{% set group_id = idx.question|string %}{{[group_id+'_False', group_id+'_True']}}"
......
...@@ -4,7 +4,7 @@ task: super_glue-record-t5-prompt ...@@ -4,7 +4,7 @@ task: super_glue-record-t5-prompt
dataset_path: super_glue dataset_path: super_glue
dataset_name: record dataset_name: record
validation_split: validation validation_split: validation
output_type: greedy_until output_type: generate_until
process_docs: !function t5_utils.process_docs process_docs: !function t5_utils.process_docs
doc_to_text: !function t5_utils.doc_to_text doc_to_text: !function t5_utils.doc_to_text
doc_to_target: "{{idx.passage|string}}+{{idx.query}}_{{answers}}" doc_to_target: "{{idx.passage|string}}+{{idx.query}}_{{answers}}"
......
...@@ -5,7 +5,7 @@ dataset_path: super_glue ...@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name: rte dataset_name: rte
training_split: train training_split: train
validation_split: validation validation_split: validation
output_type: greedy_until output_type: generate_until
doc_to_text: "rte hypothesis: {{hypothesis}} premise: {{premise}}" doc_to_text: "rte hypothesis: {{hypothesis}} premise: {{premise}}"
doc_to_target: label doc_to_target: label
doc_to_choice: ['entailment', 'not_entailment'] doc_to_choice: ['entailment', 'not_entailment']
......
...@@ -5,7 +5,7 @@ dataset_path: super_glue ...@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name: wic dataset_name: wic
training_split: train training_split: train
validation_split: validation validation_split: validation
output_type: greedy_until output_type: generate_until
doc_to_text: "wic sentence1: {{sentence1}} sentence2: {{sentence2}} word: {{word}}" doc_to_text: "wic sentence1: {{sentence1}} sentence2: {{sentence2}} word: {{word}}"
doc_to_target: label doc_to_target: label
doc_to_choice: ['False', 'True'] doc_to_choice: ['False', 'True']
......
...@@ -5,7 +5,7 @@ dataset_path: super_glue ...@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name: wsc.fixed dataset_name: wsc.fixed
training_split: train training_split: train
validation_split: validation validation_split: validation
output_type: greedy_until output_type: generate_until
doc_to_text: !function "t5_utils.doc_to_text" doc_to_text: !function "t5_utils.doc_to_text"
doc_to_target: label doc_to_target: label
generation_kwargs: generation_kwargs:
......
...@@ -6,7 +6,7 @@ doc_to_text: 'Arabic phrase: {{translation["ar"]}} ...@@ -6,7 +6,7 @@ doc_to_text: 'Arabic phrase: {{translation["ar"]}}
English phrase:' English phrase:'
group: group:
- greedy_until - generate_until
- translation - translation
- iwslt2017 - iwslt2017
include: wmt_common_yaml include: wmt_common_yaml
......
...@@ -6,7 +6,7 @@ doc_to_text: 'English phrase: {{translation["en"]}} ...@@ -6,7 +6,7 @@ doc_to_text: 'English phrase: {{translation["en"]}}
Arabic phrase:' Arabic phrase:'
group: group:
- greedy_until - generate_until
- translation - translation
- iwslt2017 - iwslt2017
include: wmt_common_yaml include: wmt_common_yaml
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment