Unverified Commit 2c20cd1f authored by Lintang Sutawika's avatar Lintang Sutawika Committed by GitHub
Browse files

Merge pull request #671 from EleutherAI/revamp-process

Revamp process
parents 6862fa7d 0dadc92a
......@@ -5,10 +5,10 @@ dataset_path: qa4mre
dataset_name: 2011.main.EN
output_type: multiple_choice
test_split: train
template_aliases: "{% set answer_choices = answer_options['answer_str'] %}"
doc_to_text: "{{document_str.strip()}}\nQuestion: {{question_str}}\nChoices:\n- {{answer_choices|join('\n- ')}}\nAnswer:"
doc_to_target: !function preprocess_qa4mre.doc_to_target
gold_alias: !function preprocess_qa4mre.qa4mre_process
# doc_to_text: "{{document_str.strip()}}\nQuestion: {{question_str}}\nChoices:\n- {{answer_choices|join('\n- ')}}\nAnswer:"
doc_to_text: "{{document_str.strip()}}\nQuestion: {{question_str}}\nAnswer:"
doc_to_target: "{{correct_answer_id|int - 1}}"
doc_to_choice: "{{answer_options.answer_str}}"
should_decontaminate: true
doc_to_decontamination_query: "{{document_str.strip()}} + ' ' + {{question_str}}"
metric_list:
......
group:
- multiple_choice
include: qa4mre_2011.yaml
task: qa4mre_2012
dataset_path: qa4mre
dataset_name: 2012.main.EN
output_type: multiple_choice
test_split: train
template_aliases: "{% set answer_choices = answer_options['answer_str'] %}"
doc_to_text: "{{document_str.strip()}}\nQuestion: {{question_str}}\nChoices:\n- {{answer_choices|join('\n- ')}}\nAnswer:"
doc_to_target: !function preprocess_qa4mre.doc_to_target
gold_alias: !function preprocess_qa4mre.qa4mre_process
should_decontaminate: true
doc_to_decontamination_query: "{{document_str.strip()}} + ' ' + {{question_str}}"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
group:
- multiple_choice
include: qa4mre_2011.yaml
task: qa4mre_2013
dataset_path: qa4mre
dataset_name: 2013.main.EN
output_type: multiple_choice
test_split: train
template_aliases: "{% set answer_choices = answer_options['answer_str'] %}"
doc_to_text: "{{document_str.strip()}}\nQuestion: {{question_str}}\nChoices:\n- {{answer_choices|join('\n- ')}}\nAnswer:"
doc_to_target: !function preprocess_qa4mre.doc_to_target
gold_alias: !function preprocess_qa4mre.qa4mre_process
should_decontaminate: true
doc_to_decontamination_query: "{{document_str.strip()}} + ' ' + {{question_str}}"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
......@@ -15,7 +15,7 @@ def get_answer_option(problem):
return problem["options"][answer]
def create_choices(doc):
def doc_to_choice(doc):
problem = last_problem(doc)
choices = [problem["options"][i] for i in range(4)]
return choices
......
......@@ -5,9 +5,9 @@ dataset_path: EleutherAI/race
dataset_name: high
output_type: multiple_choice
test_split: test
create_choices: !function preprocess_race.create_choices
doc_to_text: !function preprocess_race.doc_to_text
doc_to_target: !function preprocess_race.doc_to_target
doc_to_choice: !function preprocess_race.doc_to_choice
metric_list:
- metric: acc
aggregation: mean
......
......@@ -7,10 +7,11 @@ output_type: multiple_choice
training_split: train
validation_split: validation
test_split: test
template_aliases: "{% set answer_choices = [distractor1, distractor2, distractor3, correct_answer] %}{% set gold = 3 %}" # set the list of possible answer choices, and set what this doc's gold label idx is
doc_to_text: "{{support.lstrip()}}\nQuestion: {{question}}\nAnswer:"
doc_to_target: "{{correct_answer}}"
gold_alias: "{{gold}}" # this will be cast to an int.
doc_to_target: 3
doc_to_choice: "{{[distractor1, distractor2, distractor3, correct_answer]}}"
should_decontaminate: true
doc_to_decontamination_query: "{{support}} {{question}}"
metric_list:
- metric: acc
aggregation: mean
......
......@@ -6,13 +6,10 @@ dataset_name: boolq
output_type: multiple_choice
training_split: train
validation_split: validation
doc_to_text: "{{passage}}\nQuestion: {{question}}\nAnswer:"
doc_to_target: "{{answer_choices[label]}}"
gold_alias: "{{label}}" # this will be cast to an int.
template_aliases: "{% set answer_choices = ['no', 'yes'] %}"
doc_to_text: "{{passage}}\nQuestion: {{question}}?\nAnswer:"
doc_to_target: label
doc_to_choice: ["no", "yes"]
should_decontaminate: true
doc_to_decontamination_query: passage
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
- metric: acc
......@@ -6,16 +6,15 @@ dataset_name: boolq
output_type: greedy_until
training_split: train
validation_split: validation
doc_to_text: "{{passage}}\nQuestion: {{question}}\nAnswer:"
doc_to_target: "{{answer_choices[label]}}"
gold_alias: " {{answer_choices[label]}}" # this will be cast to an int.
doc_to_text: "{{passage}}\nQuestion: {{question}}?\nAnswer:"
doc_to_target: "{{[' no', ' yes'][label]}}"
target_delimiter: ""
generation_kwargs:
until:
- "\n\n"
- "\n"
do_sample: false
temperature: 0.0
template_aliases: "{% set answer_choices = ['no', 'yes'] %}"
metric_list:
- metric: exact_match
aggregation: mean
......
group:
- super-glue-lm-eval-v1
task: "cb"
task: cb
dataset_path: super_glue
dataset_name: cb
output_type: multiple_choice
training_split: train
validation_split: validation
doc_to_text: "{{premise}}\nQuestion: {{hypothesis}}. True, False, or Neither?\nAnswer:"
doc_to_target: "{{answer_choices[label]}}"
gold_alias: "{{label}}" # this will be cast to an int.
template_aliases: "{% set answer_choices = ['True', 'False', 'Neither'] %}"
doc_to_target: label
doc_to_choice: ['True', 'False', 'Neither']
metric_list:
- metric: acc
- metric: f1
......
group:
- super-glue-lm-eval-v1
task: "copa"
task: copa
dataset_path: super_glue
dataset_name: copa
output_type: multiple_choice
......@@ -8,7 +8,6 @@ training_split: train
validation_split: validation
doc_to_text: !function utils.doc_to_text
doc_to_target: !function utils.doc_to_target
gold_alias: "{{label}}" # this will be cast to an int.
template_aliases: "{% set answer_choices = [{{doc.choice1}}, 'b'] %} {{answer_choices}}"
doc_to_choice: !function utils.doc_to_choice
metric_list:
- metric: acc
......@@ -15,3 +15,7 @@ def doc_to_target(doc):
correct_choice = doc["choice1"] if doc["label"] == 0 else doc["choice2"]
# Connect the sentences
return " " + convert_choice(correct_choice)
def doc_to_choice(doc):
return [" " + convert_choice(doc["choice1"]), " " + convert_choice(doc["choice2"])]
group:
- super-glue-lm-eval-v1
task: multirc
dataset_path: super_glue
dataset_name: multirc
output_type: multiple_choice
training_split: train
validation_split: validation
doc_to_text: "{{paragraph}}\nQuestion: {{question}}\nAnswer:"
doc_to_target: label
doc_to_choice: "['''{{answer}}\\nIs the answer correct? yes''', '''{{answer}}\\nIs the answer correct? no''']"
metric_list:
- metric: acc
# group:
# - super-glue-lm-eval-v1
task: record
dataset_path: super_glue
dataset_name: record
output_type: multiple_choice
training_split: train
validation_split: validation
doc_to_text: !function util.doc_to_text
doc_to_target: "{{answers}}"
doc_to_choice: "{{entities}}"
metric_list:
- metric: f1
- metric: em
include: promptsource-00.yaml
group:
- super-glue-promptsource
task: "Add sentence after after (continuation choices)"
use_prompt: "promptsource:Add sentence after after (continuation choices)"
include: promptsource-00.yaml
group:
- super-glue-promptsource
task: "Can you figure out…"
use_prompt: "promptsource:Can you figure out…"
def doc_to_text(doc):
initial_text, *highlights = doc["passage"].strip().split("\n@highlight\n")
text = initial_text + "\n\n"
for highlight in highlights:
text += f" - {highlight}.\n"
return text
def format_answer(query, entity):
return f" - {query}".replace("@placeholder", entity)
def doc_to_target(doc):
# We only output the first correct entity in a doc
return format_answer(query=doc["query"], entity=doc["answers"][0])
......@@ -6,9 +6,8 @@ dataset_name: wic
output_type: multiple_choice
training_split: train
validation_split: validation
doc_to_text: !function utils.doc_to_text
doc_to_target: !function utils.doc_to_target
gold_alias: "{{label}}" # this will be cast to an int.
template_aliases: "{% set answer_choices = ['no', 'yes'] %}"
doc_to_text: "Sentence 1: {{sentence1}}\nSentence 2: {{sentence2}}\nQuestion: Is the word '{{sentence1[start1:end1]}}' used in the same way in the two sentences above?\nAnswer:"
doc_to_target: label
doc_to_choice: ['no', 'yes']
metric_list:
- metric: acc
def doc_to_text(doc):
return (
"Sentence 1: {}\nSentence 2: {}\nQuestion: Is the word '{}' used in the same way in the"
" two sentences above?\nAnswer:".format(
doc["sentence1"],
doc["sentence2"],
doc["sentence1"][doc["start1"] : doc["end1"]],
)
)
def doc_to_target(doc):
return " {}".format({0: "no", 1: "yes"}[doc["label"]])
group:
- super-glue-lm-eval-v1
task: wsc
dataset_path: super_glue
dataset_name: wsc
output_type: multiple_choice
training_split: train
validation_split: validation
doc_to_text: !function preprocess_wsc.default_doc_to_text
doc_to_target: label
doc_to_choice: ['no', 'yes']
metric_list:
- metric: acc
import re
from lm_eval.utils import general_detokenize
def doc_to_text(x):
def t5_prompt_doc_to_text(x):
def _mark_span(text, span_str, span_idx, mark):
pattern_tmpl = r"^((?:\S+\s){N})(W)"
pattern = re.sub("N", str(span_idx), pattern_tmpl)
......@@ -15,3 +16,19 @@ def doc_to_text(x):
text = _mark_span(text, x["span2_text"], span2_index, "#")
return text
def default_doc_to_text(doc):
raw_passage = doc["text"]
# NOTE: HuggingFace span indices are word-based not character-based.
pre = " ".join(raw_passage.split()[: doc["span2_index"]])
post = raw_passage[len(pre) + len(doc["span2_text"]) + 1 :]
passage = general_detokenize(pre + " *{}*".format(doc["span2_text"]) + post)
noun = doc["span1_text"]
pronoun = doc["span2_text"]
text = (
f"Passage: {passage}\n"
+ f'Question: In the passage above, does the pronoun "*{pronoun}*" refer to "*{noun}*"?\n'
+ "Answer:"
)
return text
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment