Unverified Commit 2c20cd1f authored by Lintang Sutawika's avatar Lintang Sutawika Committed by GitHub
Browse files

Merge pull request #671 from EleutherAI/revamp-process

Revamp process
parents 6862fa7d 0dadc92a
......@@ -5,8 +5,9 @@ dataset_path: super_glue
dataset_name: wsc
training_split: train
validation_split: validation
doc_to_text: !function "preprocess_wsc.doc_to_text"
doc_to_target: "{% set answer_choices = ['False', 'True'] %}{{answer_choices[label]}}"
doc_to_text: !function "preprocess_wsc.t5_prompt_doc_to_text"
doc_to_target: label
doc_to_choice: ['False', 'True']
metric_list:
- metric: exact_match
aggregation: mean
......
......@@ -7,10 +7,9 @@ output_type: multiple_choice
training_split: train
validation_split: validation
test_split: null
template_aliases: "{% set answer_choices = [ending0, ending1, ending2, ending3] %}{% set gold = label %}"
doc_to_text: "{{startphrase}}"
doc_to_target: "{{answer_choices[gold]}}"
gold_alias: "{{gold}}"
doc_to_text: startphrase
doc_to_target: label
doc_to_choice: "{{[ending0, ending1, ending2, ending3]}}"
metric_list:
- metric: acc
aggregation: mean
......
......@@ -6,10 +6,9 @@ dataset_name: annotated
output_type: multiple_choice
training_split: train
test_split: test
template_aliases: "{% set answer_choices = ['No', 'Yes'] %}"
doc_to_text: "Is the following statement hateful? Respond with either Yes or No. Statement: '{{text}}'"
doc_to_target: !function utils.doc_to_target
gold_alias: !function utils.gold_idx # this will be cast to an int.
doc_to_choice: ['No', 'Yes']
metric_list:
- metric: acc
aggregation: mean
......
import numpy as np
def gold_idx(doc):
def doc_to_target(doc):
return np.round(((doc["toxicity_ai"] + doc["toxicity_human"]) > 5.5), 0).astype(
np.int32
)
def doc_to_target(doc):
return ["No", "Yes"][gold_idx(doc)]
def partial_context(doc, option):
# Substitute the pronoun in the sentence with the specified option
# and ignore everything after.
pronoun_loc = doc["sentence"].index("_")
return doc["sentence"][:pronoun_loc] + option
def partial_target(doc):
# The target is everything after the document specified pronoun.
pronoun_loc = doc["sentence"].index("_") + 1
return doc["sentence"][pronoun_loc:].strip()
def doc_to_text(doc):
answer_to_num = {"1": 0, "2": 1}
return answer_to_num[doc["answer"]]
def create_choices(doc):
choices = []
for option in [doc["option1"], doc["option2"]]:
partial_ctx = partial_context(doc, option)
choices.append(partial_ctx)
return choices
def doc_to_target(doc):
idx = doc["sentence"].index("_") + 1
return doc["sentence"][idx:].strip()
def gold_alias(doc):
answer_to_num = {"1": 0, "2": 1}
return answer_to_num[doc["answer"]]
def doc_to_choice(doc):
idx = doc["sentence"].index("_")
options = [doc["option1"], doc["option2"]]
return [doc["sentence"][:idx] + opt for opt in options]
task: winogrande
dataset_path: winogrande
dataset_name: winogrande_xl
output_type: winograd_schema
output_type: multiple_choice
training_split: train
validation_split: validation
doc_to_target: !function preprocess_winogrande.partial_target
doc_to_text: "{{sentence}}"
create_choices: !function preprocess_winogrande.create_choices
gold_alias: !function preprocess_winogrande.gold_alias
doc_to_text: !function preprocess_winogrande.doc_to_text
doc_to_target: !function preprocess_winogrande.doc_to_target
doc_to_choice: !function preprocess_winogrande.doc_to_choice
metric_list:
- metric: acc
aggregation: mean
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment