"...gtest-1.11.0/googlemock/scripts/generator/cpp/tokenize.py" did not exist on "5e24f35816ed2e256efc15cd5d28c147119cf394"
Commit 0dbf0255 authored by Baber's avatar Baber
Browse files

add winogrande_gen

parent 799359ad
task: winogrande_generation
dataset_path: winogrande
dataset_name: winogrande_xl
output_type: generate_until
training_split: train
validation_split: validation
doc_to_text: !function preprocess_winogrande.doc_to_text_gen
doc_to_target: "{{'A' if answer|int == 1 else 'B'}}"
should_decontaminate: true
doc_to_decontamination_query: sentence
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
dataset_kwargs:
trust_remote_code: true
...@@ -12,3 +12,21 @@ def doc_to_choice(doc): ...@@ -12,3 +12,21 @@ def doc_to_choice(doc):
idx = doc["sentence"].index("_") idx = doc["sentence"].index("_")
options = [doc["option1"], doc["option2"]] options = [doc["option1"], doc["option2"]]
return [doc["sentence"][:idx] + opt for opt in options] return [doc["sentence"][:idx] + opt for opt in options]
def doc_to_text_gen(doc):
sentence = doc["sentence"]
is_noun = doc["option1"][0].isupper()
question = "Who" if is_noun else "What"
question = f"Question: {question}" + doc["sentence"].split("_")[-1]
return (
"Given the following question and two candidate answers (A and B), choose the best answer"
+ sentence
+ "\n"
+ question
+ "\n"
+ f"A. {doc['option1']}"
+ "\n"
+ f"B. {doc['option2']}"
+ 'Your response should end with "The best answer is [the_answer_letter]" where the [the_answer_letter] is one of A or B.'
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment