Unverified Commit ccfa4ad1 authored by Janna's avatar Janna Committed by GitHub
Browse files

Add BabiLong (#3287)

* create babilong tasks

* lint

* add clarification

* fix typo

* add babilong description
parent fec9dde7
include: _babilong_common_yaml
task: babilong_qa5
test_split: qa5
custom_dataset: !function common_utils.load_dataset
dataset_kwargs:
qa_split: qa5
description: "I will give you context with the facts about locations and their relations hidden in some random text and a question. You need to answer the question based only on the information from the facts.\nYour answer should contain only one word. Do not write anything else after that. Do not explain your answer.\n\n"
doc_to_text: "{{input.strip()}}\n{{question.strip()}}"
fewshot_config:
sampler: first_n
samples:
- input: "Mary picked up the apple there. Mary gave the apple to Fred. Mary moved to the bedroom. Bill took the milk there."
question: "Who did Mary give the apple to?"
target: "Fred"
- input: "Jeff took the football there. Jeff passed the football to Fred. Jeff got the milk there. Bill travelled to the bedroom."
question: "Who gave the football?"
target: "Jeff"
- input: "Fred picked up the apple there. Fred handed the apple to Bill. Bill journeyed to the bedroom. Jeff went back to the garden."
question: "What did Fred give to Bill?"
target: "apple"
include: _babilong_common_yaml
task: babilong_qa6
test_split: qa6
custom_dataset: !function common_utils.load_dataset
dataset_kwargs:
qa_split: qa6
description: "I will give you context with the facts about people and their locations hidden in some random text and a question. You need to answer the question based only on the information from the facts. If a person was in different locations, use the latest location the person was in to answer the question.\nYour answer should contain only one word - $yes$ or $no$. Do not write anything else after that.\nDo not explain your answer.\n\n"
doc_to_text: "{{input.strip()}}\n{{question.strip()}}"
fewshot_config:
sampler: first_n
samples:
- input: "John travelled to the hallway. John travelled to the garden."
question: "Is John in the garden?"
target: "yes"
- input: "Mary went to the office. Daniel journeyed to the hallway. Mary went to the bedroom. Sandra went to the garden."
question: "Is Mary in the office?"
target: "no"
include: _babilong_common_yaml
task: babilong_qa7
test_split: qa7
custom_dataset: !function common_utils.load_dataset
dataset_kwargs:
qa_split: qa7
description: "I will give you context with the facts about people and objects they carry, hidden in some random text and a question. You need to answer the question based only on the information from the facts.\nYour answer should contain only one word - $none$ or $number_of_objects$.\nDo not write anything else after that. Do not explain your answer.\n\n"
doc_to_text: "{{input.strip()}}\n{{question.strip()}}"
fewshot_config:
sampler: first_n
samples:
- input: "Daniel went to the bedroom. Daniel got the apple there."
question: "How many objects is Daniel carrying?"
target: "one"
- input: "Mary grabbed the apple there. Mary gave the apple to John."
question: "How many objects is Mary carrying?"
target: "none"
- input: "Sandra travelled to the hallway. Sandra picked up the milk there. Sandra took the apple there. Mary travelled to the garden."
question: "How many objects is Sandra carrying?"
target: "two"
include: _babilong_common_yaml
task: babilong_qa8
test_split: qa8
custom_dataset: !function common_utils.load_dataset
dataset_kwargs:
qa_split: qa8
description: "I will give you context with the facts about people and objects they carry, hidden in some random text and a question. You need to answer the question based only on the information from the facts.\nYour answer should contain only one or two words: $nothing$ or $object$ or $object_1$, $object_2$. Do not write anything else. Do not explain your answer.\n\n"
doc_to_text: "{{input.strip()}}\n{{question.strip()}}"
fewshot_config:
sampler: first_n
samples:
- input: "Sandra travelled to the garden. Mary grabbed the milk there."
question: "What is Mary carrying?"
target: "milk"
- input: "Mary travelled to the kitchen. Sandra travelled to the office. John travelled to the office. Sandra discarded the milk there."
question: "What is Sandra carrying?"
target: "nothing"
- input: "Daniel grabbed the apple there. Mary went to the office. Daniel moved to the garden. Daniel grabbed the milk there. Mary went to the kitchen."
question: "What is Daniel carrying?"
target: "apple,milk"
include: _babilong_common_yaml
task: babilong_qa9
test_split: qa9
custom_dataset: !function common_utils.load_dataset
dataset_kwargs:
qa_split: qa9
description: "I will give you context with the facts about people and their locations hidden in some random text and a question. You need to answer the question based only on the information from the facts.\nIf a person was in different locations, use the latest location the person was in to answer the question.\nYour answer should contain only one word - $yes$ or $no$. Do not write anything else. Do not explain your answer.\n\n"
doc_to_text: "{{input.strip()}}\n{{question.strip()}}"
fewshot_config:
sampler: first_n
samples:
- input: "John is not in the bathroom. Sandra is not in the bedroom."
question: "Is John in the bathroom?"
target: "no"
- input: "Mary journeyed to the kitchen. John is in the bedroom. Sandra is not in the garden."
question: "Is Mary in the kitchen?"
target: "yes"
import logging
import re
from functools import cache
from typing import TYPE_CHECKING, Union
import datasets
from transformers import AutoTokenizer
if TYPE_CHECKING:
import transformers
eval_logger = logging.getLogger(__name__)
@cache
def get_tokenizer(
tokenizer=None, pretrained=None, **kwargs
) -> Union["transformers.PreTrainedTokenizer", "transformers.PreTrainedTokenizerFast"]:
pretrained = tokenizer or pretrained
assert pretrained, "No tokenizer or pretrained provided."
eval_logger.info(f"Using tokenizer {pretrained} for babilong tasks.")
return AutoTokenizer.from_pretrained(pretrained, trust_remote_code=True)
def postprocess_pred(prediction: list[str]) -> list[str]:
res = []
for predict_str in prediction:
predict_str = predict_str.strip()
# Remove all non-printable characters
np_pattern = re.compile(r"[\x00-\x1f]")
predict_str = np_pattern.sub("\n", predict_str).strip()
res.append(predict_str)
return res
def load_dataset(**kwargs):
config_name = kwargs.get("max_seq_lengths", "0k")
# Get specific qa split
qa_split = kwargs.get("qa_split")
eval_logger.info(
f"Loading babilong dataset: max_seq_lengths={config_name}, split={qa_split}"
)
dataset = datasets.load_dataset(
"RMT-team/babilong-1k-samples", name=config_name, split=qa_split
)
return {qa_split: dataset}
def process_results(doc: dict, results: list[str]) -> dict[str, float]:
pred = postprocess_pred(results)
target = doc.get("target", "").strip()
# String match
score = 1.0 if target.lower() in pred[0].lower() else 0.0
return {"acc": score}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment