Inspired by https://github.com/stanford-crfm/helm/blob/0eaaa62a2263ddb94e9850ee629423b010f57e4a/src/helm/benchmark/scenarios/babi_qa_scenario.py
"""
importnumpyasnp
fromcollectionsimportdefaultdict
fromlm_eval.baseimportrf,Task
fromlm_eval.metricsimportmean
_CITATION="""
@article{weston2015towards,
title={Towards ai-complete question answering: A set of prerequisite toy tasks},
author={Weston, Jason and Bordes, Antoine and Chopra, Sumit and Rush, Alexander M and Van Merri{\"e}nboer, Bart and Joulin, Armand and Mikolov, Tomas},