{"es":{"description":"HEAD-QA is a multi-choice HEAlthcare Dataset. The questions come from exams to access a specialized position in the\nSpanish healthcare system, and are challenging even for highly specialized humans. They are designed by the Ministerio\nde Sanidad, Consumo y Bienestar Social.\nThe dataset contains questions about the following topics: medicine, nursing, psychology, chemistry, pharmacology and biology.\n","citation":"@inproceedings{vilares-gomez-rodriguez-2019-head,\n title = \"{HEAD}-{QA}: A Healthcare Dataset for Complex Reasoning\",\n author = \"Vilares, David and\n G{'o}mez-Rodr{'i}guez, Carlos\",\n booktitle = \"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics\",\n month = jul,\n year = \"2019\",\n address = \"Florence, Italy\",\n publisher = \"Association for Computational Linguistics\",\n url = \"https://www.aclweb.org/anthology/P19-1092\",\n doi = \"10.18653/v1/P19-1092\",\n pages = \"960--966\",\n abstract = \"We present HEAD-QA, a multi-choice question answering testbed to encourage research on complex reasoning. The questions come from exams to access a specialized position in the Spanish healthcare system, and are challenging even for highly specialized humans. We then consider monolingual (Spanish) and cross-lingual (to English) experiments with information retrieval and neural techniques. We show that: (i) HEAD-QA challenges current methods, and (ii) the results lag well behind human performance, demonstrating its usefulness as a benchmark for future work.\",\n}\n","homepage":"https://aghie.github.io/head-qa/","license":"MIT License","features":{"name":{"dtype":"string","id":null,"_type":"Value"},"year":{"dtype":"string","id":null,"_type":"Value"},"category":{"dtype":"string","id":null,"_type":"Value"},"qid":{"dtype":"int32","id":null,"_type":"Value"},"qtext":{"dtype":"string","id":null,"_type":"Value"},"ra":{"dtype":"int32","id":null,"_type":"Value"},"answers":[{"aid":{"dtype":"int32","id":null,"_type":"Value"},"atext":{"dtype":"string","id":null,"_type":"Value"}}]},"post_processed":null,"supervised_keys":null,"task_templates":null,"builder_name":"head_qa","config_name":"es","version":{"version_str":"1.1.0","description":null,"major":1,"minor":1,"patch":0},"splits":{"train":{"name":"train","num_bytes":1196021,"num_examples":2657,"dataset_name":"head_qa"},"test":{"name":"test","num_bytes":1169819,"num_examples":2742,"dataset_name":"head_qa"},"validation":{"name":"validation","num_bytes":556924,"num_examples":1366,"dataset_name":"head_qa"}},"download_checksums":{"https://drive.google.com/uc?export=download&confirm=t&id=1a_95N5zQQoUCq8IBNVZgziHbeM-QxG2t":{"num_bytes":79365502,"checksum":"6ec29a3f55153d167f0bdf05395558919ba0b1df9c63e79ffceda2a09884ad8b"}},"download_size":79365502,"post_processing_size":null,"dataset_size":2922764,"size_in_bytes":82288266},"en":{"description":"HEAD-QA is a multi-choice HEAlthcare Dataset. The questions come from exams to access a specialized position in the\nSpanish healthcare system, and are challenging even for highly specialized humans. They are designed by the Ministerio\nde Sanidad, Consumo y Bienestar Social.\nThe dataset contains questions about the following topics: medicine, nursing, psychology, chemistry, pharmacology and biology.\n","citation":"@inproceedings{vilares-gomez-rodriguez-2019-head,\n title = \"{HEAD}-{QA}: A Healthcare Dataset for Complex Reasoning\",\n author = \"Vilares, David and\n G{'o}mez-Rodr{'i}guez, Carlos\",\n booktitle = \"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics\",\n month = jul,\n year = \"2019\",\n address = \"Florence, Italy\",\n publisher = \"Association for Computational Linguistics\",\n url = \"https://www.aclweb.org/anthology/P19-1092\",\n doi = \"10.18653/v1/P19-1092\",\n pages = \"960--966\",\n abstract = \"We present HEAD-QA, a multi-choice question answering testbed to encourage research on complex reasoning. The questions come from exams to access a specialized position in the Spanish healthcare system, and are challenging even for highly specialized humans. We then consider monolingual (Spanish) and cross-lingual (to English) experiments with information retrieval and neural techniques. We show that: (i) HEAD-QA challenges current methods, and (ii) the results lag well behind human performance, demonstrating its usefulness as a benchmark for future work.\",\n}\n","homepage":"https://aghie.github.io/head-qa/","license":"MIT License","features":{"name":{"dtype":"string","id":null,"_type":"Value"},"year":{"dtype":"string","id":null,"_type":"Value"},"category":{"dtype":"string","id":null,"_type":"Value"},"qid":{"dtype":"int32","id":null,"_type":"Value"},"qtext":{"dtype":"string","id":null,"_type":"Value"},"ra":{"dtype":"int32","id":null,"_type":"Value"},"answers":[{"aid":{"dtype":"int32","id":null,"_type":"Value"},"atext":{"dtype":"string","id":null,"_type":"Value"}}]},"post_processed":null,"supervised_keys":null,"task_templates":null,"builder_name":"head_qa","config_name":"en","version":{"version_str":"1.1.0","description":null,"major":1,"minor":1,"patch":0},"splits":{"train":{"name":"train","num_bytes":1123151,"num_examples":2657,"dataset_name":"head_qa"},"test":{"name":"test","num_bytes":1097349,"num_examples":2742,"dataset_name":"head_qa"},"validation":{"name":"validation","num_bytes":523462,"num_examples":1366,"dataset_name":"head_qa"}},"download_checksums":{"https://drive.google.com/uc?export=download&confirm=t&id=1a_95N5zQQoUCq8IBNVZgziHbeM-QxG2t":{"num_bytes":79365502,"checksum":"6ec29a3f55153d167f0bdf05395558919ba0b1df9c63e79ffceda2a09884ad8b"}},"download_size":79365502,"post_processing_size":null,"dataset_size":2743962,"size_in_bytes":82109464}}
{"es":{"description":"HEAD-QA is a multi-choice HEAlthcare Dataset. The questions come from exams to access a specialized position in the\nSpanish healthcare system, and are challenging even for highly specialized humans. They are designed by the Ministerio\nde Sanidad, Consumo y Bienestar Social.\nThe dataset contains questions about the following topics: medicine, nursing, psychology, chemistry, pharmacology and biology.\n","citation":"@inproceedings{vilares-gomez-rodriguez-2019-head,\n title = \"{HEAD}-{QA}: A Healthcare Dataset for Complex Reasoning\",\n author = \"Vilares, David and\n G{'o}mez-Rodr{'i}guez, Carlos\",\n booktitle = \"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics\",\n month = jul,\n year = \"2019\",\n address = \"Florence, Italy\",\n publisher = \"Association for Computational Linguistics\",\n url = \"https://www.aclweb.org/anthology/P19-1092\",\n doi = \"10.18653/v1/P19-1092\",\n pages = \"960--966\",\n abstract = \"We present HEAD-QA, a multi-choice question answering testbed to encourage research on complex reasoning. The questions come from exams to access a specialized position in the Spanish healthcare system, and are challenging even for highly specialized humans. We then consider monolingual (Spanish) and cross-lingual (to English) experiments with information retrieval and neural techniques. We show that: (i) HEAD-QA challenges current methods, and (ii) the results lag well behind human performance, demonstrating its usefulness as a benchmark for future work.\",\n}\n","homepage":"https://aghie.github.io/head-qa/","license":"MIT License","features":{"name":{"dtype":"string","id":null,"_type":"Value"},"year":{"dtype":"string","id":null,"_type":"Value"},"category":{"dtype":"string","id":null,"_type":"Value"},"qid":{"dtype":"int32","id":null,"_type":"Value"},"qtext":{"dtype":"string","id":null,"_type":"Value"},"ra":{"dtype":"int32","id":null,"_type":"Value"},"answers":[{"aid":{"dtype":"int32","id":null,"_type":"Value"},"atext":{"dtype":"string","id":null,"_type":"Value"}}]},"post_processed":null,"supervised_keys":null,"task_templates":null,"builder_name":"head_qa","config_name":"es","version":{"version_str":"1.1.0","description":null,"major":1,"minor":1,"patch":0},"splits":{"train":{"name":"train","num_bytes":1196021,"num_examples":2657,"dataset_name":"head_qa"},"test":{"name":"test","num_bytes":1169819,"num_examples":2742,"dataset_name":"head_qa"},"validation":{"name":"validation","num_bytes":556924,"num_examples":1366,"dataset_name":"head_qa"}},"download_checksums":{"https://drive.google.com/uc?export=download&confirm=t&id=1a_95N5zQQoUCq8IBNVZgziHbeM-QxG2t":{"num_bytes":79365502,"checksum":"6ec29a3f55153d167f0bdf05395558919ba0b1df9c63e79ffceda2a09884ad8b"}},"download_size":79365502,"post_processing_size":null,"dataset_size":2922764,"size_in_bytes":82288266},"en":{"description":"HEAD-QA is a multi-choice HEAlthcare Dataset. The questions come from exams to access a specialized position in the\nSpanish healthcare system, and are challenging even for highly specialized humans. They are designed by the Ministerio\nde Sanidad, Consumo y Bienestar Social.\nThe dataset contains questions about the following topics: medicine, nursing, psychology, chemistry, pharmacology and biology.\n","citation":"@inproceedings{vilares-gomez-rodriguez-2019-head,\n title = \"{HEAD}-{QA}: A Healthcare Dataset for Complex Reasoning\",\n author = \"Vilares, David and\n G{'o}mez-Rodr{'i}guez, Carlos\",\n booktitle = \"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics\",\n month = jul,\n year = \"2019\",\n address = \"Florence, Italy\",\n publisher = \"Association for Computational Linguistics\",\n url = \"https://www.aclweb.org/anthology/P19-1092\",\n doi = \"10.18653/v1/P19-1092\",\n pages = \"960--966\",\n abstract = \"We present HEAD-QA, a multi-choice question answering testbed to encourage research on complex reasoning. The questions come from exams to access a specialized position in the Spanish healthcare system, and are challenging even for highly specialized humans. We then consider monolingual (Spanish) and cross-lingual (to English) experiments with information retrieval and neural techniques. We show that: (i) HEAD-QA challenges current methods, and (ii) the results lag well behind human performance, demonstrating its usefulness as a benchmark for future work.\",\n}\n","homepage":"https://aghie.github.io/head-qa/","license":"MIT License","features":{"name":{"dtype":"string","id":null,"_type":"Value"},"year":{"dtype":"string","id":null,"_type":"Value"},"category":{"dtype":"string","id":null,"_type":"Value"},"qid":{"dtype":"int32","id":null,"_type":"Value"},"qtext":{"dtype":"string","id":null,"_type":"Value"},"ra":{"dtype":"int32","id":null,"_type":"Value"},"answers":[{"aid":{"dtype":"int32","id":null,"_type":"Value"},"atext":{"dtype":"string","id":null,"_type":"Value"}}]},"post_processed":null,"supervised_keys":null,"task_templates":null,"builder_name":"head_qa","config_name":"en","version":{"version_str":"1.1.0","description":null,"major":1,"minor":1,"patch":0},"splits":{"train":{"name":"train","num_bytes":1123151,"num_examples":2657,"dataset_name":"head_qa"},"test":{"name":"test","num_bytes":1097349,"num_examples":2742,"dataset_name":"head_qa"},"validation":{"name":"validation","num_bytes":523462,"num_examples":1366,"dataset_name":"head_qa"}},"download_checksums":{"https://drive.google.com/uc?export=download&confirm=t&id=1a_95N5zQQoUCq8IBNVZgziHbeM-QxG2t":{"num_bytes":79365502,"checksum":"6ec29a3f55153d167f0bdf05395558919ba0b1df9c63e79ffceda2a09884ad8b"}},"download_size":79365502,"post_processing_size":null,"dataset_size":2743962,"size_in_bytes":82109464}}
{"commonsense":{"description":"The ETHICS dataset is a benchmark that spans concepts in justice, well-being,\nduties, virtues, and commonsense morality. Models predict widespread moral\njudgments about diverse text scenarios. This requires connecting physical and\nsocial world knowledge to value judgements, a capability that may enable us\nto steer chatbot outputs or eventually regularize open-ended reinforcement\nlearning agents.\n\nThe Commonsense subset contains examples focusing on moral standards and principles that most people intuitively accept.","citation":"@article{hendrycks2021ethics\n title={Aligning AI With Shared Human Values},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andrew Critch and Jerry Li and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n}\n","homepage":"https://github.com/hendrycks/ethics","license":"","features":{"label":{"dtype":"int32","id":null,"_type":"Value"},"input":{"dtype":"string","id":null,"_type":"Value"},"is_short":{"dtype":"bool","id":null,"_type":"Value"},"edited":{"dtype":"bool","id":null,"_type":"Value"}},"post_processed":null,"supervised_keys":null,"task_templates":null,"builder_name":"hendrycks_ethics","config_name":"commonsense","version":{"version_str":"0.0.1","description":null,"major":0,"minor":0,"patch":1},"splits":{"train":{"name":"train","num_bytes":14435215,"num_examples":13910,"dataset_name":"hendrycks_ethics"},"test":{"name":"test","num_bytes":3150094,"num_examples":3885,"dataset_name":"hendrycks_ethics"}},"download_checksums":{"https://people.eecs.berkeley.edu/~hendrycks/ethics.tar":{"num_bytes":35585024,"checksum":"40acbf1ac0da79a2aabef394d58889136b8d38b05be09482006de2453fb06333"}},"download_size":35585024,"post_processing_size":null,"dataset_size":17585309,"size_in_bytes":53170333},"deontology":{"description":"The ETHICS dataset is a benchmark that spans concepts in justice, well-being,\nduties, virtues, and commonsense morality. Models predict widespread moral\njudgments about diverse text scenarios. This requires connecting physical and\nsocial world knowledge to value judgements, a capability that may enable us\nto steer chatbot outputs or eventually regularize open-ended reinforcement\nlearning agents.\n\nThe Deontology subset contains examples focusing on whether an act is required, permitted, or forbidden according to a set of rules or constraints","citation":"@article{hendrycks2021ethics\n title={Aligning AI With Shared Human Values},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andrew Critch and Jerry Li and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n}\n","homepage":"https://github.com/hendrycks/ethics","license":"","features":{"group_id":{"dtype":"int32","id":null,"_type":"Value"},"label":{"dtype":"int32","id":null,"_type":"Value"},"scenario":{"dtype":"string","id":null,"_type":"Value"},"excuse":{"dtype":"string","id":null,"_type":"Value"}},"post_processed":null,"supervised_keys":null,"task_templates":null,"builder_name":"hendrycks_ethics","config_name":"deontology","version":{"version_str":"0.0.1","description":null,"major":0,"minor":0,"patch":1},"splits":{"train":{"name":"train","num_bytes":1931475,"num_examples":18164,"dataset_name":"hendrycks_ethics"},"test":{"name":"test","num_bytes":384602,"num_examples":3596,"dataset_name":"hendrycks_ethics"}},"download_checksums":{"https://people.eecs.berkeley.edu/~hendrycks/ethics.tar":{"num_bytes":35585024,"checksum":"40acbf1ac0da79a2aabef394d58889136b8d38b05be09482006de2453fb06333"}},"download_size":35585024,"post_processing_size":null,"dataset_size":2316077,"size_in_bytes":37901101},"justice":{"description":"The ETHICS dataset is a benchmark that spans concepts in justice, well-being,\nduties, virtues, and commonsense morality. Models predict widespread moral\njudgments about diverse text scenarios. This requires connecting physical and\nsocial world knowledge to value judgements, a capability that may enable us\nto steer chatbot outputs or eventually regularize open-ended reinforcement\nlearning agents.\n\nThe Justice subset contains examples focusing on how a character treats another person","citation":"@article{hendrycks2021ethics\n title={Aligning AI With Shared Human Values},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andrew Critch and Jerry Li and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n}\n","homepage":"https://github.com/hendrycks/ethics","license":"","features":{"group_id":{"dtype":"int32","id":null,"_type":"Value"},"label":{"dtype":"int32","id":null,"_type":"Value"},"scenario":{"dtype":"string","id":null,"_type":"Value"}},"post_processed":null,"supervised_keys":null,"task_templates":null,"builder_name":"hendrycks_ethics","config_name":"justice","version":{"version_str":"0.0.1","description":null,"major":0,"minor":0,"patch":1},"splits":{"train":{"name":"train","num_bytes":2516501,"num_examples":21791,"dataset_name":"hendrycks_ethics"},"test":{"name":"test","num_bytes":309427,"num_examples":2704,"dataset_name":"hendrycks_ethics"}},"download_checksums":{"https://people.eecs.berkeley.edu/~hendrycks/ethics.tar":{"num_bytes":35585024,"checksum":"40acbf1ac0da79a2aabef394d58889136b8d38b05be09482006de2453fb06333"}},"download_size":35585024,"post_processing_size":null,"dataset_size":2825928,"size_in_bytes":38410952},"utilitarianism":{"description":"The ETHICS dataset is a benchmark that spans concepts in justice, well-being,\nduties, virtues, and commonsense morality. Models predict widespread moral\njudgments about diverse text scenarios. This requires connecting physical and\nsocial world knowledge to value judgements, a capability that may enable us\nto steer chatbot outputs or eventually regularize open-ended reinforcement\nlearning agents.\n\nThe Utilitarianism subset contains scenarios that should be ranked from most pleasant to least pleasant for the person in the scenario","citation":"@article{hendrycks2021ethics\n title={Aligning AI With Shared Human Values},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andrew Critch and Jerry Li and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n}\n","homepage":"https://github.com/hendrycks/ethics","license":"","features":{"activity":{"dtype":"string","id":null,"_type":"Value"},"baseline":{"dtype":"string","id":null,"_type":"Value"},"rating":{"dtype":"string","id":null,"_type":"Value"}},"post_processed":null,"supervised_keys":null,"task_templates":null,"builder_name":"hendrycks_ethics","config_name":"utilitarianism","version":{"version_str":"0.0.1","description":null,"major":0,"minor":0,"patch":1},"splits":{"train":{"name":"train","num_bytes":2241770,"num_examples":13738,"dataset_name":"hendrycks_ethics"},"test":{"name":"test","num_bytes":749768,"num_examples":4808,"dataset_name":"hendrycks_ethics"}},"download_checksums":{"https://people.eecs.berkeley.edu/~hendrycks/ethics.tar":{"num_bytes":35585024,"checksum":"40acbf1ac0da79a2aabef394d58889136b8d38b05be09482006de2453fb06333"}},"download_size":35585024,"post_processing_size":null,"dataset_size":2991538,"size_in_bytes":38576562},"virtue":{"description":"The ETHICS dataset is a benchmark that spans concepts in justice, well-being,\nduties, virtues, and commonsense morality. Models predict widespread moral\njudgments about diverse text scenarios. This requires connecting physical and\nsocial world knowledge to value judgements, a capability that may enable us\nto steer chatbot outputs or eventually regularize open-ended reinforcement\nlearning agents.\n\nThe Virtue subset contains scenarios focusing on whether virtues or vices are being exemplified","citation":"@article{hendrycks2021ethics\n title={Aligning AI With Shared Human Values},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andrew Critch and Jerry Li and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n}\n","homepage":"https://github.com/hendrycks/ethics","license":"","features":{"group_id":{"dtype":"int32","id":null,"_type":"Value"},"label":{"dtype":"int32","id":null,"_type":"Value"},"scenario":{"dtype":"string","id":null,"_type":"Value"},"trait":{"dtype":"string","id":null,"_type":"Value"}},"post_processed":null,"supervised_keys":null,"task_templates":null,"builder_name":"hendrycks_ethics","config_name":"virtue","version":{"version_str":"0.0.1","description":null,"major":0,"minor":0,"patch":1},"splits":{"train":{"name":"train","num_bytes":2640328,"num_examples":28245,"dataset_name":"hendrycks_ethics"},"test":{"name":"test","num_bytes":473473,"num_examples":4975,"dataset_name":"hendrycks_ethics"}},"download_checksums":{"https://people.eecs.berkeley.edu/~hendrycks/ethics.tar":{"num_bytes":35585024,"checksum":"40acbf1ac0da79a2aabef394d58889136b8d38b05be09482006de2453fb06333"}},"download_size":35585024,"post_processing_size":null,"dataset_size":3113801,"size_in_bytes":38698825}}
{"commonsense":{"description":"The ETHICS dataset is a benchmark that spans concepts in justice, well-being,\nduties, virtues, and commonsense morality. Models predict widespread moral\njudgments about diverse text scenarios. This requires connecting physical and\nsocial world knowledge to value judgements, a capability that may enable us\nto steer chatbot outputs or eventually regularize open-ended reinforcement\nlearning agents.\n\nThe Commonsense subset contains examples focusing on moral standards and principles that most people intuitively accept.","citation":"@article{hendrycks2021ethics\n title={Aligning AI With Shared Human Values},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andrew Critch and Jerry Li and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n}\n","homepage":"https://github.com/hendrycks/ethics","license":"","features":{"label":{"dtype":"int32","id":null,"_type":"Value"},"input":{"dtype":"string","id":null,"_type":"Value"},"is_short":{"dtype":"bool","id":null,"_type":"Value"},"edited":{"dtype":"bool","id":null,"_type":"Value"}},"post_processed":null,"supervised_keys":null,"task_templates":null,"builder_name":"hendrycks_ethics","config_name":"commonsense","version":{"version_str":"0.0.1","description":null,"major":0,"minor":0,"patch":1},"splits":{"train":{"name":"train","num_bytes":14435215,"num_examples":13910,"dataset_name":"hendrycks_ethics"},"test":{"name":"test","num_bytes":3150094,"num_examples":3885,"dataset_name":"hendrycks_ethics"}},"download_checksums":{"https://people.eecs.berkeley.edu/~hendrycks/ethics.tar":{"num_bytes":35585024,"checksum":"40acbf1ac0da79a2aabef394d58889136b8d38b05be09482006de2453fb06333"}},"download_size":35585024,"post_processing_size":null,"dataset_size":17585309,"size_in_bytes":53170333},"deontology":{"description":"The ETHICS dataset is a benchmark that spans concepts in justice, well-being,\nduties, virtues, and commonsense morality. Models predict widespread moral\njudgments about diverse text scenarios. This requires connecting physical and\nsocial world knowledge to value judgements, a capability that may enable us\nto steer chatbot outputs or eventually regularize open-ended reinforcement\nlearning agents.\n\nThe Deontology subset contains examples focusing on whether an act is required, permitted, or forbidden according to a set of rules or constraints","citation":"@article{hendrycks2021ethics\n title={Aligning AI With Shared Human Values},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andrew Critch and Jerry Li and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n}\n","homepage":"https://github.com/hendrycks/ethics","license":"","features":{"group_id":{"dtype":"int32","id":null,"_type":"Value"},"label":{"dtype":"int32","id":null,"_type":"Value"},"scenario":{"dtype":"string","id":null,"_type":"Value"},"excuse":{"dtype":"string","id":null,"_type":"Value"}},"post_processed":null,"supervised_keys":null,"task_templates":null,"builder_name":"hendrycks_ethics","config_name":"deontology","version":{"version_str":"0.0.1","description":null,"major":0,"minor":0,"patch":1},"splits":{"train":{"name":"train","num_bytes":1931475,"num_examples":18164,"dataset_name":"hendrycks_ethics"},"test":{"name":"test","num_bytes":384602,"num_examples":3596,"dataset_name":"hendrycks_ethics"}},"download_checksums":{"https://people.eecs.berkeley.edu/~hendrycks/ethics.tar":{"num_bytes":35585024,"checksum":"40acbf1ac0da79a2aabef394d58889136b8d38b05be09482006de2453fb06333"}},"download_size":35585024,"post_processing_size":null,"dataset_size":2316077,"size_in_bytes":37901101},"justice":{"description":"The ETHICS dataset is a benchmark that spans concepts in justice, well-being,\nduties, virtues, and commonsense morality. Models predict widespread moral\njudgments about diverse text scenarios. This requires connecting physical and\nsocial world knowledge to value judgements, a capability that may enable us\nto steer chatbot outputs or eventually regularize open-ended reinforcement\nlearning agents.\n\nThe Justice subset contains examples focusing on how a character treats another person","citation":"@article{hendrycks2021ethics\n title={Aligning AI With Shared Human Values},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andrew Critch and Jerry Li and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n}\n","homepage":"https://github.com/hendrycks/ethics","license":"","features":{"group_id":{"dtype":"int32","id":null,"_type":"Value"},"label":{"dtype":"int32","id":null,"_type":"Value"},"scenario":{"dtype":"string","id":null,"_type":"Value"}},"post_processed":null,"supervised_keys":null,"task_templates":null,"builder_name":"hendrycks_ethics","config_name":"justice","version":{"version_str":"0.0.1","description":null,"major":0,"minor":0,"patch":1},"splits":{"train":{"name":"train","num_bytes":2516501,"num_examples":21791,"dataset_name":"hendrycks_ethics"},"test":{"name":"test","num_bytes":309427,"num_examples":2704,"dataset_name":"hendrycks_ethics"}},"download_checksums":{"https://people.eecs.berkeley.edu/~hendrycks/ethics.tar":{"num_bytes":35585024,"checksum":"40acbf1ac0da79a2aabef394d58889136b8d38b05be09482006de2453fb06333"}},"download_size":35585024,"post_processing_size":null,"dataset_size":2825928,"size_in_bytes":38410952},"utilitarianism":{"description":"The ETHICS dataset is a benchmark that spans concepts in justice, well-being,\nduties, virtues, and commonsense morality. Models predict widespread moral\njudgments about diverse text scenarios. This requires connecting physical and\nsocial world knowledge to value judgements, a capability that may enable us\nto steer chatbot outputs or eventually regularize open-ended reinforcement\nlearning agents.\n\nThe Utilitarianism subset contains scenarios that should be ranked from most pleasant to least pleasant for the person in the scenario","citation":"@article{hendrycks2021ethics\n title={Aligning AI With Shared Human Values},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andrew Critch and Jerry Li and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n}\n","homepage":"https://github.com/hendrycks/ethics","license":"","features":{"activity":{"dtype":"string","id":null,"_type":"Value"},"baseline":{"dtype":"string","id":null,"_type":"Value"},"rating":{"dtype":"string","id":null,"_type":"Value"}},"post_processed":null,"supervised_keys":null,"task_templates":null,"builder_name":"hendrycks_ethics","config_name":"utilitarianism","version":{"version_str":"0.0.1","description":null,"major":0,"minor":0,"patch":1},"splits":{"train":{"name":"train","num_bytes":2241770,"num_examples":13738,"dataset_name":"hendrycks_ethics"},"test":{"name":"test","num_bytes":749768,"num_examples":4808,"dataset_name":"hendrycks_ethics"}},"download_checksums":{"https://people.eecs.berkeley.edu/~hendrycks/ethics.tar":{"num_bytes":35585024,"checksum":"40acbf1ac0da79a2aabef394d58889136b8d38b05be09482006de2453fb06333"}},"download_size":35585024,"post_processing_size":null,"dataset_size":2991538,"size_in_bytes":38576562},"virtue":{"description":"The ETHICS dataset is a benchmark that spans concepts in justice, well-being,\nduties, virtues, and commonsense morality. Models predict widespread moral\njudgments about diverse text scenarios. This requires connecting physical and\nsocial world knowledge to value judgements, a capability that may enable us\nto steer chatbot outputs or eventually regularize open-ended reinforcement\nlearning agents.\n\nThe Virtue subset contains scenarios focusing on whether virtues or vices are being exemplified","citation":"@article{hendrycks2021ethics\n title={Aligning AI With Shared Human Values},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andrew Critch and Jerry Li and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n}\n","homepage":"https://github.com/hendrycks/ethics","license":"","features":{"group_id":{"dtype":"int32","id":null,"_type":"Value"},"label":{"dtype":"int32","id":null,"_type":"Value"},"scenario":{"dtype":"string","id":null,"_type":"Value"},"trait":{"dtype":"string","id":null,"_type":"Value"}},"post_processed":null,"supervised_keys":null,"task_templates":null,"builder_name":"hendrycks_ethics","config_name":"virtue","version":{"version_str":"0.0.1","description":null,"major":0,"minor":0,"patch":1},"splits":{"train":{"name":"train","num_bytes":2640328,"num_examples":28245,"dataset_name":"hendrycks_ethics"},"test":{"name":"test","num_bytes":473473,"num_examples":4975,"dataset_name":"hendrycks_ethics"}},"download_checksums":{"https://people.eecs.berkeley.edu/~hendrycks/ethics.tar":{"num_bytes":35585024,"checksum":"40acbf1ac0da79a2aabef394d58889136b8d38b05be09482006de2453fb06333"}},"download_size":35585024,"post_processing_size":null,"dataset_size":3113801,"size_in_bytes":38698825}}
@@ -71,54 +71,64 @@ class HendrycksEthics(datasets.GeneratorBasedBuilder):
...
@@ -71,54 +71,64 @@ class HendrycksEthics(datasets.GeneratorBasedBuilder):
EthicsConfig(
EthicsConfig(
name="commonsense",
name="commonsense",
prefix="cm",
prefix="cm",
features=datasets.Features({
features=datasets.Features(
"label":datasets.Value("int32"),
{
"input":datasets.Value("string"),
"label":datasets.Value("int32"),
"is_short":datasets.Value("bool"),
"input":datasets.Value("string"),
"edited":datasets.Value("bool"),
"is_short":datasets.Value("bool"),
}),
"edited":datasets.Value("bool"),
description="The Commonsense subset contains examples focusing on moral standards and principles that most people intuitively accept."
}
),
description="The Commonsense subset contains examples focusing on moral standards and principles that most people intuitively accept.",
),
),
EthicsConfig(
EthicsConfig(
name="deontology",
name="deontology",
prefix="deontology",
prefix="deontology",
features=datasets.Features({
features=datasets.Features(
"group_id":datasets.Value("int32"),
{
"label":datasets.Value("int32"),
"group_id":datasets.Value("int32"),
"scenario":datasets.Value("string"),
"label":datasets.Value("int32"),
"excuse":datasets.Value("string"),
"scenario":datasets.Value("string"),
}),
"excuse":datasets.Value("string"),
}
),
description="The Deontology subset contains examples focusing on whether an act is required, permitted, or forbidden according to a set of rules or constraints",
description="The Deontology subset contains examples focusing on whether an act is required, permitted, or forbidden according to a set of rules or constraints",
),
),
EthicsConfig(
EthicsConfig(
name="justice",
name="justice",
prefix="justice",
prefix="justice",
features=datasets.Features({
features=datasets.Features(
"group_id":datasets.Value("int32"),
{
"label":datasets.Value("int32"),
"group_id":datasets.Value("int32"),
"scenario":datasets.Value("string"),
"label":datasets.Value("int32"),
}),
"scenario":datasets.Value("string"),
}
),
description="The Justice subset contains examples focusing on how a character treats another person",
description="The Justice subset contains examples focusing on how a character treats another person",
),
),
EthicsConfig(
EthicsConfig(
name="utilitarianism",
name="utilitarianism",
prefix="util",
prefix="util",
features=datasets.Features({
features=datasets.Features(
"activity":datasets.Value("string"),
{
"baseline":datasets.Value("string"),
"activity":datasets.Value("string"),
"rating":datasets.Value("string"),# Empty rating.
"baseline":datasets.Value("string"),
}),
"rating":datasets.Value("string"),# Empty rating.
}
),
description="The Utilitarianism subset contains scenarios that should be ranked from most pleasant to least pleasant for the person in the scenario",
description="The Utilitarianism subset contains scenarios that should be ranked from most pleasant to least pleasant for the person in the scenario",
),
),
EthicsConfig(
EthicsConfig(
name="virtue",
name="virtue",
prefix="virtue",
prefix="virtue",
features=datasets.Features({
features=datasets.Features(
"group_id":datasets.Value("int32"),
{
"label":datasets.Value("int32"),
"group_id":datasets.Value("int32"),
"scenario":datasets.Value("string"),
"label":datasets.Value("int32"),
"trait":datasets.Value("string"),
"scenario":datasets.Value("string"),
}),
"trait":datasets.Value("string"),
}
),
description="The Virtue subset contains scenarios focusing on whether virtues or vices are being exemplified",
description="The Virtue subset contains scenarios focusing on whether virtues or vices are being exemplified",
),
),
]
]
...
@@ -140,7 +150,12 @@ class HendrycksEthics(datasets.GeneratorBasedBuilder):
...
@@ -140,7 +150,12 @@ class HendrycksEthics(datasets.GeneratorBasedBuilder):
name=datasets.Split.TRAIN,
name=datasets.Split.TRAIN,
# These kwargs will be passed to _generate_examples
# These kwargs will be passed to _generate_examples
author={Paperno, Denis and Kruszewski, Germán and Lazaridou, Angeliki and Pham, Quan Ngoc and Bernardi, Raffaella and Pezzelle, Sandro and Baroni, Marco and Boleda, Gemma and Fernández, Raquel},
author={Paperno, Denis and Kruszewski, Germán and Lazaridou, Angeliki and Pham, Quan Ngoc and Bernardi, Raffaella and Pezzelle, Sandro and Baroni, Marco and Boleda, Gemma and Fernández, Raquel},
title={The LAMBADA dataset},
title={The LAMBADA dataset},
DOI={10.5281/zenodo.2630551},
DOI={10.5281/zenodo.2630551},
publisher={Zenodo},
publisher={Zenodo},
...
@@ -62,12 +62,34 @@ class Lambada(datasets.GeneratorBasedBuilder):
...
@@ -62,12 +62,34 @@ class Lambada(datasets.GeneratorBasedBuilder):
{"logiqa":{"description":"LogiQA is a dataset for testing human logical reasoning. It consists of 8,678 QA\ninstances, covering multiple types of deductive reasoning. Results show that state-\nof-the-art neural models perform by far worse than human ceiling. The dataset can\nalso serve as a benchmark for reinvestigating logical AI under the deep learning\nNLP setting.\n","citation":"@misc{liu2020logiqa,\n title={LogiQA: A Challenge Dataset for Machine Reading Comprehension with Logical Reasoning}, \n author={Jian Liu and Leyang Cui and Hanmeng Liu and Dandan Huang and Yile Wang and Yue Zhang},\n year={2020},\n eprint={2007.08124},\n archivePrefix={arXiv},\n primaryClass={cs.CL}\n}\n","homepage":"https://github.com/lgw863/LogiQA-dataset","license":"","features":{"label":{"dtype":"string","id":null,"_type":"Value"},"context":{"dtype":"string","id":null,"_type":"Value"},"question":{"dtype":"string","id":null,"_type":"Value"},"options":{"feature":{"dtype":"string","id":null,"_type":"Value"},"length":-1,"id":null,"_type":"Sequence"}},"post_processed":null,"supervised_keys":null,"task_templates":null,"builder_name":"logiqa","config_name":"logiqa","version":{"version_str":"0.0.1","description":null,"major":0,"minor":0,"patch":1},"splits":{"train":{"name":"train","num_bytes":6419852,"num_examples":7376,"dataset_name":"logiqa"},"test":{"name":"test","num_bytes":571705,"num_examples":651,"dataset_name":"logiqa"},"validation":{"name":"validation","num_bytes":562437,"num_examples":651,"dataset_name":"logiqa"}},"download_checksums":{"https://raw.githubusercontent.com/lgw863/LogiQA-dataset/master/Train.txt":{"num_bytes":6281272,"checksum":"7d5bb1f58278e33b395744cd2ad8d7600faa0b3c4d615c659a44ec1181d759fa"},"https://raw.githubusercontent.com/lgw863/LogiQA-dataset/master/Test.txt":{"num_bytes":559060,"checksum":"359acb78c37802208f7fde9e2f6574b8526527c63d6a336f90a53f1932cb4701"},"https://raw.githubusercontent.com/lgw863/LogiQA-dataset/master/Eval.txt":{"num_bytes":550021,"checksum":"4c49e6753b7262c001506b9151135abf722247035ab075dad93acdea5789c01f"}},"download_size":7390353,"post_processing_size":null,"dataset_size":7553994,"size_in_bytes":14944347}}
{"logiqa":{"description":"LogiQA is a dataset for testing human logical reasoning. It consists of 8,678 QA\ninstances, covering multiple types of deductive reasoning. Results show that state-\nof-the-art neural models perform by far worse than human ceiling. The dataset can\nalso serve as a benchmark for reinvestigating logical AI under the deep learning\nNLP setting.\n","citation":"@misc{liu2020logiqa,\n title={LogiQA: A Challenge Dataset for Machine Reading Comprehension with Logical Reasoning}, \n author={Jian Liu and Leyang Cui and Hanmeng Liu and Dandan Huang and Yile Wang and Yue Zhang},\n year={2020},\n eprint={2007.08124},\n archivePrefix={arXiv},\n primaryClass={cs.CL}\n}\n","homepage":"https://github.com/lgw863/LogiQA-dataset","license":"","features":{"label":{"dtype":"string","id":null,"_type":"Value"},"context":{"dtype":"string","id":null,"_type":"Value"},"question":{"dtype":"string","id":null,"_type":"Value"},"options":{"feature":{"dtype":"string","id":null,"_type":"Value"},"length":-1,"id":null,"_type":"Sequence"}},"post_processed":null,"supervised_keys":null,"task_templates":null,"builder_name":"logiqa","config_name":"logiqa","version":{"version_str":"0.0.1","description":null,"major":0,"minor":0,"patch":1},"splits":{"train":{"name":"train","num_bytes":6419852,"num_examples":7376,"dataset_name":"logiqa"},"test":{"name":"test","num_bytes":571705,"num_examples":651,"dataset_name":"logiqa"},"validation":{"name":"validation","num_bytes":562437,"num_examples":651,"dataset_name":"logiqa"}},"download_checksums":{"https://raw.githubusercontent.com/lgw863/LogiQA-dataset/master/Train.txt":{"num_bytes":6281272,"checksum":"7d5bb1f58278e33b395744cd2ad8d7600faa0b3c4d615c659a44ec1181d759fa"},"https://raw.githubusercontent.com/lgw863/LogiQA-dataset/master/Test.txt":{"num_bytes":559060,"checksum":"359acb78c37802208f7fde9e2f6574b8526527c63d6a336f90a53f1932cb4701"},"https://raw.githubusercontent.com/lgw863/LogiQA-dataset/master/Eval.txt":{"num_bytes":550021,"checksum":"4c49e6753b7262c001506b9151135abf722247035ab075dad93acdea5789c01f"}},"download_size":7390353,"post_processing_size":null,"dataset_size":7553994,"size_in_bytes":14944347}}
{"mutual":{"description":"MuTual is a retrieval-based dataset for multi-turn dialogue reasoning, which is\nmodified from Chinese high school English listening comprehension test data.\n\nThe MuTual dataset.","citation":"@inproceedings{mutual,\n title = \"MuTual: A Dataset for Multi-Turn Dialogue Reasoning\",\n author = \"Cui, Leyang and Wu, Yu and Liu, Shujie and Zhang, Yue and Zhou, Ming\" ,\n booktitle = \"Proceedings of the 58th Conference of the Association for Computational Linguistics\",\n year = \"2020\",\n publisher = \"Association for Computational Linguistics\",\n}\n","homepage":"https://github.com/Nealcly/MuTual","license":"","features":{"answers":{"dtype":"string","id":null,"_type":"Value"},"options":{"feature":{"dtype":"string","id":null,"_type":"Value"},"length":-1,"id":null,"_type":"Sequence"},"article":{"dtype":"string","id":null,"_type":"Value"},"id":{"dtype":"string","id":null,"_type":"Value"}},"post_processed":null,"supervised_keys":null,"task_templates":null,"builder_name":"mutual","config_name":"mutual","version":{"version_str":"0.0.1","description":null,"major":0,"minor":0,"patch":1},"splits":{"train":{"name":"train","num_bytes":5141602,"num_examples":7088,"dataset_name":"mutual"},"test":{"name":"test","num_bytes":634396,"num_examples":886,"dataset_name":"mutual"},"validation":{"name":"validation","num_bytes":624271,"num_examples":886,"dataset_name":"mutual"}},"download_checksums":{"https://github.com/Nealcly/MuTual/archive/master.zip":{"num_bytes":10997878,"checksum":"bb325cf6c672f0f02699993a37138b0fa0af6fcfc77ec81dfbe46add4d7b29f9"}},"download_size":10997878,"post_processing_size":null,"dataset_size":6400269,"size_in_bytes":17398147},"mutual_plus":{"description":"MuTual is a retrieval-based dataset for multi-turn dialogue reasoning, which is\nmodified from Chinese high school English listening comprehension test data.\n\nMuTualPlus is a more difficult MuTual that replaces positive responses with a safe responses.","citation":"@inproceedings{mutual,\n title = \"MuTual: A Dataset for Multi-Turn Dialogue Reasoning\",\n author = \"Cui, Leyang and Wu, Yu and Liu, Shujie and Zhang, Yue and Zhou, Ming\" ,\n booktitle = \"Proceedings of the 58th Conference of the Association for Computational Linguistics\",\n year = \"2020\",\n publisher = \"Association for Computational Linguistics\",\n}\n","homepage":"https://github.com/Nealcly/MuTual","license":"","features":{"answers":{"dtype":"string","id":null,"_type":"Value"},"options":{"feature":{"dtype":"string","id":null,"_type":"Value"},"length":-1,"id":null,"_type":"Sequence"},"article":{"dtype":"string","id":null,"_type":"Value"},"id":{"dtype":"string","id":null,"_type":"Value"}},"post_processed":null,"supervised_keys":null,"task_templates":null,"builder_name":"mutual","config_name":"mutual_plus","version":{"version_str":"0.0.1","description":null,"major":0,"minor":0,"patch":1},"splits":{"train":{"name":"train","num_bytes":4921179,"num_examples":7088,"dataset_name":"mutual"},"test":{"name":"test","num_bytes":606620,"num_examples":886,"dataset_name":"mutual"},"validation":{"name":"validation","num_bytes":597340,"num_examples":886,"dataset_name":"mutual"}},"download_checksums":{"https://github.com/Nealcly/MuTual/archive/master.zip":{"num_bytes":10997878,"checksum":"bb325cf6c672f0f02699993a37138b0fa0af6fcfc77ec81dfbe46add4d7b29f9"}},"download_size":10997878,"post_processing_size":null,"dataset_size":6125139,"size_in_bytes":17123017}}
{"mutual":{"description":"MuTual is a retrieval-based dataset for multi-turn dialogue reasoning, which is\nmodified from Chinese high school English listening comprehension test data.\n\nThe MuTual dataset.","citation":"@inproceedings{mutual,\n title = \"MuTual: A Dataset for Multi-Turn Dialogue Reasoning\",\n author = \"Cui, Leyang and Wu, Yu and Liu, Shujie and Zhang, Yue and Zhou, Ming\" ,\n booktitle = \"Proceedings of the 58th Conference of the Association for Computational Linguistics\",\n year = \"2020\",\n publisher = \"Association for Computational Linguistics\",\n}\n","homepage":"https://github.com/Nealcly/MuTual","license":"","features":{"answers":{"dtype":"string","id":null,"_type":"Value"},"options":{"feature":{"dtype":"string","id":null,"_type":"Value"},"length":-1,"id":null,"_type":"Sequence"},"article":{"dtype":"string","id":null,"_type":"Value"},"id":{"dtype":"string","id":null,"_type":"Value"}},"post_processed":null,"supervised_keys":null,"task_templates":null,"builder_name":"mutual","config_name":"mutual","version":{"version_str":"0.0.1","description":null,"major":0,"minor":0,"patch":1},"splits":{"train":{"name":"train","num_bytes":5141602,"num_examples":7088,"dataset_name":"mutual"},"test":{"name":"test","num_bytes":634396,"num_examples":886,"dataset_name":"mutual"},"validation":{"name":"validation","num_bytes":624271,"num_examples":886,"dataset_name":"mutual"}},"download_checksums":{"https://github.com/Nealcly/MuTual/archive/master.zip":{"num_bytes":10997878,"checksum":"bb325cf6c672f0f02699993a37138b0fa0af6fcfc77ec81dfbe46add4d7b29f9"}},"download_size":10997878,"post_processing_size":null,"dataset_size":6400269,"size_in_bytes":17398147},"mutual_plus":{"description":"MuTual is a retrieval-based dataset for multi-turn dialogue reasoning, which is\nmodified from Chinese high school English listening comprehension test data.\n\nMuTualPlus is a more difficult MuTual that replaces positive responses with a safe responses.","citation":"@inproceedings{mutual,\n title = \"MuTual: A Dataset for Multi-Turn Dialogue Reasoning\",\n author = \"Cui, Leyang and Wu, Yu and Liu, Shujie and Zhang, Yue and Zhou, Ming\" ,\n booktitle = \"Proceedings of the 58th Conference of the Association for Computational Linguistics\",\n year = \"2020\",\n publisher = \"Association for Computational Linguistics\",\n}\n","homepage":"https://github.com/Nealcly/MuTual","license":"","features":{"answers":{"dtype":"string","id":null,"_type":"Value"},"options":{"feature":{"dtype":"string","id":null,"_type":"Value"},"length":-1,"id":null,"_type":"Sequence"},"article":{"dtype":"string","id":null,"_type":"Value"},"id":{"dtype":"string","id":null,"_type":"Value"}},"post_processed":null,"supervised_keys":null,"task_templates":null,"builder_name":"mutual","config_name":"mutual_plus","version":{"version_str":"0.0.1","description":null,"major":0,"minor":0,"patch":1},"splits":{"train":{"name":"train","num_bytes":4921179,"num_examples":7088,"dataset_name":"mutual"},"test":{"name":"test","num_bytes":606620,"num_examples":886,"dataset_name":"mutual"},"validation":{"name":"validation","num_bytes":597340,"num_examples":886,"dataset_name":"mutual"}},"download_checksums":{"https://github.com/Nealcly/MuTual/archive/master.zip":{"num_bytes":10997878,"checksum":"bb325cf6c672f0f02699993a37138b0fa0af6fcfc77ec81dfbe46add4d7b29f9"}},"download_size":10997878,"post_processing_size":null,"dataset_size":6125139,"size_in_bytes":17123017}}
datasets.BuilderConfig(name="mutual_plus",version=VERSION,description="MuTualPlus is a more difficult MuTual that replaces positive responses with a safe responses."),
{"quac":{"description":"Question Answering in Context (QuAC) is a dataset for modeling, understanding, and \nparticipating in information seeking dialog. Data instances consist of an interactive\ndialog between two crowd workers: (1) a student who poses a sequence of freeform\nquestions to learn as much as possible about a hidden Wikipedia text, and (2)\na teacher who answers the questions by providing short excerpts (spans) from the text.\n","citation":"@article{choi2018quac,\n title={Quac: Question answering in context},\n author={Choi, Eunsol and He, He and Iyyer, Mohit and Yatskar, Mark and Yih, Wen-tau and Choi, Yejin and Liang, Percy and Zettlemoyer, Luke},\n journal={arXiv preprint arXiv:1808.07036},\n year={2018}\n}\n","homepage":"https://quac.ai/","license":"","features":{"title":{"dtype":"string","id":null,"_type":"Value"},"section_title":{"dtype":"string","id":null,"_type":"Value"},"paragraph":{"dtype":"string","id":null,"_type":"Value"},"question":{"dtype":"string","id":null,"_type":"Value"},"answer":{"dtype":"string","id":null,"_type":"Value"}},"post_processed":null,"supervised_keys":null,"task_templates":null,"builder_name":"quac","config_name":"quac","version":{"version_str":"1.1.0","description":null,"major":1,"minor":1,"patch":0},"splits":{"train":{"name":"train","num_bytes":212391958,"num_examples":83568,"dataset_name":"quac"},"validation":{"name":"validation","num_bytes":20678483,"num_examples":7354,"dataset_name":"quac"}},"download_checksums":{"https://s3.amazonaws.com/my89public/quac/train_v0.2.json":{"num_bytes":68114819,"checksum":"ff5cca5a2e4b4d1cb5b5ced68b9fce88394ef6d93117426d6d4baafbcc05c56a"},"https://s3.amazonaws.com/my89public/quac/val_v0.2.json":{"num_bytes":8929167,"checksum":"09e622916280ba04c9352acb1bc5bbe80f11a2598f6f34e934c51d9e6570f378"}},"download_size":77043986,"post_processing_size":null,"dataset_size":233070441,"size_in_bytes":310114427}}
{"quac":{"description":"Question Answering in Context (QuAC) is a dataset for modeling, understanding, and \nparticipating in information seeking dialog. Data instances consist of an interactive\ndialog between two crowd workers: (1) a student who poses a sequence of freeform\nquestions to learn as much as possible about a hidden Wikipedia text, and (2)\na teacher who answers the questions by providing short excerpts (spans) from the text.\n","citation":"@article{choi2018quac,\n title={Quac: Question answering in context},\n author={Choi, Eunsol and He, He and Iyyer, Mohit and Yatskar, Mark and Yih, Wen-tau and Choi, Yejin and Liang, Percy and Zettlemoyer, Luke},\n journal={arXiv preprint arXiv:1808.07036},\n year={2018}\n}\n","homepage":"https://quac.ai/","license":"","features":{"title":{"dtype":"string","id":null,"_type":"Value"},"section_title":{"dtype":"string","id":null,"_type":"Value"},"paragraph":{"dtype":"string","id":null,"_type":"Value"},"question":{"dtype":"string","id":null,"_type":"Value"},"answer":{"dtype":"string","id":null,"_type":"Value"}},"post_processed":null,"supervised_keys":null,"task_templates":null,"builder_name":"quac","config_name":"quac","version":{"version_str":"1.1.0","description":null,"major":1,"minor":1,"patch":0},"splits":{"train":{"name":"train","num_bytes":212391958,"num_examples":83568,"dataset_name":"quac"},"validation":{"name":"validation","num_bytes":20678483,"num_examples":7354,"dataset_name":"quac"}},"download_checksums":{"https://s3.amazonaws.com/my89public/quac/train_v0.2.json":{"num_bytes":68114819,"checksum":"ff5cca5a2e4b4d1cb5b5ced68b9fce88394ef6d93117426d6d4baafbcc05c56a"},"https://s3.amazonaws.com/my89public/quac/val_v0.2.json":{"num_bytes":8929167,"checksum":"09e622916280ba04c9352acb1bc5bbe80f11a2598f6f34e934c51d9e6570f378"}},"download_size":77043986,"post_processing_size":null,"dataset_size":233070441,"size_in_bytes":310114427}}
{"triviaqa":{"description":"TriviaQA is a reading comprehension dataset containing over 650K question-answer-evidence\ntriples. TriviaQA includes 95K question-answer pairs authored by trivia enthusiasts\nand independently gathered evidence documents, six per question on average, that provide\nhigh quality distant supervision for answering the questions.\n","citation":"@InProceedings{JoshiTriviaQA2017,\n author = {Joshi, Mandar and Choi, Eunsol and Weld, Daniel S. and Zettlemoyer, Luke},\n title = {TriviaQA: A Large Scale Distantly Supervised Challenge Dataset for Reading Comprehension},\n booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics},\n month = {July},\n year = {2017},\n address = {Vancouver, Canada},\n publisher = {Association for Computational Linguistics},\n}\n","homepage":"https://nlp.cs.washington.edu/triviaqa/","license":"Apache License 2.0","features":{"question_id":{"dtype":"string","id":null,"_type":"Value"},"question_source":{"dtype":"string","id":null,"_type":"Value"},"question":{"dtype":"string","id":null,"_type":"Value"},"answer":{"aliases":{"feature":{"dtype":"string","id":null,"_type":"Value"},"length":-1,"id":null,"_type":"Sequence"},"value":{"dtype":"string","id":null,"_type":"Value"}},"search_results":{"feature":{"description":{"dtype":"string","id":null,"_type":"Value"},"filename":{"dtype":"string","id":null,"_type":"Value"},"rank":{"dtype":"int32","id":null,"_type":"Value"},"title":{"dtype":"string","id":null,"_type":"Value"},"url":{"dtype":"string","id":null,"_type":"Value"},"search_context":{"dtype":"string","id":null,"_type":"Value"}},"length":-1,"id":null,"_type":"Sequence"}},"post_processed":null,"supervised_keys":null,"task_templates":null,"builder_name":"triviaqa","config_name":"triviaqa","version":{"version_str":"0.0.1","description":null,"major":0,"minor":0,"patch":1},"splits":{"train":{"name":"train","num_bytes":1271393601,"num_examples":87622,"dataset_name":"triviaqa"},"validation":{"name":"validation","num_bytes":163819509,"num_examples":11313,"dataset_name":"triviaqa"}},"download_checksums":{"http://eaidata.bmk.sh/data/triviaqa-unfiltered.tar.gz":{"num_bytes":546481381,"checksum":"adc19b42769062d241a8fbe834c56e58598d9322eb6c614e9f33a68a2cf5523e"}},"download_size":546481381,"post_processing_size":null,"dataset_size":1435213110,"size_in_bytes":1981694491}}
{"triviaqa":{"description":"TriviaQA is a reading comprehension dataset containing over 650K question-answer-evidence\ntriples. TriviaQA includes 95K question-answer pairs authored by trivia enthusiasts\nand independently gathered evidence documents, six per question on average, that provide\nhigh quality distant supervision for answering the questions.\n","citation":"@InProceedings{JoshiTriviaQA2017,\n author = {Joshi, Mandar and Choi, Eunsol and Weld, Daniel S. and Zettlemoyer, Luke},\n title = {TriviaQA: A Large Scale Distantly Supervised Challenge Dataset for Reading Comprehension},\n booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics},\n month = {July},\n year = {2017},\n address = {Vancouver, Canada},\n publisher = {Association for Computational Linguistics},\n}\n","homepage":"https://nlp.cs.washington.edu/triviaqa/","license":"Apache License 2.0","features":{"question_id":{"dtype":"string","id":null,"_type":"Value"},"question_source":{"dtype":"string","id":null,"_type":"Value"},"question":{"dtype":"string","id":null,"_type":"Value"},"answer":{"aliases":{"feature":{"dtype":"string","id":null,"_type":"Value"},"length":-1,"id":null,"_type":"Sequence"},"value":{"dtype":"string","id":null,"_type":"Value"}},"search_results":{"feature":{"description":{"dtype":"string","id":null,"_type":"Value"},"filename":{"dtype":"string","id":null,"_type":"Value"},"rank":{"dtype":"int32","id":null,"_type":"Value"},"title":{"dtype":"string","id":null,"_type":"Value"},"url":{"dtype":"string","id":null,"_type":"Value"},"search_context":{"dtype":"string","id":null,"_type":"Value"}},"length":-1,"id":null,"_type":"Sequence"}},"post_processed":null,"supervised_keys":null,"task_templates":null,"builder_name":"triviaqa","config_name":"triviaqa","version":{"version_str":"0.0.1","description":null,"major":0,"minor":0,"patch":1},"splits":{"train":{"name":"train","num_bytes":1271393601,"num_examples":87622,"dataset_name":"triviaqa"},"validation":{"name":"validation","num_bytes":163819509,"num_examples":11313,"dataset_name":"triviaqa"}},"download_checksums":{"http://eaidata.bmk.sh/data/triviaqa-unfiltered.tar.gz":{"num_bytes":546481381,"checksum":"adc19b42769062d241a8fbe834c56e58598d9322eb6c614e9f33a68a2cf5523e"}},"download_size":546481381,"post_processing_size":null,"dataset_size":1435213110,"size_in_bytes":1981694491}}