Commit 897ae76d authored by Jon Tow's avatar Jon Tow
Browse files

Add custom `image`less `HeadQA` dataset

parent 13317f8c
{"es": {"description": "HEAD-QA is a multi-choice HEAlthcare Dataset. The questions come from exams to access a specialized position in the\nSpanish healthcare system, and are challenging even for highly specialized humans. They are designed by the Ministerio\nde Sanidad, Consumo y Bienestar Social.\nThe dataset contains questions about the following topics: medicine, nursing, psychology, chemistry, pharmacology and biology.\n", "citation": "@inproceedings{vilares-gomez-rodriguez-2019-head,\n title = \"{HEAD}-{QA}: A Healthcare Dataset for Complex Reasoning\",\n author = \"Vilares, David and\n G{'o}mez-Rodr{'i}guez, Carlos\",\n booktitle = \"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics\",\n month = jul,\n year = \"2019\",\n address = \"Florence, Italy\",\n publisher = \"Association for Computational Linguistics\",\n url = \"https://www.aclweb.org/anthology/P19-1092\",\n doi = \"10.18653/v1/P19-1092\",\n pages = \"960--966\",\n abstract = \"We present HEAD-QA, a multi-choice question answering testbed to encourage research on complex reasoning. The questions come from exams to access a specialized position in the Spanish healthcare system, and are challenging even for highly specialized humans. We then consider monolingual (Spanish) and cross-lingual (to English) experiments with information retrieval and neural techniques. We show that: (i) HEAD-QA challenges current methods, and (ii) the results lag well behind human performance, demonstrating its usefulness as a benchmark for future work.\",\n}\n", "homepage": "https://aghie.github.io/head-qa/", "license": "MIT License", "features": {"name": {"dtype": "string", "id": null, "_type": "Value"}, "year": {"dtype": "string", "id": null, "_type": "Value"}, "category": {"dtype": "string", "id": null, "_type": "Value"}, "qid": {"dtype": "int32", "id": null, "_type": "Value"}, "qtext": {"dtype": "string", "id": null, "_type": "Value"}, "ra": {"dtype": "int32", "id": null, "_type": "Value"}, "answers": [{"aid": {"dtype": "int32", "id": null, "_type": "Value"}, "atext": {"dtype": "string", "id": null, "_type": "Value"}}]}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "head_qa", "config_name": "es", "version": {"version_str": "1.1.0", "description": null, "major": 1, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1196021, "num_examples": 2657, "dataset_name": "head_qa"}, "test": {"name": "test", "num_bytes": 1169819, "num_examples": 2742, "dataset_name": "head_qa"}, "validation": {"name": "validation", "num_bytes": 556924, "num_examples": 1366, "dataset_name": "head_qa"}}, "download_checksums": {"https://drive.google.com/uc?export=download&confirm=t&id=1a_95N5zQQoUCq8IBNVZgziHbeM-QxG2t": {"num_bytes": 79365502, "checksum": "6ec29a3f55153d167f0bdf05395558919ba0b1df9c63e79ffceda2a09884ad8b"}}, "download_size": 79365502, "post_processing_size": null, "dataset_size": 2922764, "size_in_bytes": 82288266}, "en": {"description": "HEAD-QA is a multi-choice HEAlthcare Dataset. The questions come from exams to access a specialized position in the\nSpanish healthcare system, and are challenging even for highly specialized humans. They are designed by the Ministerio\nde Sanidad, Consumo y Bienestar Social.\nThe dataset contains questions about the following topics: medicine, nursing, psychology, chemistry, pharmacology and biology.\n", "citation": "@inproceedings{vilares-gomez-rodriguez-2019-head,\n title = \"{HEAD}-{QA}: A Healthcare Dataset for Complex Reasoning\",\n author = \"Vilares, David and\n G{'o}mez-Rodr{'i}guez, Carlos\",\n booktitle = \"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics\",\n month = jul,\n year = \"2019\",\n address = \"Florence, Italy\",\n publisher = \"Association for Computational Linguistics\",\n url = \"https://www.aclweb.org/anthology/P19-1092\",\n doi = \"10.18653/v1/P19-1092\",\n pages = \"960--966\",\n abstract = \"We present HEAD-QA, a multi-choice question answering testbed to encourage research on complex reasoning. The questions come from exams to access a specialized position in the Spanish healthcare system, and are challenging even for highly specialized humans. We then consider monolingual (Spanish) and cross-lingual (to English) experiments with information retrieval and neural techniques. We show that: (i) HEAD-QA challenges current methods, and (ii) the results lag well behind human performance, demonstrating its usefulness as a benchmark for future work.\",\n}\n", "homepage": "https://aghie.github.io/head-qa/", "license": "MIT License", "features": {"name": {"dtype": "string", "id": null, "_type": "Value"}, "year": {"dtype": "string", "id": null, "_type": "Value"}, "category": {"dtype": "string", "id": null, "_type": "Value"}, "qid": {"dtype": "int32", "id": null, "_type": "Value"}, "qtext": {"dtype": "string", "id": null, "_type": "Value"}, "ra": {"dtype": "int32", "id": null, "_type": "Value"}, "answers": [{"aid": {"dtype": "int32", "id": null, "_type": "Value"}, "atext": {"dtype": "string", "id": null, "_type": "Value"}}]}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "head_qa", "config_name": "en", "version": {"version_str": "1.1.0", "description": null, "major": 1, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1123151, "num_examples": 2657, "dataset_name": "head_qa"}, "test": {"name": "test", "num_bytes": 1097349, "num_examples": 2742, "dataset_name": "head_qa"}, "validation": {"name": "validation", "num_bytes": 523462, "num_examples": 1366, "dataset_name": "head_qa"}}, "download_checksums": {"https://drive.google.com/uc?export=download&confirm=t&id=1a_95N5zQQoUCq8IBNVZgziHbeM-QxG2t": {"num_bytes": 79365502, "checksum": "6ec29a3f55153d167f0bdf05395558919ba0b1df9c63e79ffceda2a09884ad8b"}}, "download_size": 79365502, "post_processing_size": null, "dataset_size": 2743962, "size_in_bytes": 82109464}}
\ No newline at end of file
# coding=utf-8
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# NOTE: This is an exact copy of
# https://github.com/huggingface/datasets/blob/3804442bb7cfcb9d52044d92688115cfdc69c2da/datasets/head_qa/head_qa.py
# with the exception of the `image` feature. This is to avoid adding `Pillow`
# as a dependency.
"""HEAD-QA: A Healthcare Dataset for Complex Reasoning"""
import json
import os
import datasets
_CITATION = """\
@inproceedings{vilares-gomez-rodriguez-2019-head,
title = "{HEAD}-{QA}: A Healthcare Dataset for Complex Reasoning",
author = "Vilares, David and
G{\'o}mez-Rodr{\'i}guez, Carlos",
booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/P19-1092",
doi = "10.18653/v1/P19-1092",
pages = "960--966",
abstract = "We present HEAD-QA, a multi-choice question answering testbed to encourage research on complex reasoning. The questions come from exams to access a specialized position in the Spanish healthcare system, and are challenging even for highly specialized humans. We then consider monolingual (Spanish) and cross-lingual (to English) experiments with information retrieval and neural techniques. We show that: (i) HEAD-QA challenges current methods, and (ii) the results lag well behind human performance, demonstrating its usefulness as a benchmark for future work.",
}
"""
_DESCRIPTION = """\
HEAD-QA is a multi-choice HEAlthcare Dataset. The questions come from exams to access a specialized position in the
Spanish healthcare system, and are challenging even for highly specialized humans. They are designed by the Ministerio
de Sanidad, Consumo y Bienestar Social.
The dataset contains questions about the following topics: medicine, nursing, psychology, chemistry, pharmacology and biology.
"""
_HOMEPAGE = "https://aghie.github.io/head-qa/"
_LICENSE = "MIT License"
_URL = "https://drive.google.com/uc?export=download&confirm=t&id=1a_95N5zQQoUCq8IBNVZgziHbeM-QxG2t"
_DIRS = {"es": "HEAD", "en": "HEAD_EN"}
class HeadQA(datasets.GeneratorBasedBuilder):
"""HEAD-QA: A Healthcare Dataset for Complex Reasoning"""
VERSION = datasets.Version("1.1.0")
BUILDER_CONFIGS = [
datasets.BuilderConfig(name="es", version=VERSION, description="Spanish HEAD dataset"),
datasets.BuilderConfig(name="en", version=VERSION, description="English HEAD dataset"),
]
DEFAULT_CONFIG_NAME = "es"
def _info(self):
return datasets.DatasetInfo(
description=_DESCRIPTION,
features=datasets.Features(
{
"name": datasets.Value("string"),
"year": datasets.Value("string"),
"category": datasets.Value("string"),
"qid": datasets.Value("int32"),
"qtext": datasets.Value("string"),
"ra": datasets.Value("int32"),
"answers": [
{
"aid": datasets.Value("int32"),
"atext": datasets.Value("string"),
}
],
}
),
supervised_keys=None,
homepage=_HOMEPAGE,
license=_LICENSE,
citation=_CITATION,
)
def _split_generators(self, dl_manager):
"""Returns SplitGenerators."""
data_dir = dl_manager.download_and_extract(_URL)
dir = _DIRS[self.config.name]
data_lang_dir = os.path.join(data_dir, dir)
return [
datasets.SplitGenerator(
name=datasets.Split.TRAIN,
gen_kwargs={"data_dir": data_dir, "filepath": os.path.join(data_lang_dir, f"train_{dir}.json")},
),
datasets.SplitGenerator(
name=datasets.Split.TEST,
gen_kwargs={"data_dir": data_dir, "filepath": os.path.join(data_lang_dir, f"test_{dir}.json")},
),
datasets.SplitGenerator(
name=datasets.Split.VALIDATION,
gen_kwargs={"data_dir": data_dir, "filepath": os.path.join(data_lang_dir, f"dev_{dir}.json")},
),
]
def _generate_examples(self, data_dir, filepath):
"""Yields examples."""
with open(filepath, encoding="utf-8") as f:
head_qa = json.load(f)
for exam_id, exam in enumerate(head_qa["exams"]):
content = head_qa["exams"][exam]
name = content["name"].strip()
year = content["year"].strip()
category = content["category"].strip()
for question in content["data"]:
qid = int(question["qid"].strip())
qtext = question["qtext"].strip()
ra = int(question["ra"].strip())
aids = [answer["aid"] for answer in question["answers"]]
atexts = [answer["atext"].strip() for answer in question["answers"]]
answers = [{"aid": aid, "atext": atext} for aid, atext in zip(aids, atexts)]
id_ = f"{exam_id}_{qid}"
yield id_, {
"name": name,
"year": year,
"category": category,
"qid": qid,
"qtext": qtext,
"ra": ra,
"answers": answers,
}
...@@ -8,6 +8,8 @@ even for highly specialized humans. ...@@ -8,6 +8,8 @@ even for highly specialized humans.
Homepage: https://aghie.github.io/head-qa/ Homepage: https://aghie.github.io/head-qa/
""" """
import inspect
import lm_eval.datasets.headqa.headqa
from lm_eval.base import MultipleChoiceTask from lm_eval.base import MultipleChoiceTask
...@@ -25,7 +27,7 @@ _CITATION = """ ...@@ -25,7 +27,7 @@ _CITATION = """
class HeadQABase(MultipleChoiceTask): class HeadQABase(MultipleChoiceTask):
VERSION = 0 VERSION = 0
DATASET_PATH = "head_qa" DATASET_PATH = inspect.getfile(lm_eval.datasets.headqa.headqa)
def has_training_docs(self): def has_training_docs(self):
return True return True
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment