Add hellaswag datasets

6e092c81 · Rayyyyy · a6b358ca · 6e092c81 · 6e092c81 · 6e092c81
Commit 6e092c81 authored May 24, 2024 by Rayyyyy
6 changed files
--- a/lm_eval/api/task.py
+++ b/lm_eval/api/task.py
@@ -21,6 +21,7 @@ from typing import (
 )

 import datasets
+import inspect
 import numpy as np
 from tqdm import tqdm

@@ -707,7 +708,7 @@ class ConfigurableTask(Task):
            self.OUTPUT_TYPE = self.config.output_type

        if self.config.dataset_path is not None:
-            self.DATASET_PATH = self.config.dataset_path
+            self.DATASET_PATH = inspect.getfile(self.config.dataset_path)

        if self.config.dataset_name is not None:
            self.DATASET_NAME = self.config.dataset_name

--- a/lm_eval/datasets/__init__.py
+++ b/lm_eval/datasets/__init__.py
--- a/lm_eval/datasets/hellaswag/__init__.py
+++ b/lm_eval/datasets/hellaswag/__init__.py
--- a/lm_eval/datasets/hellaswag/dataset_infos.json
+++ b/lm_eval/datasets/hellaswag/dataset_infos.json
+{"default": {"description": "\nHellaSwag: Can a Machine Really Finish Your Sentence? is a new dataset for commonsense NLI. A paper was published at ACL2019.\n", "citation": "@inproceedings{zellers2019hellaswag,\n    title={HellaSwag: Can a Machine Really Finish Your Sentence?},\n    author={Zellers, Rowan and Holtzman, Ari and Bisk, Yonatan and Farhadi, Ali and Choi, Yejin},\n    booktitle ={Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics},\n    year={2019}\n}\n", "homepage": "https://rowanzellers.com/hellaswag/", "license": "", "features": {"ind": {"dtype": "int32", "id": null, "_type": "Value"}, "activity_label": {"dtype": "string", "id": null, "_type": "Value"}, "ctx_a": {"dtype": "string", "id": null, "_type": "Value"}, "ctx_b": {"dtype": "string", "id": null, "_type": "Value"}, "ctx": {"dtype": "string", "id": null, "_type": "Value"}, "endings": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "source_id": {"dtype": "string", "id": null, "_type": "Value"}, "split": {"dtype": "string", "id": null, "_type": "Value"}, "split_type": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "hellaswag", "config_name": "default", "version": {"version_str": "0.1.0", "description": null, "major": 0, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 43232624, "num_examples": 39905, "dataset_name": "hellaswag"}, "test": {"name": "test", "num_bytes": 10791853, "num_examples": 10003, "dataset_name": "hellaswag"}, "validation": {"name": "validation", "num_bytes": 11175717, "num_examples": 10042, "dataset_name": "hellaswag"}}, "download_checksums": {"https://raw.githubusercontent.com/rowanz/hellaswag/master/data/hellaswag_train.jsonl": {"num_bytes": 47496131, "checksum": "dae5e69249868cb9fe4e23ff925c60b66169564cfb7072d793cd7356a2b69f8d"}, "https://raw.githubusercontent.com/rowanz/hellaswag/master/data/hellaswag_test.jsonl": {"num_bytes": 11752147, "checksum": "da082b00543e422b8d25394614d102944586986def4de5cd1bd36d86bcb76261"}, "https://raw.githubusercontent.com/rowanz/hellaswag/master/data/hellaswag_val.jsonl": {"num_bytes": 12246618, "checksum": "0aa3b88843990f3f10a97b9575c94d7b71fb2205240ba04ae4884d9e9c992588"}}, "download_size": 71494896, "post_processing_size": null, "dataset_size": 65200194, "size_in_bytes": 136695090}}
\ No newline at end of file
--- a/lm_eval/datasets/hellaswag/hellaswag.py
+++ b/lm_eval/datasets/hellaswag/hellaswag.py
+"""TODO(hellaswag): Add a description here."""
+
+
+import json
+
+import datasets
+
+
+# TODO(hellaswag): BibTeX citation
+_CITATION = """\
+@inproceedings{zellers2019hellaswag,
+    title={HellaSwag: Can a Machine Really Finish Your Sentence?},
+    author={Zellers, Rowan and Holtzman, Ari and Bisk, Yonatan and Farhadi, Ali and Choi, Yejin},
+    booktitle ={Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics},
+    year={2019}
+}
+"""
+
+_DESCRIPTION = """
+HellaSwag: Can a Machine Really Finish Your Sentence? is a new dataset for commonsense NLI. A paper was published at ACL2019.
+"""
+_URL = "https://raw.githubusercontent.com/rowanz/hellaswag/master/data/"
+_URLS = {
+    "train": _URL + "hellaswag_train.jsonl",
+    "test": _URL + "hellaswag_test.jsonl",
+    "dev": _URL + "hellaswag_val.jsonl",
+}
+
+
+class Hellaswag(datasets.GeneratorBasedBuilder):
+    """TODO(hellaswag): Short description of my dataset."""
+
+    # TODO(hellaswag): Set up version.
+    VERSION = datasets.Version("0.1.0")
+
+    def _info(self):
+        # TODO(hellaswag): Specifies the datasets.DatasetInfo object
+        return datasets.DatasetInfo(
+            # This is the description that will appear on the datasets page.
+            description=_DESCRIPTION,
+            # datasets.features.FeatureConnectors
+            features=datasets.Features(
+                {
+                    # These are the features of your dataset like images, labels ...
+                    "ind": datasets.Value("int32"),
+                    "activity_label": datasets.Value("string"),
+                    "ctx_a": datasets.Value("string"),
+                    "ctx_b": datasets.Value("string"),
+                    "ctx": datasets.Value("string"),
+                    "endings": datasets.features.Sequence(datasets.Value("string")),
+                    "source_id": datasets.Value("string"),
+                    "split": datasets.Value("string"),
+                    "split_type": datasets.Value("string"),
+                    "label": datasets.Value("string"),
+                }
+            ),
+            # If there's a common (input, target) tuple from the features,
+            # specify them here. They'll be used if as_supervised=True in
+            # builder.as_dataset.
+            supervised_keys=None,
+            # Homepage of the dataset for documentation
+            homepage="https://rowanzellers.com/hellaswag/",
+            citation=_CITATION,
+        )
+
+    def _split_generators(self, dl_manager):
+        """Returns SplitGenerators."""
+        # TODO(hellaswag): Downloads the data and defines the splits
+        # dl_manager is a datasets.download.DownloadManager that can be used to
+        # download and extract URLs
+        urls_to_download = _URLS
+        dl_dir = dl_manager.download_and_extract(urls_to_download)
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN,
+                # These kwargs will be passed to _generate_examples
+                gen_kwargs={"filepath": dl_dir["train"]},
+            ),
+            datasets.SplitGenerator(
+                name=datasets.Split.TEST,
+                # These kwargs will be passed to _generate_examples
+                gen_kwargs={"filepath": dl_dir["test"]},
+            ),
+            datasets.SplitGenerator(
+                name=datasets.Split.VALIDATION,
+                # These kwargs will be passed to _generate_examples
+                gen_kwargs={"filepath": dl_dir["dev"]},
+            ),
+        ]
+
+    def _generate_examples(self, filepath):
+        """Yields examples."""
+        # TODO(hellaswag): Yields (key, example) tuples from the dataset
+        with open(filepath, encoding="utf-8") as f:
+            for id_, row in enumerate(f):
+                data = json.loads(row)
+                yield id_, {
+                    "ind": int(data["ind"]),
+                    "activity_label": data["activity_label"],
+                    "ctx_a": data.get("ctx_a", ""),
+                    "ctx_b": data.get("ctx_b", ""),
+                    "ctx": data["ctx"],
+                    "endings": data.get("endings", []),
+                    "source_id": data["source_id"],
+                    "split": data["split"],
+                    "split_type": data["split_type"],
+                    "label": str(data.get("label", "")),
+                }
--- a/lm_eval/tasks/hellaswag/hellaswag.yaml
+++ b/lm_eval/tasks/hellaswag/hellaswag.yaml
 group:
  - multiple_choice
 task: hellaswag
-dataset_path: hellaswag
-dataset_name: null
+dataset_path: lm_eval.datasets.hellaswag.hellaswag
+dataset_name: hellaswag
 output_type: multiple_choice
 training_split: train
 validation_split: validation