"sgl-kernel/csrc/vscode:/vscode.git/clone" did not exist on "e0b2d3eebebd3d4efc7e323ad2dee605b607f394"
Unverified Commit 2e0b659a authored by Stella Biderman's avatar Stella Biderman Committed by GitHub
Browse files

Merge pull request #27 from bigscience-workshop/add_HuffPost

Add HuffPost 
parents 9af73872 2c165797
"""
A dataset of approximately 200K news headlines from the year 2012 to 2018 collected from HuffPost.
Homepage: https://www.kaggle.com/datasets/rmisra/news-category-dataset
"""
from lm_eval.base import PromptSourceTask
_CITATION = """\
@book{book,
author = {Misra, Rishabh and Grover, Jigyasa},
year = {2021},
month = {01},
pages = {},
title = {Sculpting Data for ML: The first act of Machine Learning},
isbn = {978-0-578-83125-1}
}
@dataset{dataset,
author = {Misra, Rishabh},
year = {2018},
month = {06},
pages = {},
title = {News Category Dataset},
doi = {10.13140/RG.2.2.20331.18729}
}
"""
class HuffPost(PromptSourceTask):
VERSION = 0
DATASET_PATH = "khalidalt/HuffPost"
DATASET_NAME = None
def has_training_docs(self):
return False
def has_validation_docs(self):
return False
def has_test_docs(self):
return True
def training_docs(self):
if self.has_training_docs():
if self._training_docs is None:
self._training_docs = list(self.dataset["train"])
return self._training_docs
def validation_docs(self):
if self.has_validation_docs():
return self.dataset["validation"]
def test_docs(self):
if self.has_test_docs():
return self.dataset["test"]
......@@ -61,6 +61,7 @@ from . import e2e_nlg_cleaned
from . import gem_asset_turk
from . import crows_pairs_multilingual
from . import HuffPost
########################################
# Translation tasks
########################################
......@@ -322,6 +323,9 @@ TASK_REGISTRY = {
# Crows-Pairs
"crows_pairs_english": crows_pairs_multilingual.CrowsPairsEnglish,
"crows_pairs_french": crows_pairs_multilingual.CrowsPairsFrench,
# News
"huffpost": HuffPost.HuffPost,
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment