Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
2e0b659a
Unverified
Commit
2e0b659a
authored
Apr 29, 2022
by
Stella Biderman
Committed by
GitHub
Apr 29, 2022
Browse files
Merge pull request #27 from bigscience-workshop/add_HuffPost
Add HuffPost
parents
9af73872
2c165797
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
58 additions
and
0 deletions
+58
-0
lm_eval/tasks/HuffPost.py
lm_eval/tasks/HuffPost.py
+54
-0
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+4
-0
No files found.
lm_eval/tasks/HuffPost.py
0 → 100644
View file @
2e0b659a
"""
A dataset of approximately 200K news headlines from the year 2012 to 2018 collected from HuffPost.
Homepage: https://www.kaggle.com/datasets/rmisra/news-category-dataset
"""
from
lm_eval.base
import
PromptSourceTask
_CITATION
=
"""
\
@book{book,
author = {Misra, Rishabh and Grover, Jigyasa},
year = {2021},
month = {01},
pages = {},
title = {Sculpting Data for ML: The first act of Machine Learning},
isbn = {978-0-578-83125-1}
}
@dataset{dataset,
author = {Misra, Rishabh},
year = {2018},
month = {06},
pages = {},
title = {News Category Dataset},
doi = {10.13140/RG.2.2.20331.18729}
}
"""
class
HuffPost
(
PromptSourceTask
):
VERSION
=
0
DATASET_PATH
=
"khalidalt/HuffPost"
DATASET_NAME
=
None
def
has_training_docs
(
self
):
return
False
def
has_validation_docs
(
self
):
return
False
def
has_test_docs
(
self
):
return
True
def
training_docs
(
self
):
if
self
.
has_training_docs
():
if
self
.
_training_docs
is
None
:
self
.
_training_docs
=
list
(
self
.
dataset
[
"train"
])
return
self
.
_training_docs
def
validation_docs
(
self
):
if
self
.
has_validation_docs
():
return
self
.
dataset
[
"validation"
]
def
test_docs
(
self
):
if
self
.
has_test_docs
():
return
self
.
dataset
[
"test"
]
lm_eval/tasks/__init__.py
View file @
2e0b659a
...
@@ -61,6 +61,7 @@ from . import e2e_nlg_cleaned
...
@@ -61,6 +61,7 @@ from . import e2e_nlg_cleaned
from
.
import
gem_asset_turk
from
.
import
gem_asset_turk
from
.
import
crows_pairs_multilingual
from
.
import
crows_pairs_multilingual
from
.
import
HuffPost
########################################
########################################
# Translation tasks
# Translation tasks
########################################
########################################
...
@@ -322,6 +323,9 @@ TASK_REGISTRY = {
...
@@ -322,6 +323,9 @@ TASK_REGISTRY = {
# Crows-Pairs
# Crows-Pairs
"crows_pairs_english"
:
crows_pairs_multilingual
.
CrowsPairsEnglish
,
"crows_pairs_english"
:
crows_pairs_multilingual
.
CrowsPairsEnglish
,
"crows_pairs_french"
:
crows_pairs_multilingual
.
CrowsPairsFrench
,
"crows_pairs_french"
:
crows_pairs_multilingual
.
CrowsPairsFrench
,
# News
"huffpost"
:
HuffPost
.
HuffPost
,
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment