Unverified Commit fe4d17fc authored by Francisco Massa's avatar Francisco Massa Committed by GitHub
Browse files

Rename KineticsVideo to Kinetics400 (#1183)

Also add docs
parent 5c0b7f31
......@@ -200,3 +200,11 @@ USPS
.. autoclass:: USPS
:members: __getitem__
:special-members:
Kinetics-400
~~~~~
.. autoclass:: Kinetics400
:members: __getitem__
:special-members:
......@@ -139,7 +139,7 @@ def main(args):
if args.distributed:
print("It is recommended to pre-compute the dataset cache "
"on a single-gpu first, as it will be faster")
dataset = torchvision.datasets.KineticsVideo(
dataset = torchvision.datasets.Kinetics400(
traindir,
frames_per_clip=args.clip_len,
step_between_clips=1,
......@@ -171,7 +171,7 @@ def main(args):
if args.distributed:
print("It is recommended to pre-compute the dataset cache "
"on a single-gpu first, as it will be faster")
dataset_test = torchvision.datasets.KineticsVideo(
dataset_test = torchvision.datasets.Kinetics400(
valdir,
frames_per_clip=args.clip_len,
step_between_clips=1,
......
......@@ -19,7 +19,7 @@ from .celeba import CelebA
from .sbd import SBDataset
from .vision import VisionDataset
from .usps import USPS
from .kinetics import KineticsVideo
from .kinetics import Kinetics400
from .hmdb51 import HMDB51
from .ucf101 import UCF101
......@@ -31,4 +31,4 @@ __all__ = ('LSUN', 'LSUNClass',
'Omniglot', 'SBU', 'Flickr8k', 'Flickr30k',
'VOCSegmentation', 'VOCDetection', 'Cityscapes', 'ImageNet',
'Caltech101', 'Caltech256', 'CelebA', 'SBDataset', 'VisionDataset',
'USPS', 'KineticsVideo', 'HMDB51', 'UCF101')
'USPS', 'Kinetics400', 'HMDB51', 'UCF101')
......@@ -4,9 +4,40 @@ from .folder import make_dataset
from .vision import VisionDataset
class KineticsVideo(VisionDataset):
class Kinetics400(VisionDataset):
"""
`Kinetics-400 <https://deepmind.com/research/open-source/open-source-datasets/kinetics/>`_
dataset.
Kinetics-400 is an action recognition video dataset.
This dataset consider every video as a collection of video clips of fixed size, specified
by ``frames_per_clip``, where the step in frames between each clip is given by
``step_between_clips``.
To give an example, for 2 videos with 10 and 15 frames respectively, if ``frames_per_clip=5``
and ``step_between_clips=5``, the dataset size will be (2 + 3) = 5, where the first two
elements will come from video 1, and the next three elements from video 2.
Note that we drop clips which do not have exactly ``frames_per_clip`` elements, so not all
frames in a video might be present.
Internally, it uses a VideoClips object to handle clip creation.
Args:
root (string): Root directory of the Kinetics-400 Dataset.
frames_per_clip (int): number of frames in a clip
step_between_clips (int): number of frames between each clip
transform (callable, optional): A function/transform that takes in a TxHxWxC video
and returns a transformed version.
Returns:
video (Tensor[T, H, W, C]): the `T` video frames
audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
and `L` is the number of points
label (int): class of the video clip
"""
def __init__(self, root, frames_per_clip, step_between_clips=1, transform=None):
super(KineticsVideo, self).__init__(root)
super(Kinetics400, self).__init__(root)
extensions = ('avi',)
classes = list(sorted(list_dir(root)))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment