Rename KineticsVideo to Kinetics400 (#1183)

Also add docs

Rename KineticsVideo to Kinetics400 (#1183)
Also add docs
fe4d17fc · Francisco Massa · GitHub · 5c0b7f31 · fe4d17fc · fe4d17fc
Unverified Commit fe4d17fc authored Jul 31, 2019 by Francisco Massa Committed by GitHub Jul 31, 2019
4 changed files
--- a/docs/source/datasets.rst
+++ b/docs/source/datasets.rst
@@ -200,3 +200,11 @@ USPS
 .. autoclass:: USPS
  :members: __getitem__
  :special-members:
+
+
+Kinetics-400
+~~~~~
+
+.. autoclass:: Kinetics400
+  :members: __getitem__
+  :special-members:
--- a/references/video_classification/train.py
+++ b/references/video_classification/train.py
@@ -139,7 +139,7 @@ def main(args):
        if args.distributed:
            print("It is recommended to pre-compute the dataset cache "
                  "on a single-gpu first, as it will be faster")
-        dataset = torchvision.datasets.KineticsVideo(
+        dataset = torchvision.datasets.Kinetics400(
            traindir,
            frames_per_clip=args.clip_len,
            step_between_clips=1,
@@ -171,7 +171,7 @@ def main(args):
        if args.distributed:
            print("It is recommended to pre-compute the dataset cache "
                  "on a single-gpu first, as it will be faster")
-        dataset_test = torchvision.datasets.KineticsVideo(
+        dataset_test = torchvision.datasets.Kinetics400(
            valdir,
            frames_per_clip=args.clip_len,
            step_between_clips=1,

--- a/torchvision/datasets/__init__.py
+++ b/torchvision/datasets/__init__.py
@@ -19,7 +19,7 @@ from .celeba import CelebA
 from .sbd import SBDataset
 from .vision import VisionDataset
 from .usps import USPS
-from .kinetics import KineticsVideo
+from .kinetics import Kinetics400
 from .hmdb51 import HMDB51
 from .ucf101 import UCF101

@@ -31,4 +31,4 @@ __all__ = ('LSUN', 'LSUNClass',
           'Omniglot', 'SBU', 'Flickr8k', 'Flickr30k',
           'VOCSegmentation', 'VOCDetection', 'Cityscapes', 'ImageNet',
           'Caltech101', 'Caltech256', 'CelebA', 'SBDataset', 'VisionDataset',
-           'USPS', 'KineticsVideo', 'HMDB51', 'UCF101')
+           'USPS', 'Kinetics400', 'HMDB51', 'UCF101')
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -4,9 +4,40 @@ from .folder import make_dataset
 from .vision import VisionDataset


-class KineticsVideo(VisionDataset):
+class Kinetics400(VisionDataset):
+    """
+    `Kinetics-400 <https://deepmind.com/research/open-source/open-source-datasets/kinetics/>`_
+    dataset.
+
+    Kinetics-400 is an action recognition video dataset.
+    This dataset consider every video as a collection of video clips of fixed size, specified
+    by ``frames_per_clip``, where the step in frames between each clip is given by
+    ``step_between_clips``.
+
+    To give an example, for 2 videos with 10 and 15 frames respectively, if ``frames_per_clip=5``
+    and ``step_between_clips=5``, the dataset size will be (2 + 3) = 5, where the first two
+    elements will come from video 1, and the next three elements from video 2.
+    Note that we drop clips which do not have exactly ``frames_per_clip`` elements, so not all
+    frames in a video might be present.
+
+    Internally, it uses a VideoClips object to handle clip creation.
+
+    Args:
+        root (string): Root directory of the Kinetics-400 Dataset.
+        frames_per_clip (int): number of frames in a clip
+        step_between_clips (int): number of frames between each clip
+        transform (callable, optional): A function/transform that  takes in a TxHxWxC video
+            and returns a transformed version.
+
+    Returns:
+        video (Tensor[T, H, W, C]): the `T` video frames
+        audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
+            and `L` is the number of points
+        label (int): class of the video clip
+    """
+
    def __init__(self, root, frames_per_clip, step_between_clips=1, transform=None):
-        super(KineticsVideo, self).__init__(root)
+        super(Kinetics400, self).__init__(root)
        extensions = ('avi',)

        classes = list(sorted(list_dir(root)))