datasets.py 440 Bytes
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
from typing import Tuple

import torchvision
from torch import Tensor


class KineticsWithVideoId(torchvision.datasets.Kinetics):
    def __getitem__(self, idx: int) -> Tuple[Tensor, Tensor, int]:
        video, audio, info, video_idx = self.video_clips.get_clip(idx)
        label = self.samples[video_idx][1]

        if self.transform is not None:
            video = self.transform(video)

        return video, audio, label, video_idx