_temporal.py 1.07 KB
Newer Older
1
2
from typing import Any, Dict

3
4
from torchvision import datapoints
from torchvision.transforms.v2 import functional as F, Transform
5

6
from torchvision.transforms.v2.utils import is_simple_tensor
7

8
9

class UniformTemporalSubsample(Transform):
10
11
    """[BETA] Uniformly subsample ``num_samples`` indices from the temporal dimension of the video.

12
    .. v2betastatus:: UniformTemporalSubsample transform
13
14
15
16
17
18
19
20
21
22

    Videos are expected to be of shape ``[..., T, C, H, W]`` where ``T`` denotes the temporal dimension.

    When ``num_samples`` is larger than the size of temporal dimension of the video, it
    will sample frames based on nearest neighbor interpolation.

    Args:
        num_samples (int): The number of equispaced samples to be selected
    """

23
    _transformed_types = (is_simple_tensor, datapoints.Video)
24

25
    def __init__(self, num_samples: int):
26
27
28
        super().__init__()
        self.num_samples = num_samples

Philip Meier's avatar
Philip Meier committed
29
    def _transform(self, inpt: datapoints._VideoType, params: Dict[str, Any]) -> datapoints._VideoType:
30
        return F.uniform_temporal_subsample(inpt, self.num_samples)