#!/usr/bin/env python3 import numbers import random import warnings from torchvision.transforms import ( RandomCrop, RandomResizedCrop, ) from . import _functional_video as F __all__ = [ "RandomCropVideo", "RandomResizedCropVideo", "CenterCropVideo", "NormalizeVideo", "ToTensorVideo", "RandomHorizontalFlipVideo", ] warnings.warn("The _transforms_video module is deprecated. Please use the transforms module instead.") class RandomCropVideo(RandomCrop): def __init__(self, size): if isinstance(size, numbers.Number): self.size = (int(size), int(size)) else: self.size = size def __call__(self, clip): """ Args: clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W) Returns: torch.tensor: randomly cropped/resized video clip. size is (C, T, OH, OW) """ i, j, h, w = self.get_params(clip, self.size) return F.crop(clip, i, j, h, w) def __repr__(self): return self.__class__.__name__ + f"(size={self.size})" class RandomResizedCropVideo(RandomResizedCrop): def __init__( self, size, scale=(0.08, 1.0), ratio=(3.0 / 4.0, 4.0 / 3.0), interpolation_mode="bilinear", ): if isinstance(size, tuple): assert len(size) == 2, "size should be tuple (height, width)" self.size = size else: self.size = (size, size) self.interpolation_mode = interpolation_mode self.scale = scale self.ratio = ratio def __call__(self, clip): """ Args: clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W) Returns: torch.tensor: randomly cropped/resized video clip. size is (C, T, H, W) """ i, j, h, w = self.get_params(clip, self.scale, self.ratio) return F.resized_crop(clip, i, j, h, w, self.size, self.interpolation_mode) def __repr__(self): return ( self.__class__.__name__ + f"(size={self.size}, interpolation_mode={self.interpolation_mode}, scale={self.scale}, ratio={self.ratio})" ) class CenterCropVideo: def __init__(self, crop_size): if isinstance(crop_size, numbers.Number): self.crop_size = (int(crop_size), int(crop_size)) else: self.crop_size = crop_size def __call__(self, clip): """ Args: clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W) Returns: torch.tensor: central cropping of video clip. Size is (C, T, crop_size, crop_size) """ return F.center_crop(clip, self.crop_size) def __repr__(self): return self.__class__.__name__ + f"(crop_size={self.crop_size})" class NormalizeVideo: """ Normalize the video clip by mean subtraction and division by standard deviation Args: mean (3-tuple): pixel RGB mean std (3-tuple): pixel RGB standard deviation inplace (boolean): whether do in-place normalization """ def __init__(self, mean, std, inplace=False): self.mean = mean self.std = std self.inplace = inplace def __call__(self, clip): """ Args: clip (torch.tensor): video clip to be normalized. Size is (C, T, H, W) """ return F.normalize(clip, self.mean, self.std, self.inplace) def __repr__(self): return self.__class__.__name__ + f"(mean={self.mean}, std={self.std}, inplace={self.inplace})" class ToTensorVideo: """ Convert tensor data type from uint8 to float, divide value by 255.0 and permute the dimensions of clip tensor """ def __init__(self): pass def __call__(self, clip): """ Args: clip (torch.tensor, dtype=torch.uint8): Size is (T, H, W, C) Return: clip (torch.tensor, dtype=torch.float): Size is (C, T, H, W) """ return F.to_tensor(clip) def __repr__(self): return self.__class__.__name__ class RandomHorizontalFlipVideo: """ Flip the video clip along the horizonal direction with a given probability Args: p (float): probability of the clip being flipped. Default value is 0.5 """ def __init__(self, p=0.5): self.p = p def __call__(self, clip): """ Args: clip (torch.tensor): Size is (C, T, H, W) Return: clip (torch.tensor): Size is (C, T, H, W) """ if random.random() < self.p: clip = F.hflip(clip) return clip def __repr__(self): return self.__class__.__name__ + f"(p={self.p})"