Fix int/float typing in `video_utils.py` (#8234)

Co-authored-by: Nicolas Hug <nh.nicolas.hug@gmail.com>

Fix int/float typing in `video_utils.py` (#8234)
Co-authored-by: Nicolas Hug <nh.nicolas.hug@gmail.com>
806dba67 · Santiago Castro · GitHub · 0be6c7e6 · 806dba67
Unverified Commit 806dba67 authored Jan 31, 2024 by Santiago Castro Committed by GitHub Jan 31, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 8 deletions

torchvision/datasets/video_utils.py torchvision/datasets/video_utils.py +8 -8

No files found.
--- a/torchvision/datasets/video_utils.py
+++ b/torchvision/datasets/video_utils.py
@@ -89,7 +89,7 @@ class VideoClips:
        video_paths (List[str]): paths to the video files
        clip_length_in_frames (int): size of a clip in number of frames
        frames_between_clips (int): step (in frames) between each clip
-        frame_rate (int, optional): if specified, it will resample the video
+        frame_rate (float, optional): if specified, it will resample the video
            so that it has `frame_rate`, and then the clips will be defined
            on the resampled video
        num_workers (int): how many subprocesses to use for data loading.
@@ -102,7 +102,7 @@ class VideoClips:
        video_paths: List[str],
        clip_length_in_frames: int = 16,
        frames_between_clips: int = 1,
-        frame_rate: Optional[int] = None,
+        frame_rate: Optional[float] = None,
        _precomputed_metadata: Optional[Dict[str, Any]] = None,
        num_workers: int = 0,
        _video_width: int = 0,
@@ -136,7 +136,7 @@ class VideoClips:
    def _compute_frame_pts(self) -> None:
        self.video_pts = []  # len = num_videos. Each entry is a tensor of shape (num_frames_in_video,)
-        self.video_fps: List[int] = []  # len = num_videos
+        self.video_fps: List[float] = []  # len = num_videos
        # strategy: use a DataLoader to parallelize read_video_timestamps
        # so need to create a dummy dataset first
@@ -203,7 +203,7 @@ class VideoClips:
    @staticmethod
    def compute_clips_for_video(
-        video_pts: torch.Tensor, num_frames: int, step: int, fps: int, frame_rate: Optional[int] = None
+        video_pts: torch.Tensor, num_frames: int, step: int, fps: Optional[float], frame_rate: Optional[float] = None
    ) -> Tuple[torch.Tensor, Union[List[slice], torch.Tensor]]:
        if fps is None:
            # if for some reason the video doesn't have fps (because doesn't have a video stream)
@@ -211,7 +211,7 @@ class VideoClips:
            fps = 1
        if frame_rate is None:
            frame_rate = fps
-        total_frames = len(video_pts) * (float(frame_rate) / fps)
+        total_frames = len(video_pts) * frame_rate / fps
        _idxs = VideoClips._resample_video_idx(int(math.floor(total_frames)), fps, frame_rate)
        video_pts = video_pts[_idxs]
        clips = unfold(video_pts, num_frames, step)
@@ -227,7 +227,7 @@ class VideoClips:
            idxs = unfold(_idxs, num_frames, step)
        return clips, idxs
-    def compute_clips(self, num_frames: int, step: int, frame_rate: Optional[int] = None) -> None:
+    def compute_clips(self, num_frames: int, step: int, frame_rate: Optional[float] = None) -> None:
        """
        Compute all consecutive sequences of clips from video_pts.
        Always returns clips of size `num_frames`, meaning that the
@@ -275,8 +275,8 @@ class VideoClips:
        return video_idx, clip_idx
    @staticmethod
-    def _resample_video_idx(num_frames: int, original_fps: int, new_fps: int) -> Union[slice, torch.Tensor]:
+    def _resample_video_idx(num_frames: int, original_fps: float, new_fps: float) -> Union[slice, torch.Tensor]:
-        step = float(original_fps) / new_fps
+        step = original_fps / new_fps
        if step.is_integer():
            # optimization: if step is integer, don't need to perform
            # advanced indexing