Unverified Commit 806dba67 authored by Santiago Castro's avatar Santiago Castro Committed by GitHub
Browse files

Fix int/float typing in `video_utils.py` (#8234)


Co-authored-by: default avatarNicolas Hug <nh.nicolas.hug@gmail.com>
parent 0be6c7e6
...@@ -89,7 +89,7 @@ class VideoClips: ...@@ -89,7 +89,7 @@ class VideoClips:
video_paths (List[str]): paths to the video files video_paths (List[str]): paths to the video files
clip_length_in_frames (int): size of a clip in number of frames clip_length_in_frames (int): size of a clip in number of frames
frames_between_clips (int): step (in frames) between each clip frames_between_clips (int): step (in frames) between each clip
frame_rate (int, optional): if specified, it will resample the video frame_rate (float, optional): if specified, it will resample the video
so that it has `frame_rate`, and then the clips will be defined so that it has `frame_rate`, and then the clips will be defined
on the resampled video on the resampled video
num_workers (int): how many subprocesses to use for data loading. num_workers (int): how many subprocesses to use for data loading.
...@@ -102,7 +102,7 @@ class VideoClips: ...@@ -102,7 +102,7 @@ class VideoClips:
video_paths: List[str], video_paths: List[str],
clip_length_in_frames: int = 16, clip_length_in_frames: int = 16,
frames_between_clips: int = 1, frames_between_clips: int = 1,
frame_rate: Optional[int] = None, frame_rate: Optional[float] = None,
_precomputed_metadata: Optional[Dict[str, Any]] = None, _precomputed_metadata: Optional[Dict[str, Any]] = None,
num_workers: int = 0, num_workers: int = 0,
_video_width: int = 0, _video_width: int = 0,
...@@ -136,7 +136,7 @@ class VideoClips: ...@@ -136,7 +136,7 @@ class VideoClips:
def _compute_frame_pts(self) -> None: def _compute_frame_pts(self) -> None:
self.video_pts = [] # len = num_videos. Each entry is a tensor of shape (num_frames_in_video,) self.video_pts = [] # len = num_videos. Each entry is a tensor of shape (num_frames_in_video,)
self.video_fps: List[int] = [] # len = num_videos self.video_fps: List[float] = [] # len = num_videos
# strategy: use a DataLoader to parallelize read_video_timestamps # strategy: use a DataLoader to parallelize read_video_timestamps
# so need to create a dummy dataset first # so need to create a dummy dataset first
...@@ -203,7 +203,7 @@ class VideoClips: ...@@ -203,7 +203,7 @@ class VideoClips:
@staticmethod @staticmethod
def compute_clips_for_video( def compute_clips_for_video(
video_pts: torch.Tensor, num_frames: int, step: int, fps: int, frame_rate: Optional[int] = None video_pts: torch.Tensor, num_frames: int, step: int, fps: Optional[float], frame_rate: Optional[float] = None
) -> Tuple[torch.Tensor, Union[List[slice], torch.Tensor]]: ) -> Tuple[torch.Tensor, Union[List[slice], torch.Tensor]]:
if fps is None: if fps is None:
# if for some reason the video doesn't have fps (because doesn't have a video stream) # if for some reason the video doesn't have fps (because doesn't have a video stream)
...@@ -211,7 +211,7 @@ class VideoClips: ...@@ -211,7 +211,7 @@ class VideoClips:
fps = 1 fps = 1
if frame_rate is None: if frame_rate is None:
frame_rate = fps frame_rate = fps
total_frames = len(video_pts) * (float(frame_rate) / fps) total_frames = len(video_pts) * frame_rate / fps
_idxs = VideoClips._resample_video_idx(int(math.floor(total_frames)), fps, frame_rate) _idxs = VideoClips._resample_video_idx(int(math.floor(total_frames)), fps, frame_rate)
video_pts = video_pts[_idxs] video_pts = video_pts[_idxs]
clips = unfold(video_pts, num_frames, step) clips = unfold(video_pts, num_frames, step)
...@@ -227,7 +227,7 @@ class VideoClips: ...@@ -227,7 +227,7 @@ class VideoClips:
idxs = unfold(_idxs, num_frames, step) idxs = unfold(_idxs, num_frames, step)
return clips, idxs return clips, idxs
def compute_clips(self, num_frames: int, step: int, frame_rate: Optional[int] = None) -> None: def compute_clips(self, num_frames: int, step: int, frame_rate: Optional[float] = None) -> None:
""" """
Compute all consecutive sequences of clips from video_pts. Compute all consecutive sequences of clips from video_pts.
Always returns clips of size `num_frames`, meaning that the Always returns clips of size `num_frames`, meaning that the
...@@ -275,8 +275,8 @@ class VideoClips: ...@@ -275,8 +275,8 @@ class VideoClips:
return video_idx, clip_idx return video_idx, clip_idx
@staticmethod @staticmethod
def _resample_video_idx(num_frames: int, original_fps: int, new_fps: int) -> Union[slice, torch.Tensor]: def _resample_video_idx(num_frames: int, original_fps: float, new_fps: float) -> Union[slice, torch.Tensor]:
step = float(original_fps) / new_fps step = original_fps / new_fps
if step.is_integer(): if step.is_integer():
# optimization: if step is integer, don't need to perform # optimization: if step is integer, don't need to perform
# advanced indexing # advanced indexing
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment