"vscode:/vscode.git/clone" did not exist on "365f75233f67e03c761809bedff6defc72f988d4"
Unverified Commit 9fc6522d authored by Francisco Massa's avatar Francisco Massa Committed by GitHub
Browse files

concatenate small tensors into big ones to reduce the use of shared f… (#1795)

* concatenate small tensors into big ones to reduce the use of shared file descriptor (#1694)

Summary:
Pull Request resolved: https://github.com/pytorch/vision/pull/1694



- PT dataloader forks worker process to speed up the fetching of dataset example.  The recommended way of multiprocess context is `forkserver` rather than `fork`.

- Main process and worker processes will share the dataset class instance, which avoid duplicating the dataset and save memory. In this process, `ForkPickler(..).dumps(...)` will be called to serialize the objects, including objects within dataset instance recursively. `VideoClips` instance internally uses O(N) `torch.Tensor` to store per-video information, such as pts, and possible clips, where N is the No. of videos.

- During dumping, each `torch.Tensor` will use one File Descriptor (FD). The OS default max limit of FD is 65K by using `ulimit -n` to query. The number of tensors in `VideoClips` often exceeds the limit.

- To resolve this issue, we use a few big tensors by concatenating small tensors in the `__getstate__()` method, which will be called during pickling. This will only require O(1) tensors.

- When this diff is landed, we can abondon D19173248

In D19173397, in ClassyVision, we change the mp context from `fork` to `forkserver`, and finally can run the PT dataloader without hanging issues.

Reviewed By: fmassa

Differential Revision: D19179991

fbshipit-source-id: c8716775c7c154aa33d93b25d112d2a59ea688a9

* Try to fix Windows

* Try fix Windows v2

* Disable tests on Windows

* Add back necessary part

* Try fix OSX (and maybe Windows)

* Fix

* Try enabling Windows
Co-authored-by: default avatarZhicheng Yan <zyan3@fb.com>
parent d97825ea
...@@ -81,17 +81,20 @@ class HMDB51(VisionDataset): ...@@ -81,17 +81,20 @@ class HMDB51(VisionDataset):
_video_min_dimension=_video_min_dimension, _video_min_dimension=_video_min_dimension,
_audio_samples=_audio_samples, _audio_samples=_audio_samples,
) )
# we bookkeep the full version of video clips because we want to be able
# to return the meta data of full version rather than the subset version of
# video clips
self.full_video_clips = video_clips
self.fold = fold self.fold = fold
self.train = train self.train = train
self.classes = classes self.classes = classes
self.video_clips_metadata = video_clips.metadata
self.indices = self._select_fold(video_paths, annotation_path, fold, train) self.indices = self._select_fold(video_paths, annotation_path, fold, train)
self.video_clips = video_clips.subset(self.indices) self.video_clips = video_clips.subset(self.indices)
self.transform = transform self.transform = transform
@property @property
def metadata(self): def metadata(self):
return self.video_clips_metadata return self.full_video_clips.metadata
def _select_fold(self, video_list, annotations_dir, fold, train): def _select_fold(self, video_list, annotations_dir, fold, train):
target_tag = self.TRAIN_TAG if train else self.TEST_TAG target_tag = self.TRAIN_TAG if train else self.TEST_TAG
......
...@@ -71,14 +71,17 @@ class UCF101(VisionDataset): ...@@ -71,14 +71,17 @@ class UCF101(VisionDataset):
_video_min_dimension=_video_min_dimension, _video_min_dimension=_video_min_dimension,
_audio_samples=_audio_samples, _audio_samples=_audio_samples,
) )
self.video_clips_metadata = video_clips.metadata # we bookkeep the full version of video clips because we want to be able
# to return the meta data of full version rather than the subset version of
# video clips
self.full_video_clips = video_clips
self.indices = self._select_fold(video_list, annotation_path, fold, train) self.indices = self._select_fold(video_list, annotation_path, fold, train)
self.video_clips = video_clips.subset(self.indices) self.video_clips = video_clips.subset(self.indices)
self.transform = transform self.transform = transform
@property @property
def metadata(self): def metadata(self):
return self.video_clips_metadata return self.full_video_clips.metadata
def _select_fold(self, video_list, annotation_path, fold, train): def _select_fold(self, video_list, annotation_path, fold, train):
name = "train" if train else "test" name = "train" if train else "test"
......
...@@ -66,6 +66,13 @@ class _VideoTimestampsDataset(object): ...@@ -66,6 +66,13 @@ class _VideoTimestampsDataset(object):
return read_video_timestamps(self.video_paths[idx]) return read_video_timestamps(self.video_paths[idx])
def _collate_fn(x):
"""
Dummy collate function to be used with _VideoTimestampsDataset
"""
return x
class VideoClips(object): class VideoClips(object):
""" """
Given a list of video files, computes all consecutive subvideos of size Given a list of video files, computes all consecutive subvideos of size
...@@ -125,9 +132,6 @@ class VideoClips(object): ...@@ -125,9 +132,6 @@ class VideoClips(object):
self._init_from_metadata(_precomputed_metadata) self._init_from_metadata(_precomputed_metadata)
self.compute_clips(clip_length_in_frames, frames_between_clips, frame_rate) self.compute_clips(clip_length_in_frames, frames_between_clips, frame_rate)
def _collate_fn(self, x):
return x
def _compute_frame_pts(self): def _compute_frame_pts(self):
self.video_pts = [] self.video_pts = []
self.video_fps = [] self.video_fps = []
...@@ -140,14 +144,17 @@ class VideoClips(object): ...@@ -140,14 +144,17 @@ class VideoClips(object):
_VideoTimestampsDataset(self.video_paths), _VideoTimestampsDataset(self.video_paths),
batch_size=16, batch_size=16,
num_workers=self.num_workers, num_workers=self.num_workers,
collate_fn=self._collate_fn, collate_fn=_collate_fn,
) )
with tqdm(total=len(dl)) as pbar: with tqdm(total=len(dl)) as pbar:
for batch in dl: for batch in dl:
pbar.update(1) pbar.update(1)
clips, fps = list(zip(*batch)) clips, fps = list(zip(*batch))
clips = [torch.as_tensor(c) for c in clips] # we need to specify dtype=torch.long because for empty list,
# torch.as_tensor will use torch.float as default dtype. This
# happens when decoding fails and no pts is returned in the list.
clips = [torch.as_tensor(c, dtype=torch.long) for c in clips]
self.video_pts.extend(clips) self.video_pts.extend(clips)
self.video_fps.extend(fps) self.video_fps.extend(fps)
...@@ -373,3 +380,47 @@ class VideoClips(object): ...@@ -373,3 +380,47 @@ class VideoClips(object):
video.shape, self.num_frames video.shape, self.num_frames
) )
return video, audio, info, video_idx return video, audio, info, video_idx
def __getstate__(self):
video_pts_sizes = [len(v) for v in self.video_pts]
# To be back-compatible, we convert data to dtype torch.long as needed
# because for empty list, in legacy implementation, torch.as_tensor will
# use torch.float as default dtype. This happens when decoding fails and
# no pts is returned in the list.
video_pts = [x.to(torch.int64) for x in self.video_pts]
# video_pts can be an empty list if no frames have been decoded
if video_pts:
video_pts = torch.cat(video_pts)
# avoid bug in https://github.com/pytorch/pytorch/issues/32351
# TODO: Revert it once the bug is fixed.
video_pts = video_pts.numpy()
# make a copy of the fields of self
d = self.__dict__.copy()
d["video_pts_sizes"] = video_pts_sizes
d["video_pts"] = video_pts
# delete the following attributes to reduce the size of dictionary. They
# will be re-computed in "__setstate__()"
del d["clips"]
del d["resampling_idxs"]
del d["cumulative_sizes"]
# for backwards-compatibility
d["_version"] = 2
return d
def __setstate__(self, d):
# for backwards-compatibility
if "_version" not in d:
self.__dict__ = d
return
video_pts = torch.as_tensor(d["video_pts"], dtype=torch.int64)
video_pts = torch.split(video_pts, d["video_pts_sizes"], dim=0)
# don't need this info anymore
del d["video_pts_sizes"]
d["video_pts"] = video_pts
self.__dict__ = d
# recompute attributes "clips", "resampling_idxs" and other derivative ones
self.compute_clips(self.num_frames, self.step, self.frame_rate)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment