Unverified Commit 9fc6522d authored by Francisco Massa's avatar Francisco Massa Committed by GitHub
Browse files

concatenate small tensors into big ones to reduce the use of shared f… (#1795)

* concatenate small tensors into big ones to reduce the use of shared file descriptor (#1694)

Summary:
Pull Request resolved: https://github.com/pytorch/vision/pull/1694



- PT dataloader forks worker process to speed up the fetching of dataset example.  The recommended way of multiprocess context is `forkserver` rather than `fork`.

- Main process and worker processes will share the dataset class instance, which avoid duplicating the dataset and save memory. In this process, `ForkPickler(..).dumps(...)` will be called to serialize the objects, including objects within dataset instance recursively. `VideoClips` instance internally uses O(N) `torch.Tensor` to store per-video information, such as pts, and possible clips, where N is the No. of videos.

- During dumping, each `torch.Tensor` will use one File Descriptor (FD). The OS default max limit of FD is 65K by using `ulimit -n` to query. The number of tensors in `VideoClips` often exceeds the limit.

- To resolve this issue, we use a few big tensors by concatenating small tensors in the `__getstate__()` method, which will be called during pickling. This will only require O(1) tensors.

- When this diff is landed, we can abondon D19173248

In D19173397, in ClassyVision, we change the mp context from `fork` to `forkserver`, and finally can run the PT dataloader without hanging issues.

Reviewed By: fmassa

Differential Revision: D19179991

fbshipit-source-id: c8716775c7c154aa33d93b25d112d2a59ea688a9

* Try to fix Windows

* Try fix Windows v2

* Disable tests on Windows

* Add back necessary part

* Try fix OSX (and maybe Windows)

* Fix

* Try enabling Windows
Co-authored-by: default avatarZhicheng Yan <zyan3@fb.com>
parent d97825ea
......@@ -81,17 +81,20 @@ class HMDB51(VisionDataset):
_video_min_dimension=_video_min_dimension,
_audio_samples=_audio_samples,
)
# we bookkeep the full version of video clips because we want to be able
# to return the meta data of full version rather than the subset version of
# video clips
self.full_video_clips = video_clips
self.fold = fold
self.train = train
self.classes = classes
self.video_clips_metadata = video_clips.metadata
self.indices = self._select_fold(video_paths, annotation_path, fold, train)
self.video_clips = video_clips.subset(self.indices)
self.transform = transform
@property
def metadata(self):
return self.video_clips_metadata
return self.full_video_clips.metadata
def _select_fold(self, video_list, annotations_dir, fold, train):
target_tag = self.TRAIN_TAG if train else self.TEST_TAG
......
......@@ -71,14 +71,17 @@ class UCF101(VisionDataset):
_video_min_dimension=_video_min_dimension,
_audio_samples=_audio_samples,
)
self.video_clips_metadata = video_clips.metadata
# we bookkeep the full version of video clips because we want to be able
# to return the meta data of full version rather than the subset version of
# video clips
self.full_video_clips = video_clips
self.indices = self._select_fold(video_list, annotation_path, fold, train)
self.video_clips = video_clips.subset(self.indices)
self.transform = transform
@property
def metadata(self):
return self.video_clips_metadata
return self.full_video_clips.metadata
def _select_fold(self, video_list, annotation_path, fold, train):
name = "train" if train else "test"
......
......@@ -66,6 +66,13 @@ class _VideoTimestampsDataset(object):
return read_video_timestamps(self.video_paths[idx])
def _collate_fn(x):
"""
Dummy collate function to be used with _VideoTimestampsDataset
"""
return x
class VideoClips(object):
"""
Given a list of video files, computes all consecutive subvideos of size
......@@ -125,9 +132,6 @@ class VideoClips(object):
self._init_from_metadata(_precomputed_metadata)
self.compute_clips(clip_length_in_frames, frames_between_clips, frame_rate)
def _collate_fn(self, x):
return x
def _compute_frame_pts(self):
self.video_pts = []
self.video_fps = []
......@@ -140,14 +144,17 @@ class VideoClips(object):
_VideoTimestampsDataset(self.video_paths),
batch_size=16,
num_workers=self.num_workers,
collate_fn=self._collate_fn,
collate_fn=_collate_fn,
)
with tqdm(total=len(dl)) as pbar:
for batch in dl:
pbar.update(1)
clips, fps = list(zip(*batch))
clips = [torch.as_tensor(c) for c in clips]
# we need to specify dtype=torch.long because for empty list,
# torch.as_tensor will use torch.float as default dtype. This
# happens when decoding fails and no pts is returned in the list.
clips = [torch.as_tensor(c, dtype=torch.long) for c in clips]
self.video_pts.extend(clips)
self.video_fps.extend(fps)
......@@ -373,3 +380,47 @@ class VideoClips(object):
video.shape, self.num_frames
)
return video, audio, info, video_idx
def __getstate__(self):
video_pts_sizes = [len(v) for v in self.video_pts]
# To be back-compatible, we convert data to dtype torch.long as needed
# because for empty list, in legacy implementation, torch.as_tensor will
# use torch.float as default dtype. This happens when decoding fails and
# no pts is returned in the list.
video_pts = [x.to(torch.int64) for x in self.video_pts]
# video_pts can be an empty list if no frames have been decoded
if video_pts:
video_pts = torch.cat(video_pts)
# avoid bug in https://github.com/pytorch/pytorch/issues/32351
# TODO: Revert it once the bug is fixed.
video_pts = video_pts.numpy()
# make a copy of the fields of self
d = self.__dict__.copy()
d["video_pts_sizes"] = video_pts_sizes
d["video_pts"] = video_pts
# delete the following attributes to reduce the size of dictionary. They
# will be re-computed in "__setstate__()"
del d["clips"]
del d["resampling_idxs"]
del d["cumulative_sizes"]
# for backwards-compatibility
d["_version"] = 2
return d
def __setstate__(self, d):
# for backwards-compatibility
if "_version" not in d:
self.__dict__ = d
return
video_pts = torch.as_tensor(d["video_pts"], dtype=torch.int64)
video_pts = torch.split(video_pts, d["video_pts_sizes"], dim=0)
# don't need this info anymore
del d["video_pts_sizes"]
d["video_pts"] = video_pts
self.__dict__ = d
# recompute attributes "clips", "resampling_idxs" and other derivative ones
self.compute_clips(self.num_frames, self.step, self.frame_rate)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment