Unverified Commit a7b4bfd6 authored by Philip Meier's avatar Philip Meier Committed by GitHub
Browse files

Add tests for UCF101 (#3411)



* enable default frames per clips for video test cases

* add tests for UCF101

* remove old tests as well as fake data generation

* better explain frames_per_clip overriding

* lint
Co-authored-by: default avatarFrancisco Massa <fvsmassa@gmail.com>
parent b7f3c812
...@@ -496,14 +496,44 @@ class ImageDatasetTestCase(DatasetTestCase): ...@@ -496,14 +496,44 @@ class ImageDatasetTestCase(DatasetTestCase):
class VideoDatasetTestCase(DatasetTestCase): class VideoDatasetTestCase(DatasetTestCase):
"""Abstract base class for video dataset testcases. """Abstract base class for video dataset testcases.
- Overwrites the FEATURE_TYPES class attribute to expect two :class:`torch.Tensor` s for the video and audio as - Overwrites the 'FEATURE_TYPES' class attribute to expect two :class:`torch.Tensor` s for the video and audio as
well as an integer label. well as an integer label.
- Overwrites the REQUIRED_PACKAGES class attribute to require PyAV (``av``). - Overwrites the 'REQUIRED_PACKAGES' class attribute to require PyAV (``av``).
- Adds the 'DEFAULT_FRAMES_PER_CLIP' class attribute. If no 'frames_per_clip' is provided by 'inject_fake_data()'
and it is the last parameter without a default value in the dataset constructor, the value of the
'DEFAULT_FRAMES_PER_CLIP' class attribute is appended to the output.
""" """
FEATURE_TYPES = (torch.Tensor, torch.Tensor, int) FEATURE_TYPES = (torch.Tensor, torch.Tensor, int)
REQUIRED_PACKAGES = ("av",) REQUIRED_PACKAGES = ("av",)
DEFAULT_FRAMES_PER_CLIP = 1
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.inject_fake_data = self._set_default_frames_per_clip(self.inject_fake_data)
def _set_default_frames_per_clip(self, inject_fake_data):
argspec = inspect.getfullargspec(self.DATASET_CLASS.__init__)
args_without_default = argspec.args[1:-len(argspec.defaults)]
frames_per_clip_last = args_without_default[-1] == "frames_per_clip"
only_root_and_frames_per_clip = (len(args_without_default) == 2) and frames_per_clip_last
@functools.wraps(inject_fake_data)
def wrapper(tmpdir, config):
output = inject_fake_data(tmpdir, config)
if isinstance(output, collections.abc.Sequence) and len(output) == 2:
args, info = output
if frames_per_clip_last and len(args) == len(args_without_default) - 1:
args = (*args, self.DEFAULT_FRAMES_PER_CLIP)
return args, info
elif isinstance(output, (int, dict)) and only_root_and_frames_per_clip:
return (tmpdir, self.DEFAULT_FRAMES_PER_CLIP)
else:
return output
return wrapper
def create_image_or_video_tensor(size: Sequence[int]) -> torch.Tensor: def create_image_or_video_tensor(size: Sequence[int]) -> torch.Tensor:
r"""Create a random uint8 tensor. r"""Create a random uint8 tensor.
......
...@@ -369,50 +369,6 @@ def svhn_root(): ...@@ -369,50 +369,6 @@ def svhn_root():
yield root yield root
@contextlib.contextmanager
def ucf101_root():
with get_tmp_dir() as tmp_dir:
ucf_dir = os.path.join(tmp_dir, 'UCF-101')
video_dir = os.path.join(ucf_dir, 'video')
annotations = os.path.join(ucf_dir, 'annotations')
os.makedirs(ucf_dir)
os.makedirs(video_dir)
os.makedirs(annotations)
fold_files = []
for split in {'train', 'test'}:
for fold in range(1, 4):
fold_file = '{:s}list{:02d}.txt'.format(split, fold)
fold_files.append(os.path.join(annotations, fold_file))
file_handles = [open(x, 'w') for x in fold_files]
file_iter = cycle(file_handles)
for i in range(0, 2):
current_class = 'class_{0}'.format(i + 1)
class_dir = os.path.join(video_dir, current_class)
os.makedirs(class_dir)
for group in range(0, 3):
for clip in range(0, 4):
# Save sample file
clip_name = 'v_{0}_g{1}_c{2}.avi'.format(
current_class, group, clip)
clip_path = os.path.join(class_dir, clip_name)
length = random.randrange(10, 21)
this_clip = torch.randint(
0, 256, (length * 25, 320, 240, 3), dtype=torch.uint8)
write_video(clip_path, this_clip, 25)
# Add to annotations
ann_file = next(file_iter)
ann_file.write('{0}\n'.format(
os.path.join(current_class, clip_name)))
# Close all file descriptors
for f in file_handles:
f.close()
yield (video_dir, annotations)
@contextlib.contextmanager @contextlib.contextmanager
def places365_root(split="train-standard", small=False): def places365_root(split="train-standard", small=False):
VARIANTS = { VARIANTS = {
......
...@@ -11,7 +11,7 @@ import torchvision ...@@ -11,7 +11,7 @@ import torchvision
from torchvision.datasets import utils from torchvision.datasets import utils
from common_utils import get_tmp_dir from common_utils import get_tmp_dir
from fakedata_generation import mnist_root, cifar_root, imagenet_root, \ from fakedata_generation import mnist_root, cifar_root, imagenet_root, \
cityscapes_root, svhn_root, ucf101_root, places365_root, widerface_root, stl10_root cityscapes_root, svhn_root, places365_root, widerface_root, stl10_root
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from urllib.request import Request, urlopen from urllib.request import Request, urlopen
import itertools import itertools
...@@ -22,6 +22,7 @@ from torchvision import datasets ...@@ -22,6 +22,7 @@ from torchvision import datasets
import torch import torch
import shutil import shutil
import json import json
import random
try: try:
...@@ -261,29 +262,6 @@ class Tester(DatasetTestcase): ...@@ -261,29 +262,6 @@ class Tester(DatasetTestcase):
dataset = torchvision.datasets.SVHN(root, split="extra") dataset = torchvision.datasets.SVHN(root, split="extra")
self.generic_classification_dataset_test(dataset, num_images=2) self.generic_classification_dataset_test(dataset, num_images=2)
@unittest.skipIf(not HAS_PYAV, "PyAV unavailable")
def test_ucf101(self):
cached_meta_data = None
with ucf101_root() as (root, ann_root):
for split in {True, False}:
for fold in range(1, 4):
for length in {10, 15, 20}:
dataset = torchvision.datasets.UCF101(root, ann_root, length, fold=fold, train=split,
num_workers=2, _precomputed_metadata=cached_meta_data)
if cached_meta_data is None:
cached_meta_data = dataset.metadata
self.assertGreater(len(dataset), 0)
video, audio, label = dataset[0]
self.assertEqual(video.size(), (length, 320, 240, 3))
self.assertEqual(audio.numel(), 0)
self.assertEqual(label, 0)
video, audio, label = dataset[len(dataset) - 1]
self.assertEqual(video.size(), (length, 320, 240, 3))
self.assertEqual(audio.numel(), 0)
self.assertEqual(label, 1)
def test_places365(self): def test_places365(self):
for split, small in itertools.product(("train-standard", "train-challenge", "val"), (False, True)): for split, small in itertools.product(("train-standard", "train-challenge", "val"), (False, True)):
with places365_root(split=split, small=small) as places365: with places365_root(split=split, small=small) as places365:
...@@ -905,5 +883,56 @@ class CocoCaptionsTestCase(CocoDetectionTestCase): ...@@ -905,5 +883,56 @@ class CocoCaptionsTestCase(CocoDetectionTestCase):
self.assertEqual(tuple(captions), tuple(info["captions"])) self.assertEqual(tuple(captions), tuple(info["captions"]))
class UCF101TestCase(datasets_utils.VideoDatasetTestCase):
DATASET_CLASS = datasets.UCF101
CONFIGS = datasets_utils.combinations_grid(fold=(1, 2, 3), train=(True, False))
def inject_fake_data(self, tmpdir, config):
tmpdir = pathlib.Path(tmpdir)
video_folder = tmpdir / "videos"
os.makedirs(video_folder)
video_files = self._create_videos(video_folder)
annotations_folder = annotations_folder = tmpdir / "annotations"
os.makedirs(annotations_folder)
num_examples = self._create_annotation_files(annotations_folder, video_files, config["fold"], config["train"])
return (str(video_folder), str(annotations_folder)), num_examples
def _create_videos(self, root, num_examples_per_class=3):
def file_name_fn(cls, idx, clips_per_group=2):
return f"v_{cls}_g{(idx // clips_per_group) + 1:02d}_c{(idx % clips_per_group) + 1:02d}.avi"
video_files = [
datasets_utils.create_video_folder(root, cls, lambda idx: file_name_fn(cls, idx), num_examples_per_class)
for cls in ("ApplyEyeMakeup", "YoYo")
]
return [path.relative_to(root) for path in itertools.chain(*video_files)]
def _create_annotation_files(self, root, video_files, fold, train):
current_videos = random.sample(video_files, random.randrange(1, len(video_files) - 1))
current_annotation = self._annotation_file_name(fold, train)
self._create_annotation_file(root, current_annotation, current_videos)
other_videos = set(video_files) - set(current_videos)
other_annotations = [
self._annotation_file_name(fold, train) for fold, train in itertools.product((1, 2, 3), (True, False))
]
other_annotations.remove(current_annotation)
for name in other_annotations:
self._create_annotation_file(root, name, other_videos)
return len(current_videos)
def _annotation_file_name(self, fold, train):
return f"{'train' if train else 'test'}list{fold:02d}.txt"
def _create_annotation_file(self, root, name, video_files):
with open(pathlib.Path(root) / name, "w") as fh:
fh.writelines(f"{file}\n" for file in sorted(video_files))
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment