Unverified Commit 13c4470a authored by Philip Meier's avatar Philip Meier Committed by GitHub
Browse files

Properly fix dataset test that passes by accident (#3434)

* make UsageError an Exception rather than RuntimeError

* separate fake data injection and dataset args handling

* adapt tests for Coco

* fix Coco implementation

* add documentation

* fix VideoDatasetTestCase

* adapt UCF101 tests

* cleanup

* allow FileNotFoundError for test without fake data

* Revert "fix Coco implementation"

This reverts commit e2b693881654b3e2462a73e6d22bb01c1b738f8a.

* lint

* fix UCF101 tests
parent fc33c46f
...@@ -35,7 +35,7 @@ __all__ = [ ...@@ -35,7 +35,7 @@ __all__ = [
] ]
class UsageError(RuntimeError): class UsageError(Exception):
"""Should be raised in case an error happens in the setup rather than the test.""" """Should be raised in case an error happens in the setup rather than the test."""
...@@ -165,7 +165,8 @@ class DatasetTestCase(unittest.TestCase): ...@@ -165,7 +165,8 @@ class DatasetTestCase(unittest.TestCase):
Without further configuration, the testcase will test if Without further configuration, the testcase will test if
1. the dataset raises a ``RuntimeError`` if the data files are not found, 1. the dataset raises a :class:`FileNotFoundError` or a :class:`RuntimeError` if the data files are not found or
corrupted,
2. the dataset inherits from `torchvision.datasets.VisionDataset`, 2. the dataset inherits from `torchvision.datasets.VisionDataset`,
3. the dataset can be turned into a string, 3. the dataset can be turned into a string,
4. the feature types of a returned example matches ``FEATURE_TYPES``, 4. the feature types of a returned example matches ``FEATURE_TYPES``,
...@@ -228,9 +229,25 @@ class DatasetTestCase(unittest.TestCase): ...@@ -228,9 +229,25 @@ class DatasetTestCase(unittest.TestCase):
"download_and_extract_archive", "download_and_extract_archive",
} }
def inject_fake_data( def dataset_args(self, tmpdir: str, config: Dict[str, Any]) -> Sequence[Any]:
self, tmpdir: str, config: Dict[str, Any] """Define positional arguments passed to the dataset.
) -> Union[int, Dict[str, Any], Tuple[Sequence[Any], Union[int, Dict[str, Any]]]]:
.. note::
The default behavior is only valid if the dataset to be tested has ``root`` as the only required parameter.
Otherwise you need to overwrite this method.
Args:
tmpdir (str): Path to a temporary directory. For most cases this acts as root directory for the dataset
to be created and in turn also for the fake data injected here.
config (Dict[str, Any]): Configuration that will be used to create the dataset.
Returns:
(Tuple[str]): ``tmpdir`` which corresponds to ``root`` for most datasets.
"""
return (tmpdir,)
def inject_fake_data(self, tmpdir: str, config: Dict[str, Any]) -> Union[int, Dict[str, Any]]:
"""Inject fake data for dataset into a temporary directory. """Inject fake data for dataset into a temporary directory.
Args: Args:
...@@ -240,15 +257,9 @@ class DatasetTestCase(unittest.TestCase): ...@@ -240,15 +257,9 @@ class DatasetTestCase(unittest.TestCase):
Needs to return one of the following: Needs to return one of the following:
1. (int): Number of examples in the dataset to be created, 1. (int): Number of examples in the dataset to be created, or
2. (Dict[str, Any]): Additional information about the injected fake data. Must contain the field 2. (Dict[str, Any]): Additional information about the injected fake data. Must contain the field
``"num_examples"`` that corresponds to the number of examples in the dataset to be created, or ``"num_examples"`` that corresponds to the number of examples in the dataset to be created.
3. (Tuple[Sequence[Any], Union[int, Dict[str, Any]]]): Additional required parameters that are passed to
the dataset constructor. The second element corresponds to cases 1. and 2.
If no ``args`` is returned (case 1. and 2.), the ``tmp_dir`` is passed as first parameter to the dataset
constructor. In most cases this corresponds to ``root``. If the dataset has more parameters without default
values you need to explicitly pass them as explained in case 3.
""" """
raise NotImplementedError("You need to provide fake data in order for the tests to run.") raise NotImplementedError("You need to provide fake data in order for the tests to run.")
...@@ -287,33 +298,30 @@ class DatasetTestCase(unittest.TestCase): ...@@ -287,33 +298,30 @@ class DatasetTestCase(unittest.TestCase):
disable_download_extract = inject_fake_data disable_download_extract = inject_fake_data
with get_tmp_dir() as tmpdir: with get_tmp_dir() as tmpdir:
output = self.inject_fake_data(tmpdir, config) if inject_fake_data else None args = self.dataset_args(tmpdir, config)
if output is None:
if inject_fake_data:
info = self.inject_fake_data(tmpdir, config)
if info is None:
raise UsageError( raise UsageError(
"The method 'inject_fake_data' needs to return at least an integer indicating the number of " "The method 'inject_fake_data' needs to return at least an integer indicating the number of "
"examples for the current configuration." "examples for the current configuration."
) )
elif isinstance(info, int):
if isinstance(output, collections.abc.Sequence) and len(output) == 2:
args, info = output
else:
args = (tmpdir,)
info = output
if isinstance(info, int):
info = dict(num_examples=info) info = dict(num_examples=info)
elif isinstance(info, dict): elif not isinstance(info, dict):
if "num_examples" not in info: raise UsageError(
f"The additional information returned by the method 'inject_fake_data' must be either an "
f"integer indicating the number of examples for the current configuration or a dictionary with "
f"the same content. Got {type(info)} instead."
)
elif "num_examples" not in info:
raise UsageError( raise UsageError(
"The information dictionary returned by the method 'inject_fake_data' must contain a " "The information dictionary returned by the method 'inject_fake_data' must contain a "
"'num_examples' field that holds the number of examples for the current configuration." "'num_examples' field that holds the number of examples for the current configuration."
) )
else: else:
raise UsageError( info = None
f"The additional information returned by the method 'inject_fake_data' must be either an integer "
f"indicating the number of examples for the current configuration or a dictionary with the the "
f"same content. Got {type(info)} instead."
)
cm = self._disable_download_extract if disable_download_extract else nullcontext cm = self._disable_download_extract if disable_download_extract else nullcontext
with cm(special_kwargs), disable_console_output(): with cm(special_kwargs), disable_console_output():
...@@ -395,8 +403,8 @@ class DatasetTestCase(unittest.TestCase): ...@@ -395,8 +403,8 @@ class DatasetTestCase(unittest.TestCase):
if inject_download_kwarg: if inject_download_kwarg:
del special_kwargs["download"] del special_kwargs["download"]
def test_not_found(self): def test_not_found_or_corrupted(self):
with self.assertRaises(RuntimeError): with self.assertRaises((FileNotFoundError, RuntimeError)):
with self.create_dataset(inject_fake_data=False): with self.create_dataset(inject_fake_data=False):
pass pass
...@@ -511,26 +519,20 @@ class VideoDatasetTestCase(DatasetTestCase): ...@@ -511,26 +519,20 @@ class VideoDatasetTestCase(DatasetTestCase):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.inject_fake_data = self._set_default_frames_per_clip(self.inject_fake_data) self.dataset_args = self._set_default_frames_per_clip(self.dataset_args)
def _set_default_frames_per_clip(self, inject_fake_data): def _set_default_frames_per_clip(self, inject_fake_data):
argspec = inspect.getfullargspec(self.DATASET_CLASS.__init__) argspec = inspect.getfullargspec(self.DATASET_CLASS.__init__)
args_without_default = argspec.args[1:-len(argspec.defaults)] args_without_default = argspec.args[1:-len(argspec.defaults)]
frames_per_clip_last = args_without_default[-1] == "frames_per_clip" frames_per_clip_last = args_without_default[-1] == "frames_per_clip"
only_root_and_frames_per_clip = (len(args_without_default) == 2) and frames_per_clip_last
@functools.wraps(inject_fake_data) @functools.wraps(inject_fake_data)
def wrapper(tmpdir, config): def wrapper(tmpdir, config):
output = inject_fake_data(tmpdir, config) args = inject_fake_data(tmpdir, config)
if isinstance(output, collections.abc.Sequence) and len(output) == 2:
args, info = output
if frames_per_clip_last and len(args) == len(args_without_default) - 1: if frames_per_clip_last and len(args) == len(args_without_default) - 1:
args = (*args, self.DEFAULT_FRAMES_PER_CLIP) args = (*args, self.DEFAULT_FRAMES_PER_CLIP)
return args, info
elif isinstance(output, (int, dict)) and only_root_and_frames_per_clip: return args
return (tmpdir, self.DEFAULT_FRAMES_PER_CLIP)
else:
return output
return wrapper return wrapper
......
...@@ -824,33 +824,44 @@ class CocoDetectionTestCase(datasets_utils.ImageDatasetTestCase): ...@@ -824,33 +824,44 @@ class CocoDetectionTestCase(datasets_utils.ImageDatasetTestCase):
REQUIRED_PACKAGES = ("pycocotools",) REQUIRED_PACKAGES = ("pycocotools",)
_IMAGE_FOLDER = "images"
_ANNOTATIONS_FOLDER = "annotations"
_ANNOTATIONS_FILE = "annotations.json"
def dataset_args(self, tmpdir, config):
tmpdir = pathlib.Path(tmpdir)
root = tmpdir / self._IMAGE_FOLDER
annotation_file = tmpdir / self._ANNOTATIONS_FOLDER / self._ANNOTATIONS_FILE
return root, annotation_file
def inject_fake_data(self, tmpdir, config): def inject_fake_data(self, tmpdir, config):
tmpdir = pathlib.Path(tmpdir) tmpdir = pathlib.Path(tmpdir)
num_images = 3 num_images = 3
num_annotations_per_image = 2 num_annotations_per_image = 2
image_folder = tmpdir / "images"
files = datasets_utils.create_image_folder( files = datasets_utils.create_image_folder(
tmpdir, name="images", file_name_fn=lambda idx: f"{idx:012d}.jpg", num_examples=num_images tmpdir, name=self._IMAGE_FOLDER, file_name_fn=lambda idx: f"{idx:012d}.jpg", num_examples=num_images
) )
file_names = [file.relative_to(image_folder) for file in files] file_names = [file.relative_to(tmpdir / self._IMAGE_FOLDER) for file in files]
annotation_folder = tmpdir / "annotations" annotation_folder = tmpdir / self._ANNOTATIONS_FOLDER
os.makedirs(annotation_folder) os.makedirs(annotation_folder)
annotation_file, info = self._create_annotation_file(annotation_folder, file_names, num_annotations_per_image) info = self._create_annotation_file(
annotation_folder, self._ANNOTATIONS_FILE, file_names, num_annotations_per_image
)
info["num_examples"] = num_images info["num_examples"] = num_images
return (str(image_folder), str(annotation_file)), info return info
def _create_annotation_file(self, root, file_names, num_annotations_per_image): def _create_annotation_file(self, root, name, file_names, num_annotations_per_image):
image_ids = [int(file_name.stem) for file_name in file_names] image_ids = [int(file_name.stem) for file_name in file_names]
images = [dict(file_name=str(file_name), id=id) for file_name, id in zip(file_names, image_ids)] images = [dict(file_name=str(file_name), id=id) for file_name, id in zip(file_names, image_ids)]
annotations, info = self._create_annotations(image_ids, num_annotations_per_image) annotations, info = self._create_annotations(image_ids, num_annotations_per_image)
self._create_json(root, name, dict(images=images, annotations=annotations))
content = dict(images=images, annotations=annotations) return info
return self._create_json(root, "annotations.json", content), info
def _create_annotations(self, image_ids, num_annotations_per_image): def _create_annotations(self, image_ids, num_annotations_per_image):
annotations = datasets_utils.combinations_grid( annotations = datasets_utils.combinations_grid(
...@@ -888,18 +899,27 @@ class UCF101TestCase(datasets_utils.VideoDatasetTestCase): ...@@ -888,18 +899,27 @@ class UCF101TestCase(datasets_utils.VideoDatasetTestCase):
CONFIGS = datasets_utils.combinations_grid(fold=(1, 2, 3), train=(True, False)) CONFIGS = datasets_utils.combinations_grid(fold=(1, 2, 3), train=(True, False))
_VIDEO_FOLDER = "videos"
_ANNOTATIONS_FOLDER = "annotations"
def dataset_args(self, tmpdir, config):
tmpdir = pathlib.Path(tmpdir)
root = tmpdir / self._VIDEO_FOLDER
annotation_path = tmpdir / self._ANNOTATIONS_FOLDER
return root, annotation_path
def inject_fake_data(self, tmpdir, config): def inject_fake_data(self, tmpdir, config):
tmpdir = pathlib.Path(tmpdir) tmpdir = pathlib.Path(tmpdir)
video_folder = tmpdir / "videos" video_folder = tmpdir / self._VIDEO_FOLDER
os.makedirs(video_folder) os.makedirs(video_folder)
video_files = self._create_videos(video_folder) video_files = self._create_videos(video_folder)
annotations_folder = annotations_folder = tmpdir / "annotations" annotations_folder = tmpdir / self._ANNOTATIONS_FOLDER
os.makedirs(annotations_folder) os.makedirs(annotations_folder)
num_examples = self._create_annotation_files(annotations_folder, video_files, config["fold"], config["train"]) num_examples = self._create_annotation_files(annotations_folder, video_files, config["fold"], config["train"])
return (str(video_folder), str(annotations_folder)), num_examples return num_examples
def _create_videos(self, root, num_examples_per_class=3): def _create_videos(self, root, num_examples_per_class=3):
def file_name_fn(cls, idx, clips_per_group=2): def file_name_fn(cls, idx, clips_per_group=2):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment