Commit cc26cd81 authored by panning's avatar panning
Browse files

merge v0.16.0

parents f78f29f5 fbb4cc54
......@@ -164,7 +164,7 @@ class RandomResizeAndCrop(torch.nn.Module):
# The reason we don't rely on RandomResizedCrop is because of a significant
# difference in the parametrization of both transforms, in particular,
# because of the way the random parameters are sampled in both transforms,
# which leads to fairly different resuts (and different epe). For more details see
# which leads to fairly different results (and different epe). For more details see
# https://github.com/pytorch/vision/pull/5026/files#r762932579
def __init__(self, crop_size, min_scale=-0.2, max_scale=0.5, stretch_prob=0.8):
super().__init__()
......@@ -196,8 +196,12 @@ class RandomResizeAndCrop(torch.nn.Module):
if torch.rand(1).item() < self.resize_prob:
# rescale the images
img1 = F.resize(img1, size=(new_h, new_w))
img2 = F.resize(img2, size=(new_h, new_w))
# We hard-code antialias=False to preserve results after we changed
# its default from None to True (see
# https://github.com/pytorch/vision/pull/7160)
# TODO: we could re-train the OF models with antialias=True?
img1 = F.resize(img1, size=(new_h, new_w), antialias=False)
img2 = F.resize(img2, size=(new_h, new_w), antialias=False)
if valid_flow_mask is None:
flow = F.resize(flow, size=(new_h, new_w))
flow = flow * torch.tensor([scale_x, scale_y])[:, None, None]
......@@ -208,7 +212,7 @@ class RandomResizeAndCrop(torch.nn.Module):
# Note: For sparse datasets (Kitti), the original code uses a "margin"
# See e.g. https://github.com/princeton-vl/RAFT/blob/master/core/utils/augmentor.py#L220:L220
# We don't, not sure it matters much
# We don't, not sure if it matters much
y0 = torch.randint(0, img1.shape[1] - self.crop_size[0], size=(1,)).item()
x0 = torch.randint(0, img1.shape[2] - self.crop_size[1], size=(1,)).item()
......
......@@ -181,7 +181,7 @@ def sequence_loss(flow_preds, flow_gt, valid_flow_mask, gamma=0.8, max_flow=400)
if gamma > 1:
raise ValueError(f"Gamma should be < 1, got {gamma}.")
# exlude invalid pixels and extremely large diplacements
# exclude invalid pixels and extremely large diplacements
flow_norm = torch.sum(flow_gt**2, dim=1).sqrt()
valid_flow_mask = valid_flow_mask & (flow_norm < max_flow)
......@@ -248,7 +248,7 @@ def setup_ddp(args):
# https://discuss.pytorch.org/t/what-is-the-difference-between-rank-and-local-rank/61940/2
if all(key in os.environ for key in ("LOCAL_RANK", "RANK", "WORLD_SIZE")):
# if we're here, the script was called with torchrun. Otherwise
# if we're here, the script was called with torchrun. Otherwise,
# these args will be set already by the run_with_submitit script
args.local_rank = int(os.environ["LOCAL_RANK"])
args.rank = int(os.environ["RANK"])
......
......@@ -68,11 +68,6 @@ def _coco_remove_images_without_annotations(dataset, cat_list=None):
# if more than 1k pixels occupied in the image
return sum(obj["area"] for obj in anno) > 1000
if not isinstance(dataset, torchvision.datasets.CocoDetection):
raise TypeError(
f"This function expects dataset of type torchvision.datasets.CocoDetection, instead got {type(dataset)}"
)
ids = []
for ds_idx, img_id in enumerate(dataset.ids):
ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None)
......@@ -86,7 +81,7 @@ def _coco_remove_images_without_annotations(dataset, cat_list=None):
return dataset
def get_coco(root, image_set, transforms):
def get_coco(root, image_set, transforms, use_v2=False):
PATHS = {
"train": ("train2017", os.path.join("annotations", "instances_train2017.json")),
"val": ("val2017", os.path.join("annotations", "instances_val2017.json")),
......@@ -94,12 +89,23 @@ def get_coco(root, image_set, transforms):
}
CAT_LIST = [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4, 1, 64, 20, 63, 7, 72]
transforms = Compose([FilterAndRemapCocoCategories(CAT_LIST, remap=True), ConvertCocoPolysToMask(), transforms])
img_folder, ann_file = PATHS[image_set]
img_folder = os.path.join(root, img_folder)
ann_file = os.path.join(root, ann_file)
# The 2 "Compose" below achieve the same thing: converting coco detection
# samples into segmentation-compatible samples. They just do it with
# slightly different implementations. We could refactor and unify, but
# keeping them separate helps keeping the v2 version clean
if use_v2:
import v2_extras
from torchvision.datasets import wrap_dataset_for_transforms_v2
transforms = Compose([v2_extras.CocoDetectionToVOCSegmentation(), transforms])
dataset = torchvision.datasets.CocoDetection(img_folder, ann_file, transforms=transforms)
dataset = wrap_dataset_for_transforms_v2(dataset, target_keys={"masks", "labels"})
else:
transforms = Compose([FilterAndRemapCocoCategories(CAT_LIST, remap=True), ConvertCocoPolysToMask(), transforms])
dataset = torchvision.datasets.CocoDetection(img_folder, ann_file, transforms=transforms)
if image_set == "train":
......
import torch
import transforms as T
def get_modules(use_v2):
# We need a protected import to avoid the V2 warning in case just V1 is used
if use_v2:
import torchvision.transforms.v2
import torchvision.tv_tensors
import v2_extras
return torchvision.transforms.v2, torchvision.tv_tensors, v2_extras
else:
import transforms
return transforms, None, None
class SegmentationPresetTrain:
def __init__(self, *, base_size, crop_size, hflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
min_size = int(0.5 * base_size)
max_size = int(2.0 * base_size)
def __init__(
self,
*,
base_size,
crop_size,
hflip_prob=0.5,
mean=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225),
backend="pil",
use_v2=False,
):
T, tv_tensors, v2_extras = get_modules(use_v2)
transforms = []
backend = backend.lower()
if backend == "tv_tensor":
transforms.append(T.ToImage())
elif backend == "tensor":
transforms.append(T.PILToTensor())
elif backend != "pil":
raise ValueError(f"backend can be 'tv_tensor', 'tensor' or 'pil', but got {backend}")
transforms += [T.RandomResize(min_size=int(0.5 * base_size), max_size=int(2.0 * base_size))]
trans = [T.RandomResize(min_size, max_size)]
if hflip_prob > 0:
trans.append(T.RandomHorizontalFlip(hflip_prob))
trans.extend(
[
T.RandomCrop(crop_size),
T.PILToTensor(),
T.ConvertImageDtype(torch.float),
T.Normalize(mean=mean, std=std),
transforms += [T.RandomHorizontalFlip(hflip_prob)]
if use_v2:
# We need a custom pad transform here, since the padding we want to perform here is fundamentally
# different from the padding in `RandomCrop` if `pad_if_needed=True`.
transforms += [v2_extras.PadIfSmaller(crop_size, fill={tv_tensors.Mask: 255, "others": 0})]
transforms += [T.RandomCrop(crop_size)]
if backend == "pil":
transforms += [T.PILToTensor()]
if use_v2:
img_type = tv_tensors.Image if backend == "tv_tensor" else torch.Tensor
transforms += [
T.ToDtype(dtype={img_type: torch.float32, tv_tensors.Mask: torch.int64, "others": None}, scale=True)
]
)
self.transforms = T.Compose(trans)
else:
# No need to explicitly convert masks as they're magically int64 already
transforms += [T.ToDtype(torch.float, scale=True)]
transforms += [T.Normalize(mean=mean, std=std)]
if use_v2:
transforms += [T.ToPureTensor()]
self.transforms = T.Compose(transforms)
def __call__(self, img, target):
return self.transforms(img, target)
class SegmentationPresetEval:
def __init__(self, *, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
self.transforms = T.Compose(
[
T.RandomResize(base_size, base_size),
T.PILToTensor(),
T.ConvertImageDtype(torch.float),
def __init__(
self, *, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), backend="pil", use_v2=False
):
T, _, _ = get_modules(use_v2)
transforms = []
backend = backend.lower()
if backend == "tensor":
transforms += [T.PILToTensor()]
elif backend == "tv_tensor":
transforms += [T.ToImage()]
elif backend != "pil":
raise ValueError(f"backend can be 'tv_tensor', 'tensor' or 'pil', but got {backend}")
if use_v2:
transforms += [T.Resize(size=(base_size, base_size))]
else:
transforms += [T.RandomResize(min_size=base_size, max_size=base_size)]
if backend == "pil":
# Note: we could just convert to pure tensors even in v2?
transforms += [T.ToImage() if use_v2 else T.PILToTensor()]
transforms += [
T.ToDtype(torch.float, scale=True),
T.Normalize(mean=mean, std=std),
]
)
if use_v2:
transforms += [T.ToPureTensor()]
self.transforms = T.Compose(transforms)
def __call__(self, img, target):
return self.transforms(img, target)
......@@ -14,24 +14,30 @@ from torch.optim.lr_scheduler import PolynomialLR
from torchvision.transforms import functional as F, InterpolationMode
def get_dataset(dir_path, name, image_set, transform):
def get_dataset(args, is_train):
def sbd(*args, **kwargs):
kwargs.pop("use_v2")
return torchvision.datasets.SBDataset(*args, mode="segmentation", **kwargs)
def voc(*args, **kwargs):
kwargs.pop("use_v2")
return torchvision.datasets.VOCSegmentation(*args, **kwargs)
paths = {
"voc": (dir_path, torchvision.datasets.VOCSegmentation, 21),
"voc_aug": (dir_path, sbd, 21),
"coco": (dir_path, get_coco, 21),
"voc": (args.data_path, voc, 21),
"voc_aug": (args.data_path, sbd, 21),
"coco": (args.data_path, get_coco, 21),
}
p, ds_fn, num_classes = paths[name]
p, ds_fn, num_classes = paths[args.dataset]
ds = ds_fn(p, image_set=image_set, transforms=transform)
image_set = "train" if is_train else "val"
ds = ds_fn(p, image_set=image_set, transforms=get_transform(is_train, args), use_v2=args.use_v2)
return ds, num_classes
def get_transform(train, args):
if train:
return presets.SegmentationPresetTrain(base_size=520, crop_size=480)
def get_transform(is_train, args):
if is_train:
return presets.SegmentationPresetTrain(base_size=520, crop_size=480, backend=args.backend, use_v2=args.use_v2)
elif args.weights and args.test_only:
weights = torchvision.models.get_weight(args.weights)
trans = weights.transforms()
......@@ -44,7 +50,7 @@ def get_transform(train, args):
return preprocessing
else:
return presets.SegmentationPresetEval(base_size=520)
return presets.SegmentationPresetEval(base_size=520, backend=args.backend, use_v2=args.use_v2)
def criterion(inputs, target):
......@@ -120,6 +126,12 @@ def train_one_epoch(model, criterion, optimizer, data_loader, lr_scheduler, devi
def main(args):
if args.backend.lower() != "pil" and not args.use_v2:
# TODO: Support tensor backend in V1?
raise ValueError("Use --use-v2 if you want to use the tv_tensor or tensor backend.")
if args.use_v2 and args.dataset != "coco":
raise ValueError("v2 is only support supported for coco dataset for now.")
if args.output_dir:
utils.mkdir(args.output_dir)
......@@ -134,8 +146,8 @@ def main(args):
else:
torch.backends.cudnn.benchmark = True
dataset, num_classes = get_dataset(args.data_path, args.dataset, "train", get_transform(True, args))
dataset_test, _ = get_dataset(args.data_path, args.dataset, "val", get_transform(False, args))
dataset, num_classes = get_dataset(args, is_train=True)
dataset_test, _ = get_dataset(args, is_train=False)
if args.distributed:
train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
......@@ -260,7 +272,7 @@ def get_args_parser(add_help=True):
parser.add_argument("--data-path", default="/datasets01/COCO/022719/", type=str, help="dataset path")
parser.add_argument("--dataset", default="coco", type=str, help="dataset name")
parser.add_argument("--model", default="fcn_resnet101", type=str, help="model name")
parser.add_argument("--aux-loss", action="store_true", help="auxiliar loss")
parser.add_argument("--aux-loss", action="store_true", help="auxiliary loss")
parser.add_argument("--device", default="cuda", type=str, help="device (Use cuda or cpu Default: cuda)")
parser.add_argument(
"-b", "--batch-size", default=8, type=int, help="images per gpu, the total batch size is $NGPU x batch_size"
......@@ -307,6 +319,8 @@ def get_args_parser(add_help=True):
# Mixed precision training parameters
parser.add_argument("--amp", action="store_true", help="Use torch.cuda.amp for mixed precision training")
parser.add_argument("--backend", default="PIL", type=str.lower, help="PIL or tensor - case insensitive")
parser.add_argument("--use-v2", action="store_true", help="Use V2 transforms")
return parser
......
......@@ -35,7 +35,7 @@ class RandomResize:
def __call__(self, image, target):
size = random.randint(self.min_size, self.max_size)
image = F.resize(image, size)
image = F.resize(image, size, antialias=True)
target = F.resize(target, size, interpolation=T.InterpolationMode.NEAREST)
return image, target
......@@ -81,11 +81,14 @@ class PILToTensor:
return image, target
class ConvertImageDtype:
def __init__(self, dtype):
class ToDtype:
def __init__(self, dtype, scale=False):
self.dtype = dtype
self.scale = scale
def __call__(self, image, target):
if not self.scale:
return image.to(dtype=self.dtype), target
image = F.convert_image_dtype(image, self.dtype)
return image, target
......
......@@ -267,9 +267,9 @@ def init_distributed_mode(args):
args.rank = int(os.environ["RANK"])
args.world_size = int(os.environ["WORLD_SIZE"])
args.gpu = int(os.environ["LOCAL_RANK"])
elif "SLURM_PROCID" in os.environ:
args.rank = int(os.environ["SLURM_PROCID"])
args.gpu = args.rank % torch.cuda.device_count()
# elif "SLURM_PROCID" in os.environ:
# args.rank = int(os.environ["SLURM_PROCID"])
# args.gpu = args.rank % torch.cuda.device_count()
elif hasattr(args, "rank"):
pass
else:
......
"""This file only exists to be lazy-imported and avoid V2-related import warnings when just using V1."""
import torch
from torchvision import tv_tensors
from torchvision.transforms import v2
class PadIfSmaller(v2.Transform):
def __init__(self, size, fill=0):
super().__init__()
self.size = size
self.fill = v2._utils._setup_fill_arg(fill)
def _get_params(self, sample):
_, height, width = v2._utils.query_chw(sample)
padding = [0, 0, max(self.size - width, 0), max(self.size - height, 0)]
needs_padding = any(padding)
return dict(padding=padding, needs_padding=needs_padding)
def _transform(self, inpt, params):
if not params["needs_padding"]:
return inpt
fill = v2._utils._get_fill(self.fill, type(inpt))
fill = v2._utils._convert_fill_arg(fill)
return v2.functional.pad(inpt, padding=params["padding"], fill=fill)
class CocoDetectionToVOCSegmentation(v2.Transform):
"""Turn samples from datasets.CocoDetection into the same format as VOCSegmentation.
This is achieved in two steps:
1. COCO differentiates between 91 categories while VOC only supports 21, including background for both. Fortunately,
the COCO categories are a superset of the VOC ones and thus can be mapped. Instances of the 70 categories not
present in VOC are dropped and replaced by background.
2. COCO only offers detection masks, i.e. a (N, H, W) bool-ish tensor, where the truthy values in each individual
mask denote the instance. However, a segmentation mask is a (H, W) integer tensor (typically torch.uint8), where
the value of each pixel denotes the category it belongs to. The detection masks are merged into one segmentation
mask while pixels that belong to multiple detection masks are marked as invalid.
"""
COCO_TO_VOC_LABEL_MAP = dict(
zip(
[0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4, 1, 64, 20, 63, 7, 72],
range(21),
)
)
INVALID_VALUE = 255
def _coco_detection_masks_to_voc_segmentation_mask(self, target):
if "masks" not in target:
return None
instance_masks, instance_labels_coco = target["masks"], target["labels"]
valid_labels_voc = [
(idx, label_voc)
for idx, label_coco in enumerate(instance_labels_coco.tolist())
if (label_voc := self.COCO_TO_VOC_LABEL_MAP.get(label_coco)) is not None
]
if not valid_labels_voc:
return None
valid_voc_category_idcs, instance_labels_voc = zip(*valid_labels_voc)
instance_masks = instance_masks[list(valid_voc_category_idcs)].to(torch.uint8)
instance_labels_voc = torch.tensor(instance_labels_voc, dtype=torch.uint8)
# Calling `.max()` on the stacked detection masks works fine to separate background from foreground as long as
# there is at most a single instance per pixel. Overlapping instances will be filtered out in the next step.
segmentation_mask, _ = (instance_masks * instance_labels_voc.reshape(-1, 1, 1)).max(dim=0)
segmentation_mask[instance_masks.sum(dim=0) > 1] = self.INVALID_VALUE
return segmentation_mask
def forward(self, image, target):
segmentation_mask = self._coco_detection_masks_to_voc_segmentation_mask(target)
if segmentation_mask is None:
segmentation_mask = torch.zeros(v2.functional.get_size(image), dtype=torch.uint8)
return image, tv_tensors.Mask(segmentation_mask)
......@@ -48,7 +48,7 @@ class PKSampler(Sampler):
# Ensures there are enough classes to sample from
if len(self.groups) < p:
raise ValueError("There are not enought classes to sample from")
raise ValueError("There are not enough classes to sample from")
def __iter__(self):
# Shuffle samples within groups
......
......@@ -76,7 +76,7 @@ Input data augmentations at validation time (with optional parameters):
5. Convert BCHW to CBHW
This translates in the following set of command-line arguments. Please note that `--batch-size` parameter controls the
batch size per GPU. Moreover note that our default `--lr` is configured for 64 GPUs which is how many we used for the
batch size per GPU. Moreover, note that our default `--lr` is configured for 64 GPUs which is how many we used for the
Video resnet models:
```
# number of frames per clip
......
......@@ -15,7 +15,11 @@ class VideoClassificationPresetTrain:
):
trans = [
transforms.ConvertImageDtype(torch.float32),
transforms.Resize(resize_size),
# We hard-code antialias=False to preserve results after we changed
# its default from None to True (see
# https://github.com/pytorch/vision/pull/7160)
# TODO: we could re-train the video models with antialias=True?
transforms.Resize(resize_size, antialias=False),
]
if hflip_prob > 0:
trans.append(transforms.RandomHorizontalFlip(hflip_prob))
......@@ -31,7 +35,11 @@ class VideoClassificationPresetEval:
self.transforms = transforms.Compose(
[
transforms.ConvertImageDtype(torch.float32),
transforms.Resize(resize_size),
# We hard-code antialias=False to preserve results after we changed
# its default from None to True (see
# https://github.com/pytorch/vision/pull/7160)
# TODO: we could re-train the video models with antialias=True?
transforms.Resize(resize_size, antialias=False),
transforms.Normalize(mean=mean, std=std),
transforms.CenterCrop(crop_size),
ConvertBCHWtoCBHW(),
......
import asyncio
import sys
from pathlib import Path
from time import perf_counter
from urllib.parse import urlsplit
import aiofiles
import aiohttp
from torchvision import models
from tqdm.asyncio import tqdm
async def main(download_root):
download_root.mkdir(parents=True, exist_ok=True)
urls = {weight.url for name in models.list_models() for weight in iter(models.get_model_weights(name))}
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=None)) as session:
await tqdm.gather(*[download(download_root, session, url) for url in urls])
async def download(download_root, session, url):
response = await session.get(url, params=dict(source="ci"))
assert response.ok
file_name = Path(urlsplit(url).path).name
async with aiofiles.open(download_root / file_name, "wb") as f:
async for data in response.content.iter_any():
await f.write(data)
if __name__ == "__main__":
download_root = (
(Path(sys.argv[1]) if len(sys.argv) > 1 else Path("~/.cache/torch/hub/checkpoints")).expanduser().resolve()
)
print(f"Downloading model weights to {download_root}")
start = perf_counter()
asyncio.get_event_loop().run_until_complete(main(download_root))
stop = perf_counter()
minutes, seconds = divmod(stop - start, 60)
print(f"Download took {minutes:2.0f}m {seconds:2.0f}s")
......@@ -2,7 +2,7 @@
universal=1
[metadata]
license_file = LICENSE
license_files = LICENSE
[pep8]
max-line-length = 120
......@@ -10,7 +10,7 @@ max-line-length = 120
[flake8]
# note: we ignore all 501s (line too long) anyway as they're taken care of by black
max-line-length = 120
ignore = E203, E402, W503, W504, F821, E501
ignore = E203, E402, W503, W504, F821, E501, B, C4, EXE
per-file-ignores =
__init__.py: F401, F403, F405
./hubconf.py: F401
......
......@@ -86,7 +86,6 @@ if os.getenv("PYTORCH_VERSION"):
pytorch_dep += "==" + os.getenv("PYTORCH_VERSION")
requirements = [
"typing_extensions",
"numpy",
"requests",
pytorch_dep,
......@@ -166,10 +165,13 @@ def get_extensions():
+ glob.glob(os.path.join(extensions_dir, "ops", "cpu", "*.cpp"))
+ glob.glob(os.path.join(extensions_dir, "ops", "quantized", "cpu", "*.cpp"))
)
source_mps = glob.glob(os.path.join(extensions_dir, "ops", "mps", "*.mm"))
print("Compiling extensions with following flags:")
force_cuda = os.getenv("FORCE_CUDA", "0") == "1"
print(f" FORCE_CUDA: {force_cuda}")
force_mps = os.getenv("FORCE_MPS", "0") == "1"
print(f" FORCE_MPS: {force_mps}")
debug_mode = os.getenv("DEBUG", "0") == "1"
print(f" DEBUG: {debug_mode}")
use_png = os.getenv("TORCHVISION_USE_PNG", "1") == "1"
......@@ -231,6 +233,8 @@ def get_extensions():
define_macros += [("WITH_HIP", None)]
nvcc_flags = []
extra_compile_args["nvcc"] = nvcc_flags
elif torch.backends.mps.is_available() or force_mps:
sources += source_mps
if sys.platform == "win32":
define_macros += [("torchvision_EXPORTS", None)]
......@@ -247,6 +251,9 @@ def get_extensions():
extra_compile_args["nvcc"] = [f for f in nvcc_flags if not ("-O" in f or "-g" in f)]
extra_compile_args["nvcc"].append("-O0")
extra_compile_args["nvcc"].append("-g")
else:
print("Compiling with debug mode OFF")
extra_compile_args["cxx"].append("-g0")
sources = [os.path.join(extensions_dir, s) for s in sources]
......@@ -327,6 +334,8 @@ def get_extensions():
use_jpeg = use_jpeg and jpeg_found
if use_jpeg:
print("Building torchvision with JPEG image support")
print(f" libjpeg include path: {jpeg_include}")
print(f" libjpeg lib path: {jpeg_lib}")
image_link_flags.append("jpeg")
if jpeg_conda:
image_library += [jpeg_lib]
......@@ -352,11 +361,14 @@ def get_extensions():
image_macros += [("NVJPEG_FOUND", str(int(use_nvjpeg)))]
image_path = os.path.join(extensions_dir, "io", "image")
image_src = (
glob.glob(os.path.join(image_path, "*.cpp"))
+ glob.glob(os.path.join(image_path, "cpu", "*.cpp"))
+ glob.glob(os.path.join(image_path, "cuda", "*.cpp"))
)
image_src = glob.glob(os.path.join(image_path, "*.cpp")) + glob.glob(os.path.join(image_path, "cpu", "*.cpp"))
if is_rocm_pytorch:
image_src += glob.glob(os.path.join(image_path, "hip", "*.cpp"))
# we need to exclude this in favor of the hipified source
image_src.remove(os.path.join(image_path, "image.cpp"))
else:
image_src += glob.glob(os.path.join(image_path, "cuda", "*.cpp"))
if use_png or use_jpeg:
ext_modules.append(
......@@ -464,8 +476,8 @@ def get_extensions():
"swresample",
"swscale",
],
extra_compile_args=["-std=c++14"] if os.name != "nt" else ["/std:c++14", "/MP"],
extra_link_args=["-std=c++14" if os.name != "nt" else "/std:c++14"],
extra_compile_args=["-std=c++17"] if os.name != "nt" else ["/std:c++17", "/MP"],
extra_link_args=["-std=c++17" if os.name != "nt" else "/std:c++17"],
)
)
......@@ -564,6 +576,7 @@ if __name__ == "__main__":
url="https://github.com/pytorch/vision",
description="image and video datasets and models for torch deep learning",
long_description=readme,
long_description_content_type="text/markdown",
license="BSD",
# Package info
packages=find_packages(exclude=("test",)),
......@@ -574,7 +587,7 @@ if __name__ == "__main__":
"scipy": ["scipy"],
},
ext_modules=get_extensions(),
python_requires=">=3.7",
python_requires=">=3.8",
cmdclass={
"build_ext": BuildExtension.with_options(no_python_abi_suffix=True),
"clean": clean,
......
import bz2
import collections.abc
import csv
import functools
import gzip
import io
import itertools
import json
import lzma
import pathlib
import pickle
import random
import shutil
import unittest.mock
import xml.etree.ElementTree as ET
from collections import Counter, defaultdict
import numpy as np
import pytest
import torch
from common_utils import combinations_grid
from datasets_utils import create_image_file, create_image_folder, make_tar, make_zip
from torch.nn.functional import one_hot
from torch.testing import make_tensor as _make_tensor
from torchvision.prototype import datasets
make_tensor = functools.partial(_make_tensor, device="cpu")
make_scalar = functools.partial(make_tensor, ())
__all__ = ["DATASET_MOCKS", "parametrize_dataset_mocks"]
class DatasetMock:
def __init__(self, name, *, mock_data_fn, configs):
# FIXME: error handling for unknown names
self.name = name
self.mock_data_fn = mock_data_fn
self.configs = configs
def _parse_mock_info(self, mock_info):
if mock_info is None:
raise pytest.UsageError(
f"The mock data function for dataset '{self.name}' returned nothing. It needs to at least return an "
f"integer indicating the number of samples for the current `config`."
)
elif isinstance(mock_info, int):
mock_info = dict(num_samples=mock_info)
elif not isinstance(mock_info, dict):
raise pytest.UsageError(
f"The mock data function for dataset '{self.name}' returned a {type(mock_info)}. The returned object "
f"should be a dictionary containing at least the number of samples for the key `'num_samples'`. If no "
f"additional information is required for specific tests, the number of samples can also be returned as "
f"an integer."
)
elif "num_samples" not in mock_info:
raise pytest.UsageError(
f"The dictionary returned by the mock data function for dataset '{self.name}' has to contain a "
f"`'num_samples'` entry indicating the number of samples."
)
return mock_info
def load(self, config):
# `datasets.home()` is patched to a temporary directory through the autouse fixture `test_home` in
# test/test_prototype_builtin_datasets.py
root = pathlib.Path(datasets.home()) / self.name
# We cannot place the mock data upfront in `root`. Loading a dataset calls `OnlineResource.load`. In turn,
# this will only download **and** preprocess if the file is not present. In other words, if we already place
# the file in `root` before the resource is loaded, we are effectively skipping the preprocessing.
# To avoid that we first place the mock data in a temporary directory and patch the download logic to move it to
# `root` only when it is requested.
tmp_mock_data_folder = root / "__mock__"
tmp_mock_data_folder.mkdir(parents=True)
mock_info = self._parse_mock_info(self.mock_data_fn(tmp_mock_data_folder, config))
def patched_download(resource, root, **kwargs):
src = tmp_mock_data_folder / resource.file_name
if not src.exists():
raise pytest.UsageError(
f"Dataset '{self.name}' requires the file {resource.file_name} for {config}"
f"but it was not created by the mock data function."
)
dst = root / resource.file_name
shutil.move(str(src), str(root))
return dst
with unittest.mock.patch(
"torchvision.prototype.datasets.utils._resource.OnlineResource.download", new=patched_download
):
dataset = datasets.load(self.name, **config)
extra_files = list(tmp_mock_data_folder.glob("**/*"))
if extra_files:
raise pytest.UsageError(
(
f"Dataset '{self.name}' created the following files for {config} in the mock data function, "
f"but they were not loaded:\n\n"
)
+ "\n".join(str(file.relative_to(tmp_mock_data_folder)) for file in extra_files)
)
tmp_mock_data_folder.rmdir()
return dataset, mock_info
def config_id(name, config):
parts = [name]
for name, value in config.items():
if isinstance(value, bool):
part = ("" if value else "no_") + name
else:
part = str(value)
parts.append(part)
return "-".join(parts)
def parametrize_dataset_mocks(*dataset_mocks, marks=None):
mocks = {}
for mock in dataset_mocks:
if isinstance(mock, DatasetMock):
mocks[mock.name] = mock
elif isinstance(mock, collections.abc.Mapping):
mocks.update(mock)
else:
raise pytest.UsageError(
f"The positional arguments passed to `parametrize_dataset_mocks` can either be a `DatasetMock`, "
f"a sequence of `DatasetMock`'s, or a mapping of names to `DatasetMock`'s, "
f"but got {mock} instead."
)
dataset_mocks = mocks
if marks is None:
marks = {}
elif not isinstance(marks, collections.abc.Mapping):
raise pytest.UsageError()
return pytest.mark.parametrize(
("dataset_mock", "config"),
[
pytest.param(dataset_mock, config, id=config_id(name, config), marks=marks.get(name, ()))
for name, dataset_mock in dataset_mocks.items()
for config in dataset_mock.configs
],
)
DATASET_MOCKS = {}
def register_mock(name=None, *, configs):
def wrapper(mock_data_fn):
nonlocal name
if name is None:
name = mock_data_fn.__name__
DATASET_MOCKS[name] = DatasetMock(name, mock_data_fn=mock_data_fn, configs=configs)
return mock_data_fn
return wrapper
class MNISTMockData:
_DTYPES_ID = {
torch.uint8: 8,
torch.int8: 9,
torch.int16: 11,
torch.int32: 12,
torch.float32: 13,
torch.float64: 14,
}
@classmethod
def _magic(cls, dtype, ndim):
return cls._DTYPES_ID[dtype] * 256 + ndim + 1
@staticmethod
def _encode(t):
return torch.tensor(t, dtype=torch.int32).numpy().tobytes()[::-1]
@staticmethod
def _big_endian_dtype(dtype):
np_dtype = getattr(np, str(dtype).replace("torch.", ""))().dtype
return np.dtype(f">{np_dtype.kind}{np_dtype.itemsize}")
@classmethod
def _create_binary_file(cls, root, filename, *, num_samples, shape, dtype, compressor, low=0, high):
with compressor(root / filename, "wb") as fh:
for meta in (cls._magic(dtype, len(shape)), num_samples, *shape):
fh.write(cls._encode(meta))
data = make_tensor((num_samples, *shape), dtype=dtype, low=low, high=high)
fh.write(data.numpy().astype(cls._big_endian_dtype(dtype)).tobytes())
@classmethod
def generate(
cls,
root,
*,
num_categories,
num_samples=None,
images_file,
labels_file,
image_size=(28, 28),
image_dtype=torch.uint8,
label_size=(),
label_dtype=torch.uint8,
compressor=None,
):
if num_samples is None:
num_samples = num_categories
if compressor is None:
compressor = gzip.open
cls._create_binary_file(
root,
images_file,
num_samples=num_samples,
shape=image_size,
dtype=image_dtype,
compressor=compressor,
high=float("inf"),
)
cls._create_binary_file(
root,
labels_file,
num_samples=num_samples,
shape=label_size,
dtype=label_dtype,
compressor=compressor,
high=num_categories,
)
return num_samples
def mnist(root, config):
prefix = "train" if config["split"] == "train" else "t10k"
return MNISTMockData.generate(
root,
num_categories=10,
images_file=f"{prefix}-images-idx3-ubyte.gz",
labels_file=f"{prefix}-labels-idx1-ubyte.gz",
)
DATASET_MOCKS.update(
{
name: DatasetMock(name, mock_data_fn=mnist, configs=combinations_grid(split=("train", "test")))
for name in ["mnist", "fashionmnist", "kmnist"]
}
)
@register_mock(
configs=combinations_grid(
split=("train", "test"),
image_set=("Balanced", "By_Merge", "By_Class", "Letters", "Digits", "MNIST"),
)
)
def emnist(root, config):
num_samples_map = {}
file_names = set()
for split, image_set in itertools.product(
("train", "test"),
("Balanced", "By_Merge", "By_Class", "Letters", "Digits", "MNIST"),
):
prefix = f"emnist-{image_set.replace('_', '').lower()}-{split}"
images_file = f"{prefix}-images-idx3-ubyte.gz"
labels_file = f"{prefix}-labels-idx1-ubyte.gz"
file_names.update({images_file, labels_file})
num_samples_map[(split, image_set)] = MNISTMockData.generate(
root,
# The image sets that merge some lower case letters in their respective upper case variant, still use dense
# labels in the data files. Thus, num_categories != len(categories) there.
num_categories=47 if config["image_set"] in ("Balanced", "By_Merge") else 62,
images_file=images_file,
labels_file=labels_file,
)
make_zip(root, "emnist-gzip.zip", *file_names)
return num_samples_map[(config["split"], config["image_set"])]
@register_mock(configs=combinations_grid(split=("train", "test", "test10k", "test50k", "nist")))
def qmnist(root, config):
num_categories = 10
if config["split"] == "train":
num_samples = num_samples_gen = num_categories + 2
prefix = "qmnist-train"
suffix = ".gz"
compressor = gzip.open
elif config["split"].startswith("test"):
# The split 'test50k' is defined as the last 50k images beginning at index 10000. Thus, we need to create
# more than 10000 images for the dataset to not be empty.
num_samples_gen = 10001
num_samples = {
"test": num_samples_gen,
"test10k": min(num_samples_gen, 10_000),
"test50k": num_samples_gen - 10_000,
}[config["split"]]
prefix = "qmnist-test"
suffix = ".gz"
compressor = gzip.open
else: # config["split"] == "nist"
num_samples = num_samples_gen = num_categories + 3
prefix = "xnist"
suffix = ".xz"
compressor = lzma.open
MNISTMockData.generate(
root,
num_categories=num_categories,
num_samples=num_samples_gen,
images_file=f"{prefix}-images-idx3-ubyte{suffix}",
labels_file=f"{prefix}-labels-idx2-int{suffix}",
label_size=(8,),
label_dtype=torch.int32,
compressor=compressor,
)
return num_samples
class CIFARMockData:
NUM_PIXELS = 32 * 32 * 3
@classmethod
def _create_batch_file(cls, root, name, *, num_categories, labels_key, num_samples=1):
content = {
"data": make_tensor((num_samples, cls.NUM_PIXELS), dtype=torch.uint8).numpy(),
labels_key: torch.randint(0, num_categories, size=(num_samples,)).tolist(),
}
with open(pathlib.Path(root) / name, "wb") as fh:
pickle.dump(content, fh)
@classmethod
def generate(
cls,
root,
name,
*,
folder,
train_files,
test_files,
num_categories,
labels_key,
):
folder = root / folder
folder.mkdir()
files = (*train_files, *test_files)
for file in files:
cls._create_batch_file(
folder,
file,
num_categories=num_categories,
labels_key=labels_key,
)
make_tar(root, name, folder, compression="gz")
@register_mock(configs=combinations_grid(split=("train", "test")))
def cifar10(root, config):
train_files = [f"data_batch_{idx}" for idx in range(1, 6)]
test_files = ["test_batch"]
CIFARMockData.generate(
root=root,
name="cifar-10-python.tar.gz",
folder=pathlib.Path("cifar-10-batches-py"),
train_files=train_files,
test_files=test_files,
num_categories=10,
labels_key="labels",
)
return len(train_files if config["split"] == "train" else test_files)
@register_mock(configs=combinations_grid(split=("train", "test")))
def cifar100(root, config):
train_files = ["train"]
test_files = ["test"]
CIFARMockData.generate(
root=root,
name="cifar-100-python.tar.gz",
folder=pathlib.Path("cifar-100-python"),
train_files=train_files,
test_files=test_files,
num_categories=100,
labels_key="fine_labels",
)
return len(train_files if config["split"] == "train" else test_files)
@register_mock(configs=[dict()])
def caltech101(root, config):
def create_ann_file(root, name):
import scipy.io
box_coord = make_tensor((1, 4), dtype=torch.int32, low=0).numpy().astype(np.uint16)
obj_contour = make_tensor((2, int(torch.randint(3, 6, size=()))), dtype=torch.float64, low=0).numpy()
scipy.io.savemat(str(pathlib.Path(root) / name), dict(box_coord=box_coord, obj_contour=obj_contour))
def create_ann_folder(root, name, file_name_fn, num_examples):
root = pathlib.Path(root) / name
root.mkdir(parents=True)
for idx in range(num_examples):
create_ann_file(root, file_name_fn(idx))
images_root = root / "101_ObjectCategories"
anns_root = root / "Annotations"
image_category_map = {
"Faces": "Faces_2",
"Faces_easy": "Faces_3",
"Motorbikes": "Motorbikes_16",
"airplanes": "Airplanes_Side_2",
}
categories = ["Faces", "Faces_easy", "Motorbikes", "airplanes", "yin_yang"]
num_images_per_category = 2
for category in categories:
create_image_folder(
root=images_root,
name=category,
file_name_fn=lambda idx: f"image_{idx + 1:04d}.jpg",
num_examples=num_images_per_category,
)
create_ann_folder(
root=anns_root,
name=image_category_map.get(category, category),
file_name_fn=lambda idx: f"annotation_{idx + 1:04d}.mat",
num_examples=num_images_per_category,
)
(images_root / "BACKGROUND_Goodle").mkdir()
make_tar(root, f"{images_root.name}.tar.gz", images_root, compression="gz")
make_tar(root, f"{anns_root.name}.tar", anns_root)
return num_images_per_category * len(categories)
@register_mock(configs=[dict()])
def caltech256(root, config):
dir = root / "256_ObjectCategories"
num_images_per_category = 2
categories = [
(1, "ak47"),
(127, "laptop-101"),
(198, "spider"),
(257, "clutter"),
]
for category_idx, category in categories:
files = create_image_folder(
dir,
name=f"{category_idx:03d}.{category}",
file_name_fn=lambda image_idx: f"{category_idx:03d}_{image_idx + 1:04d}.jpg",
num_examples=num_images_per_category,
)
if category == "spider":
open(files[0].parent / "RENAME2", "w").close()
make_tar(root, f"{dir.name}.tar", dir)
return num_images_per_category * len(categories)
@register_mock(configs=combinations_grid(split=("train", "val", "test")))
def imagenet(root, config):
from scipy.io import savemat
info = datasets.info("imagenet")
if config["split"] == "train":
num_samples = len(info["wnids"])
archive_name = "ILSVRC2012_img_train.tar"
files = []
for wnid in info["wnids"]:
create_image_folder(
root=root,
name=wnid,
file_name_fn=lambda image_idx: f"{wnid}_{image_idx:04d}.JPEG",
num_examples=1,
)
files.append(make_tar(root, f"{wnid}.tar"))
elif config["split"] == "val":
num_samples = 3
archive_name = "ILSVRC2012_img_val.tar"
files = [create_image_file(root, f"ILSVRC2012_val_{idx + 1:08d}.JPEG") for idx in range(num_samples)]
devkit_root = root / "ILSVRC2012_devkit_t12"
data_root = devkit_root / "data"
data_root.mkdir(parents=True)
with open(data_root / "ILSVRC2012_validation_ground_truth.txt", "w") as file:
for label in torch.randint(0, len(info["wnids"]), (num_samples,)).tolist():
file.write(f"{label}\n")
num_children = 0
synsets = [
(idx, wnid, category, "", num_children, [], 0, 0)
for idx, (category, wnid) in enumerate(zip(info["categories"], info["wnids"]), 1)
]
num_children = 1
synsets.extend((0, "", "", "", num_children, [], 0, 0) for _ in range(5))
synsets = np.array(
synsets,
dtype=np.dtype(
[
("ILSVRC2012_ID", "O"),
("WNID", "O"),
("words", "O"),
("gloss", "O"),
("num_children", "O"),
("children", "O"),
("wordnet_height", "O"),
("num_train_images", "O"),
]
),
)
savemat(data_root / "meta.mat", dict(synsets=synsets))
make_tar(root, devkit_root.with_suffix(".tar.gz").name, compression="gz")
else: # config["split"] == "test"
num_samples = 5
archive_name = "ILSVRC2012_img_test_v10102019.tar"
files = [create_image_file(root, f"ILSVRC2012_test_{idx + 1:08d}.JPEG") for idx in range(num_samples)]
make_tar(root, archive_name, *files)
return num_samples
class CocoMockData:
@classmethod
def _make_annotations_json(
cls,
root,
name,
*,
images_meta,
fn,
):
num_anns_per_image = torch.randint(1, 5, (len(images_meta),))
num_anns_total = int(num_anns_per_image.sum())
ann_ids_iter = iter(torch.arange(num_anns_total)[torch.randperm(num_anns_total)])
anns_meta = []
for image_meta, num_anns in zip(images_meta, num_anns_per_image):
for _ in range(num_anns):
ann_id = int(next(ann_ids_iter))
anns_meta.append(dict(fn(ann_id, image_meta), id=ann_id, image_id=image_meta["id"]))
anns_meta.sort(key=lambda ann: ann["id"])
with open(root / name, "w") as file:
json.dump(dict(images=images_meta, annotations=anns_meta), file)
return num_anns_per_image
@staticmethod
def _make_instances_data(ann_id, image_meta):
def make_rle_segmentation():
height, width = image_meta["height"], image_meta["width"]
numel = height * width
counts = []
while sum(counts) <= numel:
counts.append(int(torch.randint(5, 8, ())))
if sum(counts) > numel:
counts[-1] -= sum(counts) - numel
return dict(counts=counts, size=[height, width])
return dict(
segmentation=make_rle_segmentation(),
bbox=make_tensor((4,), dtype=torch.float32, low=0).tolist(),
iscrowd=True,
area=float(make_scalar(dtype=torch.float32)),
category_id=int(make_scalar(dtype=torch.int64)),
)
@staticmethod
def _make_captions_data(ann_id, image_meta):
return dict(caption=f"Caption {ann_id} describing image {image_meta['id']}.")
@classmethod
def _make_annotations(cls, root, name, *, images_meta):
num_anns_per_image = torch.zeros((len(images_meta),), dtype=torch.int64)
for annotations, fn in (
("instances", cls._make_instances_data),
("captions", cls._make_captions_data),
):
num_anns_per_image += cls._make_annotations_json(
root, f"{annotations}_{name}.json", images_meta=images_meta, fn=fn
)
return int(num_anns_per_image.sum())
@classmethod
def generate(
cls,
root,
*,
split,
year,
num_samples,
):
annotations_dir = root / "annotations"
annotations_dir.mkdir()
for split_ in ("train", "val"):
config_name = f"{split_}{year}"
images_meta = [
dict(
file_name=f"{idx:012d}.jpg",
id=idx,
width=width,
height=height,
)
for idx, (height, width) in enumerate(
torch.randint(3, 11, size=(num_samples, 2), dtype=torch.int).tolist()
)
]
if split_ == split:
create_image_folder(
root,
config_name,
file_name_fn=lambda idx: images_meta[idx]["file_name"],
num_examples=num_samples,
size=lambda idx: (3, images_meta[idx]["height"], images_meta[idx]["width"]),
)
make_zip(root, f"{config_name}.zip")
cls._make_annotations(
annotations_dir,
config_name,
images_meta=images_meta,
)
make_zip(root, f"annotations_trainval{year}.zip", annotations_dir)
return num_samples
@register_mock(
configs=combinations_grid(
split=("train", "val"),
year=("2017", "2014"),
annotations=("instances", "captions", None),
)
)
def coco(root, config):
return CocoMockData.generate(root, split=config["split"], year=config["year"], num_samples=5)
class SBDMockData:
_NUM_CATEGORIES = 20
@classmethod
def _make_split_files(cls, root_map, *, split):
splits_and_idcs = [
("train", [0, 1, 2]),
("val", [3]),
]
if split == "train_noval":
splits_and_idcs.append(("train_noval", [0, 2]))
ids_map = {split: [f"2008_{idx:06d}" for idx in idcs] for split, idcs in splits_and_idcs}
for split, ids in ids_map.items():
with open(root_map[split] / f"{split}.txt", "w") as fh:
fh.writelines(f"{id}\n" for id in ids)
return sorted(set(itertools.chain(*ids_map.values()))), {split: len(ids) for split, ids in ids_map.items()}
@classmethod
def _make_anns_folder(cls, root, name, ids):
from scipy.io import savemat
anns_folder = root / name
anns_folder.mkdir()
sizes = torch.randint(1, 9, size=(len(ids), 2)).tolist()
for id, size in zip(ids, sizes):
savemat(
anns_folder / f"{id}.mat",
{
"GTcls": {
"Boundaries": cls._make_boundaries(size),
"Segmentation": cls._make_segmentation(size),
}
},
)
return sizes
@classmethod
def _make_boundaries(cls, size):
from scipy.sparse import csc_matrix
return [
[csc_matrix(torch.randint(0, 2, size=size, dtype=torch.uint8).numpy())] for _ in range(cls._NUM_CATEGORIES)
]
@classmethod
def _make_segmentation(cls, size):
return torch.randint(0, cls._NUM_CATEGORIES + 1, size=size, dtype=torch.uint8).numpy()
@classmethod
def generate(cls, root, *, split):
archive_folder = root / "benchmark_RELEASE"
dataset_folder = archive_folder / "dataset"
dataset_folder.mkdir(parents=True, exist_ok=True)
ids, num_samples_map = cls._make_split_files(
defaultdict(lambda: dataset_folder, {"train_noval": root}), split=split
)
sizes = cls._make_anns_folder(dataset_folder, "cls", ids)
create_image_folder(
dataset_folder, "img", lambda idx: f"{ids[idx]}.jpg", num_examples=len(ids), size=lambda idx: sizes[idx]
)
make_tar(root, "benchmark.tgz", archive_folder, compression="gz")
return num_samples_map[split]
@register_mock(configs=combinations_grid(split=("train", "val", "train_noval")))
def sbd(root, config):
return SBDMockData.generate(root, split=config["split"])
@register_mock(configs=[dict()])
def semeion(root, config):
num_samples = 3
num_categories = 10
images = torch.rand(num_samples, 256)
labels = one_hot(torch.randint(num_categories, size=(num_samples,)), num_classes=num_categories)
with open(root / "semeion.data", "w") as fh:
for image, one_hot_label in zip(images, labels):
image_columns = " ".join([f"{pixel.item():.4f}" for pixel in image])
labels_columns = " ".join([str(label.item()) for label in one_hot_label])
fh.write(f"{image_columns} {labels_columns} \n")
return num_samples
class VOCMockData:
_TRAIN_VAL_FILE_NAMES = {
"2007": "VOCtrainval_06-Nov-2007.tar",
"2008": "VOCtrainval_14-Jul-2008.tar",
"2009": "VOCtrainval_11-May-2009.tar",
"2010": "VOCtrainval_03-May-2010.tar",
"2011": "VOCtrainval_25-May-2011.tar",
"2012": "VOCtrainval_11-May-2012.tar",
}
_TEST_FILE_NAMES = {
"2007": "VOCtest_06-Nov-2007.tar",
}
@classmethod
def _make_split_files(cls, root, *, year, trainval):
split_folder = root / "ImageSets"
if trainval:
idcs_map = {
"train": [0, 1, 2],
"val": [3, 4],
}
idcs_map["trainval"] = [*idcs_map["train"], *idcs_map["val"]]
else:
idcs_map = {
"test": [5],
}
ids_map = {split: [f"{year}_{idx:06d}" for idx in idcs] for split, idcs in idcs_map.items()}
for task_sub_folder in ("Main", "Segmentation"):
task_folder = split_folder / task_sub_folder
task_folder.mkdir(parents=True, exist_ok=True)
for split, ids in ids_map.items():
with open(task_folder / f"{split}.txt", "w") as fh:
fh.writelines(f"{id}\n" for id in ids)
return sorted(set(itertools.chain(*ids_map.values()))), {split: len(ids) for split, ids in ids_map.items()}
@classmethod
def _make_detection_anns_folder(cls, root, name, *, file_name_fn, num_examples):
folder = root / name
folder.mkdir(parents=True, exist_ok=True)
for idx in range(num_examples):
cls._make_detection_ann_file(folder, file_name_fn(idx))
@classmethod
def _make_detection_ann_file(cls, root, name):
def add_child(parent, name, text=None):
child = ET.SubElement(parent, name)
child.text = str(text)
return child
def add_name(obj, name="dog"):
add_child(obj, "name", name)
def add_size(obj):
obj = add_child(obj, "size")
size = {"width": 0, "height": 0, "depth": 3}
for name, text in size.items():
add_child(obj, name, text)
def add_bndbox(obj):
obj = add_child(obj, "bndbox")
bndbox = {"xmin": 1, "xmax": 2, "ymin": 3, "ymax": 4}
for name, text in bndbox.items():
add_child(obj, name, text)
annotation = ET.Element("annotation")
add_size(annotation)
obj = add_child(annotation, "object")
add_name(obj)
add_bndbox(obj)
with open(root / name, "wb") as fh:
fh.write(ET.tostring(annotation))
@classmethod
def generate(cls, root, *, year, trainval):
archive_folder = root
if year == "2011":
archive_folder = root / "TrainVal"
data_folder = archive_folder / "VOCdevkit"
else:
archive_folder = data_folder = root / "VOCdevkit"
data_folder = data_folder / f"VOC{year}"
data_folder.mkdir(parents=True, exist_ok=True)
ids, num_samples_map = cls._make_split_files(data_folder, year=year, trainval=trainval)
for make_folder_fn, name, suffix in [
(create_image_folder, "JPEGImages", ".jpg"),
(create_image_folder, "SegmentationClass", ".png"),
(cls._make_detection_anns_folder, "Annotations", ".xml"),
]:
make_folder_fn(data_folder, name, file_name_fn=lambda idx: ids[idx] + suffix, num_examples=len(ids))
make_tar(root, (cls._TRAIN_VAL_FILE_NAMES if trainval else cls._TEST_FILE_NAMES)[year], archive_folder)
return num_samples_map
@register_mock(
configs=[
*combinations_grid(
split=("train", "val", "trainval"),
year=("2007", "2008", "2009", "2010", "2011", "2012"),
task=("detection", "segmentation"),
),
*combinations_grid(
split=("test",),
year=("2007",),
task=("detection", "segmentation"),
),
],
)
def voc(root, config):
trainval = config["split"] != "test"
return VOCMockData.generate(root, year=config["year"], trainval=trainval)[config["split"]]
class CelebAMockData:
@classmethod
def _make_ann_file(cls, root, name, data, *, field_names=None):
with open(root / name, "w") as file:
if field_names:
file.write(f"{len(data)}\r\n")
file.write(" ".join(field_names) + "\r\n")
file.writelines(" ".join(str(item) for item in row) + "\r\n" for row in data)
_SPLIT_TO_IDX = {
"train": 0,
"val": 1,
"test": 2,
}
@classmethod
def _make_split_file(cls, root):
num_samples_map = {"train": 4, "val": 3, "test": 2}
data = [
(f"{idx:06d}.jpg", cls._SPLIT_TO_IDX[split])
for split, num_samples in num_samples_map.items()
for idx in range(num_samples)
]
cls._make_ann_file(root, "list_eval_partition.txt", data)
image_file_names, _ = zip(*data)
return image_file_names, num_samples_map
@classmethod
def _make_identity_file(cls, root, image_file_names):
cls._make_ann_file(
root, "identity_CelebA.txt", [(name, int(make_scalar(low=1, dtype=torch.int))) for name in image_file_names]
)
@classmethod
def _make_attributes_file(cls, root, image_file_names):
field_names = ("5_o_Clock_Shadow", "Young")
data = [
[name, *[" 1" if attr else "-1" for attr in make_tensor((len(field_names),), dtype=torch.bool)]]
for name in image_file_names
]
cls._make_ann_file(root, "list_attr_celeba.txt", data, field_names=(*field_names, ""))
@classmethod
def _make_bounding_boxes_file(cls, root, image_file_names):
field_names = ("image_id", "x_1", "y_1", "width", "height")
data = [
[f"{name} ", *[f"{coord:3d}" for coord in make_tensor((4,), low=0, dtype=torch.int).tolist()]]
for name in image_file_names
]
cls._make_ann_file(root, "list_bbox_celeba.txt", data, field_names=field_names)
@classmethod
def _make_landmarks_file(cls, root, image_file_names):
field_names = ("lefteye_x", "lefteye_y", "rightmouth_x", "rightmouth_y")
data = [
[
name,
*[
f"{coord:4d}" if idx else coord
for idx, coord in enumerate(make_tensor((len(field_names),), low=0, dtype=torch.int).tolist())
],
]
for name in image_file_names
]
cls._make_ann_file(root, "list_landmarks_align_celeba.txt", data, field_names=field_names)
@classmethod
def generate(cls, root):
image_file_names, num_samples_map = cls._make_split_file(root)
image_files = create_image_folder(
root, "img_align_celeba", file_name_fn=lambda idx: image_file_names[idx], num_examples=len(image_file_names)
)
make_zip(root, image_files[0].parent.with_suffix(".zip").name)
for make_ann_file_fn in (
cls._make_identity_file,
cls._make_attributes_file,
cls._make_bounding_boxes_file,
cls._make_landmarks_file,
):
make_ann_file_fn(root, image_file_names)
return num_samples_map
@register_mock(configs=combinations_grid(split=("train", "val", "test")))
def celeba(root, config):
return CelebAMockData.generate(root)[config["split"]]
@register_mock(configs=combinations_grid(split=("train", "val", "test")))
def country211(root, config):
split_folder = pathlib.Path(root, "country211", "valid" if config["split"] == "val" else config["split"])
split_folder.mkdir(parents=True, exist_ok=True)
num_examples = {
"train": 3,
"val": 4,
"test": 5,
}[config["split"]]
classes = ("AD", "BS", "GR")
for cls in classes:
create_image_folder(
split_folder,
name=cls,
file_name_fn=lambda idx: f"{idx}.jpg",
num_examples=num_examples,
)
make_tar(root, f"{split_folder.parent.name}.tgz", split_folder.parent, compression="gz")
return num_examples * len(classes)
@register_mock(configs=combinations_grid(split=("train", "test")))
def food101(root, config):
data_folder = root / "food-101"
num_images_per_class = 3
image_folder = data_folder / "images"
categories = ["apple_pie", "baby_back_ribs", "waffles"]
image_ids = []
for category in categories:
image_files = create_image_folder(
image_folder,
category,
file_name_fn=lambda idx: f"{idx:04d}.jpg",
num_examples=num_images_per_class,
)
image_ids.extend(path.relative_to(path.parents[1]).with_suffix("").as_posix() for path in image_files)
meta_folder = data_folder / "meta"
meta_folder.mkdir()
with open(meta_folder / "classes.txt", "w") as file:
for category in categories:
file.write(f"{category}\n")
splits = ["train", "test"]
num_samples_map = {}
for offset, split in enumerate(splits):
image_ids_in_split = image_ids[offset :: len(splits)]
num_samples_map[split] = len(image_ids_in_split)
with open(meta_folder / f"{split}.txt", "w") as file:
for image_id in image_ids_in_split:
file.write(f"{image_id}\n")
make_tar(root, f"{data_folder.name}.tar.gz", compression="gz")
return num_samples_map[config["split"]]
@register_mock(configs=combinations_grid(split=("train", "val", "test"), fold=(1, 4, 10)))
def dtd(root, config):
data_folder = root / "dtd"
num_images_per_class = 3
image_folder = data_folder / "images"
categories = {"banded", "marbled", "zigzagged"}
image_ids_per_category = {
category: [
str(path.relative_to(path.parents[1]).as_posix())
for path in create_image_folder(
image_folder,
category,
file_name_fn=lambda idx: f"{category}_{idx:04d}.jpg",
num_examples=num_images_per_class,
)
]
for category in categories
}
meta_folder = data_folder / "labels"
meta_folder.mkdir()
with open(meta_folder / "labels_joint_anno.txt", "w") as file:
for cls, image_ids in image_ids_per_category.items():
for image_id in image_ids:
joint_categories = random.choices(
list(categories - {cls}), k=int(torch.randint(len(categories) - 1, ()))
)
file.write(" ".join([image_id, *sorted([cls, *joint_categories])]) + "\n")
image_ids = list(itertools.chain(*image_ids_per_category.values()))
splits = ("train", "val", "test")
num_samples_map = {}
for fold in range(1, 11):
random.shuffle(image_ids)
for offset, split in enumerate(splits):
image_ids_in_config = image_ids[offset :: len(splits)]
with open(meta_folder / f"{split}{fold}.txt", "w") as file:
file.write("\n".join(image_ids_in_config) + "\n")
num_samples_map[(split, fold)] = len(image_ids_in_config)
make_tar(root, "dtd-r1.0.1.tar.gz", data_folder, compression="gz")
return num_samples_map[config["split"], config["fold"]]
@register_mock(configs=combinations_grid(split=("train", "test")))
def fer2013(root, config):
split = config["split"]
num_samples = 5 if split == "train" else 3
path = root / f"{split}.csv"
with open(path, "w", newline="") as file:
field_names = ["emotion"] if split == "train" else []
field_names.append("pixels")
file.write(",".join(field_names) + "\n")
writer = csv.DictWriter(file, fieldnames=field_names, quotechar='"', quoting=csv.QUOTE_NONNUMERIC)
for _ in range(num_samples):
rowdict = {
"pixels": " ".join([str(int(pixel)) for pixel in torch.randint(256, (48 * 48,), dtype=torch.uint8)])
}
if split == "train":
rowdict["emotion"] = int(torch.randint(7, ()))
writer.writerow(rowdict)
make_zip(root, f"{path.name}.zip", path)
return num_samples
@register_mock(configs=combinations_grid(split=("train", "test")))
def gtsrb(root, config):
num_examples_per_class = 5 if config["split"] == "train" else 3
classes = ("00000", "00042", "00012")
num_examples = num_examples_per_class * len(classes)
csv_columns = ["Filename", "Width", "Height", "Roi.X1", "Roi.Y1", "Roi.X2", "Roi.Y2", "ClassId"]
def _make_ann_file(path, num_examples, class_idx):
if class_idx == "random":
class_idx = torch.randint(1, len(classes) + 1, size=(1,)).item()
with open(path, "w") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=csv_columns, delimiter=";")
writer.writeheader()
for image_idx in range(num_examples):
writer.writerow(
{
"Filename": f"{image_idx:05d}.ppm",
"Width": torch.randint(1, 100, size=()).item(),
"Height": torch.randint(1, 100, size=()).item(),
"Roi.X1": torch.randint(1, 100, size=()).item(),
"Roi.Y1": torch.randint(1, 100, size=()).item(),
"Roi.X2": torch.randint(1, 100, size=()).item(),
"Roi.Y2": torch.randint(1, 100, size=()).item(),
"ClassId": class_idx,
}
)
archive_folder = root / "GTSRB"
if config["split"] == "train":
train_folder = archive_folder / "Training"
train_folder.mkdir(parents=True)
for class_idx in classes:
create_image_folder(
train_folder,
name=class_idx,
file_name_fn=lambda image_idx: f"{class_idx}_{image_idx:05d}.ppm",
num_examples=num_examples_per_class,
)
_make_ann_file(
path=train_folder / class_idx / f"GT-{class_idx}.csv",
num_examples=num_examples_per_class,
class_idx=int(class_idx),
)
make_zip(root, "GTSRB-Training_fixed.zip", archive_folder)
else:
test_folder = archive_folder / "Final_Test"
test_folder.mkdir(parents=True)
create_image_folder(
test_folder,
name="Images",
file_name_fn=lambda image_idx: f"{image_idx:05d}.ppm",
num_examples=num_examples,
)
make_zip(root, "GTSRB_Final_Test_Images.zip", archive_folder)
_make_ann_file(
path=root / "GT-final_test.csv",
num_examples=num_examples,
class_idx="random",
)
make_zip(root, "GTSRB_Final_Test_GT.zip", "GT-final_test.csv")
return num_examples
@register_mock(configs=combinations_grid(split=("train", "val", "test")))
def clevr(root, config):
data_folder = root / "CLEVR_v1.0"
num_samples_map = {
"train": 3,
"val": 2,
"test": 1,
}
images_folder = data_folder / "images"
image_files = {
split: create_image_folder(
images_folder,
split,
file_name_fn=lambda idx: f"CLEVR_{split}_{idx:06d}.jpg",
num_examples=num_samples,
)
for split, num_samples in num_samples_map.items()
}
scenes_folder = data_folder / "scenes"
scenes_folder.mkdir()
for split in ["train", "val"]:
with open(scenes_folder / f"CLEVR_{split}_scenes.json", "w") as file:
json.dump(
{
"scenes": [
{
"image_filename": image_file.name,
# We currently only return the number of objects in a scene.
# Thus, it is sufficient for now to only mock the number of elements.
"objects": [None] * int(torch.randint(1, 5, ())),
}
for image_file in image_files[split]
]
},
file,
)
make_zip(root, f"{data_folder.name}.zip", data_folder)
return num_samples_map[config["split"]]
class OxfordIIITPetMockData:
@classmethod
def _meta_to_split_and_classification_ann(cls, meta, idx):
image_id = "_".join(
[
*[(str.title if meta["species"] == "cat" else str.lower)(part) for part in meta["cls"].split()],
str(idx),
]
)
class_id = str(meta["label"] + 1)
species = "1" if meta["species"] == "cat" else "2"
breed_id = "-1"
return (image_id, class_id, species, breed_id)
@classmethod
def generate(self, root):
classification_anns_meta = (
dict(cls="Abyssinian", label=0, species="cat"),
dict(cls="Keeshond", label=18, species="dog"),
dict(cls="Yorkshire Terrier", label=36, species="dog"),
)
split_and_classification_anns = [
self._meta_to_split_and_classification_ann(meta, idx)
for meta, idx in itertools.product(classification_anns_meta, (1, 2, 10))
]
image_ids, *_ = zip(*split_and_classification_anns)
image_files = create_image_folder(
root, "images", file_name_fn=lambda idx: f"{image_ids[idx]}.jpg", num_examples=len(image_ids)
)
anns_folder = root / "annotations"
anns_folder.mkdir()
random.shuffle(split_and_classification_anns)
splits = ("trainval", "test")
num_samples_map = {}
for offset, split in enumerate(splits):
split_and_classification_anns_in_split = split_and_classification_anns[offset :: len(splits)]
with open(anns_folder / f"{split}.txt", "w") as file:
writer = csv.writer(file, delimiter=" ")
for split_and_classification_ann in split_and_classification_anns_in_split:
writer.writerow(split_and_classification_ann)
num_samples_map[split] = len(split_and_classification_anns_in_split)
segmentation_files = create_image_folder(
anns_folder, "trimaps", file_name_fn=lambda idx: f"{image_ids[idx]}.png", num_examples=len(image_ids)
)
# The dataset has some rogue files
for path in image_files[:3]:
path.with_suffix(".mat").touch()
for path in segmentation_files:
path.with_name(f".{path.name}").touch()
make_tar(root, "images.tar.gz", compression="gz")
make_tar(root, anns_folder.with_suffix(".tar.gz").name, compression="gz")
return num_samples_map
@register_mock(name="oxford-iiit-pet", configs=combinations_grid(split=("trainval", "test")))
def oxford_iiit_pet(root, config):
return OxfordIIITPetMockData.generate(root)[config["split"]]
class _CUB200MockData:
@classmethod
def _category_folder(cls, category, idx):
return f"{idx:03d}.{category}"
@classmethod
def _file_stem(cls, category, idx):
return f"{category}_{idx:04d}"
@classmethod
def _make_images(cls, images_folder):
image_files = []
for category_idx, category in [
(1, "Black_footed_Albatross"),
(100, "Brown_Pelican"),
(200, "Common_Yellowthroat"),
]:
image_files.extend(
create_image_folder(
images_folder,
cls._category_folder(category, category_idx),
lambda image_idx: f"{cls._file_stem(category, image_idx)}.jpg",
num_examples=5,
)
)
return image_files
class CUB2002011MockData(_CUB200MockData):
@classmethod
def _make_archive(cls, root):
archive_folder = root / "CUB_200_2011"
images_folder = archive_folder / "images"
image_files = cls._make_images(images_folder)
image_ids = list(range(1, len(image_files) + 1))
with open(archive_folder / "images.txt", "w") as file:
file.write(
"\n".join(
f"{id} {path.relative_to(images_folder).as_posix()}" for id, path in zip(image_ids, image_files)
)
)
split_ids = torch.randint(2, (len(image_ids),)).tolist()
counts = Counter(split_ids)
num_samples_map = {"train": counts[1], "test": counts[0]}
with open(archive_folder / "train_test_split.txt", "w") as file:
file.write("\n".join(f"{image_id} {split_id}" for image_id, split_id in zip(image_ids, split_ids)))
with open(archive_folder / "bounding_boxes.txt", "w") as file:
file.write(
"\n".join(
" ".join(
str(item)
for item in [image_id, *make_tensor((4,), dtype=torch.int, low=0).to(torch.float).tolist()]
)
for image_id in image_ids
)
)
make_tar(root, archive_folder.with_suffix(".tgz").name, compression="gz")
return image_files, num_samples_map
@classmethod
def _make_segmentations(cls, root, image_files):
segmentations_folder = root / "segmentations"
for image_file in image_files:
folder = segmentations_folder.joinpath(image_file.relative_to(image_file.parents[1]))
folder.mkdir(exist_ok=True, parents=True)
create_image_file(
folder,
image_file.with_suffix(".png").name,
size=[1, *make_tensor((2,), low=3, dtype=torch.int).tolist()],
)
make_tar(root, segmentations_folder.with_suffix(".tgz").name, compression="gz")
@classmethod
def generate(cls, root):
image_files, num_samples_map = cls._make_archive(root)
cls._make_segmentations(root, image_files)
return num_samples_map
class CUB2002010MockData(_CUB200MockData):
@classmethod
def _make_hidden_rouge_file(cls, *files):
for file in files:
(file.parent / f"._{file.name}").touch()
@classmethod
def _make_splits(cls, root, image_files):
split_folder = root / "lists"
split_folder.mkdir()
random.shuffle(image_files)
splits = ("train", "test")
num_samples_map = {}
for offset, split in enumerate(splits):
image_files_in_split = image_files[offset :: len(splits)]
split_file = split_folder / f"{split}.txt"
with open(split_file, "w") as file:
file.write(
"\n".join(
sorted(
str(image_file.relative_to(image_file.parents[1]).as_posix())
for image_file in image_files_in_split
)
)
)
cls._make_hidden_rouge_file(split_file)
num_samples_map[split] = len(image_files_in_split)
make_tar(root, split_folder.with_suffix(".tgz").name, compression="gz")
return num_samples_map
@classmethod
def _make_anns(cls, root, image_files):
from scipy.io import savemat
anns_folder = root / "annotations-mat"
for image_file in image_files:
ann_file = anns_folder / image_file.with_suffix(".mat").relative_to(image_file.parents[1])
ann_file.parent.mkdir(parents=True, exist_ok=True)
savemat(
ann_file,
{
"seg": torch.randint(
256, make_tensor((2,), low=3, dtype=torch.int).tolist(), dtype=torch.uint8
).numpy(),
"bbox": dict(
zip(("left", "top", "right", "bottom"), make_tensor((4,), dtype=torch.uint8).tolist())
),
},
)
readme_file = anns_folder / "README.txt"
readme_file.touch()
cls._make_hidden_rouge_file(readme_file)
make_tar(root, "annotations.tgz", anns_folder, compression="gz")
@classmethod
def generate(cls, root):
images_folder = root / "images"
image_files = cls._make_images(images_folder)
cls._make_hidden_rouge_file(*image_files)
make_tar(root, images_folder.with_suffix(".tgz").name, compression="gz")
num_samples_map = cls._make_splits(root, image_files)
cls._make_anns(root, image_files)
return num_samples_map
@register_mock(configs=combinations_grid(split=("train", "test"), year=("2010", "2011")))
def cub200(root, config):
num_samples_map = (CUB2002011MockData if config["year"] == "2011" else CUB2002010MockData).generate(root)
return num_samples_map[config["split"]]
@register_mock(configs=[dict()])
def eurosat(root, config):
data_folder = root / "2750"
data_folder.mkdir(parents=True)
num_examples_per_class = 3
categories = ["AnnualCrop", "Forest"]
for category in categories:
create_image_folder(
root=data_folder,
name=category,
file_name_fn=lambda idx: f"{category}_{idx + 1}.jpg",
num_examples=num_examples_per_class,
)
make_zip(root, "EuroSAT.zip", data_folder)
return len(categories) * num_examples_per_class
@register_mock(configs=combinations_grid(split=("train", "test", "extra")))
def svhn(root, config):
import scipy.io as sio
num_samples = {
"train": 2,
"test": 3,
"extra": 4,
}[config["split"]]
sio.savemat(
root / f"{config['split']}_32x32.mat",
{
"X": np.random.randint(256, size=(32, 32, 3, num_samples), dtype=np.uint8),
"y": np.random.randint(10, size=(num_samples,), dtype=np.uint8),
},
)
return num_samples
@register_mock(configs=combinations_grid(split=("train", "val", "test")))
def pcam(root, config):
import h5py
num_images = {"train": 2, "test": 3, "val": 4}[config["split"]]
split = "valid" if config["split"] == "val" else config["split"]
images_io = io.BytesIO()
with h5py.File(images_io, "w") as f:
f["x"] = np.random.randint(0, 256, size=(num_images, 10, 10, 3), dtype=np.uint8)
targets_io = io.BytesIO()
with h5py.File(targets_io, "w") as f:
f["y"] = np.random.randint(0, 2, size=(num_images, 1, 1, 1), dtype=np.uint8)
# Create .gz compressed files
images_file = root / f"camelyonpatch_level_2_split_{split}_x.h5.gz"
targets_file = root / f"camelyonpatch_level_2_split_{split}_y.h5.gz"
for compressed_file_name, uncompressed_file_io in ((images_file, images_io), (targets_file, targets_io)):
compressed_data = gzip.compress(uncompressed_file_io.getbuffer())
with open(compressed_file_name, "wb") as compressed_file:
compressed_file.write(compressed_data)
return num_images
@register_mock(name="stanford-cars", configs=combinations_grid(split=("train", "test")))
def stanford_cars(root, config):
import scipy.io as io
from numpy.core.records import fromarrays
split = config["split"]
num_samples = {"train": 5, "test": 7}[split]
num_categories = 3
if split == "train":
images_folder_name = "cars_train"
devkit = root / "devkit"
devkit.mkdir()
annotations_mat_path = devkit / "cars_train_annos.mat"
else:
images_folder_name = "cars_test"
annotations_mat_path = root / "cars_test_annos_withlabels.mat"
create_image_folder(
root=root,
name=images_folder_name,
file_name_fn=lambda image_index: f"{image_index:5d}.jpg",
num_examples=num_samples,
)
make_tar(root, f"cars_{split}.tgz", images_folder_name)
bbox = np.random.randint(1, 200, num_samples, dtype=np.uint8)
classes = np.random.randint(1, num_categories + 1, num_samples, dtype=np.uint8)
fnames = [f"{i:5d}.jpg" for i in range(num_samples)]
rec_array = fromarrays(
[bbox, bbox, bbox, bbox, classes, fnames],
names=["bbox_x1", "bbox_y1", "bbox_x2", "bbox_y2", "class", "fname"],
)
io.savemat(annotations_mat_path, {"annotations": rec_array})
if split == "train":
make_tar(root, "car_devkit.tgz", devkit, compression="gz")
return num_samples
@register_mock(configs=combinations_grid(split=("train", "test")))
def usps(root, config):
num_samples = {"train": 15, "test": 7}[config["split"]]
with bz2.open(root / f"usps{'.t' if not config['split'] == 'train' else ''}.bz2", "wb") as fh:
lines = []
for _ in range(num_samples):
label = make_tensor(1, low=1, high=11, dtype=torch.int)
values = make_tensor(256, low=-1, high=1, dtype=torch.float)
lines.append(
" ".join([f"{int(label)}", *(f"{idx}:{float(value):.6f}" for idx, value in enumerate(values, 1))])
)
fh.write("\n".join(lines).encode())
return num_samples
import os
from collections import defaultdict
from numbers import Number
from typing import Any, List
import torch
from torch.utils._python_dispatch import TorchDispatchMode
from torch.utils._pytree import tree_map
from torchvision.models._api import Weights
aten = torch.ops.aten
quantized = torch.ops.quantized
def get_shape(i):
if isinstance(i, torch.Tensor):
return i.shape
elif hasattr(i, "weight"):
return i.weight().shape
else:
raise ValueError(f"Unknown type {type(i)}")
def prod(x):
res = 1
for i in x:
res *= i
return res
def matmul_flop(inputs: List[Any], outputs: List[Any]) -> Number:
"""
Count flops for matmul.
"""
# Inputs should be a list of length 2.
# Inputs contains the shapes of two matrices.
input_shapes = [get_shape(v) for v in inputs]
assert len(input_shapes) == 2, input_shapes
assert input_shapes[0][-1] == input_shapes[1][-2], input_shapes
flop = prod(input_shapes[0]) * input_shapes[-1][-1]
return flop
def addmm_flop(inputs: List[Any], outputs: List[Any]) -> Number:
"""
Count flops for fully connected layers.
"""
# Count flop for nn.Linear
# inputs is a list of length 3.
input_shapes = [get_shape(v) for v in inputs[1:3]]
# input_shapes[0]: [batch size, input feature dimension]
# input_shapes[1]: [batch size, output feature dimension]
assert len(input_shapes[0]) == 2, input_shapes[0]
assert len(input_shapes[1]) == 2, input_shapes[1]
batch_size, input_dim = input_shapes[0]
output_dim = input_shapes[1][1]
flops = batch_size * input_dim * output_dim
return flops
def bmm_flop(inputs: List[Any], outputs: List[Any]) -> Number:
"""
Count flops for the bmm operation.
"""
# Inputs should be a list of length 2.
# Inputs contains the shapes of two tensor.
assert len(inputs) == 2, len(inputs)
input_shapes = [get_shape(v) for v in inputs]
n, c, t = input_shapes[0]
d = input_shapes[-1][-1]
flop = n * c * t * d
return flop
def conv_flop_count(
x_shape: List[int],
w_shape: List[int],
out_shape: List[int],
transposed: bool = False,
) -> Number:
"""
Count flops for convolution. Note only multiplication is
counted. Computation for addition and bias is ignored.
Flops for a transposed convolution are calculated as
flops = (x_shape[2:] * prod(w_shape) * batch_size).
Args:
x_shape (list(int)): The input shape before convolution.
w_shape (list(int)): The filter shape.
out_shape (list(int)): The output shape after convolution.
transposed (bool): is the convolution transposed
Returns:
int: the number of flops
"""
batch_size = x_shape[0]
conv_shape = (x_shape if transposed else out_shape)[2:]
flop = batch_size * prod(w_shape) * prod(conv_shape)
return flop
def conv_flop(inputs: List[Any], outputs: List[Any]):
"""
Count flops for convolution.
"""
x, w = inputs[:2]
x_shape, w_shape, out_shape = (get_shape(x), get_shape(w), get_shape(outputs[0]))
transposed = inputs[6]
return conv_flop_count(x_shape, w_shape, out_shape, transposed=transposed)
def quant_conv_flop(inputs: List[Any], outputs: List[Any]):
"""
Count flops for quantized convolution.
"""
x, w = inputs[:2]
x_shape, w_shape, out_shape = (get_shape(x), get_shape(w), get_shape(outputs[0]))
return conv_flop_count(x_shape, w_shape, out_shape, transposed=False)
def transpose_shape(shape):
return [shape[1], shape[0]] + list(shape[2:])
def conv_backward_flop(inputs: List[Any], outputs: List[Any]):
grad_out_shape, x_shape, w_shape = [get_shape(i) for i in inputs[:3]]
output_mask = inputs[-1]
fwd_transposed = inputs[7]
flop_count = 0
if output_mask[0]:
grad_input_shape = get_shape(outputs[0])
flop_count += conv_flop_count(grad_out_shape, w_shape, grad_input_shape, not fwd_transposed)
if output_mask[1]:
grad_weight_shape = get_shape(outputs[1])
flop_count += conv_flop_count(transpose_shape(x_shape), grad_out_shape, grad_weight_shape, fwd_transposed)
return flop_count
def scaled_dot_product_flash_attention_flop(inputs: List[Any], outputs: List[Any]):
# FIXME: this needs to count the flops of this kernel
# https://github.com/pytorch/pytorch/blob/207b06d099def9d9476176a1842e88636c1f714f/aten/src/ATen/native/cpu/FlashAttentionKernel.cpp#L52-L267
return 0
flop_mapping = {
aten.mm: matmul_flop,
aten.matmul: matmul_flop,
aten.addmm: addmm_flop,
aten.bmm: bmm_flop,
aten.convolution: conv_flop,
aten._convolution: conv_flop,
aten.convolution_backward: conv_backward_flop,
quantized.conv2d: quant_conv_flop,
quantized.conv2d_relu: quant_conv_flop,
aten._scaled_dot_product_flash_attention: scaled_dot_product_flash_attention_flop,
}
unmapped_ops = set()
def normalize_tuple(x):
if not isinstance(x, tuple):
return (x,)
return x
class FlopCounterMode(TorchDispatchMode):
def __init__(self, model=None):
self.flop_counts = defaultdict(lambda: defaultdict(int))
self.parents = ["Global"]
# global mod
if model is not None:
for name, module in dict(model.named_children()).items():
module.register_forward_pre_hook(self.enter_module(name))
module.register_forward_hook(self.exit_module(name))
def enter_module(self, name):
def f(module, inputs):
self.parents.append(name)
inputs = normalize_tuple(inputs)
out = self.create_backwards_pop(name)(*inputs)
return out
return f
def exit_module(self, name):
def f(module, inputs, outputs):
assert self.parents[-1] == name
self.parents.pop()
outputs = normalize_tuple(outputs)
return self.create_backwards_push(name)(*outputs)
return f
def create_backwards_push(self, name):
class PushState(torch.autograd.Function):
@staticmethod
def forward(ctx, *args):
args = tree_map(lambda x: x.clone() if isinstance(x, torch.Tensor) else x, args)
if len(args) == 1:
return args[0]
return args
@staticmethod
def backward(ctx, *grad_outs):
self.parents.append(name)
return grad_outs
return PushState.apply
def create_backwards_pop(self, name):
class PopState(torch.autograd.Function):
@staticmethod
def forward(ctx, *args):
args = tree_map(lambda x: x.clone() if isinstance(x, torch.Tensor) else x, args)
if len(args) == 1:
return args[0]
return args
@staticmethod
def backward(ctx, *grad_outs):
assert self.parents[-1] == name
self.parents.pop()
return grad_outs
return PopState.apply
def __enter__(self):
self.flop_counts.clear()
super().__enter__()
def __exit__(self, *args):
# print(f"Total: {sum(self.flop_counts['Global'].values()) / 1e9} GFLOPS")
# for mod in self.flop_counts.keys():
# print(f"Module: ", mod)
# for k, v in self.flop_counts[mod].items():
# print(f"{k}: {v / 1e9} GFLOPS")
# print()
super().__exit__(*args)
def __torch_dispatch__(self, func, types, args=(), kwargs=None):
kwargs = kwargs if kwargs else {}
out = func(*args, **kwargs)
func_packet = func._overloadpacket
if func_packet in flop_mapping:
flop_count = flop_mapping[func_packet](args, normalize_tuple(out))
for par in self.parents:
self.flop_counts[par][func_packet] += flop_count
else:
unmapped_ops.add(func_packet)
return out
def get_flops(self):
return sum(self.flop_counts["Global"].values()) / 1e9
def get_dims(module_name, height, width):
# detection models have curated input sizes
if module_name == "detection":
# we can feed a batch of 1 for detection model instead of a list of 1 image
dims = (3, height, width)
elif module_name == "video":
# hard-coding the time dimension to size 16
dims = (1, 16, 3, height, width)
else:
dims = (1, 3, height, width)
return dims
def get_ops(model: torch.nn.Module, weight: Weights, height=512, width=512):
module_name = model.__module__.split(".")[-2]
dims = get_dims(module_name=module_name, height=height, width=width)
input_tensor = torch.randn(dims)
# try:
preprocess = weight.transforms()
if module_name == "optical_flow":
inp = preprocess(input_tensor, input_tensor)
else:
# hack to enable mod(*inp) for optical_flow models
inp = [preprocess(input_tensor)]
model.eval()
flop_counter = FlopCounterMode(model)
with flop_counter:
# detection models expect a list of 3d tensors as inputs
if module_name == "detection":
model(inp)
else:
model(*inp)
flops = flop_counter.get_flops()
return round(flops, 3)
def get_file_size_mb(weight):
weights_path = os.path.join(os.getenv("HOME"), ".cache/torch/hub/checkpoints", weight.url.split("/")[-1])
weights_size_mb = os.path.getsize(weights_path) / 1024 / 1024
return round(weights_size_mb, 3)
import contextlib
import functools
import itertools
import os
import pathlib
import random
import re
import shutil
import sys
import tempfile
import warnings
from subprocess import CalledProcessError, check_output, STDOUT
import numpy as np
import PIL.Image
import pytest
import torch
import torch.testing
from PIL import Image
from torchvision import io
import __main__ # noqa: 401
from torch.testing._comparison import BooleanPair, NonePair, not_close_error_metas, NumberPair, TensorLikePair
from torchvision import io, tv_tensors
from torchvision.transforms._functional_tensor import _max_value as get_max_value
from torchvision.transforms.v2.functional import to_image, to_pil_image
IN_CIRCLE_CI = os.getenv("CIRCLECI", False) == "true"
IN_OSS_CI = any(os.getenv(var) == "true" for var in ["CIRCLECI", "GITHUB_ACTIONS"])
IN_RE_WORKER = os.environ.get("INSIDE_RE_WORKER") is not None
IN_FBCODE = os.environ.get("IN_FBCODE_TORCHVISION") == "1"
CUDA_NOT_AVAILABLE_MSG = "CUDA device not available"
CIRCLECI_GPU_NO_CUDA_MSG = "We're in a CircleCI GPU machine, and this test doesn't need cuda."
MPS_NOT_AVAILABLE_MSG = "MPS device not available"
OSS_CI_GPU_NO_CUDA_MSG = "We're in an OSS GPU machine, and this test doesn't need cuda."
@contextlib.contextmanager
......@@ -107,18 +119,28 @@ def disable_console_output():
yield
def cpu_and_gpu():
def cpu_and_cuda():
import pytest # noqa
return ("cpu", pytest.param("cuda", marks=pytest.mark.needs_cuda))
def cpu_and_cuda_and_mps():
return cpu_and_cuda() + (pytest.param("mps", marks=pytest.mark.needs_mps),)
def needs_cuda(test_func):
import pytest # noqa
return pytest.mark.needs_cuda(test_func)
def needs_mps(test_func):
import pytest # noqa
return pytest.mark.needs_mps(test_func)
def _create_data(height=3, width=3, channels=3, device="cpu"):
# TODO: When all relevant tests are ported to pytest, turn this into a module-level fixture
tensor = torch.randint(0, 256, (channels, height, width), dtype=torch.uint8, device=device)
......@@ -137,9 +159,6 @@ def _create_data_batch(height=3, width=3, channels=3, num_samples=4, device="cpu
return batch_tensor
assert_equal = functools.partial(torch.testing.assert_close, rtol=0, atol=0)
def get_list_of_videos(tmpdir, num_videos=5, sizes=None, fps=None):
names = []
for i in range(num_videos):
......@@ -160,6 +179,7 @@ def get_list_of_videos(tmpdir, num_videos=5, sizes=None, fps=None):
def _assert_equal_tensor_to_pil(tensor, pil_image, msg=None):
# FIXME: this is handled automatically by `assert_equal` below. Let's remove this in favor of it
np_pil_image = np.array(pil_image)
if np_pil_image.ndim == 2:
np_pil_image = np_pil_image[:, :, None]
......@@ -172,6 +192,7 @@ def _assert_equal_tensor_to_pil(tensor, pil_image, msg=None):
def _assert_approx_equal_tensor_to_pil(
tensor, pil_image, tol=1e-5, msg=None, agg_method="mean", allowed_percentage_diff=None
):
# FIXME: this is handled automatically by `assert_close` below. Let's remove this in favor of it
# TODO: we could just merge this into _assert_equal_tensor_to_pil
np_pil_image = np.array(pil_image)
if np_pil_image.ndim == 2:
......@@ -210,7 +231,7 @@ def cache(fn):
"""
sentinel = object()
out_cache = {}
exc_cache = {}
exc_tb_cache = {}
@functools.wraps(fn)
def wrapper(*args, **kwargs):
......@@ -220,17 +241,280 @@ def cache(fn):
if out is not sentinel:
return out
exc = exc_cache.get(key, sentinel)
if exc is not sentinel:
raise exc
exc_tb = exc_tb_cache.get(key, sentinel)
if exc_tb is not sentinel:
raise exc_tb[0].with_traceback(exc_tb[1])
try:
out = fn(*args, **kwargs)
except Exception as exc:
exc_cache[key] = exc
# We need to cache the traceback here as well. Otherwise, each re-raise will add the internal pytest
# traceback frames anew, but they will only be removed once. Thus, the traceback will be ginormous hiding
# the actual information in the noise. See https://github.com/pytest-dev/pytest/issues/10363 for details.
exc_tb_cache[key] = exc, exc.__traceback__
raise exc
out_cache[key] = out
return out
return wrapper
def combinations_grid(**kwargs):
"""Creates a grid of input combinations.
Each element in the returned sequence is a dictionary containing one possible combination as values.
Example:
>>> combinations_grid(foo=("bar", "baz"), spam=("eggs", "ham"))
[
{'foo': 'bar', 'spam': 'eggs'},
{'foo': 'bar', 'spam': 'ham'},
{'foo': 'baz', 'spam': 'eggs'},
{'foo': 'baz', 'spam': 'ham'}
]
"""
return [dict(zip(kwargs.keys(), values)) for values in itertools.product(*kwargs.values())]
class ImagePair(TensorLikePair):
def __init__(
self,
actual,
expected,
*,
mae=False,
**other_parameters,
):
if all(isinstance(input, PIL.Image.Image) for input in [actual, expected]):
actual, expected = [to_image(input) for input in [actual, expected]]
super().__init__(actual, expected, **other_parameters)
self.mae = mae
def compare(self) -> None:
actual, expected = self.actual, self.expected
self._compare_attributes(actual, expected)
actual, expected = self._equalize_attributes(actual, expected)
if self.mae:
if actual.dtype is torch.uint8:
actual, expected = actual.to(torch.int), expected.to(torch.int)
mae = float(torch.abs(actual - expected).float().mean())
if mae > self.atol:
self._fail(
AssertionError,
f"The MAE of the images is {mae}, but only {self.atol} is allowed.",
)
else:
super()._compare_values(actual, expected)
def assert_close(
actual,
expected,
*,
allow_subclasses=True,
rtol=None,
atol=None,
equal_nan=False,
check_device=True,
check_dtype=True,
check_layout=True,
check_stride=False,
msg=None,
**kwargs,
):
"""Superset of :func:`torch.testing.assert_close` with support for PIL vs. tensor image comparison"""
__tracebackhide__ = True
error_metas = not_close_error_metas(
actual,
expected,
pair_types=(
NonePair,
BooleanPair,
NumberPair,
ImagePair,
TensorLikePair,
),
allow_subclasses=allow_subclasses,
rtol=rtol,
atol=atol,
equal_nan=equal_nan,
check_device=check_device,
check_dtype=check_dtype,
check_layout=check_layout,
check_stride=check_stride,
**kwargs,
)
if error_metas:
raise error_metas[0].to_error(msg)
assert_equal = functools.partial(assert_close, rtol=0, atol=0)
DEFAULT_SIZE = (17, 11)
NUM_CHANNELS_MAP = {
"GRAY": 1,
"GRAY_ALPHA": 2,
"RGB": 3,
"RGBA": 4,
}
def make_image(
size=DEFAULT_SIZE,
*,
color_space="RGB",
batch_dims=(),
dtype=None,
device="cpu",
memory_format=torch.contiguous_format,
):
num_channels = NUM_CHANNELS_MAP[color_space]
dtype = dtype or torch.uint8
max_value = get_max_value(dtype)
data = torch.testing.make_tensor(
(*batch_dims, num_channels, *size),
low=0,
high=max_value,
dtype=dtype,
device=device,
memory_format=memory_format,
)
if color_space in {"GRAY_ALPHA", "RGBA"}:
data[..., -1, :, :] = max_value
return tv_tensors.Image(data)
def make_image_tensor(*args, **kwargs):
return make_image(*args, **kwargs).as_subclass(torch.Tensor)
def make_image_pil(*args, **kwargs):
return to_pil_image(make_image(*args, **kwargs))
def make_bounding_boxes(
canvas_size=DEFAULT_SIZE,
*,
format=tv_tensors.BoundingBoxFormat.XYXY,
dtype=None,
device="cpu",
):
def sample_position(values, max_value):
# We cannot use torch.randint directly here, because it only allows integer scalars as values for low and high.
# However, if we have batch_dims, we need tensors as limits.
return torch.stack([torch.randint(max_value - v, ()) for v in values.tolist()])
if isinstance(format, str):
format = tv_tensors.BoundingBoxFormat[format]
dtype = dtype or torch.float32
num_objects = 1
h, w = [torch.randint(1, c, (num_objects,)) for c in canvas_size]
y = sample_position(h, canvas_size[0])
x = sample_position(w, canvas_size[1])
if format is tv_tensors.BoundingBoxFormat.XYWH:
parts = (x, y, w, h)
elif format is tv_tensors.BoundingBoxFormat.XYXY:
x1, y1 = x, y
x2 = x1 + w
y2 = y1 + h
parts = (x1, y1, x2, y2)
elif format is tv_tensors.BoundingBoxFormat.CXCYWH:
cx = x + w / 2
cy = y + h / 2
parts = (cx, cy, w, h)
else:
raise ValueError(f"Format {format} is not supported")
return tv_tensors.BoundingBoxes(
torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, canvas_size=canvas_size
)
def make_detection_mask(size=DEFAULT_SIZE, *, dtype=None, device="cpu"):
"""Make a "detection" mask, i.e. (*, N, H, W), where each object is encoded as one of N boolean masks"""
num_objects = 1
return tv_tensors.Mask(
torch.testing.make_tensor(
(num_objects, *size),
low=0,
high=2,
dtype=dtype or torch.bool,
device=device,
)
)
def make_segmentation_mask(size=DEFAULT_SIZE, *, num_categories=10, batch_dims=(), dtype=None, device="cpu"):
"""Make a "segmentation" mask, i.e. (*, H, W), where the category is encoded as pixel value"""
return tv_tensors.Mask(
torch.testing.make_tensor(
(*batch_dims, *size),
low=0,
high=num_categories,
dtype=dtype or torch.uint8,
device=device,
)
)
def make_video(size=DEFAULT_SIZE, *, num_frames=3, batch_dims=(), **kwargs):
return tv_tensors.Video(make_image(size, batch_dims=(*batch_dims, num_frames), **kwargs))
def make_video_tensor(*args, **kwargs):
return make_video(*args, **kwargs).as_subclass(torch.Tensor)
def assert_run_python_script(source_code):
"""Utility to check assertions in an independent Python subprocess.
The script provided in the source code should return 0 and not print
anything on stderr or stdout. Modified from scikit-learn test utils.
Args:
source_code (str): The Python source code to execute.
"""
with get_tmp_dir() as root:
path = pathlib.Path(root) / "main.py"
with open(path, "w") as file:
file.write(source_code)
try:
out = check_output([sys.executable, str(path)], stderr=STDOUT)
except CalledProcessError as e:
raise RuntimeError(f"script errored with output:\n{e.output.decode()}")
if out != b"":
raise AssertionError(out.decode())
@contextlib.contextmanager
def assert_no_warnings():
# The name `catch_warnings` is a misnomer as the context manager does **not** catch any warnings, but rather scopes
# the warning filters. All changes that are made to the filters while in this context, will be reset upon exit.
with warnings.catch_warnings():
warnings.simplefilter("error")
yield
@contextlib.contextmanager
def ignore_jit_no_profile_information_warning():
# Calling a scripted object often triggers a warning like
# `UserWarning: operator() profile_node %$INT1 : int[] = prim::profile_ivalue($INT2) does not have profile information`
# with varying `INT1` and `INT2`. Since these are uninteresting for us and only clutter the test summary, we ignore
# them.
with warnings.catch_warnings():
warnings.filterwarnings("ignore", message=re.escape("operator() profile_node %"), category=UserWarning)
yield
......@@ -3,12 +3,21 @@ import random
import numpy as np
import pytest
import torch
from common_utils import CIRCLECI_GPU_NO_CUDA_MSG, CUDA_NOT_AVAILABLE_MSG, IN_CIRCLE_CI, IN_FBCODE, IN_RE_WORKER
from common_utils import (
CUDA_NOT_AVAILABLE_MSG,
IN_FBCODE,
IN_OSS_CI,
IN_RE_WORKER,
MPS_NOT_AVAILABLE_MSG,
OSS_CI_GPU_NO_CUDA_MSG,
)
def pytest_configure(config):
# register an additional marker (see pytest_collection_modifyitems)
config.addinivalue_line("markers", "needs_cuda: mark for tests that rely on a CUDA device")
config.addinivalue_line("markers", "needs_mps: mark for tests that rely on a MPS device")
config.addinivalue_line("markers", "dont_collect: mark for tests that should not be collected")
......@@ -16,9 +25,9 @@ def pytest_collection_modifyitems(items):
# This hook is called by pytest after it has collected the tests (google its name to check out its doc!)
# We can ignore some tests as we see fit here, or add marks, such as a skip mark.
#
# Typically here, we try to optimize CI time. In particular, the GPU CI instances don't need to run the
# Typically, here, we try to optimize CI time. In particular, the GPU CI instances don't need to run the
# tests that don't need CUDA, because those tests are extensively tested in the CPU CI instances already.
# This is true for both CircleCI and the fbcode internal CI.
# This is true for both OSS CI and the fbcode internal CI.
# In the fbcode CI, we have an additional constraint: we try to avoid skipping tests. So instead of relying on
# pytest.mark.skip, in fbcode we literally just remove those tests from the `items` list, and it's as if
# these tests never existed.
......@@ -28,16 +37,20 @@ def pytest_collection_modifyitems(items):
# The needs_cuda mark will exist if the test was explicitly decorated with
# the @needs_cuda decorator. It will also exist if it was parametrized with a
# parameter that has the mark: for example if a test is parametrized with
# @pytest.mark.parametrize('device', cpu_and_gpu())
# @pytest.mark.parametrize('device', cpu_and_cuda())
# the "instances" of the tests where device == 'cuda' will have the 'needs_cuda' mark,
# and the ones with device == 'cpu' won't have the mark.
needs_cuda = item.get_closest_marker("needs_cuda") is not None
needs_mps = item.get_closest_marker("needs_mps") is not None
if needs_cuda and not torch.cuda.is_available():
# In general, we skip cuda tests on machines without a GPU
# There are special cases though, see below
item.add_marker(pytest.mark.skip(reason=CUDA_NOT_AVAILABLE_MSG))
if needs_mps and not torch.backends.mps.is_available():
item.add_marker(pytest.mark.skip(reason=MPS_NOT_AVAILABLE_MSG))
if IN_FBCODE:
# fbcode doesn't like skipping tests, so instead we just don't collect the test
# so that they don't even "exist", hence the continue statements.
......@@ -49,15 +62,18 @@ def pytest_collection_modifyitems(items):
# TODO: something more robust would be to do that only in a sandcastle instance,
# so that we can still see the test being skipped when testing locally from a devvm
continue
elif IN_CIRCLE_CI:
if needs_mps and not torch.backends.mps.is_available():
# Same as above, but for MPS
continue
elif IN_OSS_CI:
# Here we're not in fbcode, so we can safely collect and skip tests.
if not needs_cuda and torch.cuda.is_available():
# Similar to what happens in RE workers: we don't need the CircleCI GPU machines
# Similar to what happens in RE workers: we don't need the OSS CI GPU machines
# to run the CPU-only tests.
item.add_marker(pytest.mark.skip(reason=CIRCLECI_GPU_NO_CUDA_MSG))
item.add_marker(pytest.mark.skip(reason=OSS_CI_GPU_NO_CUDA_MSG))
if item.get_closest_marker("dont_collect") is not None:
# currently, this is only used for some tests we're sure we dont want to run on fbcode
# currently, this is only used for some tests we're sure we don't want to run on fbcode
continue
out_items.append(item)
......
......@@ -5,6 +5,7 @@ import inspect
import itertools
import os
import pathlib
import platform
import random
import shutil
import string
......@@ -25,6 +26,7 @@ import torch
import torchvision.datasets
import torchvision.io
from common_utils import disable_console_output, get_tmp_dir
from torch.utils._pytree import tree_any
from torchvision.transforms.functional import get_dimensions
......@@ -137,7 +139,7 @@ def test_all_configs(test):
.. note::
This will try to remove duplicate configurations. During this process it will not not preserve a potential
This will try to remove duplicate configurations. During this process it will not preserve a potential
ordering of the configurations or an inner ordering of a configuration.
"""
......@@ -146,7 +148,7 @@ def test_all_configs(test):
return [dict(config_) for config_ in {tuple(sorted(config.items())) for config in configs}]
except TypeError:
# A TypeError will be raised if a value of any config is not hashable, e.g. a list. In that case duplicate
# removal would be a lot more elaborate and we simply bail out.
# removal would be a lot more elaborate, and we simply bail out.
return configs
@functools.wraps(test)
......@@ -169,23 +171,6 @@ def test_all_configs(test):
return wrapper
def combinations_grid(**kwargs):
"""Creates a grid of input combinations.
Each element in the returned sequence is a dictionary containing one possible combination as values.
Example:
>>> combinations_grid(foo=("bar", "baz"), spam=("eggs", "ham"))
[
{'foo': 'bar', 'spam': 'eggs'},
{'foo': 'bar', 'spam': 'ham'},
{'foo': 'baz', 'spam': 'eggs'},
{'foo': 'baz', 'spam': 'ham'}
]
"""
return [dict(zip(kwargs.keys(), values)) for values in itertools.product(*kwargs.values())]
class DatasetTestCase(unittest.TestCase):
"""Abstract base class for all dataset testcases.
......@@ -297,7 +282,7 @@ class DatasetTestCase(unittest.TestCase):
.. note::
The default behavior is only valid if the dataset to be tested has ``root`` as the only required parameter.
Otherwise you need to overwrite this method.
Otherwise, you need to overwrite this method.
Args:
tmpdir (str): Path to a temporary directory. For most cases this acts as root directory for the dataset
......@@ -564,7 +549,7 @@ class DatasetTestCase(unittest.TestCase):
@test_all_configs
def test_num_examples(self, config):
with self.create_dataset(config) as (dataset, info):
assert len(dataset) == info["num_examples"]
assert len(list(dataset)) == len(dataset) == info["num_examples"]
@test_all_configs
def test_transforms(self, config):
......@@ -581,6 +566,42 @@ class DatasetTestCase(unittest.TestCase):
mock.assert_called()
@test_all_configs
def test_transforms_v2_wrapper(self, config):
from torchvision import tv_tensors
from torchvision.datasets import wrap_dataset_for_transforms_v2
try:
with self.create_dataset(config) as (dataset, info):
for target_keys in [None, "all"]:
if target_keys is not None and self.DATASET_CLASS not in {
torchvision.datasets.CocoDetection,
torchvision.datasets.VOCDetection,
torchvision.datasets.Kitti,
torchvision.datasets.WIDERFace,
}:
with self.assertRaisesRegex(ValueError, "`target_keys` is currently only supported for"):
wrap_dataset_for_transforms_v2(dataset, target_keys=target_keys)
continue
wrapped_dataset = wrap_dataset_for_transforms_v2(dataset, target_keys=target_keys)
assert isinstance(wrapped_dataset, self.DATASET_CLASS)
assert len(wrapped_dataset) == info["num_examples"]
wrapped_sample = wrapped_dataset[0]
assert tree_any(
lambda item: isinstance(item, (tv_tensors.TVTensor, PIL.Image.Image)), wrapped_sample
)
except TypeError as error:
msg = f"No wrapper exists for dataset class {type(dataset).__name__}"
if str(error).startswith(msg):
pytest.skip(msg)
raise error
except RuntimeError as error:
if "currently not supported by this wrapper" in str(error):
pytest.skip("Config is currently not supported by this wrapper")
raise error
class ImageDatasetTestCase(DatasetTestCase):
"""Abstract base class for image dataset testcases.
......@@ -604,7 +625,7 @@ class ImageDatasetTestCase(DatasetTestCase):
patch_checks=patch_checks,
**kwargs,
) as (dataset, info):
# PIL.Image.open() only loads the image meta data upfront and keeps the file open until the first access
# PIL.Image.open() only loads the image metadata upfront and keeps the file open until the first access
# to the pixel data occurs. Trying to delete such a file results in an PermissionError on Windows. Thus, we
# force-load opened images.
# This problem only occurs during testing since some tests, e.g. DatasetTestCase.test_feature_types open an
......@@ -641,27 +662,73 @@ class VideoDatasetTestCase(DatasetTestCase):
FEATURE_TYPES = (torch.Tensor, torch.Tensor, int)
REQUIRED_PACKAGES = ("av",)
DEFAULT_FRAMES_PER_CLIP = 1
FRAMES_PER_CLIP = 1
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.dataset_args = self._set_default_frames_per_clip(self.dataset_args)
def _set_default_frames_per_clip(self, inject_fake_data):
def _set_default_frames_per_clip(self, dataset_args):
argspec = inspect.getfullargspec(self.DATASET_CLASS.__init__)
args_without_default = argspec.args[1 : (-len(argspec.defaults) if argspec.defaults else None)]
frames_per_clip_last = args_without_default[-1] == "frames_per_clip"
@functools.wraps(inject_fake_data)
@functools.wraps(dataset_args)
def wrapper(tmpdir, config):
args = inject_fake_data(tmpdir, config)
args = dataset_args(tmpdir, config)
if frames_per_clip_last and len(args) == len(args_without_default) - 1:
args = (*args, self.DEFAULT_FRAMES_PER_CLIP)
args = (*args, self.FRAMES_PER_CLIP)
return args
return wrapper
def test_output_format(self):
for output_format in ["TCHW", "THWC"]:
with self.create_dataset(output_format=output_format) as (dataset, _):
for video, *_ in dataset:
if output_format == "TCHW":
num_frames, num_channels, *_ = video.shape
else: # output_format == "THWC":
num_frames, *_, num_channels = video.shape
assert num_frames == self.FRAMES_PER_CLIP
assert num_channels == 3
@test_all_configs
def test_transforms_v2_wrapper(self, config):
# `output_format == "THWC"` is not supported by the wrapper. Thus, we skip the `config` if it is set explicitly
# or use the supported `"TCHW"`
if config.setdefault("output_format", "TCHW") == "THWC":
return
super().test_transforms_v2_wrapper.__wrapped__(self, config)
def _no_collate(batch):
return batch
def check_transforms_v2_wrapper_spawn(dataset):
# On Linux and Windows, the DataLoader forks the main process by default. This is not available on macOS, so new
# subprocesses are spawned. This requires the whole pipeline including the dataset to be pickleable, which is what
# we are enforcing here.
if platform.system() != "Darwin":
pytest.skip("Multiprocessing spawning is only checked on macOS.")
from torch.utils.data import DataLoader
from torchvision import tv_tensors
from torchvision.datasets import wrap_dataset_for_transforms_v2
wrapped_dataset = wrap_dataset_for_transforms_v2(dataset)
dataloader = DataLoader(wrapped_dataset, num_workers=2, multiprocessing_context="spawn", collate_fn=_no_collate)
for wrapped_sample in dataloader:
assert tree_any(
lambda item: isinstance(item, (tv_tensors.Image, tv_tensors.Video, PIL.Image.Image)), wrapped_sample
)
def create_image_or_video_tensor(size: Sequence[int]) -> torch.Tensor:
r"""Create a random uint8 tensor.
......@@ -786,7 +853,7 @@ def create_video_file(
fps: float = 25,
**kwargs: Any,
) -> pathlib.Path:
"""Create an video file from random data.
"""Create a video file from random data.
Args:
root (Union[str, pathlib.Path]): Root directory the video file will be placed in.
......@@ -951,7 +1018,7 @@ def create_random_string(length: int, *digits: str) -> str:
Args:
length (int): Number of characters in the generated string.
*characters (str): Characters to sample from. If omitted defaults to :attr:`string.ascii_lowercase`.
*digits (str): Characters to sample from. If omitted defaults to :attr:`string.ascii_lowercase`.
"""
if not digits:
digits = string.ascii_lowercase
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment