Unverified Commit 8324c481 authored by Nicolas Hug's avatar Nicolas Hug Committed by GitHub
Browse files

Add uint8 bicubic support to ResizeV2 (#7668)


Co-authored-by: default avatarvfdev-5 <vfdev.5@gmail.com>
parent e44bba12
...@@ -87,10 +87,8 @@ CONSISTENCY_CONFIGS = [ ...@@ -87,10 +87,8 @@ CONSISTENCY_CONFIGS = [
ArgsKwargs([32]), ArgsKwargs([32]),
ArgsKwargs((32, 29)), ArgsKwargs((32, 29)),
ArgsKwargs((31, 28), interpolation=v2_transforms.InterpolationMode.NEAREST), ArgsKwargs((31, 28), interpolation=v2_transforms.InterpolationMode.NEAREST),
ArgsKwargs((33, 26), interpolation=v2_transforms.InterpolationMode.BICUBIC),
ArgsKwargs((30, 27), interpolation=PIL.Image.NEAREST), ArgsKwargs((30, 27), interpolation=PIL.Image.NEAREST),
ArgsKwargs((35, 29), interpolation=PIL.Image.BILINEAR), ArgsKwargs((35, 29), interpolation=PIL.Image.BILINEAR),
ArgsKwargs((34, 25), interpolation=PIL.Image.BICUBIC),
NotScriptableArgsKwargs(31, max_size=32), NotScriptableArgsKwargs(31, max_size=32),
ArgsKwargs([31], max_size=32), ArgsKwargs([31], max_size=32),
NotScriptableArgsKwargs(30, max_size=100), NotScriptableArgsKwargs(30, max_size=100),
...@@ -101,6 +99,15 @@ CONSISTENCY_CONFIGS = [ ...@@ -101,6 +99,15 @@ CONSISTENCY_CONFIGS = [
# atol=1 due to Resize v2 is using native uint8 interpolate path for bilinear and nearest modes # atol=1 due to Resize v2 is using native uint8 interpolate path for bilinear and nearest modes
closeness_kwargs=dict(rtol=0, atol=1), closeness_kwargs=dict(rtol=0, atol=1),
), ),
ConsistencyConfig(
v2_transforms.Resize,
legacy_transforms.Resize,
[
ArgsKwargs((33, 26), interpolation=v2_transforms.InterpolationMode.BICUBIC, antialias=True),
ArgsKwargs((34, 25), interpolation=PIL.Image.BICUBIC, antialias=True),
],
closeness_kwargs=dict(rtol=0, atol=21),
),
ConsistencyConfig( ConsistencyConfig(
v2_transforms.CenterCrop, v2_transforms.CenterCrop,
legacy_transforms.CenterCrop, legacy_transforms.CenterCrop,
...@@ -309,15 +316,22 @@ CONSISTENCY_CONFIGS = [ ...@@ -309,15 +316,22 @@ CONSISTENCY_CONFIGS = [
ArgsKwargs(17, scale=(0.3, 0.7)), ArgsKwargs(17, scale=(0.3, 0.7)),
ArgsKwargs(25, ratio=(0.5, 1.5)), ArgsKwargs(25, ratio=(0.5, 1.5)),
ArgsKwargs((31, 28), interpolation=v2_transforms.InterpolationMode.NEAREST), ArgsKwargs((31, 28), interpolation=v2_transforms.InterpolationMode.NEAREST),
ArgsKwargs((33, 26), interpolation=v2_transforms.InterpolationMode.BICUBIC),
ArgsKwargs((31, 28), interpolation=PIL.Image.NEAREST), ArgsKwargs((31, 28), interpolation=PIL.Image.NEAREST),
ArgsKwargs((33, 26), interpolation=PIL.Image.BICUBIC),
ArgsKwargs((29, 32), antialias=False), ArgsKwargs((29, 32), antialias=False),
ArgsKwargs((28, 31), antialias=True), ArgsKwargs((28, 31), antialias=True),
], ],
# atol=1 due to Resize v2 is using native uint8 interpolate path for bilinear and nearest modes # atol=1 due to Resize v2 is using native uint8 interpolate path for bilinear and nearest modes
closeness_kwargs=dict(rtol=0, atol=1), closeness_kwargs=dict(rtol=0, atol=1),
), ),
ConsistencyConfig(
v2_transforms.RandomResizedCrop,
legacy_transforms.RandomResizedCrop,
[
ArgsKwargs((33, 26), interpolation=v2_transforms.InterpolationMode.BICUBIC, antialias=True),
ArgsKwargs((33, 26), interpolation=PIL.Image.BICUBIC, antialias=True),
],
closeness_kwargs=dict(rtol=0, atol=21),
),
ConsistencyConfig( ConsistencyConfig(
v2_transforms.RandomErasing, v2_transforms.RandomErasing,
legacy_transforms.RandomErasing, legacy_transforms.RandomErasing,
......
...@@ -257,17 +257,20 @@ def sample_inputs_resize_image_tensor(): ...@@ -257,17 +257,20 @@ def sample_inputs_resize_image_tensor():
for image_loader, interpolation in itertools.product( for image_loader, interpolation in itertools.product(
make_image_loaders(sizes=["random"], color_spaces=["RGB"]), make_image_loaders(sizes=["random"], color_spaces=["RGB"]),
[ [F.InterpolationMode.NEAREST, F.InterpolationMode.BILINEAR],
F.InterpolationMode.NEAREST,
F.InterpolationMode.BILINEAR,
F.InterpolationMode.BICUBIC,
],
): ):
yield ArgsKwargs(image_loader, size=[min(image_loader.spatial_size) + 1], interpolation=interpolation) yield ArgsKwargs(image_loader, size=[min(image_loader.spatial_size) + 1], interpolation=interpolation)
yield ArgsKwargs(make_image_loader(size=(11, 17)), size=20, max_size=25) yield ArgsKwargs(make_image_loader(size=(11, 17)), size=20, max_size=25)
def sample_inputs_resize_image_tensor_bicubic():
for image_loader, interpolation in itertools.product(
make_image_loaders(sizes=["random"], color_spaces=["RGB"]), [F.InterpolationMode.BICUBIC]
):
yield ArgsKwargs(image_loader, size=[min(image_loader.spatial_size) + 1], interpolation=interpolation)
@pil_reference_wrapper @pil_reference_wrapper
def reference_resize_image_tensor(*args, **kwargs): def reference_resize_image_tensor(*args, **kwargs):
if not kwargs.pop("antialias", False) and kwargs.get("interpolation", F.InterpolationMode.BILINEAR) in { if not kwargs.pop("antialias", False) and kwargs.get("interpolation", F.InterpolationMode.BILINEAR) in {
...@@ -364,6 +367,21 @@ KERNEL_INFOS.extend( ...@@ -364,6 +367,21 @@ KERNEL_INFOS.extend(
xfail_jit_python_scalar_arg("size"), xfail_jit_python_scalar_arg("size"),
], ],
), ),
KernelInfo(
F.resize_image_tensor,
sample_inputs_fn=sample_inputs_resize_image_tensor_bicubic,
reference_fn=reference_resize_image_tensor,
reference_inputs_fn=reference_inputs_resize_image_tensor,
float32_vs_uint8=True,
closeness_kwargs={
**pil_reference_pixel_difference(10, mae=True),
**cuda_vs_cpu_pixel_difference(atol=30),
**float32_vs_uint8_pixel_difference(1, mae=True),
},
test_marks=[
xfail_jit_python_scalar_arg("size"),
],
),
KernelInfo( KernelInfo(
F.resize_bounding_box, F.resize_bounding_box,
sample_inputs_fn=sample_inputs_resize_bounding_box, sample_inputs_fn=sample_inputs_resize_bounding_box,
......
...@@ -190,14 +190,13 @@ def resize_image_tensor( ...@@ -190,14 +190,13 @@ def resize_image_tensor(
if interpolation == InterpolationMode.NEAREST or interpolation == InterpolationMode.NEAREST_EXACT: if interpolation == InterpolationMode.NEAREST or interpolation == InterpolationMode.NEAREST_EXACT:
# uint8 dtype can be included for cpu and cuda input if nearest mode # uint8 dtype can be included for cpu and cuda input if nearest mode
acceptable_dtypes.append(torch.uint8) acceptable_dtypes.append(torch.uint8)
elif ( elif image.device.type == "cpu":
interpolation == InterpolationMode.BILINEAR # uint8 dtype support for bilinear and bicubic is limited to cpu and
and image.device.type == "cpu" # according to our benchmarks, non-AVX CPUs should still prefer u8->f32->interpolate->u8 path for bilinear
and "AVX2" in torch.backends.cpu.get_cpu_capability() if (interpolation == InterpolationMode.BILINEAR and "AVX2" in torch.backends.cpu.get_cpu_capability()) or (
): interpolation == InterpolationMode.BICUBIC
# uint8 dtype support for bilinear mode is limited to cpu and ):
# according to our benchmarks non-AVX CPUs should prefer u8->f32->interpolate->u8 path acceptable_dtypes.append(torch.uint8)
acceptable_dtypes.append(torch.uint8)
strides = image.stride() strides = image.stride()
if image.is_contiguous(memory_format=torch.channels_last) and image.shape[0] == 1 and numel != strides[0]: if image.is_contiguous(memory_format=torch.channels_last) and image.shape[0] == 1 and numel != strides[0]:
...@@ -227,6 +226,7 @@ def resize_image_tensor( ...@@ -227,6 +226,7 @@ def resize_image_tensor(
if need_cast: if need_cast:
if interpolation == InterpolationMode.BICUBIC and dtype == torch.uint8: if interpolation == InterpolationMode.BICUBIC and dtype == torch.uint8:
# This path is hit on non-AVX archs, or on GPU.
image = image.clamp_(min=0, max=255) image = image.clamp_(min=0, max=255)
if dtype in (torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64): if dtype in (torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64):
image = image.round_() image = image.round_()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment