Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
vision
Commits
d84aaae1
Unverified
Commit
d84aaae1
authored
Sep 09, 2023
by
Philip Meier
Committed by
GitHub
Sep 09, 2023
Browse files
port tests for F.perspective and transforms.RandomPerspective (#7943)
parent
7253af58
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
252 additions
and
266 deletions
+252
-266
test/test_transforms_v2.py
test/test_transforms_v2.py
+0
-20
test/test_transforms_v2_consistency.py
test/test_transforms_v2_consistency.py
+0
-16
test/test_transforms_v2_functional.py
test/test_transforms_v2_functional.py
+0
-79
test/test_transforms_v2_refactored.py
test/test_transforms_v2_refactored.py
+248
-0
test/transforms_v2_dispatcher_infos.py
test/transforms_v2_dispatcher_infos.py
+0
-13
test/transforms_v2_kernel_infos.py
test/transforms_v2_kernel_infos.py
+1
-135
torchvision/transforms/v2/_geometry.py
torchvision/transforms/v2/_geometry.py
+2
-2
torchvision/transforms/v2/functional/_geometry.py
torchvision/transforms/v2/functional/_geometry.py
+1
-1
No files found.
test/test_transforms_v2.py
View file @
d84aaae1
...
@@ -418,26 +418,6 @@ class TestRandomZoomOut:
...
@@ -418,26 +418,6 @@ class TestRandomZoomOut:
assert
0
<=
params
[
"padding"
][
3
]
<=
(
side_range
[
1
]
-
1
)
*
h
assert
0
<=
params
[
"padding"
][
3
]
<=
(
side_range
[
1
]
-
1
)
*
h
class
TestRandomPerspective
:
def
test_assertions
(
self
):
with
pytest
.
raises
(
ValueError
,
match
=
"Argument distortion_scale value should be between 0 and 1"
):
transforms
.
RandomPerspective
(
distortion_scale
=-
1.0
)
with
pytest
.
raises
(
TypeError
,
match
=
"Got inappropriate fill arg"
):
transforms
.
RandomPerspective
(
0.5
,
fill
=
"abc"
)
def
test__get_params
(
self
):
dscale
=
0.5
transform
=
transforms
.
RandomPerspective
(
dscale
)
image
=
make_image
((
24
,
32
))
params
=
transform
.
_get_params
([
image
])
assert
"coefficients"
in
params
assert
len
(
params
[
"coefficients"
])
==
8
class
TestElasticTransform
:
class
TestElasticTransform
:
def
test_assertions
(
self
):
def
test_assertions
(
self
):
...
...
test/test_transforms_v2_consistency.py
View file @
d84aaae1
...
@@ -6,7 +6,6 @@ import re
...
@@ -6,7 +6,6 @@ import re
from
pathlib
import
Path
from
pathlib
import
Path
import
numpy
as
np
import
numpy
as
np
import
PIL.Image
import
pytest
import
pytest
import
torch
import
torch
...
@@ -246,20 +245,6 @@ CONSISTENCY_CONFIGS = [
...
@@ -246,20 +245,6 @@ CONSISTENCY_CONFIGS = [
],
],
closeness_kwargs
=
{
"atol"
:
1e-5
,
"rtol"
:
1e-5
},
closeness_kwargs
=
{
"atol"
:
1e-5
,
"rtol"
:
1e-5
},
),
),
ConsistencyConfig
(
v2_transforms
.
RandomPerspective
,
legacy_transforms
.
RandomPerspective
,
[
ArgsKwargs
(
p
=
0
),
ArgsKwargs
(
p
=
1
),
ArgsKwargs
(
p
=
1
,
distortion_scale
=
0.3
),
ArgsKwargs
(
p
=
1
,
distortion_scale
=
0.2
,
interpolation
=
v2_transforms
.
InterpolationMode
.
NEAREST
),
ArgsKwargs
(
p
=
1
,
distortion_scale
=
0.2
,
interpolation
=
PIL
.
Image
.
NEAREST
),
ArgsKwargs
(
p
=
1
,
distortion_scale
=
0.1
,
fill
=
1
),
ArgsKwargs
(
p
=
1
,
distortion_scale
=
0.4
,
fill
=
(
1
,
2
,
3
)),
],
closeness_kwargs
=
{
"atol"
:
None
,
"rtol"
:
None
},
),
ConsistencyConfig
(
ConsistencyConfig
(
v2_transforms
.
PILToTensor
,
v2_transforms
.
PILToTensor
,
legacy_transforms
.
PILToTensor
,
legacy_transforms
.
PILToTensor
,
...
@@ -478,7 +463,6 @@ get_params_parametrization = pytest.mark.parametrize(
...
@@ -478,7 +463,6 @@ get_params_parametrization = pytest.mark.parametrize(
)
)
for
transform_cls
,
get_params_args_kwargs
in
[
for
transform_cls
,
get_params_args_kwargs
in
[
(
v2_transforms
.
ColorJitter
,
ArgsKwargs
(
brightness
=
None
,
contrast
=
None
,
saturation
=
None
,
hue
=
None
)),
(
v2_transforms
.
ColorJitter
,
ArgsKwargs
(
brightness
=
None
,
contrast
=
None
,
saturation
=
None
,
hue
=
None
)),
(
v2_transforms
.
RandomPerspective
,
ArgsKwargs
(
23
,
17
,
0.5
)),
(
v2_transforms
.
AutoAugment
,
ArgsKwargs
(
5
)),
(
v2_transforms
.
AutoAugment
,
ArgsKwargs
(
5
)),
]
]
],
],
...
...
test/test_transforms_v2_functional.py
View file @
d84aaae1
...
@@ -9,10 +9,8 @@ import torch
...
@@ -9,10 +9,8 @@ import torch
from
common_utils
import
assert_close
,
cache
,
cpu_and_cuda
,
needs_cuda
,
set_rng_seed
from
common_utils
import
assert_close
,
cache
,
cpu_and_cuda
,
needs_cuda
,
set_rng_seed
from
torch.utils._pytree
import
tree_map
from
torch.utils._pytree
import
tree_map
from
torchvision
import
tv_tensors
from
torchvision
import
tv_tensors
from
torchvision.transforms.functional
import
_get_perspective_coeffs
from
torchvision.transforms.v2
import
functional
as
F
from
torchvision.transforms.v2
import
functional
as
F
from
torchvision.transforms.v2._utils
import
is_pure_tensor
from
torchvision.transforms.v2._utils
import
is_pure_tensor
from
torchvision.transforms.v2.functional._meta
import
clamp_bounding_boxes
,
convert_bounding_box_format
from
transforms_v2_dispatcher_infos
import
DISPATCHER_INFOS
from
transforms_v2_dispatcher_infos
import
DISPATCHER_INFOS
from
transforms_v2_kernel_infos
import
KERNEL_INFOS
from
transforms_v2_kernel_infos
import
KERNEL_INFOS
from
transforms_v2_legacy_utils
import
(
from
transforms_v2_legacy_utils
import
(
...
@@ -523,83 +521,6 @@ class TestClampBoundingBoxes:
...
@@ -523,83 +521,6 @@ class TestClampBoundingBoxes:
# `transforms_v2_kernel_infos.py`
# `transforms_v2_kernel_infos.py`
@
pytest
.
mark
.
parametrize
(
"device"
,
cpu_and_cuda
())
@
pytest
.
mark
.
parametrize
(
"startpoints, endpoints"
,
[
[[[
0
,
0
],
[
33
,
0
],
[
33
,
25
],
[
0
,
25
]],
[[
3
,
2
],
[
32
,
3
],
[
30
,
24
],
[
2
,
25
]]],
[[[
3
,
2
],
[
32
,
3
],
[
30
,
24
],
[
2
,
25
]],
[[
0
,
0
],
[
33
,
0
],
[
33
,
25
],
[
0
,
25
]]],
[[[
3
,
2
],
[
32
,
3
],
[
30
,
24
],
[
2
,
25
]],
[[
5
,
5
],
[
30
,
3
],
[
33
,
19
],
[
4
,
25
]]],
],
)
def
test_correctness_perspective_bounding_boxes
(
device
,
startpoints
,
endpoints
):
def
_compute_expected_bbox
(
bbox
,
format_
,
canvas_size_
,
pcoeffs_
):
m1
=
np
.
array
(
[
[
pcoeffs_
[
0
],
pcoeffs_
[
1
],
pcoeffs_
[
2
]],
[
pcoeffs_
[
3
],
pcoeffs_
[
4
],
pcoeffs_
[
5
]],
]
)
m2
=
np
.
array
(
[
[
pcoeffs_
[
6
],
pcoeffs_
[
7
],
1.0
],
[
pcoeffs_
[
6
],
pcoeffs_
[
7
],
1.0
],
]
)
bbox_xyxy
=
convert_bounding_box_format
(
bbox
,
old_format
=
format_
,
new_format
=
tv_tensors
.
BoundingBoxFormat
.
XYXY
)
points
=
np
.
array
(
[
[
bbox_xyxy
[
0
].
item
(),
bbox_xyxy
[
1
].
item
(),
1.0
],
[
bbox_xyxy
[
2
].
item
(),
bbox_xyxy
[
1
].
item
(),
1.0
],
[
bbox_xyxy
[
0
].
item
(),
bbox_xyxy
[
3
].
item
(),
1.0
],
[
bbox_xyxy
[
2
].
item
(),
bbox_xyxy
[
3
].
item
(),
1.0
],
]
)
numer
=
np
.
matmul
(
points
,
m1
.
T
)
denom
=
np
.
matmul
(
points
,
m2
.
T
)
transformed_points
=
numer
/
denom
out_bbox
=
np
.
array
(
[
np
.
min
(
transformed_points
[:,
0
]),
np
.
min
(
transformed_points
[:,
1
]),
np
.
max
(
transformed_points
[:,
0
]),
np
.
max
(
transformed_points
[:,
1
]),
]
)
out_bbox
=
torch
.
from_numpy
(
out_bbox
)
out_bbox
=
convert_bounding_box_format
(
out_bbox
,
old_format
=
tv_tensors
.
BoundingBoxFormat
.
XYXY
,
new_format
=
format_
)
return
clamp_bounding_boxes
(
out_bbox
,
format
=
format_
,
canvas_size
=
canvas_size_
).
to
(
bbox
)
canvas_size
=
(
32
,
38
)
pcoeffs
=
_get_perspective_coeffs
(
startpoints
,
endpoints
)
inv_pcoeffs
=
_get_perspective_coeffs
(
endpoints
,
startpoints
)
for
bboxes
in
make_multiple_bounding_boxes
(
spatial_size
=
canvas_size
,
extra_dims
=
((
4
,),)):
bboxes
=
bboxes
.
to
(
device
)
output_bboxes
=
F
.
perspective_bounding_boxes
(
bboxes
.
as_subclass
(
torch
.
Tensor
),
format
=
bboxes
.
format
,
canvas_size
=
bboxes
.
canvas_size
,
startpoints
=
None
,
endpoints
=
None
,
coefficients
=
pcoeffs
,
)
expected_bboxes
=
torch
.
stack
(
[
_compute_expected_bbox
(
b
,
bboxes
.
format
,
bboxes
.
canvas_size
,
inv_pcoeffs
)
for
b
in
bboxes
.
reshape
(
-
1
,
4
).
unbind
()
]
).
reshape
(
bboxes
.
shape
)
torch
.
testing
.
assert_close
(
output_bboxes
,
expected_bboxes
,
rtol
=
0
,
atol
=
1
)
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
"inpt"
,
"inpt"
,
[
[
...
...
test/test_transforms_v2_refactored.py
View file @
d84aaae1
...
@@ -45,6 +45,7 @@ from torchvision import tv_tensors
...
@@ -45,6 +45,7 @@ from torchvision import tv_tensors
from
torchvision.transforms._functional_tensor
import
_max_value
as
get_max_value
from
torchvision.transforms._functional_tensor
import
_max_value
as
get_max_value
from
torchvision.transforms.functional
import
pil_modes_mapping
from
torchvision.transforms.functional
import
pil_modes_mapping
from
torchvision.transforms.v2
import
functional
as
F
from
torchvision.transforms.v2
import
functional
as
F
from
torchvision.transforms.v2.functional._geometry
import
_get_perspective_coeffs
from
torchvision.transforms.v2.functional._utils
import
_get_kernel
,
_register_kernel_internal
from
torchvision.transforms.v2.functional._utils
import
_get_kernel
,
_register_kernel_internal
...
@@ -3430,6 +3431,9 @@ class TestPad:
...
@@ -3430,6 +3431,9 @@ class TestPad:
with
pytest
.
raises
(
ValueError
,
match
=
"Non-scalar fill value is not supported"
):
with
pytest
.
raises
(
ValueError
,
match
=
"Non-scalar fill value is not supported"
):
check_kernel
(
F
.
pad_mask
,
make_segmentation_mask
(),
padding
=
[
1
],
fill
=
fill
)
check_kernel
(
F
.
pad_mask
,
make_segmentation_mask
(),
padding
=
[
1
],
fill
=
fill
)
def
test_kernel_video
(
self
):
check_kernel
(
F
.
pad_video
,
make_video
(),
padding
=
[
1
])
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
"make_input"
,
"make_input"
,
[
make_image_tensor
,
make_image_pil
,
make_image
,
make_bounding_boxes
,
make_segmentation_mask
,
make_video
],
[
make_image_tensor
,
make_image_pil
,
make_image
,
make_bounding_boxes
,
make_segmentation_mask
,
make_video
],
...
@@ -3633,3 +3637,247 @@ class TestCenterCrop:
...
@@ -3633,3 +3637,247 @@ class TestCenterCrop:
expected
=
self
.
_reference_center_crop_bounding_boxes
(
bounding_boxes
,
output_size
)
expected
=
self
.
_reference_center_crop_bounding_boxes
(
bounding_boxes
,
output_size
)
assert_equal
(
actual
,
expected
)
assert_equal
(
actual
,
expected
)
class
TestPerspective
:
COEFFICIENTS
=
[
[
1.2405
,
0.1772
,
-
6.9113
,
0.0463
,
1.251
,
-
5.235
,
0.00013
,
0.0018
],
[
0.7366
,
-
0.11724
,
1.45775
,
-
0.15012
,
0.73406
,
2.6019
,
-
0.0072
,
-
0.0063
],
]
START_END_POINTS
=
[
([[
0
,
0
],
[
33
,
0
],
[
33
,
25
],
[
0
,
25
]],
[[
3
,
2
],
[
32
,
3
],
[
30
,
24
],
[
2
,
25
]]),
([[
3
,
2
],
[
32
,
3
],
[
30
,
24
],
[
2
,
25
]],
[[
0
,
0
],
[
33
,
0
],
[
33
,
25
],
[
0
,
25
]]),
([[
3
,
2
],
[
32
,
3
],
[
30
,
24
],
[
2
,
25
]],
[[
5
,
5
],
[
30
,
3
],
[
33
,
19
],
[
4
,
25
]]),
]
MINIMAL_KWARGS
=
dict
(
startpoints
=
None
,
endpoints
=
None
,
coefficients
=
COEFFICIENTS
[
0
])
@
param_value_parametrization
(
coefficients
=
COEFFICIENTS
,
start_end_points
=
START_END_POINTS
,
fill
=
EXHAUSTIVE_TYPE_FILLS
,
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
torch
.
uint8
,
torch
.
float32
])
@
pytest
.
mark
.
parametrize
(
"device"
,
cpu_and_cuda
())
def
test_kernel_image
(
self
,
param
,
value
,
dtype
,
device
):
if
param
==
"start_end_points"
:
kwargs
=
dict
(
zip
([
"startpoints"
,
"endpoints"
],
value
))
else
:
kwargs
=
{
"startpoints"
:
None
,
"endpoints"
:
None
,
param
:
value
}
if
param
==
"fill"
:
kwargs
[
"coefficients"
]
=
self
.
COEFFICIENTS
[
0
]
check_kernel
(
F
.
perspective_image
,
make_image
(
dtype
=
dtype
,
device
=
device
),
**
kwargs
,
check_scripted_vs_eager
=
not
(
param
==
"fill"
and
isinstance
(
value
,
(
int
,
float
))),
)
def
test_kernel_image_error
(
self
):
image
=
make_image_tensor
()
with
pytest
.
raises
(
ValueError
,
match
=
"startpoints/endpoints or the coefficients must have non `None` values"
):
F
.
perspective_image
(
image
,
startpoints
=
None
,
endpoints
=
None
)
with
pytest
.
raises
(
ValueError
,
match
=
"startpoints/endpoints and the coefficients shouldn't be defined concurrently"
):
startpoints
,
endpoints
=
self
.
START_END_POINTS
[
0
]
coefficients
=
self
.
COEFFICIENTS
[
0
]
F
.
perspective_image
(
image
,
startpoints
=
startpoints
,
endpoints
=
endpoints
,
coefficients
=
coefficients
)
with
pytest
.
raises
(
ValueError
,
match
=
"coefficients should have 8 float values"
):
F
.
perspective_image
(
image
,
startpoints
=
None
,
endpoints
=
None
,
coefficients
=
list
(
range
(
7
)))
@
param_value_parametrization
(
coefficients
=
COEFFICIENTS
,
start_end_points
=
START_END_POINTS
,
)
@
pytest
.
mark
.
parametrize
(
"format"
,
list
(
tv_tensors
.
BoundingBoxFormat
))
def
test_kernel_bounding_boxes
(
self
,
param
,
value
,
format
):
if
param
==
"start_end_points"
:
kwargs
=
dict
(
zip
([
"startpoints"
,
"endpoints"
],
value
))
else
:
kwargs
=
{
"startpoints"
:
None
,
"endpoints"
:
None
,
param
:
value
}
bounding_boxes
=
make_bounding_boxes
(
format
=
format
)
check_kernel
(
F
.
perspective_bounding_boxes
,
bounding_boxes
,
format
=
bounding_boxes
.
format
,
canvas_size
=
bounding_boxes
.
canvas_size
,
**
kwargs
,
)
def
test_kernel_bounding_boxes_error
(
self
):
bounding_boxes
=
make_bounding_boxes
()
format
,
canvas_size
=
bounding_boxes
.
format
,
bounding_boxes
.
canvas_size
bounding_boxes
=
bounding_boxes
.
as_subclass
(
torch
.
Tensor
)
with
pytest
.
raises
(
RuntimeError
,
match
=
"Denominator is zero"
):
F
.
perspective_bounding_boxes
(
bounding_boxes
,
format
=
format
,
canvas_size
=
canvas_size
,
startpoints
=
None
,
endpoints
=
None
,
coefficients
=
[
0.0
]
*
8
,
)
@
pytest
.
mark
.
parametrize
(
"make_mask"
,
[
make_segmentation_mask
,
make_detection_mask
])
def
test_kernel_mask
(
self
,
make_mask
):
check_kernel
(
F
.
perspective_mask
,
make_mask
(),
**
self
.
MINIMAL_KWARGS
)
def
test_kernel_video
(
self
):
check_kernel
(
F
.
perspective_video
,
make_video
(),
**
self
.
MINIMAL_KWARGS
)
@
pytest
.
mark
.
parametrize
(
"make_input"
,
[
make_image_tensor
,
make_image_pil
,
make_image
,
make_bounding_boxes
,
make_segmentation_mask
,
make_video
],
)
def
test_functional
(
self
,
make_input
):
check_functional
(
F
.
perspective
,
make_input
(),
**
self
.
MINIMAL_KWARGS
)
@
pytest
.
mark
.
parametrize
(
(
"kernel"
,
"input_type"
),
[
(
F
.
perspective_image
,
torch
.
Tensor
),
(
F
.
_perspective_image_pil
,
PIL
.
Image
.
Image
),
(
F
.
perspective_image
,
tv_tensors
.
Image
),
(
F
.
perspective_bounding_boxes
,
tv_tensors
.
BoundingBoxes
),
(
F
.
perspective_mask
,
tv_tensors
.
Mask
),
(
F
.
perspective_video
,
tv_tensors
.
Video
),
],
)
def
test_functional_signature
(
self
,
kernel
,
input_type
):
check_functional_kernel_signature_match
(
F
.
perspective
,
kernel
=
kernel
,
input_type
=
input_type
)
@
pytest
.
mark
.
parametrize
(
"distortion_scale"
,
[
0.5
,
0.0
,
1.0
])
@
pytest
.
mark
.
parametrize
(
"make_input"
,
[
make_image_tensor
,
make_image_pil
,
make_image
,
make_bounding_boxes
,
make_segmentation_mask
,
make_video
],
)
def
test_transform
(
self
,
distortion_scale
,
make_input
):
check_transform
(
transforms
.
RandomPerspective
(
distortion_scale
=
distortion_scale
,
p
=
1
),
make_input
())
@
pytest
.
mark
.
parametrize
(
"distortion_scale"
,
[
-
1
,
2
])
def
test_transform_error
(
self
,
distortion_scale
):
with
pytest
.
raises
(
ValueError
,
match
=
"distortion_scale value should be between 0 and 1"
):
transforms
.
RandomPerspective
(
distortion_scale
=
distortion_scale
)
@
pytest
.
mark
.
parametrize
(
"coefficients"
,
COEFFICIENTS
)
@
pytest
.
mark
.
parametrize
(
"interpolation"
,
[
transforms
.
InterpolationMode
.
NEAREST
,
transforms
.
InterpolationMode
.
BILINEAR
]
)
@
pytest
.
mark
.
parametrize
(
"fill"
,
CORRECTNESS_FILLS
)
def
test_image_functional_correctness
(
self
,
coefficients
,
interpolation
,
fill
):
image
=
make_image
(
dtype
=
torch
.
uint8
,
device
=
"cpu"
)
actual
=
F
.
perspective
(
image
,
startpoints
=
None
,
endpoints
=
None
,
coefficients
=
coefficients
,
interpolation
=
interpolation
,
fill
=
fill
)
expected
=
F
.
to_image
(
F
.
perspective
(
F
.
to_pil_image
(
image
),
startpoints
=
None
,
endpoints
=
None
,
coefficients
=
coefficients
,
interpolation
=
interpolation
,
fill
=
fill
,
)
)
if
interpolation
is
transforms
.
InterpolationMode
.
BILINEAR
:
abs_diff
=
(
actual
.
float
()
-
expected
.
float
()).
abs
()
assert
(
abs_diff
>
1
).
float
().
mean
()
<
7e-2
mae
=
abs_diff
.
mean
()
assert
mae
<
3
else
:
assert_equal
(
actual
,
expected
)
def
_reference_perspective_bounding_boxes
(
self
,
bounding_boxes
,
*
,
startpoints
,
endpoints
):
format
=
bounding_boxes
.
format
canvas_size
=
bounding_boxes
.
canvas_size
dtype
=
bounding_boxes
.
dtype
device
=
bounding_boxes
.
device
coefficients
=
_get_perspective_coeffs
(
endpoints
,
startpoints
)
def
perspective_bounding_boxes
(
bounding_boxes
):
m1
=
np
.
array
(
[
[
coefficients
[
0
],
coefficients
[
1
],
coefficients
[
2
]],
[
coefficients
[
3
],
coefficients
[
4
],
coefficients
[
5
]],
]
)
m2
=
np
.
array
(
[
[
coefficients
[
6
],
coefficients
[
7
],
1.0
],
[
coefficients
[
6
],
coefficients
[
7
],
1.0
],
]
)
# Go to float before converting to prevent precision loss in case of CXCYWH -> XYXY and W or H is 1
input_xyxy
=
F
.
convert_bounding_box_format
(
bounding_boxes
.
to
(
dtype
=
torch
.
float64
,
device
=
"cpu"
,
copy
=
True
),
old_format
=
format
,
new_format
=
tv_tensors
.
BoundingBoxFormat
.
XYXY
,
inplace
=
True
,
)
x1
,
y1
,
x2
,
y2
=
input_xyxy
.
squeeze
(
0
).
tolist
()
points
=
np
.
array
(
[
[
x1
,
y1
,
1.0
],
[
x2
,
y1
,
1.0
],
[
x1
,
y2
,
1.0
],
[
x2
,
y2
,
1.0
],
]
)
numerator
=
points
@
m1
.
T
denominator
=
points
@
m2
.
T
transformed_points
=
numerator
/
denominator
output_xyxy
=
torch
.
Tensor
(
[
float
(
np
.
min
(
transformed_points
[:,
0
])),
float
(
np
.
min
(
transformed_points
[:,
1
])),
float
(
np
.
max
(
transformed_points
[:,
0
])),
float
(
np
.
max
(
transformed_points
[:,
1
])),
]
)
output
=
F
.
convert_bounding_box_format
(
output_xyxy
,
old_format
=
tv_tensors
.
BoundingBoxFormat
.
XYXY
,
new_format
=
format
)
# It is important to clamp before casting, especially for CXCYWH format, dtype=int64
return
F
.
clamp_bounding_boxes
(
output
,
format
=
format
,
canvas_size
=
canvas_size
,
).
to
(
dtype
=
dtype
,
device
=
device
)
return
tv_tensors
.
BoundingBoxes
(
torch
.
cat
([
perspective_bounding_boxes
(
b
)
for
b
in
bounding_boxes
.
reshape
(
-
1
,
4
).
unbind
()],
dim
=
0
).
reshape
(
bounding_boxes
.
shape
),
format
=
format
,
canvas_size
=
canvas_size
,
)
@
pytest
.
mark
.
parametrize
((
"startpoints"
,
"endpoints"
),
START_END_POINTS
)
@
pytest
.
mark
.
parametrize
(
"format"
,
list
(
tv_tensors
.
BoundingBoxFormat
))
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
torch
.
int64
,
torch
.
float32
])
@
pytest
.
mark
.
parametrize
(
"device"
,
cpu_and_cuda
())
def
test_correctness_perspective_bounding_boxes
(
self
,
startpoints
,
endpoints
,
format
,
dtype
,
device
):
bounding_boxes
=
make_bounding_boxes
(
format
=
format
,
dtype
=
dtype
,
device
=
device
)
actual
=
F
.
perspective
(
bounding_boxes
,
startpoints
=
startpoints
,
endpoints
=
endpoints
)
expected
=
self
.
_reference_perspective_bounding_boxes
(
bounding_boxes
,
startpoints
=
startpoints
,
endpoints
=
endpoints
)
assert_close
(
actual
,
expected
,
rtol
=
0
,
atol
=
1
)
test/transforms_v2_dispatcher_infos.py
View file @
d84aaae1
...
@@ -111,19 +111,6 @@ multi_crop_skips.append(skip_dispatch_tv_tensor)
...
@@ -111,19 +111,6 @@ multi_crop_skips.append(skip_dispatch_tv_tensor)
DISPATCHER_INFOS
=
[
DISPATCHER_INFOS
=
[
DispatcherInfo
(
F
.
perspective
,
kernels
=
{
tv_tensors
.
Image
:
F
.
perspective_image
,
tv_tensors
.
Video
:
F
.
perspective_video
,
tv_tensors
.
BoundingBoxes
:
F
.
perspective_bounding_boxes
,
tv_tensors
.
Mask
:
F
.
perspective_mask
,
},
pil_kernel_info
=
PILKernelInfo
(
F
.
_perspective_image_pil
),
test_marks
=
[
xfail_jit_python_scalar_arg
(
"fill"
),
],
),
DispatcherInfo
(
DispatcherInfo
(
F
.
elastic
,
F
.
elastic
,
kernels
=
{
kernels
=
{
...
...
test/transforms_v2_kernel_infos.py
View file @
d84aaae1
...
@@ -5,23 +5,18 @@ import PIL.Image
...
@@ -5,23 +5,18 @@ import PIL.Image
import
pytest
import
pytest
import
torch.testing
import
torch.testing
import
torchvision.transforms.v2.functional
as
F
import
torchvision.transforms.v2.functional
as
F
from
torchvision
import
tv_tensors
from
torchvision.transforms._functional_tensor
import
_max_value
as
get_max_value
from
torchvision.transforms._functional_tensor
import
_max_value
as
get_max_value
from
transforms_v2_legacy_utils
import
(
# noqa: F401
from
transforms_v2_legacy_utils
import
(
ArgsKwargs
,
ArgsKwargs
,
combinations_grid
,
DEFAULT_PORTRAIT_SPATIAL_SIZE
,
DEFAULT_PORTRAIT_SPATIAL_SIZE
,
get_num_channels
,
get_num_channels
,
ImageLoader
,
ImageLoader
,
InfoBase
,
InfoBase
,
make_bounding_box_loader
,
make_bounding_box_loaders
,
make_bounding_box_loaders
,
make_detection_mask_loader
,
make_image_loader
,
make_image_loader
,
make_image_loaders
,
make_image_loaders
,
make_image_loaders_for_interpolation
,
make_image_loaders_for_interpolation
,
make_mask_loaders
,
make_mask_loaders
,
make_video_loader
,
make_video_loaders
,
make_video_loaders
,
mark_framework_limitation
,
mark_framework_limitation
,
TestMark
,
TestMark
,
...
@@ -182,135 +177,6 @@ def float32_vs_uint8_fill_adapter(other_args, kwargs):
...
@@ -182,135 +177,6 @@ def float32_vs_uint8_fill_adapter(other_args, kwargs):
return
other_args
,
dict
(
kwargs
,
fill
=
fill
)
return
other_args
,
dict
(
kwargs
,
fill
=
fill
)
_PERSPECTIVE_COEFFS
=
[
[
1.2405
,
0.1772
,
-
6.9113
,
0.0463
,
1.251
,
-
5.235
,
0.00013
,
0.0018
],
[
0.7366
,
-
0.11724
,
1.45775
,
-
0.15012
,
0.73406
,
2.6019
,
-
0.0072
,
-
0.0063
],
]
_STARTPOINTS
=
[[
0
,
1
],
[
2
,
3
],
[
4
,
5
],
[
6
,
7
]]
_ENDPOINTS
=
[[
9
,
8
],
[
7
,
6
],
[
5
,
4
],
[
3
,
2
]]
def
sample_inputs_perspective_image_tensor
():
for
image_loader
in
make_image_loaders
(
sizes
=
[
DEFAULT_PORTRAIT_SPATIAL_SIZE
]):
for
fill
in
get_fills
(
num_channels
=
image_loader
.
num_channels
,
dtype
=
image_loader
.
dtype
):
yield
ArgsKwargs
(
image_loader
,
startpoints
=
None
,
endpoints
=
None
,
fill
=
fill
,
coefficients
=
_PERSPECTIVE_COEFFS
[
0
]
)
yield
ArgsKwargs
(
make_image_loader
(),
startpoints
=
_STARTPOINTS
,
endpoints
=
_ENDPOINTS
)
def
reference_inputs_perspective_image_tensor
():
for
image_loader
,
coefficients
,
interpolation
in
itertools
.
product
(
make_image_loaders_for_interpolation
(),
_PERSPECTIVE_COEFFS
,
[
F
.
InterpolationMode
.
NEAREST
,
F
.
InterpolationMode
.
BILINEAR
,
],
):
for
fill
in
get_fills
(
num_channels
=
image_loader
.
num_channels
,
dtype
=
image_loader
.
dtype
):
# FIXME: PIL kernel doesn't support sequences of length 1 if the number of channels is larger. Shouldn't it?
if
isinstance
(
fill
,
(
list
,
tuple
)):
continue
yield
ArgsKwargs
(
image_loader
,
startpoints
=
None
,
endpoints
=
None
,
interpolation
=
interpolation
,
fill
=
fill
,
coefficients
=
coefficients
,
)
def
sample_inputs_perspective_bounding_boxes
():
for
bounding_boxes_loader
in
make_bounding_box_loaders
():
yield
ArgsKwargs
(
bounding_boxes_loader
,
format
=
bounding_boxes_loader
.
format
,
canvas_size
=
bounding_boxes_loader
.
canvas_size
,
startpoints
=
None
,
endpoints
=
None
,
coefficients
=
_PERSPECTIVE_COEFFS
[
0
],
)
format
=
tv_tensors
.
BoundingBoxFormat
.
XYXY
loader
=
make_bounding_box_loader
(
format
=
format
)
yield
ArgsKwargs
(
loader
,
format
=
format
,
canvas_size
=
loader
.
canvas_size
,
startpoints
=
_STARTPOINTS
,
endpoints
=
_ENDPOINTS
)
def
sample_inputs_perspective_mask
():
for
mask_loader
in
make_mask_loaders
(
sizes
=
[
DEFAULT_PORTRAIT_SPATIAL_SIZE
]):
yield
ArgsKwargs
(
mask_loader
,
startpoints
=
None
,
endpoints
=
None
,
coefficients
=
_PERSPECTIVE_COEFFS
[
0
])
yield
ArgsKwargs
(
make_detection_mask_loader
(),
startpoints
=
_STARTPOINTS
,
endpoints
=
_ENDPOINTS
)
def
reference_inputs_perspective_mask
():
for
mask_loader
,
perspective_coeffs
in
itertools
.
product
(
make_mask_loaders
(
extra_dims
=
[()],
num_objects
=
[
1
]),
_PERSPECTIVE_COEFFS
):
yield
ArgsKwargs
(
mask_loader
,
startpoints
=
None
,
endpoints
=
None
,
coefficients
=
perspective_coeffs
)
def
sample_inputs_perspective_video
():
for
video_loader
in
make_video_loaders
(
sizes
=
[
DEFAULT_PORTRAIT_SPATIAL_SIZE
],
num_frames
=
[
3
]):
yield
ArgsKwargs
(
video_loader
,
startpoints
=
None
,
endpoints
=
None
,
coefficients
=
_PERSPECTIVE_COEFFS
[
0
])
yield
ArgsKwargs
(
make_video_loader
(),
startpoints
=
_STARTPOINTS
,
endpoints
=
_ENDPOINTS
)
KERNEL_INFOS
.
extend
(
[
KernelInfo
(
F
.
perspective_image
,
sample_inputs_fn
=
sample_inputs_perspective_image_tensor
,
reference_fn
=
pil_reference_wrapper
(
F
.
_perspective_image_pil
),
reference_inputs_fn
=
reference_inputs_perspective_image_tensor
,
float32_vs_uint8
=
float32_vs_uint8_fill_adapter
,
closeness_kwargs
=
{
**
pil_reference_pixel_difference
(
2
,
mae
=
True
),
**
cuda_vs_cpu_pixel_difference
(),
**
float32_vs_uint8_pixel_difference
(),
**
scripted_vs_eager_float64_tolerances
(
"cpu"
,
atol
=
1e-5
,
rtol
=
1e-5
),
**
scripted_vs_eager_float64_tolerances
(
"cuda"
,
atol
=
1e-5
,
rtol
=
1e-5
),
},
test_marks
=
[
xfail_jit_python_scalar_arg
(
"fill"
)],
),
KernelInfo
(
F
.
perspective_bounding_boxes
,
sample_inputs_fn
=
sample_inputs_perspective_bounding_boxes
,
closeness_kwargs
=
{
**
scripted_vs_eager_float64_tolerances
(
"cpu"
,
atol
=
1e-6
,
rtol
=
1e-6
),
**
scripted_vs_eager_float64_tolerances
(
"cuda"
,
atol
=
1e-6
,
rtol
=
1e-6
),
},
),
KernelInfo
(
F
.
perspective_mask
,
sample_inputs_fn
=
sample_inputs_perspective_mask
,
reference_fn
=
pil_reference_wrapper
(
F
.
_perspective_image_pil
),
reference_inputs_fn
=
reference_inputs_perspective_mask
,
float32_vs_uint8
=
True
,
closeness_kwargs
=
{
((
"TestKernels"
,
"test_against_reference"
),
torch
.
uint8
,
"cpu"
):
dict
(
atol
=
10
,
rtol
=
0
),
},
),
KernelInfo
(
F
.
perspective_video
,
sample_inputs_fn
=
sample_inputs_perspective_video
,
closeness_kwargs
=
{
**
cuda_vs_cpu_pixel_difference
(),
**
scripted_vs_eager_float64_tolerances
(
"cpu"
,
atol
=
1e-5
,
rtol
=
1e-5
),
**
scripted_vs_eager_float64_tolerances
(
"cuda"
,
atol
=
1e-5
,
rtol
=
1e-5
),
},
),
]
)
def
_get_elastic_displacement
(
canvas_size
):
def
_get_elastic_displacement
(
canvas_size
):
return
torch
.
rand
(
1
,
*
canvas_size
,
2
)
return
torch
.
rand
(
1
,
*
canvas_size
,
2
)
...
...
torchvision/transforms/v2/_geometry.py
View file @
d84aaae1
...
@@ -1008,8 +1008,8 @@ class RandomPerspective(_RandomApplyTransform):
...
@@ -1008,8 +1008,8 @@ class RandomPerspective(_RandomApplyTransform):
return
self
.
_call_kernel
(
return
self
.
_call_kernel
(
F
.
perspective
,
F
.
perspective
,
inpt
,
inpt
,
None
,
startpoints
=
None
,
None
,
endpoints
=
None
,
fill
=
fill
,
fill
=
fill
,
interpolation
=
self
.
interpolation
,
interpolation
=
self
.
interpolation
,
**
params
,
**
params
,
...
...
torchvision/transforms/v2/functional/_geometry.py
View file @
d84aaae1
...
@@ -1552,7 +1552,7 @@ def _perspective_image_pil(
...
@@ -1552,7 +1552,7 @@ def _perspective_image_pil(
image
:
PIL
.
Image
.
Image
,
image
:
PIL
.
Image
.
Image
,
startpoints
:
Optional
[
List
[
List
[
int
]]],
startpoints
:
Optional
[
List
[
List
[
int
]]],
endpoints
:
Optional
[
List
[
List
[
int
]]],
endpoints
:
Optional
[
List
[
List
[
int
]]],
interpolation
:
Union
[
InterpolationMode
,
int
]
=
InterpolationMode
.
BI
CUBIC
,
interpolation
:
Union
[
InterpolationMode
,
int
]
=
InterpolationMode
.
BI
LINEAR
,
fill
:
_FillTypeJIT
=
None
,
fill
:
_FillTypeJIT
=
None
,
coefficients
:
Optional
[
List
[
float
]]
=
None
,
coefficients
:
Optional
[
List
[
float
]]
=
None
,
)
->
PIL
.
Image
.
Image
:
)
->
PIL
.
Image
.
Image
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment