Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
vision
Commits
312c3d32
"...text-generation-inference.git" did not exist on "1e3ec3c91f140c5f0dddad95a9bcb843d67b382b"
Unverified
Commit
312c3d32
authored
Aug 01, 2023
by
Philip Meier
Committed by
GitHub
Aug 01, 2023
Browse files
remove spatial_size (#7734)
parent
bdf16222
Changes
29
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
284 additions
and
360 deletions
+284
-360
gallery/plot_datapoints.py
gallery/plot_datapoints.py
+3
-3
gallery/plot_transforms_v2.py
gallery/plot_transforms_v2.py
+1
-1
test/common_utils.py
test/common_utils.py
+26
-60
test/test_datapoints.py
test/test_datapoints.py
+2
-2
test/test_prototype_transforms.py
test/test_prototype_transforms.py
+17
-17
test/test_transforms_v2.py
test/test_transforms_v2.py
+71
-87
test/test_transforms_v2_consistency.py
test/test_transforms_v2_consistency.py
+5
-5
test/test_transforms_v2_functional.py
test/test_transforms_v2_functional.py
+40
-40
test/test_transforms_v2_refactored.py
test/test_transforms_v2_refactored.py
+29
-29
test/test_transforms_v2_utils.py
test/test_transforms_v2_utils.py
+4
-4
test/transforms_v2_kernel_infos.py
test/transforms_v2_kernel_infos.py
+32
-32
torchvision/datapoints/_bounding_box.py
torchvision/datapoints/_bounding_box.py
+31
-31
torchvision/datapoints/_datapoint.py
torchvision/datapoints/_datapoint.py
+1
-1
torchvision/datapoints/_dataset_wrapper.py
torchvision/datapoints/_dataset_wrapper.py
+10
-10
torchvision/datapoints/_image.py
torchvision/datapoints/_image.py
+1
-9
torchvision/datapoints/_mask.py
torchvision/datapoints/_mask.py
+1
-5
torchvision/datapoints/_video.py
torchvision/datapoints/_video.py
+1
-13
torchvision/prototype/transforms/_augment.py
torchvision/prototype/transforms/_augment.py
+2
-2
torchvision/prototype/transforms/_geometry.py
torchvision/prototype/transforms/_geometry.py
+5
-7
torchvision/transforms/v2/_augment.py
torchvision/transforms/v2/_augment.py
+2
-2
No files found.
gallery/plot_datapoints.py
View file @
312c3d32
...
...
@@ -80,7 +80,7 @@ print(image.shape, image.dtype)
# corresponding image alongside the actual values:
bounding_box
=
datapoints
.
BoundingBoxes
(
[
17
,
16
,
344
,
495
],
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
,
spatial
_size
=
image
.
shape
[
-
2
:]
[
17
,
16
,
344
,
495
],
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
,
canvas
_size
=
image
.
shape
[
-
2
:]
)
print
(
bounding_box
)
...
...
@@ -108,7 +108,7 @@ class PennFudanDataset(torch.utils.data.Dataset):
target
[
"boxes"
]
=
datapoints
.
BoundingBoxes
(
boxes
,
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
,
spatial
_size
=
F
.
get_
spatial_
size
(
img
),
canvas
_size
=
F
.
get_size
(
img
),
)
target
[
"labels"
]
=
labels
target
[
"masks"
]
=
datapoints
.
Mask
(
masks
)
...
...
@@ -129,7 +129,7 @@ class WrapPennFudanDataset:
target
[
"boxes"
]
=
datapoints
.
BoundingBoxes
(
target
[
"boxes"
],
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
,
spatial
_size
=
F
.
get_
spatial_
size
(
img
),
canvas
_size
=
F
.
get_size
(
img
),
)
target
[
"masks"
]
=
datapoints
.
Mask
(
target
[
"masks"
])
return
img
,
target
...
...
gallery/plot_transforms_v2.py
View file @
312c3d32
...
...
@@ -30,7 +30,7 @@ def load_data():
masks
=
datapoints
.
Mask
(
merged_masks
==
labels
.
view
(
-
1
,
1
,
1
))
bounding_boxes
=
datapoints
.
BoundingBoxes
(
masks_to_boxes
(
masks
),
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
,
spatial
_size
=
image
.
shape
[
-
2
:]
masks_to_boxes
(
masks
),
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
,
canvas
_size
=
image
.
shape
[
-
2
:]
)
return
path
,
image
,
bounding_boxes
,
masks
,
labels
...
...
test/common_utils.py
View file @
312c3d32
...
...
@@ -412,7 +412,7 @@ DEFAULT_SPATIAL_SIZES = (
)
def
_parse_
spatial
_size
(
size
,
*
,
name
=
"size"
):
def
_parse_
canvas
_size
(
size
,
*
,
name
=
"size"
):
if
size
==
"random"
:
raise
ValueError
(
"This should never happen"
)
elif
isinstance
(
size
,
int
)
and
size
>
0
:
...
...
@@ -467,12 +467,13 @@ class TensorLoader:
@
dataclasses
.
dataclass
class
ImageLoader
(
TensorLoader
):
spatial
_size
:
Tuple
[
int
,
int
]
=
dataclasses
.
field
(
init
=
False
)
canvas
_size
:
Tuple
[
int
,
int
]
=
dataclasses
.
field
(
init
=
False
)
num_channels
:
int
=
dataclasses
.
field
(
init
=
False
)
memory_format
:
torch
.
memory_format
=
torch
.
contiguous_format
canvas_size
:
Tuple
[
int
,
int
]
=
dataclasses
.
field
(
init
=
False
)
def
__post_init__
(
self
):
self
.
spatial
_size
=
self
.
shape
[
-
2
:]
self
.
canvas_size
=
self
.
canvas
_size
=
self
.
shape
[
-
2
:]
self
.
num_channels
=
self
.
shape
[
-
3
]
def
load
(
self
,
device
):
...
...
@@ -538,7 +539,7 @@ def make_image_loader(
):
if
not
constant_alpha
:
raise
ValueError
(
"This should never happen"
)
size
=
_parse_
spatial
_size
(
size
)
size
=
_parse_
canvas
_size
(
size
)
num_channels
=
get_num_channels
(
color_space
)
def
fn
(
shape
,
dtype
,
device
,
memory_format
):
...
...
@@ -578,7 +579,7 @@ make_images = from_loaders(make_image_loaders)
def
make_image_loader_for_interpolation
(
size
=
(
233
,
147
),
*
,
color_space
=
"RGB"
,
dtype
=
torch
.
uint8
,
memory_format
=
torch
.
contiguous_format
):
size
=
_parse_
spatial
_size
(
size
)
size
=
_parse_
canvas
_size
(
size
)
num_channels
=
get_num_channels
(
color_space
)
def
fn
(
shape
,
dtype
,
device
,
memory_format
):
...
...
@@ -623,43 +624,20 @@ def make_image_loaders_for_interpolation(
class
BoundingBoxesLoader
(
TensorLoader
):
format
:
datapoints
.
BoundingBoxFormat
spatial_size
:
Tuple
[
int
,
int
]
canvas_size
:
Tuple
[
int
,
int
]
=
dataclasses
.
field
(
init
=
False
)
def
__post_init__
(
self
):
self
.
canvas_size
=
self
.
spatial_size
def
make_bounding_box
(
size
=
None
,
canvas_size
=
DEFAULT_SIZE
,
*
,
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
,
spatial_size
=
None
,
batch_dims
=
(),
dtype
=
None
,
device
=
"cpu"
,
):
"""
size: Size of the actual bounding box, i.e.
- (box[3] - box[1], box[2] - box[0]) for XYXY
- (H, W) for XYWH and CXCYWH
spatial_size: Size of the reference object, e.g. an image. Corresponds to the .spatial_size attribute on
returned datapoints.BoundingBoxes
To generate a valid joint sample, you need to set spatial_size here to the same value as size on the other maker
functions, e.g.
.. code::
image = make_image=(size=size)
bounding_boxes = make_bounding_box(spatial_size=size)
assert F.get_spatial_size(bounding_boxes) == F.get_spatial_size(image)
For convenience, if both size and spatial_size are omitted, spatial_size defaults to the same value as size for all
other maker functions, e.g.
.. code::
image = make_image=()
bounding_boxes = make_bounding_box()
assert F.get_spatial_size(bounding_boxes) == F.get_spatial_size(image)
"""
def
sample_position
(
values
,
max_value
):
# We cannot use torch.randint directly here, because it only allows integer scalars as values for low and high.
# However, if we have batch_dims, we need tensors as limits.
...
...
@@ -668,28 +646,16 @@ def make_bounding_box(
if
isinstance
(
format
,
str
):
format
=
datapoints
.
BoundingBoxFormat
[
format
]
if
spatial_size
is
None
:
if
size
is
None
:
spatial_size
=
DEFAULT_SIZE
else
:
height
,
width
=
size
height_margin
,
width_margin
=
torch
.
randint
(
10
,
(
2
,)).
tolist
()
spatial_size
=
(
height
+
height_margin
,
width
+
width_margin
)
dtype
=
dtype
or
torch
.
float32
if
any
(
dim
==
0
for
dim
in
batch_dims
):
return
datapoints
.
BoundingBoxes
(
torch
.
empty
(
*
batch_dims
,
4
,
dtype
=
dtype
,
device
=
device
),
format
=
format
,
spatial_size
=
spatial
_size
torch
.
empty
(
*
batch_dims
,
4
,
dtype
=
dtype
,
device
=
device
),
format
=
format
,
canvas_size
=
canvas
_size
)
if
size
is
None
:
h
,
w
=
[
torch
.
randint
(
1
,
s
,
batch_dims
)
for
s
in
spatial_size
]
else
:
h
,
w
=
[
torch
.
full
(
batch_dims
,
s
,
dtype
=
torch
.
int
)
for
s
in
size
]
y
=
sample_position
(
h
,
spatial_size
[
0
])
x
=
sample_position
(
w
,
spatial_size
[
1
])
h
,
w
=
[
torch
.
randint
(
1
,
c
,
batch_dims
)
for
c
in
canvas_size
]
y
=
sample_position
(
h
,
canvas_size
[
0
])
x
=
sample_position
(
w
,
canvas_size
[
1
])
if
format
is
datapoints
.
BoundingBoxFormat
.
XYWH
:
parts
=
(
x
,
y
,
w
,
h
)
...
...
@@ -706,15 +672,15 @@ def make_bounding_box(
raise
ValueError
(
f
"Format
{
format
}
is not supported"
)
return
datapoints
.
BoundingBoxes
(
torch
.
stack
(
parts
,
dim
=-
1
).
to
(
dtype
=
dtype
,
device
=
device
),
format
=
format
,
spatial_size
=
spatial
_size
torch
.
stack
(
parts
,
dim
=-
1
).
to
(
dtype
=
dtype
,
device
=
device
),
format
=
format
,
canvas_size
=
canvas
_size
)
def
make_bounding_box_loader
(
*
,
extra_dims
=
(),
format
,
spatial
_size
=
DEFAULT_PORTRAIT_SPATIAL_SIZE
,
dtype
=
torch
.
float32
):
def
make_bounding_box_loader
(
*
,
extra_dims
=
(),
format
,
canvas
_size
=
DEFAULT_PORTRAIT_SPATIAL_SIZE
,
dtype
=
torch
.
float32
):
if
isinstance
(
format
,
str
):
format
=
datapoints
.
BoundingBoxFormat
[
format
]
spatial
_size
=
_parse_
spatial_size
(
spatial
_size
,
name
=
"
spatial
_size"
)
canvas
_size
=
_parse_
canvas_size
(
canvas
_size
,
name
=
"
canvas
_size"
)
def
fn
(
shape
,
dtype
,
device
):
*
batch_dims
,
num_coordinates
=
shape
...
...
@@ -722,21 +688,21 @@ def make_bounding_box_loader(*, extra_dims=(), format, spatial_size=DEFAULT_PORT
raise
pytest
.
UsageError
()
return
make_bounding_box
(
format
=
format
,
spatial_size
=
spatial
_size
,
batch_dims
=
batch_dims
,
dtype
=
dtype
,
device
=
device
format
=
format
,
canvas_size
=
canvas
_size
,
batch_dims
=
batch_dims
,
dtype
=
dtype
,
device
=
device
)
return
BoundingBoxesLoader
(
fn
,
shape
=
(
*
extra_dims
,
4
),
dtype
=
dtype
,
format
=
format
,
spatial_size
=
spatial
_size
)
return
BoundingBoxesLoader
(
fn
,
shape
=
(
*
extra_dims
,
4
),
dtype
=
dtype
,
format
=
format
,
spatial_size
=
canvas
_size
)
def
make_bounding_box_loaders
(
*
,
extra_dims
=
DEFAULT_EXTRA_DIMS
,
formats
=
tuple
(
datapoints
.
BoundingBoxFormat
),
spatial
_size
=
DEFAULT_PORTRAIT_SPATIAL_SIZE
,
canvas
_size
=
DEFAULT_PORTRAIT_SPATIAL_SIZE
,
dtypes
=
(
torch
.
float32
,
torch
.
float64
,
torch
.
int64
),
):
for
params
in
combinations_grid
(
extra_dims
=
extra_dims
,
format
=
formats
,
dtype
=
dtypes
):
yield
make_bounding_box_loader
(
**
params
,
spatial_size
=
spatial
_size
)
yield
make_bounding_box_loader
(
**
params
,
canvas_size
=
canvas
_size
)
make_bounding_boxes
=
from_loaders
(
make_bounding_box_loaders
)
...
...
@@ -761,7 +727,7 @@ def make_detection_mask(size=DEFAULT_SIZE, *, num_objects=5, batch_dims=(), dtyp
def
make_detection_mask_loader
(
size
=
DEFAULT_PORTRAIT_SPATIAL_SIZE
,
*
,
num_objects
=
5
,
extra_dims
=
(),
dtype
=
torch
.
uint8
):
# This produces "detection" masks, i.e. `(*, N, H, W)`, where `N` denotes the number of objects
size
=
_parse_
spatial
_size
(
size
)
size
=
_parse_
canvas
_size
(
size
)
def
fn
(
shape
,
dtype
,
device
):
*
batch_dims
,
num_objects
,
height
,
width
=
shape
...
...
@@ -802,7 +768,7 @@ def make_segmentation_mask_loader(
size
=
DEFAULT_PORTRAIT_SPATIAL_SIZE
,
*
,
num_categories
=
10
,
extra_dims
=
(),
dtype
=
torch
.
uint8
):
# This produces "segmentation" masks, i.e. `(*, H, W)`, where the category is encoded in the values
spatial
_size
=
_parse_
spatial
_size
(
size
)
canvas
_size
=
_parse_
canvas
_size
(
size
)
def
fn
(
shape
,
dtype
,
device
):
*
batch_dims
,
height
,
width
=
shape
...
...
@@ -810,7 +776,7 @@ def make_segmentation_mask_loader(
(
height
,
width
),
num_categories
=
num_categories
,
batch_dims
=
batch_dims
,
dtype
=
dtype
,
device
=
device
)
return
MaskLoader
(
fn
,
shape
=
(
*
extra_dims
,
*
spatial
_size
),
dtype
=
dtype
)
return
MaskLoader
(
fn
,
shape
=
(
*
extra_dims
,
*
canvas
_size
),
dtype
=
dtype
)
def
make_segmentation_mask_loaders
(
...
...
@@ -860,7 +826,7 @@ def make_video_loader(
extra_dims
=
(),
dtype
=
torch
.
uint8
,
):
size
=
_parse_
spatial
_size
(
size
)
size
=
_parse_
canvas
_size
(
size
)
def
fn
(
shape
,
dtype
,
device
,
memory_format
):
*
batch_dims
,
num_frames
,
_
,
height
,
width
=
shape
...
...
test/test_datapoints.py
View file @
312c3d32
...
...
@@ -27,7 +27,7 @@ def test_mask_instance(data):
"format"
,
[
"XYXY"
,
"CXCYWH"
,
datapoints
.
BoundingBoxFormat
.
XYXY
,
datapoints
.
BoundingBoxFormat
.
XYWH
]
)
def
test_bbox_instance
(
data
,
format
):
bboxes
=
datapoints
.
BoundingBoxes
(
data
,
format
=
format
,
spatial
_size
=
(
32
,
32
))
bboxes
=
datapoints
.
BoundingBoxes
(
data
,
format
=
format
,
canvas
_size
=
(
32
,
32
))
assert
isinstance
(
bboxes
,
torch
.
Tensor
)
assert
bboxes
.
ndim
==
2
and
bboxes
.
shape
[
1
]
==
4
if
isinstance
(
format
,
str
):
...
...
@@ -164,7 +164,7 @@ def test_wrap_like():
[
datapoints
.
Image
(
torch
.
rand
(
3
,
16
,
16
)),
datapoints
.
Video
(
torch
.
rand
(
2
,
3
,
16
,
16
)),
datapoints
.
BoundingBoxes
([
0.0
,
1.0
,
2.0
,
3.0
],
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
,
spatial
_size
=
(
10
,
10
)),
datapoints
.
BoundingBoxes
([
0.0
,
1.0
,
2.0
,
3.0
],
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
,
canvas
_size
=
(
10
,
10
)),
datapoints
.
Mask
(
torch
.
randint
(
0
,
256
,
(
16
,
16
),
dtype
=
torch
.
uint8
)),
],
)
...
...
test/test_prototype_transforms.py
View file @
312c3d32
...
...
@@ -164,7 +164,7 @@ class TestSimpleCopyPaste:
labels
=
torch
.
nn
.
functional
.
one_hot
(
labels
,
num_classes
=
5
)
target
=
{
"boxes"
:
BoundingBoxes
(
torch
.
tensor
([[
2.0
,
3.0
,
8.0
,
9.0
],
[
20.0
,
20.0
,
30.0
,
30.0
]]),
format
=
"XYXY"
,
spatial
_size
=
(
32
,
32
)
torch
.
tensor
([[
2.0
,
3.0
,
8.0
,
9.0
],
[
20.0
,
20.0
,
30.0
,
30.0
]]),
format
=
"XYXY"
,
canvas
_size
=
(
32
,
32
)
),
"masks"
:
Mask
(
masks
),
"labels"
:
label_type
(
labels
),
...
...
@@ -179,7 +179,7 @@ class TestSimpleCopyPaste:
paste_labels
=
torch
.
nn
.
functional
.
one_hot
(
paste_labels
,
num_classes
=
5
)
paste_target
=
{
"boxes"
:
BoundingBoxes
(
torch
.
tensor
([[
12.0
,
13.0
,
19.0
,
18.0
],
[
1.0
,
15.0
,
8.0
,
19.0
]]),
format
=
"XYXY"
,
spatial
_size
=
(
32
,
32
)
torch
.
tensor
([[
12.0
,
13.0
,
19.0
,
18.0
],
[
1.0
,
15.0
,
8.0
,
19.0
]]),
format
=
"XYXY"
,
canvas
_size
=
(
32
,
32
)
),
"masks"
:
Mask
(
paste_masks
),
"labels"
:
label_type
(
paste_labels
),
...
...
@@ -210,13 +210,13 @@ class TestFixedSizeCrop:
def
test__get_params
(
self
,
mocker
):
crop_size
=
(
7
,
7
)
batch_shape
=
(
10
,)
spatial
_size
=
(
11
,
5
)
canvas
_size
=
(
11
,
5
)
transform
=
transforms
.
FixedSizeCrop
(
size
=
crop_size
)
flat_inputs
=
[
make_image
(
size
=
spatial
_size
,
color_space
=
"RGB"
),
make_bounding_box
(
format
=
BoundingBoxFormat
.
XYXY
,
spatial_size
=
spatial
_size
,
batch_dims
=
batch_shape
),
make_image
(
size
=
canvas
_size
,
color_space
=
"RGB"
),
make_bounding_box
(
format
=
BoundingBoxFormat
.
XYXY
,
canvas_size
=
canvas
_size
,
batch_dims
=
batch_shape
),
]
params
=
transform
.
_get_params
(
flat_inputs
)
...
...
@@ -295,7 +295,7 @@ class TestFixedSizeCrop:
def
test__transform_culling
(
self
,
mocker
):
batch_size
=
10
spatial
_size
=
(
10
,
10
)
canvas
_size
=
(
10
,
10
)
is_valid
=
torch
.
randint
(
0
,
2
,
(
batch_size
,),
dtype
=
torch
.
bool
)
mocker
.
patch
(
...
...
@@ -304,17 +304,17 @@ class TestFixedSizeCrop:
needs_crop
=
True
,
top
=
0
,
left
=
0
,
height
=
spatial
_size
[
0
],
width
=
spatial
_size
[
1
],
height
=
canvas
_size
[
0
],
width
=
canvas
_size
[
1
],
is_valid
=
is_valid
,
needs_pad
=
False
,
),
)
bounding_boxes
=
make_bounding_box
(
format
=
BoundingBoxFormat
.
XYXY
,
spatial_size
=
spatial
_size
,
batch_dims
=
(
batch_size
,)
format
=
BoundingBoxFormat
.
XYXY
,
canvas_size
=
canvas
_size
,
batch_dims
=
(
batch_size
,)
)
masks
=
make_detection_mask
(
size
=
spatial
_size
,
batch_dims
=
(
batch_size
,))
masks
=
make_detection_mask
(
size
=
canvas
_size
,
batch_dims
=
(
batch_size
,))
labels
=
make_label
(
extra_dims
=
(
batch_size
,))
transform
=
transforms
.
FixedSizeCrop
((
-
1
,
-
1
))
...
...
@@ -334,7 +334,7 @@ class TestFixedSizeCrop:
def
test__transform_bounding_boxes_clamping
(
self
,
mocker
):
batch_size
=
3
spatial
_size
=
(
10
,
10
)
canvas
_size
=
(
10
,
10
)
mocker
.
patch
(
"torchvision.prototype.transforms._geometry.FixedSizeCrop._get_params"
,
...
...
@@ -342,15 +342,15 @@ class TestFixedSizeCrop:
needs_crop
=
True
,
top
=
0
,
left
=
0
,
height
=
spatial
_size
[
0
],
width
=
spatial
_size
[
1
],
height
=
canvas
_size
[
0
],
width
=
canvas
_size
[
1
],
is_valid
=
torch
.
full
((
batch_size
,),
fill_value
=
True
),
needs_pad
=
False
,
),
)
bounding_boxes
=
make_bounding_box
(
format
=
BoundingBoxFormat
.
XYXY
,
spatial_size
=
spatial
_size
,
batch_dims
=
(
batch_size
,)
format
=
BoundingBoxFormat
.
XYXY
,
canvas_size
=
canvas
_size
,
batch_dims
=
(
batch_size
,)
)
mock
=
mocker
.
patch
(
"torchvision.prototype.transforms._geometry.F.clamp_bounding_boxes"
)
...
...
@@ -496,7 +496,7 @@ def test_fixed_sized_crop_against_detection_reference():
pil_image
=
to_image_pil
(
make_image
(
size
=
size
,
color_space
=
"RGB"
))
target
=
{
"boxes"
:
make_bounding_box
(
spatial
_size
=
size
,
format
=
"XYXY"
,
batch_dims
=
(
num_objects
,),
dtype
=
torch
.
float
),
"boxes"
:
make_bounding_box
(
canvas
_size
=
size
,
format
=
"XYXY"
,
batch_dims
=
(
num_objects
,),
dtype
=
torch
.
float
),
"labels"
:
make_label
(
extra_dims
=
(
num_objects
,),
categories
=
80
),
"masks"
:
make_detection_mask
(
size
=
size
,
num_objects
=
num_objects
,
dtype
=
torch
.
long
),
}
...
...
@@ -505,7 +505,7 @@ def test_fixed_sized_crop_against_detection_reference():
tensor_image
=
torch
.
Tensor
(
make_image
(
size
=
size
,
color_space
=
"RGB"
))
target
=
{
"boxes"
:
make_bounding_box
(
spatial
_size
=
size
,
format
=
"XYXY"
,
batch_dims
=
(
num_objects
,),
dtype
=
torch
.
float
),
"boxes"
:
make_bounding_box
(
canvas
_size
=
size
,
format
=
"XYXY"
,
batch_dims
=
(
num_objects
,),
dtype
=
torch
.
float
),
"labels"
:
make_label
(
extra_dims
=
(
num_objects
,),
categories
=
80
),
"masks"
:
make_detection_mask
(
size
=
size
,
num_objects
=
num_objects
,
dtype
=
torch
.
long
),
}
...
...
@@ -514,7 +514,7 @@ def test_fixed_sized_crop_against_detection_reference():
datapoint_image
=
make_image
(
size
=
size
,
color_space
=
"RGB"
)
target
=
{
"boxes"
:
make_bounding_box
(
spatial
_size
=
size
,
format
=
"XYXY"
,
batch_dims
=
(
num_objects
,),
dtype
=
torch
.
float
),
"boxes"
:
make_bounding_box
(
canvas
_size
=
size
,
format
=
"XYXY"
,
batch_dims
=
(
num_objects
,),
dtype
=
torch
.
float
),
"labels"
:
make_label
(
extra_dims
=
(
num_objects
,),
categories
=
80
),
"masks"
:
make_detection_mask
(
size
=
size
,
num_objects
=
num_objects
,
dtype
=
torch
.
long
),
}
...
...
test/test_transforms_v2.py
View file @
312c3d32
...
...
@@ -174,20 +174,20 @@ class TestSmoke:
)
@
pytest
.
mark
.
parametrize
(
"device"
,
cpu_and_cuda
())
def
test_common
(
self
,
transform
,
adapter
,
container_type
,
image_or_video
,
device
):
spatial
_size
=
F
.
get_
spatial_
size
(
image_or_video
)
canvas
_size
=
F
.
get_size
(
image_or_video
)
input
=
dict
(
image_or_video
=
image_or_video
,
image_datapoint
=
make_image
(
size
=
spatial
_size
),
video_datapoint
=
make_video
(
size
=
spatial
_size
),
image_pil
=
next
(
make_pil_images
(
sizes
=
[
spatial
_size
],
color_spaces
=
[
"RGB"
])),
image_datapoint
=
make_image
(
size
=
canvas
_size
),
video_datapoint
=
make_video
(
size
=
canvas
_size
),
image_pil
=
next
(
make_pil_images
(
sizes
=
[
canvas
_size
],
color_spaces
=
[
"RGB"
])),
bounding_boxes_xyxy
=
make_bounding_box
(
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
,
spatial_size
=
spatial
_size
,
batch_dims
=
(
3
,)
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
,
canvas_size
=
canvas
_size
,
batch_dims
=
(
3
,)
),
bounding_boxes_xywh
=
make_bounding_box
(
format
=
datapoints
.
BoundingBoxFormat
.
XYWH
,
spatial_size
=
spatial
_size
,
batch_dims
=
(
4
,)
format
=
datapoints
.
BoundingBoxFormat
.
XYWH
,
canvas_size
=
canvas
_size
,
batch_dims
=
(
4
,)
),
bounding_boxes_cxcywh
=
make_bounding_box
(
format
=
datapoints
.
BoundingBoxFormat
.
CXCYWH
,
spatial_size
=
spatial
_size
,
batch_dims
=
(
5
,)
format
=
datapoints
.
BoundingBoxFormat
.
CXCYWH
,
canvas_size
=
canvas
_size
,
batch_dims
=
(
5
,)
),
bounding_boxes_degenerate_xyxy
=
datapoints
.
BoundingBoxes
(
[
...
...
@@ -199,7 +199,7 @@ class TestSmoke:
[
2
,
2
,
1
,
1
],
# x1 > x2, y1 > y2
],
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
,
spatial_size
=
spatial
_size
,
canvas_size
=
canvas
_size
,
),
bounding_boxes_degenerate_xywh
=
datapoints
.
BoundingBoxes
(
[
...
...
@@ -211,7 +211,7 @@ class TestSmoke:
[
0
,
0
,
-
1
,
-
1
],
# negative height and width
],
format
=
datapoints
.
BoundingBoxFormat
.
XYWH
,
spatial_size
=
spatial
_size
,
canvas_size
=
canvas
_size
,
),
bounding_boxes_degenerate_cxcywh
=
datapoints
.
BoundingBoxes
(
[
...
...
@@ -223,10 +223,10 @@ class TestSmoke:
[
0
,
0
,
-
1
,
-
1
],
# negative height and width
],
format
=
datapoints
.
BoundingBoxFormat
.
CXCYWH
,
spatial_size
=
spatial
_size
,
canvas_size
=
canvas
_size
,
),
detection_mask
=
make_detection_mask
(
size
=
spatial
_size
),
segmentation_mask
=
make_segmentation_mask
(
size
=
spatial
_size
),
detection_mask
=
make_detection_mask
(
size
=
canvas
_size
),
segmentation_mask
=
make_segmentation_mask
(
size
=
canvas
_size
),
int
=
0
,
float
=
0.0
,
bool
=
True
,
...
...
@@ -271,7 +271,7 @@ class TestSmoke:
# TODO: we should test that against all degenerate boxes above
for
format
in
list
(
datapoints
.
BoundingBoxFormat
):
sample
=
dict
(
boxes
=
datapoints
.
BoundingBoxes
([[
0
,
0
,
0
,
0
]],
format
=
format
,
spatial
_size
=
(
224
,
244
)),
boxes
=
datapoints
.
BoundingBoxes
([[
0
,
0
,
0
,
0
]],
format
=
format
,
canvas
_size
=
(
224
,
244
)),
labels
=
torch
.
tensor
([
3
]),
)
assert
transforms
.
SanitizeBoundingBoxes
()(
sample
)[
"boxes"
].
shape
==
(
0
,
4
)
...
...
@@ -473,11 +473,11 @@ class TestRandomZoomOut:
@
pytest
.
mark
.
parametrize
(
"fill"
,
[
0
,
[
1
,
2
,
3
],
(
2
,
3
,
4
)])
@
pytest
.
mark
.
parametrize
(
"side_range"
,
[(
1.0
,
4.0
),
[
2.0
,
5.0
]])
def
test__get_params
(
self
,
fill
,
side_range
,
mocker
):
def
test__get_params
(
self
,
fill
,
side_range
):
transform
=
transforms
.
RandomZoomOut
(
fill
=
fill
,
side_range
=
side_range
)
image
=
mocker
.
MagicMock
(
spec
=
datapoints
.
Image
)
h
,
w
=
i
ma
ge
.
spatial_size
=
(
24
,
32
)
h
,
w
=
size
=
(
24
,
32
)
image
=
ma
ke_image
(
size
)
params
=
transform
.
_get_params
([
image
])
...
...
@@ -490,9 +490,7 @@ class TestRandomZoomOut:
@
pytest
.
mark
.
parametrize
(
"fill"
,
[
0
,
[
1
,
2
,
3
],
(
2
,
3
,
4
)])
@
pytest
.
mark
.
parametrize
(
"side_range"
,
[(
1.0
,
4.0
),
[
2.0
,
5.0
]])
def
test__transform
(
self
,
fill
,
side_range
,
mocker
):
inpt
=
mocker
.
MagicMock
(
spec
=
datapoints
.
Image
)
inpt
.
num_channels
=
3
inpt
.
spatial_size
=
(
24
,
32
)
inpt
=
make_image
((
24
,
32
))
transform
=
transforms
.
RandomZoomOut
(
fill
=
fill
,
side_range
=
side_range
,
p
=
1
)
...
...
@@ -559,11 +557,9 @@ class TestRandomCrop:
@
pytest
.
mark
.
parametrize
(
"padding"
,
[
None
,
1
,
[
2
,
3
],
[
1
,
2
,
3
,
4
]])
@
pytest
.
mark
.
parametrize
(
"size, pad_if_needed"
,
[((
10
,
10
),
False
),
((
50
,
25
),
True
)])
def
test__get_params
(
self
,
padding
,
pad_if_needed
,
size
,
mocker
):
image
=
mocker
.
MagicMock
(
spec
=
datapoints
.
Image
)
image
.
num_channels
=
3
image
.
spatial_size
=
(
24
,
32
)
h
,
w
=
image
.
spatial_size
def
test__get_params
(
self
,
padding
,
pad_if_needed
,
size
):
h
,
w
=
size
=
(
24
,
32
)
image
=
make_image
(
size
)
transform
=
transforms
.
RandomCrop
(
size
,
padding
=
padding
,
pad_if_needed
=
pad_if_needed
)
params
=
transform
.
_get_params
([
image
])
...
...
@@ -613,21 +609,16 @@ class TestRandomCrop:
output_size
,
padding
=
padding
,
pad_if_needed
=
pad_if_needed
,
fill
=
fill
,
padding_mode
=
padding_mode
)
inpt
=
mocker
.
MagicMock
(
spec
=
datapoints
.
Image
)
inpt
.
num_channels
=
3
inpt
.
spatial_size
=
(
32
,
32
)
h
,
w
=
size
=
(
32
,
32
)
inpt
=
make_image
(
size
)
expected
=
mocker
.
MagicMock
(
spec
=
datapoints
.
Image
)
expected
.
num_channels
=
3
if
isinstance
(
padding
,
int
):
expected
.
spatial_size
=
(
inpt
.
spatial_size
[
0
]
+
padding
,
inpt
.
spatial_size
[
1
]
+
padding
)
new_size
=
(
h
+
padding
,
w
+
padding
)
elif
isinstance
(
padding
,
list
):
expected
.
spatial_size
=
(
inpt
.
spatial_size
[
0
]
+
sum
(
padding
[
0
::
2
]),
inpt
.
spatial_size
[
1
]
+
sum
(
padding
[
1
::
2
]),
)
new_size
=
(
h
+
sum
(
padding
[
0
::
2
]),
w
+
sum
(
padding
[
1
::
2
]))
else
:
expected
.
spatial_size
=
inpt
.
spatial_size
new_size
=
size
expected
=
make_image
(
new_size
)
_
=
mocker
.
patch
(
"torchvision.transforms.v2.functional.pad"
,
return_value
=
expected
)
fn_crop
=
mocker
.
patch
(
"torchvision.transforms.v2.functional.crop"
)
...
...
@@ -703,7 +694,7 @@ class TestGaussianBlur:
fn
=
mocker
.
patch
(
"torchvision.transforms.v2.functional.gaussian_blur"
)
inpt
=
mocker
.
MagicMock
(
spec
=
datapoints
.
Image
)
inpt
.
num_channels
=
3
inpt
.
spatial
_size
=
(
24
,
32
)
inpt
.
canvas
_size
=
(
24
,
32
)
# vfdev-5, Feature Request: let's store params as Transform attribute
# This could be also helpful for users
...
...
@@ -749,16 +740,14 @@ class TestRandomPerspective:
with
pytest
.
raises
(
TypeError
,
match
=
"Got inappropriate fill arg"
):
transforms
.
RandomPerspective
(
0.5
,
fill
=
"abc"
)
def
test__get_params
(
self
,
mocker
):
def
test__get_params
(
self
):
dscale
=
0.5
transform
=
transforms
.
RandomPerspective
(
dscale
)
image
=
mocker
.
MagicMock
(
spec
=
datapoints
.
Image
)
image
.
num_channels
=
3
image
.
spatial_size
=
(
24
,
32
)
image
=
make_image
((
24
,
32
))
params
=
transform
.
_get_params
([
image
])
h
,
w
=
image
.
spatial_size
assert
"coefficients"
in
params
assert
len
(
params
[
"coefficients"
])
==
8
...
...
@@ -769,9 +758,9 @@ class TestRandomPerspective:
transform
=
transforms
.
RandomPerspective
(
distortion_scale
,
fill
=
fill
,
interpolation
=
interpolation
)
fn
=
mocker
.
patch
(
"torchvision.transforms.v2.functional.perspective"
)
inpt
=
mocker
.
MagicMock
(
spec
=
datapoints
.
Image
)
inpt
.
num_channels
=
3
inpt
.
spatial_size
=
(
24
,
32
)
inpt
=
make_image
((
24
,
32
))
# vfdev-5, Feature Request: let's store params as Transform attribute
# This could be also helpful for users
# Otherwise, we can mock transform._get_params
...
...
@@ -809,17 +798,16 @@ class TestElasticTransform:
with
pytest
.
raises
(
TypeError
,
match
=
"Got inappropriate fill arg"
):
transforms
.
ElasticTransform
(
1.0
,
2.0
,
fill
=
"abc"
)
def
test__get_params
(
self
,
mocker
):
def
test__get_params
(
self
):
alpha
=
2.0
sigma
=
3.0
transform
=
transforms
.
ElasticTransform
(
alpha
,
sigma
)
image
=
mocker
.
MagicMock
(
spec
=
datapoints
.
Image
)
image
.
num_channels
=
3
image
.
spatial_size
=
(
24
,
32
)
h
,
w
=
size
=
(
24
,
32
)
image
=
make_image
(
size
)
params
=
transform
.
_get_params
([
image
])
h
,
w
=
image
.
spatial_size
displacement
=
params
[
"displacement"
]
assert
displacement
.
shape
==
(
1
,
h
,
w
,
2
)
assert
(
-
alpha
/
w
<=
displacement
[
0
,
...,
0
]).
all
()
and
(
displacement
[
0
,
...,
0
]
<=
alpha
/
w
).
all
()
...
...
@@ -845,7 +833,7 @@ class TestElasticTransform:
fn
=
mocker
.
patch
(
"torchvision.transforms.v2.functional.elastic"
)
inpt
=
mocker
.
MagicMock
(
spec
=
datapoints
.
Image
)
inpt
.
num_channels
=
3
inpt
.
spatial
_size
=
(
24
,
32
)
inpt
.
canvas
_size
=
(
24
,
32
)
# Let's mock transform._get_params to control the output:
transform
.
_get_params
=
mocker
.
MagicMock
()
...
...
@@ -856,7 +844,7 @@ class TestElasticTransform:
class
TestRandomErasing
:
def
test_assertions
(
self
,
mocker
):
def
test_assertions
(
self
):
with
pytest
.
raises
(
TypeError
,
match
=
"Argument value should be either a number or str or a sequence"
):
transforms
.
RandomErasing
(
value
=
{})
...
...
@@ -872,9 +860,7 @@ class TestRandomErasing:
with
pytest
.
raises
(
ValueError
,
match
=
"Scale should be between 0 and 1"
):
transforms
.
RandomErasing
(
scale
=
[
-
1
,
2
])
image
=
mocker
.
MagicMock
(
spec
=
datapoints
.
Image
)
image
.
num_channels
=
3
image
.
spatial_size
=
(
24
,
32
)
image
=
make_image
((
24
,
32
))
transform
=
transforms
.
RandomErasing
(
value
=
[
1
,
2
,
3
,
4
])
...
...
@@ -882,10 +868,9 @@ class TestRandomErasing:
transform
.
_get_params
([
image
])
@
pytest
.
mark
.
parametrize
(
"value"
,
[
5.0
,
[
1
,
2
,
3
],
"random"
])
def
test__get_params
(
self
,
value
,
mocker
):
image
=
mocker
.
MagicMock
(
spec
=
datapoints
.
Image
)
image
.
num_channels
=
3
image
.
spatial_size
=
(
24
,
32
)
def
test__get_params
(
self
,
value
):
image
=
make_image
((
24
,
32
))
num_channels
,
height
,
width
=
F
.
get_dimensions
(
image
)
transform
=
transforms
.
RandomErasing
(
value
=
value
)
params
=
transform
.
_get_params
([
image
])
...
...
@@ -895,14 +880,14 @@ class TestRandomErasing:
i
,
j
=
params
[
"i"
],
params
[
"j"
]
assert
isinstance
(
v
,
torch
.
Tensor
)
if
value
==
"random"
:
assert
v
.
shape
==
(
image
.
num_channels
,
h
,
w
)
assert
v
.
shape
==
(
num_channels
,
h
,
w
)
elif
isinstance
(
value
,
(
int
,
float
)):
assert
v
.
shape
==
(
1
,
1
,
1
)
elif
isinstance
(
value
,
(
list
,
tuple
)):
assert
v
.
shape
==
(
image
.
num_channels
,
1
,
1
)
assert
v
.
shape
==
(
num_channels
,
1
,
1
)
assert
0
<=
i
<=
image
.
spatial_size
[
0
]
-
h
assert
0
<=
j
<=
image
.
spatial_size
[
1
]
-
w
assert
0
<=
i
<=
height
-
h
assert
0
<=
j
<=
width
-
w
@
pytest
.
mark
.
parametrize
(
"p"
,
[
0
,
1
])
def
test__transform
(
self
,
mocker
,
p
):
...
...
@@ -1061,14 +1046,13 @@ class TestRandomChoice:
class
TestRandomIoUCrop
:
@
pytest
.
mark
.
parametrize
(
"device"
,
cpu_and_cuda
())
@
pytest
.
mark
.
parametrize
(
"options"
,
[[
0.5
,
0.9
],
[
2.0
]])
def
test__get_params
(
self
,
device
,
options
,
mocker
):
image
=
mocker
.
MagicMock
(
spec
=
datapoints
.
Image
)
image
.
num_channels
=
3
image
.
spatial_size
=
(
24
,
32
)
def
test__get_params
(
self
,
device
,
options
):
orig_h
,
orig_w
=
size
=
(
24
,
32
)
image
=
make_image
(
size
)
bboxes
=
datapoints
.
BoundingBoxes
(
torch
.
tensor
([[
1
,
1
,
10
,
10
],
[
20
,
20
,
23
,
23
],
[
1
,
20
,
10
,
23
],
[
20
,
1
,
23
,
10
]]),
format
=
"XYXY"
,
spatial_size
=
image
.
spatial_
size
,
canvas_size
=
size
,
device
=
device
,
)
sample
=
[
image
,
bboxes
]
...
...
@@ -1087,8 +1071,6 @@ class TestRandomIoUCrop:
assert
len
(
params
[
"is_within_crop_area"
])
>
0
assert
params
[
"is_within_crop_area"
].
dtype
==
torch
.
bool
orig_h
=
image
.
spatial_size
[
0
]
orig_w
=
image
.
spatial_size
[
1
]
assert
int
(
transform
.
min_scale
*
orig_h
)
<=
params
[
"height"
]
<=
int
(
transform
.
max_scale
*
orig_h
)
assert
int
(
transform
.
min_scale
*
orig_w
)
<=
params
[
"width"
]
<=
int
(
transform
.
max_scale
*
orig_w
)
...
...
@@ -1103,7 +1085,7 @@ class TestRandomIoUCrop:
def
test__transform_empty_params
(
self
,
mocker
):
transform
=
transforms
.
RandomIoUCrop
(
sampler_options
=
[
2.0
])
image
=
datapoints
.
Image
(
torch
.
rand
(
1
,
3
,
4
,
4
))
bboxes
=
datapoints
.
BoundingBoxes
(
torch
.
tensor
([[
1
,
1
,
2
,
2
]]),
format
=
"XYXY"
,
spatial
_size
=
(
4
,
4
))
bboxes
=
datapoints
.
BoundingBoxes
(
torch
.
tensor
([[
1
,
1
,
2
,
2
]]),
format
=
"XYXY"
,
canvas
_size
=
(
4
,
4
))
label
=
torch
.
tensor
([
1
])
sample
=
[
image
,
bboxes
,
label
]
# Let's mock transform._get_params to control the output:
...
...
@@ -1122,9 +1104,10 @@ class TestRandomIoUCrop:
def
test__transform
(
self
,
mocker
):
transform
=
transforms
.
RandomIoUCrop
()
image
=
datapoints
.
Image
(
torch
.
rand
(
3
,
32
,
24
))
bboxes
=
make_bounding_box
(
format
=
"XYXY"
,
spatial_size
=
(
32
,
24
),
batch_dims
=
(
6
,))
masks
=
make_detection_mask
((
32
,
24
),
num_objects
=
6
)
size
=
(
32
,
24
)
image
=
make_image
(
size
)
bboxes
=
make_bounding_box
(
format
=
"XYXY"
,
canvas_size
=
size
,
batch_dims
=
(
6
,))
masks
=
make_detection_mask
(
size
,
num_objects
=
6
)
sample
=
[
image
,
bboxes
,
masks
]
...
...
@@ -1155,13 +1138,14 @@ class TestRandomIoUCrop:
class
TestScaleJitter
:
def
test__get_params
(
self
,
mocker
):
spatial
_size
=
(
24
,
32
)
def
test__get_params
(
self
):
canvas
_size
=
(
24
,
32
)
target_size
=
(
16
,
12
)
scale_range
=
(
0.5
,
1.5
)
transform
=
transforms
.
ScaleJitter
(
target_size
=
target_size
,
scale_range
=
scale_range
)
sample
=
mocker
.
MagicMock
(
spec
=
datapoints
.
Image
,
num_channels
=
3
,
spatial_size
=
spatial_size
)
sample
=
make_image
(
canvas_size
)
n_samples
=
5
for
_
in
range
(
n_samples
):
...
...
@@ -1174,11 +1158,11 @@ class TestScaleJitter:
assert
isinstance
(
size
,
tuple
)
and
len
(
size
)
==
2
height
,
width
=
size
r_min
=
min
(
target_size
[
1
]
/
spatial
_size
[
0
],
target_size
[
0
]
/
spatial
_size
[
1
])
*
scale_range
[
0
]
r_max
=
min
(
target_size
[
1
]
/
spatial
_size
[
0
],
target_size
[
0
]
/
spatial
_size
[
1
])
*
scale_range
[
1
]
r_min
=
min
(
target_size
[
1
]
/
canvas
_size
[
0
],
target_size
[
0
]
/
canvas
_size
[
1
])
*
scale_range
[
0
]
r_max
=
min
(
target_size
[
1
]
/
canvas
_size
[
0
],
target_size
[
0
]
/
canvas
_size
[
1
])
*
scale_range
[
1
]
assert
int
(
spatial
_size
[
0
]
*
r_min
)
<=
height
<=
int
(
spatial
_size
[
0
]
*
r_max
)
assert
int
(
spatial
_size
[
1
]
*
r_min
)
<=
width
<=
int
(
spatial
_size
[
1
]
*
r_max
)
assert
int
(
canvas
_size
[
0
]
*
r_min
)
<=
height
<=
int
(
canvas
_size
[
0
]
*
r_max
)
assert
int
(
canvas
_size
[
1
]
*
r_min
)
<=
width
<=
int
(
canvas
_size
[
1
]
*
r_max
)
def
test__transform
(
self
,
mocker
):
interpolation_sentinel
=
mocker
.
MagicMock
(
spec
=
InterpolationMode
)
...
...
@@ -1206,12 +1190,12 @@ class TestScaleJitter:
class
TestRandomShortestSize
:
@
pytest
.
mark
.
parametrize
(
"min_size,max_size"
,
[([
5
,
9
],
20
),
([
5
,
9
],
None
)])
def
test__get_params
(
self
,
min_size
,
max_size
,
mocker
):
spatial
_size
=
(
3
,
10
)
def
test__get_params
(
self
,
min_size
,
max_size
):
canvas
_size
=
(
3
,
10
)
transform
=
transforms
.
RandomShortestSize
(
min_size
=
min_size
,
max_size
=
max_size
,
antialias
=
True
)
sample
=
m
ocker
.
MagicMock
(
spec
=
datapoints
.
Image
,
num_channels
=
3
,
spatial_size
=
spatial
_size
)
sample
=
m
ake_image
(
canvas
_size
)
params
=
transform
.
_get_params
([
sample
])
assert
"size"
in
params
...
...
@@ -1523,7 +1507,7 @@ def test_detection_preset(image_type, data_augmentation, to_tensor, sanitize):
boxes
=
torch
.
randint
(
0
,
min
(
H
,
W
)
//
2
,
size
=
(
num_boxes
,
4
))
boxes
[:,
2
:]
+=
boxes
[:,
:
2
]
boxes
=
boxes
.
clamp
(
min
=
0
,
max
=
min
(
H
,
W
))
boxes
=
datapoints
.
BoundingBoxes
(
boxes
,
format
=
"XYXY"
,
spatial
_size
=
(
H
,
W
))
boxes
=
datapoints
.
BoundingBoxes
(
boxes
,
format
=
"XYXY"
,
canvas
_size
=
(
H
,
W
))
masks
=
datapoints
.
Mask
(
torch
.
randint
(
0
,
2
,
size
=
(
num_boxes
,
H
,
W
),
dtype
=
torch
.
uint8
))
...
...
@@ -1597,7 +1581,7 @@ def test_sanitize_bounding_boxes(min_size, labels_getter, sample_type):
boxes
=
datapoints
.
BoundingBoxes
(
boxes
,
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
,
spatial
_size
=
(
H
,
W
),
canvas
_size
=
(
H
,
W
),
)
masks
=
datapoints
.
Mask
(
torch
.
randint
(
0
,
2
,
size
=
(
boxes
.
shape
[
0
],
H
,
W
)))
...
...
@@ -1651,7 +1635,7 @@ def test_sanitize_bounding_boxes_errors():
good_bbox
=
datapoints
.
BoundingBoxes
(
[[
0
,
0
,
10
,
10
]],
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
,
spatial
_size
=
(
20
,
20
),
canvas
_size
=
(
20
,
20
),
)
with
pytest
.
raises
(
ValueError
,
match
=
"min_size must be >= 1"
):
...
...
@@ -1678,7 +1662,7 @@ def test_sanitize_bounding_boxes_errors():
[[
0
,
0
,
10
,
10
]],
],
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
,
spatial
_size
=
(
20
,
20
),
canvas
_size
=
(
20
,
20
),
)
different_sizes
=
{
"bbox"
:
bad_bbox
,
"labels"
:
torch
.
arange
(
bad_bbox
.
shape
[
0
])}
transforms
.
SanitizeBoundingBoxes
()(
different_sizes
)
...
...
test/test_transforms_v2_consistency.py
View file @
312c3d32
...
...
@@ -31,7 +31,7 @@ from torchvision._utils import sequence_to_str
from
torchvision.transforms
import
functional
as
legacy_F
from
torchvision.transforms.v2
import
functional
as
prototype_F
from
torchvision.transforms.v2.functional
import
to_image_pil
from
torchvision.transforms.v2.utils
import
query_
spatial_
size
from
torchvision.transforms.v2.utils
import
query_size
DEFAULT_MAKE_IMAGES_KWARGS
=
dict
(
color_spaces
=
[
"RGB"
],
extra_dims
=
[(
4
,)])
...
...
@@ -1090,7 +1090,7 @@ class TestRefDetTransforms:
pil_image
=
to_image_pil
(
make_image
(
size
=
size
,
color_space
=
"RGB"
))
target
=
{
"boxes"
:
make_bounding_box
(
spatial
_size
=
size
,
format
=
"XYXY"
,
batch_dims
=
(
num_objects
,),
dtype
=
torch
.
float
),
"boxes"
:
make_bounding_box
(
canvas
_size
=
size
,
format
=
"XYXY"
,
batch_dims
=
(
num_objects
,),
dtype
=
torch
.
float
),
"labels"
:
make_label
(
extra_dims
=
(
num_objects
,),
categories
=
80
),
}
if
with_mask
:
...
...
@@ -1100,7 +1100,7 @@ class TestRefDetTransforms:
tensor_image
=
torch
.
Tensor
(
make_image
(
size
=
size
,
color_space
=
"RGB"
,
dtype
=
torch
.
float32
))
target
=
{
"boxes"
:
make_bounding_box
(
spatial
_size
=
size
,
format
=
"XYXY"
,
batch_dims
=
(
num_objects
,),
dtype
=
torch
.
float
),
"boxes"
:
make_bounding_box
(
canvas
_size
=
size
,
format
=
"XYXY"
,
batch_dims
=
(
num_objects
,),
dtype
=
torch
.
float
),
"labels"
:
make_label
(
extra_dims
=
(
num_objects
,),
categories
=
80
),
}
if
with_mask
:
...
...
@@ -1110,7 +1110,7 @@ class TestRefDetTransforms:
datapoint_image
=
make_image
(
size
=
size
,
color_space
=
"RGB"
,
dtype
=
torch
.
float32
)
target
=
{
"boxes"
:
make_bounding_box
(
spatial
_size
=
size
,
format
=
"XYXY"
,
batch_dims
=
(
num_objects
,),
dtype
=
torch
.
float
),
"boxes"
:
make_bounding_box
(
canvas
_size
=
size
,
format
=
"XYXY"
,
batch_dims
=
(
num_objects
,),
dtype
=
torch
.
float
),
"labels"
:
make_label
(
extra_dims
=
(
num_objects
,),
categories
=
80
),
}
if
with_mask
:
...
...
@@ -1172,7 +1172,7 @@ class PadIfSmaller(v2_transforms.Transform):
self
.
fill
=
v2_transforms
.
_geometry
.
_setup_fill_arg
(
fill
)
def
_get_params
(
self
,
sample
):
height
,
width
=
query_
spatial_
size
(
sample
)
height
,
width
=
query_size
(
sample
)
padding
=
[
0
,
0
,
max
(
self
.
size
-
width
,
0
),
max
(
self
.
size
-
height
,
0
)]
needs_padding
=
any
(
padding
)
return
dict
(
padding
=
padding
,
needs_padding
=
needs_padding
)
...
...
test/test_transforms_v2_functional.py
View file @
312c3d32
...
...
@@ -351,7 +351,7 @@ class TestDispatchers:
F
.
get_image_size
,
F
.
get_num_channels
,
F
.
get_num_frames
,
F
.
get_
spatial_
size
,
F
.
get_size
,
F
.
rgb_to_grayscale
,
F
.
uniform_temporal_subsample
,
],
...
...
@@ -568,27 +568,27 @@ class TestClampBoundingBoxes:
[
dict
(),
dict
(
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
),
dict
(
spatial
_size
=
(
1
,
1
)),
dict
(
canvas
_size
=
(
1
,
1
)),
],
)
def
test_simple_tensor_insufficient_metadata
(
self
,
metadata
):
simple_tensor
=
next
(
make_bounding_boxes
()).
as_subclass
(
torch
.
Tensor
)
with
pytest
.
raises
(
ValueError
,
match
=
re
.
escape
(
"`format` and `
spatial
_size` has to be passed"
)):
with
pytest
.
raises
(
ValueError
,
match
=
re
.
escape
(
"`format` and `
canvas
_size` has to be passed"
)):
F
.
clamp_bounding_boxes
(
simple_tensor
,
**
metadata
)
@
pytest
.
mark
.
parametrize
(
"metadata"
,
[
dict
(
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
),
dict
(
spatial
_size
=
(
1
,
1
)),
dict
(
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
,
spatial
_size
=
(
1
,
1
)),
dict
(
canvas
_size
=
(
1
,
1
)),
dict
(
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
,
canvas
_size
=
(
1
,
1
)),
],
)
def
test_datapoint_explicit_metadata
(
self
,
metadata
):
datapoint
=
next
(
make_bounding_boxes
())
with
pytest
.
raises
(
ValueError
,
match
=
re
.
escape
(
"`format` and `
spatial
_size` must not be passed"
)):
with
pytest
.
raises
(
ValueError
,
match
=
re
.
escape
(
"`format` and `
canvas
_size` must not be passed"
)):
F
.
clamp_bounding_boxes
(
datapoint
,
**
metadata
)
...
...
@@ -673,7 +673,7 @@ def test_correctness_crop_bounding_boxes(device, format, top, left, height, widt
# expected_bboxes.append(out_box)
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
spatial
_size
=
(
64
,
76
)
canvas
_size
=
(
64
,
76
)
in_boxes
=
[
[
10.0
,
15.0
,
25.0
,
35.0
],
[
50.0
,
5.0
,
70.0
,
22.0
],
...
...
@@ -684,23 +684,23 @@ def test_correctness_crop_bounding_boxes(device, format, top, left, height, widt
in_boxes
=
convert_format_bounding_boxes
(
in_boxes
,
datapoints
.
BoundingBoxFormat
.
XYXY
,
format
)
expected_bboxes
=
clamp_bounding_boxes
(
datapoints
.
BoundingBoxes
(
expected_bboxes
,
format
=
"XYXY"
,
spatial_size
=
spatial
_size
)
datapoints
.
BoundingBoxes
(
expected_bboxes
,
format
=
"XYXY"
,
canvas_size
=
canvas
_size
)
).
tolist
()
output_boxes
,
output_
spatial
_size
=
F
.
crop_bounding_boxes
(
output_boxes
,
output_
canvas
_size
=
F
.
crop_bounding_boxes
(
in_boxes
,
format
,
top
,
left
,
spatial
_size
[
0
],
spatial
_size
[
1
],
canvas
_size
[
0
],
canvas
_size
[
1
],
)
if
format
!=
datapoints
.
BoundingBoxFormat
.
XYXY
:
output_boxes
=
convert_format_bounding_boxes
(
output_boxes
,
format
,
datapoints
.
BoundingBoxFormat
.
XYXY
)
torch
.
testing
.
assert_close
(
output_boxes
.
tolist
(),
expected_bboxes
)
torch
.
testing
.
assert_close
(
output_
spatial_size
,
spatial
_size
)
torch
.
testing
.
assert_close
(
output_
canvas_size
,
canvas
_size
)
@
pytest
.
mark
.
parametrize
(
"device"
,
cpu_and_cuda
())
...
...
@@ -737,7 +737,7 @@ def test_correctness_resized_crop_bounding_boxes(device, format, top, left, heig
return
bbox
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
spatial
_size
=
(
100
,
100
)
canvas
_size
=
(
100
,
100
)
in_boxes
=
[
[
10.0
,
10.0
,
20.0
,
20.0
],
[
5.0
,
10.0
,
15.0
,
20.0
],
...
...
@@ -748,18 +748,18 @@ def test_correctness_resized_crop_bounding_boxes(device, format, top, left, heig
expected_bboxes
=
torch
.
tensor
(
expected_bboxes
,
device
=
device
)
in_boxes
=
datapoints
.
BoundingBoxes
(
in_boxes
,
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
,
spatial_size
=
spatial
_size
,
device
=
device
in_boxes
,
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
,
canvas_size
=
canvas
_size
,
device
=
device
)
if
format
!=
datapoints
.
BoundingBoxFormat
.
XYXY
:
in_boxes
=
convert_format_bounding_boxes
(
in_boxes
,
datapoints
.
BoundingBoxFormat
.
XYXY
,
format
)
output_boxes
,
output_
spatial
_size
=
F
.
resized_crop_bounding_boxes
(
in_boxes
,
format
,
top
,
left
,
height
,
width
,
size
)
output_boxes
,
output_
canvas
_size
=
F
.
resized_crop_bounding_boxes
(
in_boxes
,
format
,
top
,
left
,
height
,
width
,
size
)
if
format
!=
datapoints
.
BoundingBoxFormat
.
XYXY
:
output_boxes
=
convert_format_bounding_boxes
(
output_boxes
,
format
,
datapoints
.
BoundingBoxFormat
.
XYXY
)
torch
.
testing
.
assert_close
(
output_boxes
,
expected_bboxes
)
torch
.
testing
.
assert_close
(
output_
spatial
_size
,
size
)
torch
.
testing
.
assert_close
(
output_
canvas
_size
,
size
)
def
_parse_padding
(
padding
):
...
...
@@ -798,28 +798,28 @@ def test_correctness_pad_bounding_boxes(device, padding):
bbox
=
bbox
.
to
(
dtype
)
return
bbox
def
_compute_expected_
spatial
_size
(
bbox
,
padding_
):
def
_compute_expected_
canvas
_size
(
bbox
,
padding_
):
pad_left
,
pad_up
,
pad_right
,
pad_down
=
_parse_padding
(
padding_
)
height
,
width
=
bbox
.
spatial
_size
height
,
width
=
bbox
.
canvas
_size
return
height
+
pad_up
+
pad_down
,
width
+
pad_left
+
pad_right
for
bboxes
in
make_bounding_boxes
():
bboxes
=
bboxes
.
to
(
device
)
bboxes_format
=
bboxes
.
format
bboxes_
spatial
_size
=
bboxes
.
spatial
_size
bboxes_
canvas
_size
=
bboxes
.
canvas
_size
output_boxes
,
output_
spatial
_size
=
F
.
pad_bounding_boxes
(
bboxes
,
format
=
bboxes_format
,
spatial
_size
=
bboxes_
spatial
_size
,
padding
=
padding
output_boxes
,
output_
canvas
_size
=
F
.
pad_bounding_boxes
(
bboxes
,
format
=
bboxes_format
,
canvas
_size
=
bboxes_
canvas
_size
,
padding
=
padding
)
torch
.
testing
.
assert_close
(
output_
spatial
_size
,
_compute_expected_
spatial
_size
(
bboxes
,
padding
))
torch
.
testing
.
assert_close
(
output_
canvas
_size
,
_compute_expected_
canvas
_size
(
bboxes
,
padding
))
if
bboxes
.
ndim
<
2
or
bboxes
.
shape
[
0
]
==
0
:
bboxes
=
[
bboxes
]
expected_bboxes
=
[]
for
bbox
in
bboxes
:
bbox
=
datapoints
.
BoundingBoxes
(
bbox
,
format
=
bboxes_format
,
spatial
_size
=
bboxes_
spatial
_size
)
bbox
=
datapoints
.
BoundingBoxes
(
bbox
,
format
=
bboxes_format
,
canvas
_size
=
bboxes_
canvas
_size
)
expected_bboxes
.
append
(
_compute_expected_bbox
(
bbox
,
padding
))
if
len
(
expected_bboxes
)
>
1
:
...
...
@@ -887,24 +887,24 @@ def test_correctness_perspective_bounding_boxes(device, startpoints, endpoints):
out_bbox
=
datapoints
.
BoundingBoxes
(
out_bbox
,
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
,
spatial
_size
=
bbox
.
spatial
_size
,
canvas
_size
=
bbox
.
canvas
_size
,
dtype
=
bbox
.
dtype
,
device
=
bbox
.
device
,
)
return
clamp_bounding_boxes
(
convert_format_bounding_boxes
(
out_bbox
,
new_format
=
bbox
.
format
))
spatial
_size
=
(
32
,
38
)
canvas
_size
=
(
32
,
38
)
pcoeffs
=
_get_perspective_coeffs
(
startpoints
,
endpoints
)
inv_pcoeffs
=
_get_perspective_coeffs
(
endpoints
,
startpoints
)
for
bboxes
in
make_bounding_boxes
(
spatial_size
=
spatial
_size
,
extra_dims
=
((
4
,),)):
for
bboxes
in
make_bounding_boxes
(
canvas_size
=
canvas
_size
,
extra_dims
=
((
4
,),)):
bboxes
=
bboxes
.
to
(
device
)
output_bboxes
=
F
.
perspective_bounding_boxes
(
bboxes
.
as_subclass
(
torch
.
Tensor
),
format
=
bboxes
.
format
,
spatial
_size
=
bboxes
.
spatial
_size
,
canvas
_size
=
bboxes
.
canvas
_size
,
startpoints
=
None
,
endpoints
=
None
,
coefficients
=
pcoeffs
,
...
...
@@ -915,7 +915,7 @@ def test_correctness_perspective_bounding_boxes(device, startpoints, endpoints):
expected_bboxes
=
[]
for
bbox
in
bboxes
:
bbox
=
datapoints
.
BoundingBoxes
(
bbox
,
format
=
bboxes
.
format
,
spatial
_size
=
bboxes
.
spatial
_size
)
bbox
=
datapoints
.
BoundingBoxes
(
bbox
,
format
=
bboxes
.
format
,
canvas
_size
=
bboxes
.
canvas
_size
)
expected_bboxes
.
append
(
_compute_expected_bbox
(
bbox
,
inv_pcoeffs
))
if
len
(
expected_bboxes
)
>
1
:
expected_bboxes
=
torch
.
stack
(
expected_bboxes
)
...
...
@@ -932,15 +932,15 @@ def test_correctness_perspective_bounding_boxes(device, startpoints, endpoints):
def
test_correctness_center_crop_bounding_boxes
(
device
,
output_size
):
def
_compute_expected_bbox
(
bbox
,
output_size_
):
format_
=
bbox
.
format
spatial
_size_
=
bbox
.
spatial
_size
canvas
_size_
=
bbox
.
canvas
_size
dtype
=
bbox
.
dtype
bbox
=
convert_format_bounding_boxes
(
bbox
.
float
(),
format_
,
datapoints
.
BoundingBoxFormat
.
XYWH
)
if
len
(
output_size_
)
==
1
:
output_size_
.
append
(
output_size_
[
-
1
])
cy
=
int
(
round
((
spatial
_size_
[
0
]
-
output_size_
[
0
])
*
0.5
))
cx
=
int
(
round
((
spatial
_size_
[
1
]
-
output_size_
[
1
])
*
0.5
))
cy
=
int
(
round
((
canvas
_size_
[
0
]
-
output_size_
[
0
])
*
0.5
))
cx
=
int
(
round
((
canvas
_size_
[
1
]
-
output_size_
[
1
])
*
0.5
))
out_bbox
=
[
bbox
[
0
].
item
()
-
cx
,
bbox
[
1
].
item
()
-
cy
,
...
...
@@ -949,16 +949,16 @@ def test_correctness_center_crop_bounding_boxes(device, output_size):
]
out_bbox
=
torch
.
tensor
(
out_bbox
)
out_bbox
=
convert_format_bounding_boxes
(
out_bbox
,
datapoints
.
BoundingBoxFormat
.
XYWH
,
format_
)
out_bbox
=
clamp_bounding_boxes
(
out_bbox
,
format
=
format_
,
spatial
_size
=
output_size
)
out_bbox
=
clamp_bounding_boxes
(
out_bbox
,
format
=
format_
,
canvas
_size
=
output_size
)
return
out_bbox
.
to
(
dtype
=
dtype
,
device
=
bbox
.
device
)
for
bboxes
in
make_bounding_boxes
(
extra_dims
=
((
4
,),)):
bboxes
=
bboxes
.
to
(
device
)
bboxes_format
=
bboxes
.
format
bboxes_
spatial
_size
=
bboxes
.
spatial
_size
bboxes_
canvas
_size
=
bboxes
.
canvas
_size
output_boxes
,
output_
spatial
_size
=
F
.
center_crop_bounding_boxes
(
bboxes
,
bboxes_format
,
bboxes_
spatial
_size
,
output_size
output_boxes
,
output_
canvas
_size
=
F
.
center_crop_bounding_boxes
(
bboxes
,
bboxes_format
,
bboxes_
canvas
_size
,
output_size
)
if
bboxes
.
ndim
<
2
:
...
...
@@ -966,7 +966,7 @@ def test_correctness_center_crop_bounding_boxes(device, output_size):
expected_bboxes
=
[]
for
bbox
in
bboxes
:
bbox
=
datapoints
.
BoundingBoxes
(
bbox
,
format
=
bboxes_format
,
spatial
_size
=
bboxes_
spatial
_size
)
bbox
=
datapoints
.
BoundingBoxes
(
bbox
,
format
=
bboxes_format
,
canvas
_size
=
bboxes_
canvas
_size
)
expected_bboxes
.
append
(
_compute_expected_bbox
(
bbox
,
output_size
))
if
len
(
expected_bboxes
)
>
1
:
...
...
@@ -975,7 +975,7 @@ def test_correctness_center_crop_bounding_boxes(device, output_size):
expected_bboxes
=
expected_bboxes
[
0
]
torch
.
testing
.
assert_close
(
output_boxes
,
expected_bboxes
,
atol
=
1
,
rtol
=
0
)
torch
.
testing
.
assert_close
(
output_
spatial
_size
,
output_size
)
torch
.
testing
.
assert_close
(
output_
canvas
_size
,
output_size
)
@
pytest
.
mark
.
parametrize
(
"device"
,
cpu_and_cuda
())
...
...
@@ -1003,11 +1003,11 @@ def test_correctness_center_crop_mask(device, output_size):
# Copied from test/test_functional_tensor.py
@
pytest
.
mark
.
parametrize
(
"device"
,
cpu_and_cuda
())
@
pytest
.
mark
.
parametrize
(
"
spatial
_size"
,
(
"small"
,
"large"
))
@
pytest
.
mark
.
parametrize
(
"
canvas
_size"
,
(
"small"
,
"large"
))
@
pytest
.
mark
.
parametrize
(
"dt"
,
[
None
,
torch
.
float32
,
torch
.
float64
,
torch
.
float16
])
@
pytest
.
mark
.
parametrize
(
"ksize"
,
[(
3
,
3
),
[
3
,
5
],
(
23
,
23
)])
@
pytest
.
mark
.
parametrize
(
"sigma"
,
[[
0.5
,
0.5
],
(
0.5
,
0.5
),
(
0.8
,
0.8
),
(
1.7
,
1.7
)])
def
test_correctness_gaussian_blur_image_tensor
(
device
,
spatial
_size
,
dt
,
ksize
,
sigma
):
def
test_correctness_gaussian_blur_image_tensor
(
device
,
canvas
_size
,
dt
,
ksize
,
sigma
):
fn
=
F
.
gaussian_blur_image_tensor
# true_cv2_results = {
...
...
@@ -1027,7 +1027,7 @@ def test_correctness_gaussian_blur_image_tensor(device, spatial_size, dt, ksize,
p
=
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
)),
"assets"
,
"gaussian_blur_opencv_results.pt"
)
true_cv2_results
=
torch
.
load
(
p
)
if
spatial
_size
==
"small"
:
if
canvas
_size
==
"small"
:
tensor
=
(
torch
.
from_numpy
(
np
.
arange
(
3
*
10
*
12
,
dtype
=
"uint8"
).
reshape
((
10
,
12
,
3
))).
permute
(
2
,
0
,
1
).
to
(
device
)
)
...
...
test/test_transforms_v2_refactored.py
View file @
312c3d32
...
...
@@ -392,7 +392,7 @@ def assert_warns_antialias_default_value():
yield
def
reference_affine_bounding_boxes_helper
(
bounding_boxes
,
*
,
format
,
spatial
_size
,
affine_matrix
):
def
reference_affine_bounding_boxes_helper
(
bounding_boxes
,
*
,
format
,
canvas
_size
,
affine_matrix
):
def
transform
(
bbox
):
# Go to float before converting to prevent precision loss in case of CXCYWH -> XYXY and W or H is 1
in_dtype
=
bbox
.
dtype
...
...
@@ -426,7 +426,7 @@ def reference_affine_bounding_boxes_helper(bounding_boxes, *, format, spatial_si
out_bbox
,
old_format
=
datapoints
.
BoundingBoxFormat
.
XYXY
,
new_format
=
format
,
inplace
=
True
)
# It is important to clamp before casting, especially for CXCYWH format, dtype=int64
out_bbox
=
F
.
clamp_bounding_boxes
(
out_bbox
,
format
=
format
,
spatial_size
=
spatial
_size
)
out_bbox
=
F
.
clamp_bounding_boxes
(
out_bbox
,
format
=
format
,
canvas_size
=
canvas
_size
)
out_bbox
=
out_bbox
.
to
(
dtype
=
in_dtype
)
return
out_bbox
...
...
@@ -514,14 +514,14 @@ class TestResize:
bounding_boxes
=
make_bounding_box
(
format
=
format
,
spatial
_size
=
self
.
INPUT_SIZE
,
canvas
_size
=
self
.
INPUT_SIZE
,
dtype
=
dtype
,
device
=
device
,
)
check_kernel
(
F
.
resize_bounding_boxes
,
bounding_boxes
,
spatial
_size
=
bounding_boxes
.
spatial
_size
,
canvas
_size
=
bounding_boxes
.
canvas
_size
,
size
=
size
,
**
max_size_kwarg
,
check_scripted_vs_eager
=
not
isinstance
(
size
,
int
),
...
...
@@ -588,8 +588,8 @@ class TestResize:
check_transform
(
transforms
.
Resize
,
make_input
(
self
.
INPUT_SIZE
,
device
=
device
),
size
=
size
,
antialias
=
True
)
def
_check_output_size
(
self
,
input
,
output
,
*
,
size
,
max_size
):
assert
tuple
(
F
.
get_
spatial_
size
(
output
))
==
self
.
_compute_output_size
(
input_size
=
F
.
get_
spatial_
size
(
input
),
size
=
size
,
max_size
=
max_size
assert
tuple
(
F
.
get_size
(
output
))
==
self
.
_compute_output_size
(
input_size
=
F
.
get_size
(
input
),
size
=
size
,
max_size
=
max_size
)
@
pytest
.
mark
.
parametrize
(
"size"
,
OUTPUT_SIZES
)
...
...
@@ -613,9 +613,9 @@ class TestResize:
torch
.
testing
.
assert_close
(
actual
,
expected
,
atol
=
1
,
rtol
=
0
)
def
_reference_resize_bounding_boxes
(
self
,
bounding_boxes
,
*
,
size
,
max_size
=
None
):
old_height
,
old_width
=
bounding_boxes
.
spatial
_size
old_height
,
old_width
=
bounding_boxes
.
canvas
_size
new_height
,
new_width
=
self
.
_compute_output_size
(
input_size
=
bounding_boxes
.
spatial
_size
,
size
=
size
,
max_size
=
max_size
input_size
=
bounding_boxes
.
canvas
_size
,
size
=
size
,
max_size
=
max_size
)
if
(
old_height
,
old_width
)
==
(
new_height
,
new_width
):
...
...
@@ -632,10 +632,10 @@ class TestResize:
expected_bboxes
=
reference_affine_bounding_boxes_helper
(
bounding_boxes
,
format
=
bounding_boxes
.
format
,
spatial
_size
=
(
new_height
,
new_width
),
canvas
_size
=
(
new_height
,
new_width
),
affine_matrix
=
affine_matrix
,
)
return
datapoints
.
BoundingBoxes
.
wrap_like
(
bounding_boxes
,
expected_bboxes
,
spatial
_size
=
(
new_height
,
new_width
))
return
datapoints
.
BoundingBoxes
.
wrap_like
(
bounding_boxes
,
expected_bboxes
,
canvas
_size
=
(
new_height
,
new_width
))
@
pytest
.
mark
.
parametrize
(
"format"
,
list
(
datapoints
.
BoundingBoxFormat
))
@
pytest
.
mark
.
parametrize
(
"size"
,
OUTPUT_SIZES
)
...
...
@@ -645,7 +645,7 @@ class TestResize:
if
not
(
max_size_kwarg
:
=
self
.
_make_max_size_kwarg
(
use_max_size
=
use_max_size
,
size
=
size
)):
return
bounding_boxes
=
make_bounding_box
(
format
=
format
,
spatial
_size
=
self
.
INPUT_SIZE
)
bounding_boxes
=
make_bounding_box
(
format
=
format
,
canvas
_size
=
self
.
INPUT_SIZE
)
actual
=
fn
(
bounding_boxes
,
size
=
size
,
**
max_size_kwarg
)
expected
=
self
.
_reference_resize_bounding_boxes
(
bounding_boxes
,
size
=
size
,
**
max_size_kwarg
)
...
...
@@ -762,7 +762,7 @@ class TestResize:
def
test_noop
(
self
,
size
,
make_input
):
input
=
make_input
(
self
.
INPUT_SIZE
)
output
=
F
.
resize
(
input
,
size
=
F
.
get_
spatial_
size
(
input
),
antialias
=
True
)
output
=
F
.
resize
(
input
,
size
=
F
.
get_size
(
input
),
antialias
=
True
)
# This identity check is not a requirement. It is here to avoid breaking the behavior by accident. If there
# is a good reason to break this, feel free to downgrade to an equality check.
...
...
@@ -792,11 +792,11 @@ class TestResize:
input
=
make_input
(
self
.
INPUT_SIZE
)
size
=
min
(
F
.
get_
spatial_
size
(
input
))
size
=
min
(
F
.
get_size
(
input
))
max_size
=
size
+
1
output
=
F
.
resize
(
input
,
size
=
size
,
max_size
=
max_size
,
antialias
=
True
)
assert
max
(
F
.
get_
spatial_
size
(
output
))
==
max_size
assert
max
(
F
.
get_size
(
output
))
==
max_size
class
TestHorizontalFlip
:
...
...
@@ -814,7 +814,7 @@ class TestHorizontalFlip:
F
.
horizontal_flip_bounding_boxes
,
bounding_boxes
,
format
=
format
,
spatial
_size
=
bounding_boxes
.
spatial
_size
,
canvas
_size
=
bounding_boxes
.
canvas
_size
,
)
@
pytest
.
mark
.
parametrize
(
"make_mask"
,
[
make_segmentation_mask
,
make_detection_mask
])
...
...
@@ -874,7 +874,7 @@ class TestHorizontalFlip:
def
_reference_horizontal_flip_bounding_boxes
(
self
,
bounding_boxes
):
affine_matrix
=
np
.
array
(
[
[
-
1
,
0
,
bounding_boxes
.
spatial
_size
[
1
]],
[
-
1
,
0
,
bounding_boxes
.
canvas
_size
[
1
]],
[
0
,
1
,
0
],
],
dtype
=
"float64"
if
bounding_boxes
.
dtype
==
torch
.
float64
else
"float32"
,
...
...
@@ -883,7 +883,7 @@ class TestHorizontalFlip:
expected_bboxes
=
reference_affine_bounding_boxes_helper
(
bounding_boxes
,
format
=
bounding_boxes
.
format
,
spatial
_size
=
bounding_boxes
.
spatial
_size
,
canvas
_size
=
bounding_boxes
.
canvas
_size
,
affine_matrix
=
affine_matrix
,
)
...
...
@@ -995,7 +995,7 @@ class TestAffine:
F
.
affine_bounding_boxes
,
bounding_boxes
,
format
=
format
,
spatial
_size
=
bounding_boxes
.
spatial
_size
,
canvas
_size
=
bounding_boxes
.
canvas
_size
,
**
{
param
:
value
},
check_scripted_vs_eager
=
not
(
param
==
"shear"
and
isinstance
(
value
,
(
int
,
float
))),
)
...
...
@@ -1133,7 +1133,7 @@ class TestAffine:
def
_reference_affine_bounding_boxes
(
self
,
bounding_boxes
,
*
,
angle
,
translate
,
scale
,
shear
,
center
):
if
center
is
None
:
center
=
[
s
*
0.5
for
s
in
bounding_boxes
.
spatial
_size
[::
-
1
]]
center
=
[
s
*
0.5
for
s
in
bounding_boxes
.
canvas
_size
[::
-
1
]]
affine_matrix
=
self
.
_compute_affine_matrix
(
angle
=
angle
,
translate
=
translate
,
scale
=
scale
,
shear
=
shear
,
center
=
center
...
...
@@ -1143,7 +1143,7 @@ class TestAffine:
expected_bboxes
=
reference_affine_bounding_boxes_helper
(
bounding_boxes
,
format
=
bounding_boxes
.
format
,
spatial
_size
=
bounding_boxes
.
spatial
_size
,
canvas
_size
=
bounding_boxes
.
canvas
_size
,
affine_matrix
=
affine_matrix
,
)
...
...
@@ -1202,7 +1202,7 @@ class TestAffine:
@
pytest
.
mark
.
parametrize
(
"seed"
,
list
(
range
(
10
)))
def
test_transform_get_params_bounds
(
self
,
degrees
,
translate
,
scale
,
shear
,
seed
):
image
=
make_image
()
height
,
width
=
F
.
get_
spatial_
size
(
image
)
height
,
width
=
F
.
get_size
(
image
)
transform
=
transforms
.
RandomAffine
(
degrees
=
degrees
,
translate
=
translate
,
scale
=
scale
,
shear
=
shear
)
...
...
@@ -1293,7 +1293,7 @@ class TestVerticalFlip:
F
.
vertical_flip_bounding_boxes
,
bounding_boxes
,
format
=
format
,
spatial
_size
=
bounding_boxes
.
spatial
_size
,
canvas
_size
=
bounding_boxes
.
canvas
_size
,
)
@
pytest
.
mark
.
parametrize
(
"make_mask"
,
[
make_segmentation_mask
,
make_detection_mask
])
...
...
@@ -1352,7 +1352,7 @@ class TestVerticalFlip:
affine_matrix
=
np
.
array
(
[
[
1
,
0
,
0
],
[
0
,
-
1
,
bounding_boxes
.
spatial
_size
[
0
]],
[
0
,
-
1
,
bounding_boxes
.
canvas
_size
[
0
]],
],
dtype
=
"float64"
if
bounding_boxes
.
dtype
==
torch
.
float64
else
"float32"
,
)
...
...
@@ -1360,7 +1360,7 @@ class TestVerticalFlip:
expected_bboxes
=
reference_affine_bounding_boxes_helper
(
bounding_boxes
,
format
=
bounding_boxes
.
format
,
spatial
_size
=
bounding_boxes
.
spatial
_size
,
canvas
_size
=
bounding_boxes
.
canvas
_size
,
affine_matrix
=
affine_matrix
,
)
...
...
@@ -1449,7 +1449,7 @@ class TestRotate:
F
.
rotate_bounding_boxes
,
bounding_boxes
,
format
=
format
,
spatial
_size
=
bounding_boxes
.
spatial
_size
,
canvas
_size
=
bounding_boxes
.
canvas
_size
,
**
kwargs
,
)
...
...
@@ -1555,7 +1555,7 @@ class TestRotate:
raise
ValueError
(
"This reference currently does not support expand=True"
)
if
center
is
None
:
center
=
[
s
*
0.5
for
s
in
bounding_boxes
.
spatial
_size
[::
-
1
]]
center
=
[
s
*
0.5
for
s
in
bounding_boxes
.
canvas
_size
[::
-
1
]]
a
=
np
.
cos
(
angle
*
np
.
pi
/
180.0
)
b
=
np
.
sin
(
angle
*
np
.
pi
/
180.0
)
...
...
@@ -1572,7 +1572,7 @@ class TestRotate:
expected_bboxes
=
reference_affine_bounding_boxes_helper
(
bounding_boxes
,
format
=
bounding_boxes
.
format
,
spatial
_size
=
bounding_boxes
.
spatial
_size
,
canvas
_size
=
bounding_boxes
.
canvas
_size
,
affine_matrix
=
affine_matrix
,
)
...
...
@@ -1834,7 +1834,7 @@ class TestToDtype:
mask_dtype
=
torch
.
bool
sample
=
{
"inpt"
:
make_input
(
size
=
(
H
,
W
),
dtype
=
inpt_dtype
),
"bbox"
:
make_bounding_box
(
size
=
(
H
,
W
),
dtype
=
bbox_dtype
),
"bbox"
:
make_bounding_box
(
canvas_
size
=
(
H
,
W
),
dtype
=
bbox_dtype
),
"mask"
:
make_detection_mask
(
size
=
(
H
,
W
),
dtype
=
mask_dtype
),
}
...
...
@@ -1988,7 +1988,7 @@ class TestCutMixMixUp:
for
input_with_bad_type
in
(
F
.
to_pil_image
(
imgs
[
0
]),
datapoints
.
Mask
(
torch
.
rand
(
12
,
12
)),
datapoints
.
BoundingBoxes
(
torch
.
rand
(
2
,
4
),
format
=
"XYXY"
,
spatial
_size
=
12
),
datapoints
.
BoundingBoxes
(
torch
.
rand
(
2
,
4
),
format
=
"XYXY"
,
canvas
_size
=
12
),
):
with
pytest
.
raises
(
ValueError
,
match
=
"does not support PIL images, "
):
cutmix_mixup
(
input_with_bad_type
)
...
...
test/test_transforms_v2_utils.py
View file @
312c3d32
...
...
@@ -4,16 +4,16 @@ import pytest
import
torch
import
torchvision.transforms.v2.utils
from
common_utils
import
make_bounding_box
,
make_detection_mask
,
make_image
from
common_utils
import
DEFAULT_SIZE
,
make_bounding_box
,
make_detection_mask
,
make_image
from
torchvision
import
datapoints
from
torchvision.transforms.v2.functional
import
to_image_pil
from
torchvision.transforms.v2.utils
import
has_all
,
has_any
IMAGE
=
make_image
(
color_space
=
"RGB"
)
BOUNDING_BOX
=
make_bounding_box
(
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
,
spatial_size
=
IMAGE
.
spatial_size
)
MASK
=
make_detection_mask
(
size
=
IMAGE
.
spatial_size
)
IMAGE
=
make_image
(
DEFAULT_SIZE
,
color_space
=
"RGB"
)
BOUNDING_BOX
=
make_bounding_box
(
DEFAULT_SIZE
,
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
)
MASK
=
make_detection_mask
(
DEFAULT_SIZE
)
@
pytest
.
mark
.
parametrize
(
...
...
test/transforms_v2_kernel_infos.py
View file @
312c3d32
...
...
@@ -184,8 +184,8 @@ def float32_vs_uint8_fill_adapter(other_args, kwargs):
return
other_args
,
dict
(
kwargs
,
fill
=
fill
)
def
reference_affine_bounding_boxes_helper
(
bounding_boxes
,
*
,
format
,
spatial
_size
,
affine_matrix
):
def
transform
(
bbox
,
affine_matrix_
,
format_
,
spatial
_size_
):
def
reference_affine_bounding_boxes_helper
(
bounding_boxes
,
*
,
format
,
canvas
_size
,
affine_matrix
):
def
transform
(
bbox
,
affine_matrix_
,
format_
,
canvas
_size_
):
# Go to float before converting to prevent precision loss in case of CXCYWH -> XYXY and W or H is 1
in_dtype
=
bbox
.
dtype
if
not
torch
.
is_floating_point
(
bbox
):
...
...
@@ -218,14 +218,14 @@ def reference_affine_bounding_boxes_helper(bounding_boxes, *, format, spatial_si
out_bbox
,
old_format
=
datapoints
.
BoundingBoxFormat
.
XYXY
,
new_format
=
format_
,
inplace
=
True
)
# It is important to clamp before casting, especially for CXCYWH format, dtype=int64
out_bbox
=
F
.
clamp_bounding_boxes
(
out_bbox
,
format
=
format_
,
spatial_size
=
spatial
_size_
)
out_bbox
=
F
.
clamp_bounding_boxes
(
out_bbox
,
format
=
format_
,
canvas_size
=
canvas
_size_
)
out_bbox
=
out_bbox
.
to
(
dtype
=
in_dtype
)
return
out_bbox
if
bounding_boxes
.
ndim
<
2
:
bounding_boxes
=
[
bounding_boxes
]
expected_bboxes
=
[
transform
(
bbox
,
affine_matrix
,
format
,
spatial
_size
)
for
bbox
in
bounding_boxes
]
expected_bboxes
=
[
transform
(
bbox
,
affine_matrix
,
format
,
canvas
_size
)
for
bbox
in
bounding_boxes
]
if
len
(
expected_bboxes
)
>
1
:
expected_bboxes
=
torch
.
stack
(
expected_bboxes
)
else
:
...
...
@@ -321,11 +321,11 @@ def reference_crop_bounding_boxes(bounding_boxes, *, format, top, left, height,
dtype
=
"float64"
if
bounding_boxes
.
dtype
==
torch
.
float64
else
"float32"
,
)
spatial
_size
=
(
height
,
width
)
canvas
_size
=
(
height
,
width
)
expected_bboxes
=
reference_affine_bounding_boxes_helper
(
bounding_boxes
,
format
=
format
,
spatial_size
=
spatial
_size
,
affine_matrix
=
affine_matrix
bounding_boxes
,
format
=
format
,
canvas_size
=
canvas
_size
,
affine_matrix
=
affine_matrix
)
return
expected_bboxes
,
spatial
_size
return
expected_bboxes
,
canvas
_size
def
reference_inputs_crop_bounding_boxes
():
...
...
@@ -507,7 +507,7 @@ def sample_inputs_pad_bounding_boxes():
yield
ArgsKwargs
(
bounding_boxes_loader
,
format
=
bounding_boxes_loader
.
format
,
spatial
_size
=
bounding_boxes_loader
.
spatial
_size
,
canvas
_size
=
bounding_boxes_loader
.
canvas
_size
,
padding
=
padding
,
padding_mode
=
"constant"
,
)
...
...
@@ -530,7 +530,7 @@ def sample_inputs_pad_video():
yield
ArgsKwargs
(
video_loader
,
padding
=
[
1
])
def
reference_pad_bounding_boxes
(
bounding_boxes
,
*
,
format
,
spatial
_size
,
padding
,
padding_mode
):
def
reference_pad_bounding_boxes
(
bounding_boxes
,
*
,
format
,
canvas
_size
,
padding
,
padding_mode
):
left
,
right
,
top
,
bottom
=
_parse_pad_padding
(
padding
)
...
...
@@ -542,11 +542,11 @@ def reference_pad_bounding_boxes(bounding_boxes, *, format, spatial_size, paddin
dtype
=
"float64"
if
bounding_boxes
.
dtype
==
torch
.
float64
else
"float32"
,
)
height
=
spatial
_size
[
0
]
+
top
+
bottom
width
=
spatial
_size
[
1
]
+
left
+
right
height
=
canvas
_size
[
0
]
+
top
+
bottom
width
=
canvas
_size
[
1
]
+
left
+
right
expected_bboxes
=
reference_affine_bounding_boxes_helper
(
bounding_boxes
,
format
=
format
,
spatial
_size
=
(
height
,
width
),
affine_matrix
=
affine_matrix
bounding_boxes
,
format
=
format
,
canvas
_size
=
(
height
,
width
),
affine_matrix
=
affine_matrix
)
return
expected_bboxes
,
(
height
,
width
)
...
...
@@ -558,7 +558,7 @@ def reference_inputs_pad_bounding_boxes():
yield
ArgsKwargs
(
bounding_boxes_loader
,
format
=
bounding_boxes_loader
.
format
,
spatial
_size
=
bounding_boxes_loader
.
spatial
_size
,
canvas
_size
=
bounding_boxes_loader
.
canvas
_size
,
padding
=
padding
,
padding_mode
=
"constant"
,
)
...
...
@@ -660,7 +660,7 @@ def sample_inputs_perspective_bounding_boxes():
yield
ArgsKwargs
(
bounding_boxes_loader
,
format
=
bounding_boxes_loader
.
format
,
spatial
_size
=
bounding_boxes_loader
.
spatial
_size
,
canvas
_size
=
bounding_boxes_loader
.
canvas
_size
,
startpoints
=
None
,
endpoints
=
None
,
coefficients
=
_PERSPECTIVE_COEFFS
[
0
],
...
...
@@ -669,7 +669,7 @@ def sample_inputs_perspective_bounding_boxes():
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
loader
=
make_bounding_box_loader
(
format
=
format
)
yield
ArgsKwargs
(
loader
,
format
=
format
,
spatial
_size
=
loader
.
spatial
_size
,
startpoints
=
_STARTPOINTS
,
endpoints
=
_ENDPOINTS
loader
,
format
=
format
,
canvas
_size
=
loader
.
canvas
_size
,
startpoints
=
_STARTPOINTS
,
endpoints
=
_ENDPOINTS
)
...
...
@@ -742,13 +742,13 @@ KERNEL_INFOS.extend(
)
def
_get_elastic_displacement
(
spatial
_size
):
return
torch
.
rand
(
1
,
*
spatial
_size
,
2
)
def
_get_elastic_displacement
(
canvas
_size
):
return
torch
.
rand
(
1
,
*
canvas
_size
,
2
)
def
sample_inputs_elastic_image_tensor
():
for
image_loader
in
make_image_loaders
(
sizes
=
[
DEFAULT_PORTRAIT_SPATIAL_SIZE
]):
displacement
=
_get_elastic_displacement
(
image_loader
.
spatial
_size
)
displacement
=
_get_elastic_displacement
(
image_loader
.
canvas
_size
)
for
fill
in
get_fills
(
num_channels
=
image_loader
.
num_channels
,
dtype
=
image_loader
.
dtype
):
yield
ArgsKwargs
(
image_loader
,
displacement
=
displacement
,
fill
=
fill
)
...
...
@@ -762,18 +762,18 @@ def reference_inputs_elastic_image_tensor():
F
.
InterpolationMode
.
BICUBIC
,
],
):
displacement
=
_get_elastic_displacement
(
image_loader
.
spatial
_size
)
displacement
=
_get_elastic_displacement
(
image_loader
.
canvas
_size
)
for
fill
in
get_fills
(
num_channels
=
image_loader
.
num_channels
,
dtype
=
image_loader
.
dtype
):
yield
ArgsKwargs
(
image_loader
,
interpolation
=
interpolation
,
displacement
=
displacement
,
fill
=
fill
)
def
sample_inputs_elastic_bounding_boxes
():
for
bounding_boxes_loader
in
make_bounding_box_loaders
():
displacement
=
_get_elastic_displacement
(
bounding_boxes_loader
.
spatial
_size
)
displacement
=
_get_elastic_displacement
(
bounding_boxes_loader
.
canvas
_size
)
yield
ArgsKwargs
(
bounding_boxes_loader
,
format
=
bounding_boxes_loader
.
format
,
spatial
_size
=
bounding_boxes_loader
.
spatial
_size
,
canvas
_size
=
bounding_boxes_loader
.
canvas
_size
,
displacement
=
displacement
,
)
...
...
@@ -850,7 +850,7 @@ def sample_inputs_center_crop_bounding_boxes():
yield
ArgsKwargs
(
bounding_boxes_loader
,
format
=
bounding_boxes_loader
.
format
,
spatial
_size
=
bounding_boxes_loader
.
spatial
_size
,
canvas
_size
=
bounding_boxes_loader
.
canvas
_size
,
output_size
=
output_size
,
)
...
...
@@ -975,7 +975,7 @@ def reference_inputs_equalize_image_tensor():
image
.
mul_
(
torch
.
iinfo
(
dtype
).
max
).
round_
()
return
image
.
to
(
dtype
=
dtype
,
device
=
device
,
memory_format
=
memory_format
,
copy
=
True
)
spatial
_size
=
(
256
,
256
)
canvas
_size
=
(
256
,
256
)
for
dtype
,
color_space
,
fn
in
itertools
.
product
(
[
torch
.
uint8
],
[
"GRAY"
,
"RGB"
],
...
...
@@ -1005,7 +1005,7 @@ def reference_inputs_equalize_image_tensor():
],
],
):
image_loader
=
ImageLoader
(
fn
,
shape
=
(
get_num_channels
(
color_space
),
*
spatial
_size
),
dtype
=
dtype
)
image_loader
=
ImageLoader
(
fn
,
shape
=
(
get_num_channels
(
color_space
),
*
canvas
_size
),
dtype
=
dtype
)
yield
ArgsKwargs
(
image_loader
)
...
...
@@ -1487,7 +1487,7 @@ def sample_inputs_clamp_bounding_boxes():
yield
ArgsKwargs
(
bounding_boxes_loader
,
format
=
bounding_boxes_loader
.
format
,
spatial
_size
=
bounding_boxes_loader
.
spatial
_size
,
canvas
_size
=
bounding_boxes_loader
.
canvas
_size
,
)
...
...
@@ -1502,7 +1502,7 @@ KERNEL_INFOS.append(
_FIVE_TEN_CROP_SIZES
=
[
7
,
(
6
,),
[
5
],
(
6
,
5
),
[
7
,
6
]]
def
_get_five_ten_crop_
spatial
_size
(
size
):
def
_get_five_ten_crop_
canvas
_size
(
size
):
if
isinstance
(
size
,
int
):
crop_height
=
crop_width
=
size
elif
len
(
size
)
==
1
:
...
...
@@ -1515,7 +1515,7 @@ def _get_five_ten_crop_spatial_size(size):
def
sample_inputs_five_crop_image_tensor
():
for
size
in
_FIVE_TEN_CROP_SIZES
:
for
image_loader
in
make_image_loaders
(
sizes
=
[
_get_five_ten_crop_
spatial
_size
(
size
)],
sizes
=
[
_get_five_ten_crop_
canvas
_size
(
size
)],
color_spaces
=
[
"RGB"
],
dtypes
=
[
torch
.
float32
],
):
...
...
@@ -1525,21 +1525,21 @@ def sample_inputs_five_crop_image_tensor():
def
reference_inputs_five_crop_image_tensor
():
for
size
in
_FIVE_TEN_CROP_SIZES
:
for
image_loader
in
make_image_loaders
(
sizes
=
[
_get_five_ten_crop_
spatial
_size
(
size
)],
extra_dims
=
[()],
dtypes
=
[
torch
.
uint8
]
sizes
=
[
_get_five_ten_crop_
canvas
_size
(
size
)],
extra_dims
=
[()],
dtypes
=
[
torch
.
uint8
]
):
yield
ArgsKwargs
(
image_loader
,
size
=
size
)
def
sample_inputs_five_crop_video
():
size
=
_FIVE_TEN_CROP_SIZES
[
0
]
for
video_loader
in
make_video_loaders
(
sizes
=
[
_get_five_ten_crop_
spatial
_size
(
size
)]):
for
video_loader
in
make_video_loaders
(
sizes
=
[
_get_five_ten_crop_
canvas
_size
(
size
)]):
yield
ArgsKwargs
(
video_loader
,
size
=
size
)
def
sample_inputs_ten_crop_image_tensor
():
for
size
,
vertical_flip
in
itertools
.
product
(
_FIVE_TEN_CROP_SIZES
,
[
False
,
True
]):
for
image_loader
in
make_image_loaders
(
sizes
=
[
_get_five_ten_crop_
spatial
_size
(
size
)],
sizes
=
[
_get_five_ten_crop_
canvas
_size
(
size
)],
color_spaces
=
[
"RGB"
],
dtypes
=
[
torch
.
float32
],
):
...
...
@@ -1549,14 +1549,14 @@ def sample_inputs_ten_crop_image_tensor():
def
reference_inputs_ten_crop_image_tensor
():
for
size
,
vertical_flip
in
itertools
.
product
(
_FIVE_TEN_CROP_SIZES
,
[
False
,
True
]):
for
image_loader
in
make_image_loaders
(
sizes
=
[
_get_five_ten_crop_
spatial
_size
(
size
)],
extra_dims
=
[()],
dtypes
=
[
torch
.
uint8
]
sizes
=
[
_get_five_ten_crop_
canvas
_size
(
size
)],
extra_dims
=
[()],
dtypes
=
[
torch
.
uint8
]
):
yield
ArgsKwargs
(
image_loader
,
size
=
size
,
vertical_flip
=
vertical_flip
)
def
sample_inputs_ten_crop_video
():
size
=
_FIVE_TEN_CROP_SIZES
[
0
]
for
video_loader
in
make_video_loaders
(
sizes
=
[
_get_five_ten_crop_
spatial
_size
(
size
)]):
for
video_loader
in
make_video_loaders
(
sizes
=
[
_get_five_ten_crop_
canvas
_size
(
size
)]):
yield
ArgsKwargs
(
video_loader
,
size
=
size
)
...
...
torchvision/datapoints/_bounding_box.py
View file @
312c3d32
...
...
@@ -30,7 +30,7 @@ class BoundingBoxes(Datapoint):
Args:
data: Any data that can be turned into a tensor with :func:`torch.as_tensor`.
format (BoundingBoxFormat, str): Format of the bounding box.
spatial
_size (two-tuple of ints): Height and width of the corresponding image or video.
canvas
_size (two-tuple of ints): Height and width of the corresponding image or video.
dtype (torch.dtype, optional): Desired data type of the bounding box. If omitted, will be inferred from
``data``.
device (torch.device, optional): Desired device of the bounding box. If omitted and ``data`` is a
...
...
@@ -40,13 +40,13 @@ class BoundingBoxes(Datapoint):
"""
format
:
BoundingBoxFormat
spatial
_size
:
Tuple
[
int
,
int
]
canvas
_size
:
Tuple
[
int
,
int
]
@
classmethod
def
_wrap
(
cls
,
tensor
:
torch
.
Tensor
,
*
,
format
:
BoundingBoxFormat
,
spatial
_size
:
Tuple
[
int
,
int
])
->
BoundingBoxes
:
def
_wrap
(
cls
,
tensor
:
torch
.
Tensor
,
*
,
format
:
BoundingBoxFormat
,
canvas
_size
:
Tuple
[
int
,
int
])
->
BoundingBoxes
:
bounding_boxes
=
tensor
.
as_subclass
(
cls
)
bounding_boxes
.
format
=
format
bounding_boxes
.
spatial
_size
=
spatial
_size
bounding_boxes
.
canvas
_size
=
canvas
_size
return
bounding_boxes
def
__new__
(
...
...
@@ -54,7 +54,7 @@ class BoundingBoxes(Datapoint):
data
:
Any
,
*
,
format
:
Union
[
BoundingBoxFormat
,
str
],
spatial
_size
:
Tuple
[
int
,
int
],
canvas
_size
:
Tuple
[
int
,
int
],
dtype
:
Optional
[
torch
.
dtype
]
=
None
,
device
:
Optional
[
Union
[
torch
.
device
,
str
,
int
]]
=
None
,
requires_grad
:
Optional
[
bool
]
=
None
,
...
...
@@ -64,7 +64,7 @@ class BoundingBoxes(Datapoint):
if
isinstance
(
format
,
str
):
format
=
BoundingBoxFormat
[
format
.
upper
()]
return
cls
.
_wrap
(
tensor
,
format
=
format
,
spatial_size
=
spatial
_size
)
return
cls
.
_wrap
(
tensor
,
format
=
format
,
canvas_size
=
canvas
_size
)
@
classmethod
def
wrap_like
(
...
...
@@ -73,7 +73,7 @@ class BoundingBoxes(Datapoint):
tensor
:
torch
.
Tensor
,
*
,
format
:
Optional
[
BoundingBoxFormat
]
=
None
,
spatial
_size
:
Optional
[
Tuple
[
int
,
int
]]
=
None
,
canvas
_size
:
Optional
[
Tuple
[
int
,
int
]]
=
None
,
)
->
BoundingBoxes
:
"""Wrap a :class:`torch.Tensor` as :class:`BoundingBoxes` from a reference.
...
...
@@ -82,7 +82,7 @@ class BoundingBoxes(Datapoint):
tensor (Tensor): Tensor to be wrapped as :class:`BoundingBoxes`
format (BoundingBoxFormat, str, optional): Format of the bounding box. If omitted, it is taken from the
reference.
spatial
_size (two-tuple of ints, optional): Height and width of the corresponding image or video. If
canvas
_size (two-tuple of ints, optional): Height and width of the corresponding image or video. If
omitted, it is taken from the reference.
"""
...
...
@@ -92,21 +92,21 @@ class BoundingBoxes(Datapoint):
return
cls
.
_wrap
(
tensor
,
format
=
format
if
format
is
not
None
else
other
.
format
,
spatial_size
=
spatial
_size
if
spatial
_size
is
not
None
else
other
.
spatial
_size
,
canvas_size
=
canvas
_size
if
canvas
_size
is
not
None
else
other
.
canvas
_size
,
)
def
__repr__
(
self
,
*
,
tensor_contents
:
Any
=
None
)
->
str
:
# type: ignore[override]
return
self
.
_make_repr
(
format
=
self
.
format
,
spatial
_size
=
self
.
spatial
_size
)
return
self
.
_make_repr
(
format
=
self
.
format
,
canvas
_size
=
self
.
canvas
_size
)
def
horizontal_flip
(
self
)
->
BoundingBoxes
:
output
=
self
.
_F
.
horizontal_flip_bounding_boxes
(
self
.
as_subclass
(
torch
.
Tensor
),
format
=
self
.
format
,
spatial
_size
=
self
.
spatial
_size
self
.
as_subclass
(
torch
.
Tensor
),
format
=
self
.
format
,
canvas
_size
=
self
.
canvas
_size
)
return
BoundingBoxes
.
wrap_like
(
self
,
output
)
def
vertical_flip
(
self
)
->
BoundingBoxes
:
output
=
self
.
_F
.
vertical_flip_bounding_boxes
(
self
.
as_subclass
(
torch
.
Tensor
),
format
=
self
.
format
,
spatial
_size
=
self
.
spatial
_size
self
.
as_subclass
(
torch
.
Tensor
),
format
=
self
.
format
,
canvas
_size
=
self
.
canvas
_size
)
return
BoundingBoxes
.
wrap_like
(
self
,
output
)
...
...
@@ -117,25 +117,25 @@ class BoundingBoxes(Datapoint):
max_size
:
Optional
[
int
]
=
None
,
antialias
:
Optional
[
Union
[
str
,
bool
]]
=
"warn"
,
)
->
BoundingBoxes
:
output
,
spatial
_size
=
self
.
_F
.
resize_bounding_boxes
(
output
,
canvas
_size
=
self
.
_F
.
resize_bounding_boxes
(
self
.
as_subclass
(
torch
.
Tensor
),
spatial
_size
=
self
.
spatial
_size
,
canvas
_size
=
self
.
canvas
_size
,
size
=
size
,
max_size
=
max_size
,
)
return
BoundingBoxes
.
wrap_like
(
self
,
output
,
spatial_size
=
spatial
_size
)
return
BoundingBoxes
.
wrap_like
(
self
,
output
,
canvas_size
=
canvas
_size
)
def
crop
(
self
,
top
:
int
,
left
:
int
,
height
:
int
,
width
:
int
)
->
BoundingBoxes
:
output
,
spatial
_size
=
self
.
_F
.
crop_bounding_boxes
(
output
,
canvas
_size
=
self
.
_F
.
crop_bounding_boxes
(
self
.
as_subclass
(
torch
.
Tensor
),
self
.
format
,
top
=
top
,
left
=
left
,
height
=
height
,
width
=
width
)
return
BoundingBoxes
.
wrap_like
(
self
,
output
,
spatial_size
=
spatial
_size
)
return
BoundingBoxes
.
wrap_like
(
self
,
output
,
canvas_size
=
canvas
_size
)
def
center_crop
(
self
,
output_size
:
List
[
int
])
->
BoundingBoxes
:
output
,
spatial
_size
=
self
.
_F
.
center_crop_bounding_boxes
(
self
.
as_subclass
(
torch
.
Tensor
),
format
=
self
.
format
,
spatial
_size
=
self
.
spatial
_size
,
output_size
=
output_size
output
,
canvas
_size
=
self
.
_F
.
center_crop_bounding_boxes
(
self
.
as_subclass
(
torch
.
Tensor
),
format
=
self
.
format
,
canvas
_size
=
self
.
canvas
_size
,
output_size
=
output_size
)
return
BoundingBoxes
.
wrap_like
(
self
,
output
,
spatial_size
=
spatial
_size
)
return
BoundingBoxes
.
wrap_like
(
self
,
output
,
canvas_size
=
canvas
_size
)
def
resized_crop
(
self
,
...
...
@@ -147,10 +147,10 @@ class BoundingBoxes(Datapoint):
interpolation
:
Union
[
InterpolationMode
,
int
]
=
InterpolationMode
.
BILINEAR
,
antialias
:
Optional
[
Union
[
str
,
bool
]]
=
"warn"
,
)
->
BoundingBoxes
:
output
,
spatial
_size
=
self
.
_F
.
resized_crop_bounding_boxes
(
output
,
canvas
_size
=
self
.
_F
.
resized_crop_bounding_boxes
(
self
.
as_subclass
(
torch
.
Tensor
),
self
.
format
,
top
,
left
,
height
,
width
,
size
=
size
)
return
BoundingBoxes
.
wrap_like
(
self
,
output
,
spatial_size
=
spatial
_size
)
return
BoundingBoxes
.
wrap_like
(
self
,
output
,
canvas_size
=
canvas
_size
)
def
pad
(
self
,
...
...
@@ -158,14 +158,14 @@ class BoundingBoxes(Datapoint):
fill
:
Optional
[
Union
[
int
,
float
,
List
[
float
]]]
=
None
,
padding_mode
:
str
=
"constant"
,
)
->
BoundingBoxes
:
output
,
spatial
_size
=
self
.
_F
.
pad_bounding_boxes
(
output
,
canvas
_size
=
self
.
_F
.
pad_bounding_boxes
(
self
.
as_subclass
(
torch
.
Tensor
),
format
=
self
.
format
,
spatial
_size
=
self
.
spatial
_size
,
canvas
_size
=
self
.
canvas
_size
,
padding
=
padding
,
padding_mode
=
padding_mode
,
)
return
BoundingBoxes
.
wrap_like
(
self
,
output
,
spatial_size
=
spatial
_size
)
return
BoundingBoxes
.
wrap_like
(
self
,
output
,
canvas_size
=
canvas
_size
)
def
rotate
(
self
,
...
...
@@ -175,15 +175,15 @@ class BoundingBoxes(Datapoint):
center
:
Optional
[
List
[
float
]]
=
None
,
fill
:
_FillTypeJIT
=
None
,
)
->
BoundingBoxes
:
output
,
spatial
_size
=
self
.
_F
.
rotate_bounding_boxes
(
output
,
canvas
_size
=
self
.
_F
.
rotate_bounding_boxes
(
self
.
as_subclass
(
torch
.
Tensor
),
format
=
self
.
format
,
spatial
_size
=
self
.
spatial
_size
,
canvas
_size
=
self
.
canvas
_size
,
angle
=
angle
,
expand
=
expand
,
center
=
center
,
)
return
BoundingBoxes
.
wrap_like
(
self
,
output
,
spatial_size
=
spatial
_size
)
return
BoundingBoxes
.
wrap_like
(
self
,
output
,
canvas_size
=
canvas
_size
)
def
affine
(
self
,
...
...
@@ -198,7 +198,7 @@ class BoundingBoxes(Datapoint):
output
=
self
.
_F
.
affine_bounding_boxes
(
self
.
as_subclass
(
torch
.
Tensor
),
self
.
format
,
self
.
spatial
_size
,
self
.
canvas
_size
,
angle
,
translate
=
translate
,
scale
=
scale
,
...
...
@@ -218,7 +218,7 @@ class BoundingBoxes(Datapoint):
output
=
self
.
_F
.
perspective_bounding_boxes
(
self
.
as_subclass
(
torch
.
Tensor
),
format
=
self
.
format
,
spatial
_size
=
self
.
spatial
_size
,
canvas
_size
=
self
.
canvas
_size
,
startpoints
=
startpoints
,
endpoints
=
endpoints
,
coefficients
=
coefficients
,
...
...
@@ -232,6 +232,6 @@ class BoundingBoxes(Datapoint):
fill
:
_FillTypeJIT
=
None
,
)
->
BoundingBoxes
:
output
=
self
.
_F
.
elastic_bounding_boxes
(
self
.
as_subclass
(
torch
.
Tensor
),
self
.
format
,
self
.
spatial
_size
,
displacement
=
displacement
self
.
as_subclass
(
torch
.
Tensor
),
self
.
format
,
self
.
canvas
_size
,
displacement
=
displacement
)
return
BoundingBoxes
.
wrap_like
(
self
,
output
)
torchvision/datapoints/_datapoint.py
View file @
312c3d32
...
...
@@ -138,7 +138,7 @@ class Datapoint(torch.Tensor):
# *not* happen for `deepcopy(Tensor)`. A side-effect from detaching is that the `Tensor.requires_grad`
# attribute is cleared, so we need to refill it before we return.
# Note: We don't explicitly handle deep-copying of the metadata here. The only metadata we currently have is
# `BoundingBoxes.format` and `BoundingBoxes.
spatial
_size`, which are immutable and thus implicitly deep-copied by
# `BoundingBoxes.format` and `BoundingBoxes.
canvas
_size`, which are immutable and thus implicitly deep-copied by
# `BoundingBoxes.clone()`.
return
self
.
detach
().
clone
().
requires_grad_
(
self
.
requires_grad
)
# type: ignore[return-value]
...
...
torchvision/datapoints/_dataset_wrapper.py
View file @
312c3d32
...
...
@@ -341,13 +341,13 @@ def coco_dectection_wrapper_factory(dataset, target_keys):
default
=
{
"image_id"
,
"boxes"
,
"labels"
},
)
def
segmentation_to_mask
(
segmentation
,
*
,
spatial
_size
):
def
segmentation_to_mask
(
segmentation
,
*
,
canvas
_size
):
from
pycocotools
import
mask
segmentation
=
(
mask
.
frPyObjects
(
segmentation
,
*
spatial
_size
)
mask
.
frPyObjects
(
segmentation
,
*
canvas
_size
)
if
isinstance
(
segmentation
,
dict
)
else
mask
.
merge
(
mask
.
frPyObjects
(
segmentation
,
*
spatial
_size
))
else
mask
.
merge
(
mask
.
frPyObjects
(
segmentation
,
*
canvas
_size
))
)
return
torch
.
from_numpy
(
mask
.
decode
(
segmentation
))
...
...
@@ -359,7 +359,7 @@ def coco_dectection_wrapper_factory(dataset, target_keys):
if
not
target
:
return
image
,
dict
(
image_id
=
image_id
)
spatial
_size
=
tuple
(
F
.
get_
spatial_
size
(
image
))
canvas
_size
=
tuple
(
F
.
get_size
(
image
))
batched_target
=
list_of_dicts_to_dict_of_lists
(
target
)
target
=
{}
...
...
@@ -372,7 +372,7 @@ def coco_dectection_wrapper_factory(dataset, target_keys):
datapoints
.
BoundingBoxes
(
batched_target
[
"bbox"
],
format
=
datapoints
.
BoundingBoxFormat
.
XYWH
,
spatial_size
=
spatial
_size
,
canvas_size
=
canvas
_size
,
),
new_format
=
datapoints
.
BoundingBoxFormat
.
XYXY
,
)
...
...
@@ -381,7 +381,7 @@ def coco_dectection_wrapper_factory(dataset, target_keys):
target
[
"masks"
]
=
datapoints
.
Mask
(
torch
.
stack
(
[
segmentation_to_mask
(
segmentation
,
spatial_size
=
spatial
_size
)
segmentation_to_mask
(
segmentation
,
canvas_size
=
canvas
_size
)
for
segmentation
in
batched_target
[
"segmentation"
]
]
),
...
...
@@ -456,7 +456,7 @@ def voc_detection_wrapper_factory(dataset, target_keys):
for
bndbox
in
batched_instances
[
"bndbox"
]
],
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
,
spatial
_size
=
(
image
.
height
,
image
.
width
),
canvas
_size
=
(
image
.
height
,
image
.
width
),
)
if
"labels"
in
target_keys
:
...
...
@@ -493,7 +493,7 @@ def celeba_wrapper_factory(dataset, target_keys):
datapoints
.
BoundingBoxes
(
item
,
format
=
datapoints
.
BoundingBoxFormat
.
XYWH
,
spatial
_size
=
(
image
.
height
,
image
.
width
),
canvas
_size
=
(
image
.
height
,
image
.
width
),
),
new_format
=
datapoints
.
BoundingBoxFormat
.
XYXY
,
),
...
...
@@ -543,7 +543,7 @@ def kitti_wrapper_factory(dataset, target_keys):
target
[
"boxes"
]
=
datapoints
.
BoundingBoxes
(
batched_target
[
"bbox"
],
format
=
datapoints
.
BoundingBoxFormat
.
XYXY
,
spatial
_size
=
(
image
.
height
,
image
.
width
),
canvas
_size
=
(
image
.
height
,
image
.
width
),
)
if
"labels"
in
target_keys
:
...
...
@@ -638,7 +638,7 @@ def widerface_wrapper(dataset, target_keys):
if
"bbox"
in
target_keys
:
target
[
"bbox"
]
=
F
.
convert_format_bounding_boxes
(
datapoints
.
BoundingBoxes
(
target
[
"bbox"
],
format
=
datapoints
.
BoundingBoxFormat
.
XYWH
,
spatial
_size
=
(
image
.
height
,
image
.
width
)
target
[
"bbox"
],
format
=
datapoints
.
BoundingBoxFormat
.
XYWH
,
canvas
_size
=
(
image
.
height
,
image
.
width
)
),
new_format
=
datapoints
.
BoundingBoxFormat
.
XYXY
,
)
...
...
torchvision/datapoints/_image.py
View file @
312c3d32
from
__future__
import
annotations
from
typing
import
Any
,
List
,
Optional
,
Tuple
,
Union
from
typing
import
Any
,
List
,
Optional
,
Union
import
PIL.Image
import
torch
...
...
@@ -56,14 +56,6 @@ class Image(Datapoint):
def
__repr__
(
self
,
*
,
tensor_contents
:
Any
=
None
)
->
str
:
# type: ignore[override]
return
self
.
_make_repr
()
@
property
def
spatial_size
(
self
)
->
Tuple
[
int
,
int
]:
return
tuple
(
self
.
shape
[
-
2
:])
# type: ignore[return-value]
@
property
def
num_channels
(
self
)
->
int
:
return
self
.
shape
[
-
3
]
def
horizontal_flip
(
self
)
->
Image
:
output
=
self
.
_F
.
horizontal_flip_image_tensor
(
self
.
as_subclass
(
torch
.
Tensor
))
return
Image
.
wrap_like
(
self
,
output
)
...
...
torchvision/datapoints/_mask.py
View file @
312c3d32
from
__future__
import
annotations
from
typing
import
Any
,
List
,
Optional
,
Tuple
,
Union
from
typing
import
Any
,
List
,
Optional
,
Union
import
PIL.Image
import
torch
...
...
@@ -51,10 +51,6 @@ class Mask(Datapoint):
)
->
Mask
:
return
cls
.
_wrap
(
tensor
)
@
property
def
spatial_size
(
self
)
->
Tuple
[
int
,
int
]:
return
tuple
(
self
.
shape
[
-
2
:])
# type: ignore[return-value]
def
horizontal_flip
(
self
)
->
Mask
:
output
=
self
.
_F
.
horizontal_flip_mask
(
self
.
as_subclass
(
torch
.
Tensor
))
return
Mask
.
wrap_like
(
self
,
output
)
...
...
torchvision/datapoints/_video.py
View file @
312c3d32
from
__future__
import
annotations
from
typing
import
Any
,
List
,
Optional
,
Tuple
,
Union
from
typing
import
Any
,
List
,
Optional
,
Union
import
torch
from
torchvision.transforms.functional
import
InterpolationMode
...
...
@@ -46,18 +46,6 @@ class Video(Datapoint):
def
__repr__
(
self
,
*
,
tensor_contents
:
Any
=
None
)
->
str
:
# type: ignore[override]
return
self
.
_make_repr
()
@
property
def
spatial_size
(
self
)
->
Tuple
[
int
,
int
]:
return
tuple
(
self
.
shape
[
-
2
:])
# type: ignore[return-value]
@
property
def
num_channels
(
self
)
->
int
:
return
self
.
shape
[
-
3
]
@
property
def
num_frames
(
self
)
->
int
:
return
self
.
shape
[
-
4
]
def
horizontal_flip
(
self
)
->
Video
:
output
=
self
.
_F
.
horizontal_flip_video
(
self
.
as_subclass
(
torch
.
Tensor
))
return
Video
.
wrap_like
(
self
,
output
)
...
...
torchvision/prototype/transforms/_augment.py
View file @
312c3d32
...
...
@@ -11,7 +11,7 @@ from torchvision.transforms.v2 import functional as F, InterpolationMode, Transf
from
torchvision.transforms.v2._transform
import
_RandomApplyTransform
from
torchvision.transforms.v2.functional._geometry
import
_check_interpolation
from
torchvision.transforms.v2.utils
import
has_any
,
is_simple_tensor
,
query_
spatial_
size
from
torchvision.transforms.v2.utils
import
has_any
,
is_simple_tensor
,
query_size
class
_BaseMixupCutmix
(
_RandomApplyTransform
):
...
...
@@ -64,7 +64,7 @@ class RandomCutmix(_BaseMixupCutmix):
def
_get_params
(
self
,
flat_inputs
:
List
[
Any
])
->
Dict
[
str
,
Any
]:
lam
=
float
(
self
.
_dist
.
sample
(()))
# type: ignore[arg-type]
H
,
W
=
query_
spatial_
size
(
flat_inputs
)
H
,
W
=
query_size
(
flat_inputs
)
r_x
=
torch
.
randint
(
W
,
())
r_y
=
torch
.
randint
(
H
,
())
...
...
torchvision/prototype/transforms/_geometry.py
View file @
312c3d32
...
...
@@ -7,7 +7,7 @@ from torchvision import datapoints
from
torchvision.prototype.datapoints
import
Label
,
OneHotLabel
from
torchvision.transforms.v2
import
functional
as
F
,
Transform
from
torchvision.transforms.v2._utils
import
_setup_fill_arg
,
_setup_size
from
torchvision.transforms.v2.utils
import
has_any
,
is_simple_tensor
,
query_bounding_boxes
,
query_
spatial_
size
from
torchvision.transforms.v2.utils
import
has_any
,
is_simple_tensor
,
query_bounding_boxes
,
query_size
class
FixedSizeCrop
(
Transform
):
...
...
@@ -46,7 +46,7 @@ class FixedSizeCrop(Transform):
)
def
_get_params
(
self
,
flat_inputs
:
List
[
Any
])
->
Dict
[
str
,
Any
]:
height
,
width
=
query_
spatial_
size
(
flat_inputs
)
height
,
width
=
query_size
(
flat_inputs
)
new_height
=
min
(
height
,
self
.
crop_height
)
new_width
=
min
(
width
,
self
.
crop_width
)
...
...
@@ -67,7 +67,7 @@ class FixedSizeCrop(Transform):
if
needs_crop
and
bounding_boxes
is
not
None
:
format
=
bounding_boxes
.
format
bounding_boxes
,
spatial
_size
=
F
.
crop_bounding_boxes
(
bounding_boxes
,
canvas
_size
=
F
.
crop_bounding_boxes
(
bounding_boxes
.
as_subclass
(
torch
.
Tensor
),
format
=
format
,
top
=
top
,
...
...
@@ -75,7 +75,7 @@ class FixedSizeCrop(Transform):
height
=
new_height
,
width
=
new_width
,
)
bounding_boxes
=
F
.
clamp_bounding_boxes
(
bounding_boxes
,
format
=
format
,
spatial_size
=
spatial
_size
)
bounding_boxes
=
F
.
clamp_bounding_boxes
(
bounding_boxes
,
format
=
format
,
canvas_size
=
canvas
_size
)
height_and_width
=
F
.
convert_format_bounding_boxes
(
bounding_boxes
,
old_format
=
format
,
new_format
=
datapoints
.
BoundingBoxFormat
.
XYWH
)[...,
2
:]
...
...
@@ -115,9 +115,7 @@ class FixedSizeCrop(Transform):
elif
isinstance
(
inpt
,
datapoints
.
BoundingBoxes
):
inpt
=
datapoints
.
BoundingBoxes
.
wrap_like
(
inpt
,
F
.
clamp_bounding_boxes
(
inpt
[
params
[
"is_valid"
]],
format
=
inpt
.
format
,
spatial_size
=
inpt
.
spatial_size
),
F
.
clamp_bounding_boxes
(
inpt
[
params
[
"is_valid"
]],
format
=
inpt
.
format
,
canvas_size
=
inpt
.
canvas_size
),
)
if
params
[
"needs_pad"
]:
...
...
torchvision/transforms/v2/_augment.py
View file @
312c3d32
...
...
@@ -12,7 +12,7 @@ from torchvision.transforms.v2 import functional as F
from
._transform
import
_RandomApplyTransform
,
Transform
from
._utils
import
_parse_labels_getter
from
.utils
import
has_any
,
is_simple_tensor
,
query_chw
,
query_
spatial_
size
from
.utils
import
has_any
,
is_simple_tensor
,
query_chw
,
query_size
class
RandomErasing
(
_RandomApplyTransform
):
...
...
@@ -284,7 +284,7 @@ class Cutmix(_BaseMixupCutmix):
def
_get_params
(
self
,
flat_inputs
:
List
[
Any
])
->
Dict
[
str
,
Any
]:
lam
=
float
(
self
.
_dist
.
sample
(()))
# type: ignore[arg-type]
H
,
W
=
query_
spatial_
size
(
flat_inputs
)
H
,
W
=
query_size
(
flat_inputs
)
r_x
=
torch
.
randint
(
W
,
size
=
(
1
,))
r_y
=
torch
.
randint
(
H
,
size
=
(
1
,))
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment