Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
GroundingDINO_mmcv
Commits
b12850fe
Commit
b12850fe
authored
May 29, 2024
by
dengjb
Browse files
update codes
parent
6515fb96
Pipeline
#1046
failed with stages
in 0 seconds
Changes
364
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
2155 additions
and
0 deletions
+2155
-0
mmdet/datasets/transforms/frame_sampling.py
mmdet/datasets/transforms/frame_sampling.py
+177
-0
mmdet/datasets/transforms/geometric.py
mmdet/datasets/transforms/geometric.py
+754
-0
mmdet/datasets/transforms/instaboost.py
mmdet/datasets/transforms/instaboost.py
+150
-0
mmdet/datasets/transforms/loading.py
mmdet/datasets/transforms/loading.py
+1074
-0
No files found.
Too many changes to show.
To preserve performance only
364 of 364+
files are displayed.
Plain diff
Email patch
mmdet/datasets/transforms/frame_sampling.py
0 → 100644
View file @
b12850fe
# Copyright (c) OpenMMLab. All rights reserved.
import
random
from
collections
import
defaultdict
from
typing
import
Dict
,
List
,
Optional
,
Union
from
mmcv.transforms
import
BaseTransform
from
mmdet.registry
import
TRANSFORMS
@
TRANSFORMS
.
register_module
()
class
BaseFrameSample
(
BaseTransform
):
"""Directly get the key frame, no reference frames.
Args:
collect_video_keys (list[str]): The keys of video info to be
collected.
"""
def
__init__
(
self
,
collect_video_keys
:
List
[
str
]
=
[
'video_id'
,
'video_length'
]):
self
.
collect_video_keys
=
collect_video_keys
def
prepare_data
(
self
,
video_infos
:
dict
,
sampled_inds
:
List
[
int
])
->
Dict
[
str
,
List
]:
"""Prepare data for the subsequent pipeline.
Args:
video_infos (dict): The whole video information.
sampled_inds (list[int]): The sampled frame indices.
Returns:
dict: The processed data information.
"""
frames_anns
=
video_infos
[
'images'
]
final_data_info
=
defaultdict
(
list
)
# for data in frames_anns:
for
index
in
sampled_inds
:
data
=
frames_anns
[
index
]
# copy the info in video-level into img-level
for
key
in
self
.
collect_video_keys
:
if
key
==
'video_length'
:
data
[
'ori_video_length'
]
=
video_infos
[
key
]
data
[
'video_length'
]
=
len
(
sampled_inds
)
else
:
data
[
key
]
=
video_infos
[
key
]
# Collate data_list (list of dict to dict of list)
for
key
,
value
in
data
.
items
():
final_data_info
[
key
].
append
(
value
)
return
final_data_info
def
transform
(
self
,
video_infos
:
dict
)
->
Optional
[
Dict
[
str
,
List
]]:
"""Transform the video information.
Args:
video_infos (dict): The whole video information.
Returns:
dict: The data information of the key frames.
"""
if
'key_frame_id'
in
video_infos
:
key_frame_id
=
video_infos
[
'key_frame_id'
]
assert
isinstance
(
video_infos
[
'key_frame_id'
],
int
)
else
:
key_frame_id
=
random
.
sample
(
list
(
range
(
video_infos
[
'video_length'
])),
1
)[
0
]
results
=
self
.
prepare_data
(
video_infos
,
[
key_frame_id
])
return
results
def
__repr__
(
self
)
->
str
:
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(collect_video_keys=
{
self
.
collect_video_keys
}
)'
return
repr_str
@
TRANSFORMS
.
register_module
()
class
UniformRefFrameSample
(
BaseFrameSample
):
"""Uniformly sample reference frames.
Args:
num_ref_imgs (int): Number of reference frames to be sampled.
frame_range (int | list[int]): Range of frames to be sampled around
key frame. If int, the range is [-frame_range, frame_range].
Defaults to 10.
filter_key_img (bool): Whether to filter the key frame when
sampling reference frames. Defaults to True.
collect_video_keys (list[str]): The keys of video info to be
collected.
"""
def
__init__
(
self
,
num_ref_imgs
:
int
=
1
,
frame_range
:
Union
[
int
,
List
[
int
]]
=
10
,
filter_key_img
:
bool
=
True
,
collect_video_keys
:
List
[
str
]
=
[
'video_id'
,
'video_length'
]):
self
.
num_ref_imgs
=
num_ref_imgs
self
.
filter_key_img
=
filter_key_img
if
isinstance
(
frame_range
,
int
):
assert
frame_range
>=
0
,
'frame_range can not be a negative value.'
frame_range
=
[
-
frame_range
,
frame_range
]
elif
isinstance
(
frame_range
,
list
):
assert
len
(
frame_range
)
==
2
,
'The length must be 2.'
assert
frame_range
[
0
]
<=
0
and
frame_range
[
1
]
>=
0
for
i
in
frame_range
:
assert
isinstance
(
i
,
int
),
'Each element must be int.'
else
:
raise
TypeError
(
'The type of frame_range must be int or list.'
)
self
.
frame_range
=
frame_range
super
().
__init__
(
collect_video_keys
=
collect_video_keys
)
def
sampling_frames
(
self
,
video_length
:
int
,
key_frame_id
:
int
):
"""Sampling frames.
Args:
video_length (int): The length of the video.
key_frame_id (int): The key frame id.
Returns:
list[int]: The sampled frame indices.
"""
if
video_length
>
1
:
left
=
max
(
0
,
key_frame_id
+
self
.
frame_range
[
0
])
right
=
min
(
key_frame_id
+
self
.
frame_range
[
1
],
video_length
-
1
)
frame_ids
=
list
(
range
(
0
,
video_length
))
valid_ids
=
frame_ids
[
left
:
right
+
1
]
if
self
.
filter_key_img
and
key_frame_id
in
valid_ids
:
valid_ids
.
remove
(
key_frame_id
)
assert
len
(
valid_ids
)
>
0
,
'After filtering key frame, there are no valid frames'
if
len
(
valid_ids
)
<
self
.
num_ref_imgs
:
valid_ids
=
valid_ids
*
self
.
num_ref_imgs
ref_frame_ids
=
random
.
sample
(
valid_ids
,
self
.
num_ref_imgs
)
else
:
ref_frame_ids
=
[
key_frame_id
]
*
self
.
num_ref_imgs
sampled_frames_ids
=
[
key_frame_id
]
+
ref_frame_ids
sampled_frames_ids
=
sorted
(
sampled_frames_ids
)
key_frames_ind
=
sampled_frames_ids
.
index
(
key_frame_id
)
key_frame_flags
=
[
False
]
*
len
(
sampled_frames_ids
)
key_frame_flags
[
key_frames_ind
]
=
True
return
sampled_frames_ids
,
key_frame_flags
def
transform
(
self
,
video_infos
:
dict
)
->
Optional
[
Dict
[
str
,
List
]]:
"""Transform the video information.
Args:
video_infos (dict): The whole video information.
Returns:
dict: The data information of the sampled frames.
"""
if
'key_frame_id'
in
video_infos
:
key_frame_id
=
video_infos
[
'key_frame_id'
]
assert
isinstance
(
video_infos
[
'key_frame_id'
],
int
)
else
:
key_frame_id
=
random
.
sample
(
list
(
range
(
video_infos
[
'video_length'
])),
1
)[
0
]
(
sampled_frames_ids
,
key_frame_flags
)
=
self
.
sampling_frames
(
video_infos
[
'video_length'
],
key_frame_id
=
key_frame_id
)
results
=
self
.
prepare_data
(
video_infos
,
sampled_frames_ids
)
results
[
'key_frame_flags'
]
=
key_frame_flags
return
results
def
__repr__
(
self
)
->
str
:
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(num_ref_imgs=
{
self
.
num_ref_imgs
}
, '
repr_str
+=
f
'frame_range=
{
self
.
frame_range
}
, '
repr_str
+=
f
'filter_key_img=
{
self
.
filter_key_img
}
, '
repr_str
+=
f
'collect_video_keys=
{
self
.
collect_video_keys
}
)'
return
repr_str
mmdet/datasets/transforms/geometric.py
0 → 100644
View file @
b12850fe
# Copyright (c) OpenMMLab. All rights reserved.
from
typing
import
Optional
,
Union
import
cv2
import
mmcv
import
numpy
as
np
from
mmcv.transforms
import
BaseTransform
from
mmcv.transforms.utils
import
cache_randomness
from
mmdet.registry
import
TRANSFORMS
from
mmdet.structures.bbox
import
autocast_box_type
from
.augment_wrappers
import
_MAX_LEVEL
,
level_to_mag
@
TRANSFORMS
.
register_module
()
class
GeomTransform
(
BaseTransform
):
"""Base class for geometric transformations. All geometric transformations
need to inherit from this base class. ``GeomTransform`` unifies the class
attributes and class functions of geometric transformations (ShearX,
ShearY, Rotate, TranslateX, and TranslateY), and records the homography
matrix.
Required Keys:
- img
- gt_bboxes (BaseBoxes[torch.float32]) (optional)
- gt_masks (BitmapMasks | PolygonMasks) (optional)
- gt_seg_map (np.uint8) (optional)
Modified Keys:
- img
- gt_bboxes
- gt_masks
- gt_seg_map
Added Keys:
- homography_matrix
Args:
prob (float): The probability for performing the geometric
transformation and should be in range [0, 1]. Defaults to 1.0.
level (int, optional): The level should be in range [0, _MAX_LEVEL].
If level is None, it will generate from [0, _MAX_LEVEL] randomly.
Defaults to None.
min_mag (float): The minimum magnitude for geometric transformation.
Defaults to 0.0.
max_mag (float): The maximum magnitude for geometric transformation.
Defaults to 1.0.
reversal_prob (float): The probability that reverses the geometric
transformation magnitude. Should be in range [0,1].
Defaults to 0.5.
img_border_value (int | float | tuple): The filled values for
image border. If float, the same fill value will be used for
all the three channels of image. If tuple, it should be 3 elements.
Defaults to 128.
mask_border_value (int): The fill value used for masks. Defaults to 0.
seg_ignore_label (int): The fill value used for segmentation map.
Note this value must equals ``ignore_label`` in ``semantic_head``
of the corresponding config. Defaults to 255.
interpolation (str): Interpolation method, accepted values are
"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
backend, "nearest", "bilinear" for 'pillow' backend. Defaults
to 'bilinear'.
"""
def
__init__
(
self
,
prob
:
float
=
1.0
,
level
:
Optional
[
int
]
=
None
,
min_mag
:
float
=
0.0
,
max_mag
:
float
=
1.0
,
reversal_prob
:
float
=
0.5
,
img_border_value
:
Union
[
int
,
float
,
tuple
]
=
128
,
mask_border_value
:
int
=
0
,
seg_ignore_label
:
int
=
255
,
interpolation
:
str
=
'bilinear'
)
->
None
:
assert
0
<=
prob
<=
1.0
,
f
'The probability of the transformation '
\
f
'should be in range [0,1], got
{
prob
}
.'
assert
level
is
None
or
isinstance
(
level
,
int
),
\
f
'The level should be None or type int, got
{
type
(
level
)
}
.'
assert
level
is
None
or
0
<=
level
<=
_MAX_LEVEL
,
\
f
'The level should be in range [0,
{
_MAX_LEVEL
}
], got
{
level
}
.'
assert
isinstance
(
min_mag
,
float
),
\
f
'min_mag should be type float, got
{
type
(
min_mag
)
}
.'
assert
isinstance
(
max_mag
,
float
),
\
f
'max_mag should be type float, got
{
type
(
max_mag
)
}
.'
assert
min_mag
<=
max_mag
,
\
f
'min_mag should smaller than max_mag, '
\
f
'got min_mag=
{
min_mag
}
and max_mag=
{
max_mag
}
'
assert
isinstance
(
reversal_prob
,
float
),
\
f
'reversal_prob should be type float, got
{
type
(
max_mag
)
}
.'
assert
0
<=
reversal_prob
<=
1.0
,
\
f
'The reversal probability of the transformation magnitude '
\
f
'should be type float, got
{
type
(
reversal_prob
)
}
.'
if
isinstance
(
img_border_value
,
(
float
,
int
)):
img_border_value
=
tuple
([
float
(
img_border_value
)]
*
3
)
elif
isinstance
(
img_border_value
,
tuple
):
assert
len
(
img_border_value
)
==
3
,
\
f
'img_border_value as tuple must have 3 elements, '
\
f
'got
{
len
(
img_border_value
)
}
.'
img_border_value
=
tuple
([
float
(
val
)
for
val
in
img_border_value
])
else
:
raise
ValueError
(
'img_border_value must be float or tuple with 3 elements.'
)
assert
np
.
all
([
0
<=
val
<=
255
for
val
in
img_border_value
]),
'all '
\
'elements of img_border_value should between range [0,255].'
\
f
'got
{
img_border_value
}
.'
self
.
prob
=
prob
self
.
level
=
level
self
.
min_mag
=
min_mag
self
.
max_mag
=
max_mag
self
.
reversal_prob
=
reversal_prob
self
.
img_border_value
=
img_border_value
self
.
mask_border_value
=
mask_border_value
self
.
seg_ignore_label
=
seg_ignore_label
self
.
interpolation
=
interpolation
def
_transform_img
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Transform the image."""
pass
def
_transform_masks
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Transform the masks."""
pass
def
_transform_seg
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Transform the segmentation map."""
pass
def
_get_homography_matrix
(
self
,
results
:
dict
,
mag
:
float
)
->
np
.
ndarray
:
"""Get the homography matrix for the geometric transformation."""
return
np
.
eye
(
3
,
dtype
=
np
.
float32
)
def
_transform_bboxes
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Transform the bboxes."""
results
[
'gt_bboxes'
].
project_
(
self
.
homography_matrix
)
results
[
'gt_bboxes'
].
clip_
(
results
[
'img_shape'
])
def
_record_homography_matrix
(
self
,
results
:
dict
)
->
None
:
"""Record the homography matrix for the geometric transformation."""
if
results
.
get
(
'homography_matrix'
,
None
)
is
None
:
results
[
'homography_matrix'
]
=
self
.
homography_matrix
else
:
results
[
'homography_matrix'
]
=
self
.
homography_matrix
@
results
[
'homography_matrix'
]
@
cache_randomness
def
_random_disable
(
self
):
"""Randomly disable the transform."""
return
np
.
random
.
rand
()
>
self
.
prob
@
cache_randomness
def
_get_mag
(
self
):
"""Get the magnitude of the transform."""
mag
=
level_to_mag
(
self
.
level
,
self
.
min_mag
,
self
.
max_mag
)
return
-
mag
if
np
.
random
.
rand
()
>
self
.
reversal_prob
else
mag
@
autocast_box_type
()
def
transform
(
self
,
results
:
dict
)
->
dict
:
"""Transform function for images, bounding boxes, masks and semantic
segmentation map.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Transformed results.
"""
if
self
.
_random_disable
():
return
results
mag
=
self
.
_get_mag
()
self
.
homography_matrix
=
self
.
_get_homography_matrix
(
results
,
mag
)
self
.
_record_homography_matrix
(
results
)
self
.
_transform_img
(
results
,
mag
)
if
results
.
get
(
'gt_bboxes'
,
None
)
is
not
None
:
self
.
_transform_bboxes
(
results
,
mag
)
if
results
.
get
(
'gt_masks'
,
None
)
is
not
None
:
self
.
_transform_masks
(
results
,
mag
)
if
results
.
get
(
'gt_seg_map'
,
None
)
is
not
None
:
self
.
_transform_seg
(
results
,
mag
)
return
results
def
__repr__
(
self
)
->
str
:
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(prob=
{
self
.
prob
}
, '
repr_str
+=
f
'level=
{
self
.
level
}
, '
repr_str
+=
f
'min_mag=
{
self
.
min_mag
}
, '
repr_str
+=
f
'max_mag=
{
self
.
max_mag
}
, '
repr_str
+=
f
'reversal_prob=
{
self
.
reversal_prob
}
, '
repr_str
+=
f
'img_border_value=
{
self
.
img_border_value
}
, '
repr_str
+=
f
'mask_border_value=
{
self
.
mask_border_value
}
, '
repr_str
+=
f
'seg_ignore_label=
{
self
.
seg_ignore_label
}
, '
repr_str
+=
f
'interpolation=
{
self
.
interpolation
}
)'
return
repr_str
@
TRANSFORMS
.
register_module
()
class
ShearX
(
GeomTransform
):
"""Shear the images, bboxes, masks and segmentation map horizontally.
Required Keys:
- img
- gt_bboxes (BaseBoxes[torch.float32]) (optional)
- gt_masks (BitmapMasks | PolygonMasks) (optional)
- gt_seg_map (np.uint8) (optional)
Modified Keys:
- img
- gt_bboxes
- gt_masks
- gt_seg_map
Added Keys:
- homography_matrix
Args:
prob (float): The probability for performing Shear and should be in
range [0, 1]. Defaults to 1.0.
level (int, optional): The level should be in range [0, _MAX_LEVEL].
If level is None, it will generate from [0, _MAX_LEVEL] randomly.
Defaults to None.
min_mag (float): The minimum angle for the horizontal shear.
Defaults to 0.0.
max_mag (float): The maximum angle for the horizontal shear.
Defaults to 30.0.
reversal_prob (float): The probability that reverses the horizontal
shear magnitude. Should be in range [0,1]. Defaults to 0.5.
img_border_value (int | float | tuple): The filled values for
image border. If float, the same fill value will be used for
all the three channels of image. If tuple, it should be 3 elements.
Defaults to 128.
mask_border_value (int): The fill value used for masks. Defaults to 0.
seg_ignore_label (int): The fill value used for segmentation map.
Note this value must equals ``ignore_label`` in ``semantic_head``
of the corresponding config. Defaults to 255.
interpolation (str): Interpolation method, accepted values are
"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
backend, "nearest", "bilinear" for 'pillow' backend. Defaults
to 'bilinear'.
"""
def
__init__
(
self
,
prob
:
float
=
1.0
,
level
:
Optional
[
int
]
=
None
,
min_mag
:
float
=
0.0
,
max_mag
:
float
=
30.0
,
reversal_prob
:
float
=
0.5
,
img_border_value
:
Union
[
int
,
float
,
tuple
]
=
128
,
mask_border_value
:
int
=
0
,
seg_ignore_label
:
int
=
255
,
interpolation
:
str
=
'bilinear'
)
->
None
:
assert
0.
<=
min_mag
<=
90.
,
\
f
'min_mag angle for ShearX should be '
\
f
'in range [0, 90], got
{
min_mag
}
.'
assert
0.
<=
max_mag
<=
90.
,
\
f
'max_mag angle for ShearX should be '
\
f
'in range [0, 90], got
{
max_mag
}
.'
super
().
__init__
(
prob
=
prob
,
level
=
level
,
min_mag
=
min_mag
,
max_mag
=
max_mag
,
reversal_prob
=
reversal_prob
,
img_border_value
=
img_border_value
,
mask_border_value
=
mask_border_value
,
seg_ignore_label
=
seg_ignore_label
,
interpolation
=
interpolation
)
@
cache_randomness
def
_get_mag
(
self
):
"""Get the magnitude of the transform."""
mag
=
level_to_mag
(
self
.
level
,
self
.
min_mag
,
self
.
max_mag
)
mag
=
np
.
tan
(
mag
*
np
.
pi
/
180
)
return
-
mag
if
np
.
random
.
rand
()
>
self
.
reversal_prob
else
mag
def
_get_homography_matrix
(
self
,
results
:
dict
,
mag
:
float
)
->
np
.
ndarray
:
"""Get the homography matrix for ShearX."""
return
np
.
array
([[
1
,
mag
,
0
],
[
0
,
1
,
0
],
[
0
,
0
,
1
]],
dtype
=
np
.
float32
)
def
_transform_img
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Shear the image horizontally."""
results
[
'img'
]
=
mmcv
.
imshear
(
results
[
'img'
],
mag
,
direction
=
'horizontal'
,
border_value
=
self
.
img_border_value
,
interpolation
=
self
.
interpolation
)
def
_transform_masks
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Shear the masks horizontally."""
results
[
'gt_masks'
]
=
results
[
'gt_masks'
].
shear
(
results
[
'img_shape'
],
mag
,
direction
=
'horizontal'
,
border_value
=
self
.
mask_border_value
,
interpolation
=
self
.
interpolation
)
def
_transform_seg
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Shear the segmentation map horizontally."""
results
[
'gt_seg_map'
]
=
mmcv
.
imshear
(
results
[
'gt_seg_map'
],
mag
,
direction
=
'horizontal'
,
border_value
=
self
.
seg_ignore_label
,
interpolation
=
'nearest'
)
@
TRANSFORMS
.
register_module
()
class
ShearY
(
GeomTransform
):
"""Shear the images, bboxes, masks and segmentation map vertically.
Required Keys:
- img
- gt_bboxes (BaseBoxes[torch.float32]) (optional)
- gt_masks (BitmapMasks | PolygonMasks) (optional)
- gt_seg_map (np.uint8) (optional)
Modified Keys:
- img
- gt_bboxes
- gt_masks
- gt_seg_map
Added Keys:
- homography_matrix
Args:
prob (float): The probability for performing ShearY and should be in
range [0, 1]. Defaults to 1.0.
level (int, optional): The level should be in range [0,_MAX_LEVEL].
If level is None, it will generate from [0, _MAX_LEVEL] randomly.
Defaults to None.
min_mag (float): The minimum angle for the vertical shear.
Defaults to 0.0.
max_mag (float): The maximum angle for the vertical shear.
Defaults to 30.0.
reversal_prob (float): The probability that reverses the vertical
shear magnitude. Should be in range [0,1]. Defaults to 0.5.
img_border_value (int | float | tuple): The filled values for
image border. If float, the same fill value will be used for
all the three channels of image. If tuple, it should be 3 elements.
Defaults to 128.
mask_border_value (int): The fill value used for masks. Defaults to 0.
seg_ignore_label (int): The fill value used for segmentation map.
Note this value must equals ``ignore_label`` in ``semantic_head``
of the corresponding config. Defaults to 255.
interpolation (str): Interpolation method, accepted values are
"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
backend, "nearest", "bilinear" for 'pillow' backend. Defaults
to 'bilinear'.
"""
def
__init__
(
self
,
prob
:
float
=
1.0
,
level
:
Optional
[
int
]
=
None
,
min_mag
:
float
=
0.0
,
max_mag
:
float
=
30.
,
reversal_prob
:
float
=
0.5
,
img_border_value
:
Union
[
int
,
float
,
tuple
]
=
128
,
mask_border_value
:
int
=
0
,
seg_ignore_label
:
int
=
255
,
interpolation
:
str
=
'bilinear'
)
->
None
:
assert
0.
<=
min_mag
<=
90.
,
\
f
'min_mag angle for ShearY should be '
\
f
'in range [0, 90], got
{
min_mag
}
.'
assert
0.
<=
max_mag
<=
90.
,
\
f
'max_mag angle for ShearY should be '
\
f
'in range [0, 90], got
{
max_mag
}
.'
super
().
__init__
(
prob
=
prob
,
level
=
level
,
min_mag
=
min_mag
,
max_mag
=
max_mag
,
reversal_prob
=
reversal_prob
,
img_border_value
=
img_border_value
,
mask_border_value
=
mask_border_value
,
seg_ignore_label
=
seg_ignore_label
,
interpolation
=
interpolation
)
@
cache_randomness
def
_get_mag
(
self
):
"""Get the magnitude of the transform."""
mag
=
level_to_mag
(
self
.
level
,
self
.
min_mag
,
self
.
max_mag
)
mag
=
np
.
tan
(
mag
*
np
.
pi
/
180
)
return
-
mag
if
np
.
random
.
rand
()
>
self
.
reversal_prob
else
mag
def
_get_homography_matrix
(
self
,
results
:
dict
,
mag
:
float
)
->
np
.
ndarray
:
"""Get the homography matrix for ShearY."""
return
np
.
array
([[
1
,
0
,
0
],
[
mag
,
1
,
0
],
[
0
,
0
,
1
]],
dtype
=
np
.
float32
)
def
_transform_img
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Shear the image vertically."""
results
[
'img'
]
=
mmcv
.
imshear
(
results
[
'img'
],
mag
,
direction
=
'vertical'
,
border_value
=
self
.
img_border_value
,
interpolation
=
self
.
interpolation
)
def
_transform_masks
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Shear the masks vertically."""
results
[
'gt_masks'
]
=
results
[
'gt_masks'
].
shear
(
results
[
'img_shape'
],
mag
,
direction
=
'vertical'
,
border_value
=
self
.
mask_border_value
,
interpolation
=
self
.
interpolation
)
def
_transform_seg
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Shear the segmentation map vertically."""
results
[
'gt_seg_map'
]
=
mmcv
.
imshear
(
results
[
'gt_seg_map'
],
mag
,
direction
=
'vertical'
,
border_value
=
self
.
seg_ignore_label
,
interpolation
=
'nearest'
)
@
TRANSFORMS
.
register_module
()
class
Rotate
(
GeomTransform
):
"""Rotate the images, bboxes, masks and segmentation map.
Required Keys:
- img
- gt_bboxes (BaseBoxes[torch.float32]) (optional)
- gt_masks (BitmapMasks | PolygonMasks) (optional)
- gt_seg_map (np.uint8) (optional)
Modified Keys:
- img
- gt_bboxes
- gt_masks
- gt_seg_map
Added Keys:
- homography_matrix
Args:
prob (float): The probability for perform transformation and
should be in range 0 to 1. Defaults to 1.0.
level (int, optional): The level should be in range [0, _MAX_LEVEL].
If level is None, it will generate from [0, _MAX_LEVEL] randomly.
Defaults to None.
min_mag (float): The maximum angle for rotation.
Defaults to 0.0.
max_mag (float): The maximum angle for rotation.
Defaults to 30.0.
reversal_prob (float): The probability that reverses the rotation
magnitude. Should be in range [0,1]. Defaults to 0.5.
img_border_value (int | float | tuple): The filled values for
image border. If float, the same fill value will be used for
all the three channels of image. If tuple, it should be 3 elements.
Defaults to 128.
mask_border_value (int): The fill value used for masks. Defaults to 0.
seg_ignore_label (int): The fill value used for segmentation map.
Note this value must equals ``ignore_label`` in ``semantic_head``
of the corresponding config. Defaults to 255.
interpolation (str): Interpolation method, accepted values are
"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
backend, "nearest", "bilinear" for 'pillow' backend. Defaults
to 'bilinear'.
"""
def
__init__
(
self
,
prob
:
float
=
1.0
,
level
:
Optional
[
int
]
=
None
,
min_mag
:
float
=
0.0
,
max_mag
:
float
=
30.0
,
reversal_prob
:
float
=
0.5
,
img_border_value
:
Union
[
int
,
float
,
tuple
]
=
128
,
mask_border_value
:
int
=
0
,
seg_ignore_label
:
int
=
255
,
interpolation
:
str
=
'bilinear'
)
->
None
:
assert
0.
<=
min_mag
<=
180.
,
\
f
'min_mag for Rotate should be in range [0,180], got
{
min_mag
}
.'
assert
0.
<=
max_mag
<=
180.
,
\
f
'max_mag for Rotate should be in range [0,180], got
{
max_mag
}
.'
super
().
__init__
(
prob
=
prob
,
level
=
level
,
min_mag
=
min_mag
,
max_mag
=
max_mag
,
reversal_prob
=
reversal_prob
,
img_border_value
=
img_border_value
,
mask_border_value
=
mask_border_value
,
seg_ignore_label
=
seg_ignore_label
,
interpolation
=
interpolation
)
def
_get_homography_matrix
(
self
,
results
:
dict
,
mag
:
float
)
->
np
.
ndarray
:
"""Get the homography matrix for Rotate."""
img_shape
=
results
[
'img_shape'
]
center
=
((
img_shape
[
1
]
-
1
)
*
0.5
,
(
img_shape
[
0
]
-
1
)
*
0.5
)
cv2_rotation_matrix
=
cv2
.
getRotationMatrix2D
(
center
,
-
mag
,
1.0
)
return
np
.
concatenate
(
[
cv2_rotation_matrix
,
np
.
array
([
0
,
0
,
1
]).
reshape
((
1
,
3
))]).
astype
(
np
.
float32
)
def
_transform_img
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Rotate the image."""
results
[
'img'
]
=
mmcv
.
imrotate
(
results
[
'img'
],
mag
,
border_value
=
self
.
img_border_value
,
interpolation
=
self
.
interpolation
)
def
_transform_masks
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Rotate the masks."""
results
[
'gt_masks'
]
=
results
[
'gt_masks'
].
rotate
(
results
[
'img_shape'
],
mag
,
border_value
=
self
.
mask_border_value
,
interpolation
=
self
.
interpolation
)
def
_transform_seg
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Rotate the segmentation map."""
results
[
'gt_seg_map'
]
=
mmcv
.
imrotate
(
results
[
'gt_seg_map'
],
mag
,
border_value
=
self
.
seg_ignore_label
,
interpolation
=
'nearest'
)
@
TRANSFORMS
.
register_module
()
class
TranslateX
(
GeomTransform
):
"""Translate the images, bboxes, masks and segmentation map horizontally.
Required Keys:
- img
- gt_bboxes (BaseBoxes[torch.float32]) (optional)
- gt_masks (BitmapMasks | PolygonMasks) (optional)
- gt_seg_map (np.uint8) (optional)
Modified Keys:
- img
- gt_bboxes
- gt_masks
- gt_seg_map
Added Keys:
- homography_matrix
Args:
prob (float): The probability for perform transformation and
should be in range 0 to 1. Defaults to 1.0.
level (int, optional): The level should be in range [0, _MAX_LEVEL].
If level is None, it will generate from [0, _MAX_LEVEL] randomly.
Defaults to None.
min_mag (float): The minimum pixel's offset ratio for horizontal
translation. Defaults to 0.0.
max_mag (float): The maximum pixel's offset ratio for horizontal
translation. Defaults to 0.1.
reversal_prob (float): The probability that reverses the horizontal
translation magnitude. Should be in range [0,1]. Defaults to 0.5.
img_border_value (int | float | tuple): The filled values for
image border. If float, the same fill value will be used for
all the three channels of image. If tuple, it should be 3 elements.
Defaults to 128.
mask_border_value (int): The fill value used for masks. Defaults to 0.
seg_ignore_label (int): The fill value used for segmentation map.
Note this value must equals ``ignore_label`` in ``semantic_head``
of the corresponding config. Defaults to 255.
interpolation (str): Interpolation method, accepted values are
"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
backend, "nearest", "bilinear" for 'pillow' backend. Defaults
to 'bilinear'.
"""
def
__init__
(
self
,
prob
:
float
=
1.0
,
level
:
Optional
[
int
]
=
None
,
min_mag
:
float
=
0.0
,
max_mag
:
float
=
0.1
,
reversal_prob
:
float
=
0.5
,
img_border_value
:
Union
[
int
,
float
,
tuple
]
=
128
,
mask_border_value
:
int
=
0
,
seg_ignore_label
:
int
=
255
,
interpolation
:
str
=
'bilinear'
)
->
None
:
assert
0.
<=
min_mag
<=
1.
,
\
f
'min_mag ratio for TranslateX should be '
\
f
'in range [0, 1], got
{
min_mag
}
.'
assert
0.
<=
max_mag
<=
1.
,
\
f
'max_mag ratio for TranslateX should be '
\
f
'in range [0, 1], got
{
max_mag
}
.'
super
().
__init__
(
prob
=
prob
,
level
=
level
,
min_mag
=
min_mag
,
max_mag
=
max_mag
,
reversal_prob
=
reversal_prob
,
img_border_value
=
img_border_value
,
mask_border_value
=
mask_border_value
,
seg_ignore_label
=
seg_ignore_label
,
interpolation
=
interpolation
)
def
_get_homography_matrix
(
self
,
results
:
dict
,
mag
:
float
)
->
np
.
ndarray
:
"""Get the homography matrix for TranslateX."""
mag
=
int
(
results
[
'img_shape'
][
1
]
*
mag
)
return
np
.
array
([[
1
,
0
,
mag
],
[
0
,
1
,
0
],
[
0
,
0
,
1
]],
dtype
=
np
.
float32
)
def
_transform_img
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Translate the image horizontally."""
mag
=
int
(
results
[
'img_shape'
][
1
]
*
mag
)
results
[
'img'
]
=
mmcv
.
imtranslate
(
results
[
'img'
],
mag
,
direction
=
'horizontal'
,
border_value
=
self
.
img_border_value
,
interpolation
=
self
.
interpolation
)
def
_transform_masks
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Translate the masks horizontally."""
mag
=
int
(
results
[
'img_shape'
][
1
]
*
mag
)
results
[
'gt_masks'
]
=
results
[
'gt_masks'
].
translate
(
results
[
'img_shape'
],
mag
,
direction
=
'horizontal'
,
border_value
=
self
.
mask_border_value
,
interpolation
=
self
.
interpolation
)
def
_transform_seg
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Translate the segmentation map horizontally."""
mag
=
int
(
results
[
'img_shape'
][
1
]
*
mag
)
results
[
'gt_seg_map'
]
=
mmcv
.
imtranslate
(
results
[
'gt_seg_map'
],
mag
,
direction
=
'horizontal'
,
border_value
=
self
.
seg_ignore_label
,
interpolation
=
'nearest'
)
@
TRANSFORMS
.
register_module
()
class
TranslateY
(
GeomTransform
):
"""Translate the images, bboxes, masks and segmentation map vertically.
Required Keys:
- img
- gt_bboxes (BaseBoxes[torch.float32]) (optional)
- gt_masks (BitmapMasks | PolygonMasks) (optional)
- gt_seg_map (np.uint8) (optional)
Modified Keys:
- img
- gt_bboxes
- gt_masks
- gt_seg_map
Added Keys:
- homography_matrix
Args:
prob (float): The probability for perform transformation and
should be in range 0 to 1. Defaults to 1.0.
level (int, optional): The level should be in range [0, _MAX_LEVEL].
If level is None, it will generate from [0, _MAX_LEVEL] randomly.
Defaults to None.
min_mag (float): The minimum pixel's offset ratio for vertical
translation. Defaults to 0.0.
max_mag (float): The maximum pixel's offset ratio for vertical
translation. Defaults to 0.1.
reversal_prob (float): The probability that reverses the vertical
translation magnitude. Should be in range [0,1]. Defaults to 0.5.
img_border_value (int | float | tuple): The filled values for
image border. If float, the same fill value will be used for
all the three channels of image. If tuple, it should be 3 elements.
Defaults to 128.
mask_border_value (int): The fill value used for masks. Defaults to 0.
seg_ignore_label (int): The fill value used for segmentation map.
Note this value must equals ``ignore_label`` in ``semantic_head``
of the corresponding config. Defaults to 255.
interpolation (str): Interpolation method, accepted values are
"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
backend, "nearest", "bilinear" for 'pillow' backend. Defaults
to 'bilinear'.
"""
def
__init__
(
self
,
prob
:
float
=
1.0
,
level
:
Optional
[
int
]
=
None
,
min_mag
:
float
=
0.0
,
max_mag
:
float
=
0.1
,
reversal_prob
:
float
=
0.5
,
img_border_value
:
Union
[
int
,
float
,
tuple
]
=
128
,
mask_border_value
:
int
=
0
,
seg_ignore_label
:
int
=
255
,
interpolation
:
str
=
'bilinear'
)
->
None
:
assert
0.
<=
min_mag
<=
1.
,
\
f
'min_mag ratio for TranslateY should be '
\
f
'in range [0,1], got
{
min_mag
}
.'
assert
0.
<=
max_mag
<=
1.
,
\
f
'max_mag ratio for TranslateY should be '
\
f
'in range [0,1], got
{
max_mag
}
.'
super
().
__init__
(
prob
=
prob
,
level
=
level
,
min_mag
=
min_mag
,
max_mag
=
max_mag
,
reversal_prob
=
reversal_prob
,
img_border_value
=
img_border_value
,
mask_border_value
=
mask_border_value
,
seg_ignore_label
=
seg_ignore_label
,
interpolation
=
interpolation
)
def
_get_homography_matrix
(
self
,
results
:
dict
,
mag
:
float
)
->
np
.
ndarray
:
"""Get the homography matrix for TranslateY."""
mag
=
int
(
results
[
'img_shape'
][
0
]
*
mag
)
return
np
.
array
([[
1
,
0
,
0
],
[
0
,
1
,
mag
],
[
0
,
0
,
1
]],
dtype
=
np
.
float32
)
def
_transform_img
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Translate the image vertically."""
mag
=
int
(
results
[
'img_shape'
][
0
]
*
mag
)
results
[
'img'
]
=
mmcv
.
imtranslate
(
results
[
'img'
],
mag
,
direction
=
'vertical'
,
border_value
=
self
.
img_border_value
,
interpolation
=
self
.
interpolation
)
def
_transform_masks
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Translate masks vertically."""
mag
=
int
(
results
[
'img_shape'
][
0
]
*
mag
)
results
[
'gt_masks'
]
=
results
[
'gt_masks'
].
translate
(
results
[
'img_shape'
],
mag
,
direction
=
'vertical'
,
border_value
=
self
.
mask_border_value
,
interpolation
=
self
.
interpolation
)
def
_transform_seg
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Translate segmentation map vertically."""
mag
=
int
(
results
[
'img_shape'
][
0
]
*
mag
)
results
[
'gt_seg_map'
]
=
mmcv
.
imtranslate
(
results
[
'gt_seg_map'
],
mag
,
direction
=
'vertical'
,
border_value
=
self
.
seg_ignore_label
,
interpolation
=
'nearest'
)
mmdet/datasets/transforms/instaboost.py
0 → 100644
View file @
b12850fe
# Copyright (c) OpenMMLab. All rights reserved.
from
typing
import
Tuple
import
numpy
as
np
from
mmcv.transforms
import
BaseTransform
from
mmdet.registry
import
TRANSFORMS
@
TRANSFORMS
.
register_module
()
class
InstaBoost
(
BaseTransform
):
r
"""Data augmentation method in `InstaBoost: Boosting Instance
Segmentation Via Probability Map Guided Copy-Pasting
<https://arxiv.org/abs/1908.07801>`_.
Refer to https://github.com/GothicAi/Instaboost for implementation details.
Required Keys:
- img (np.uint8)
- instances
Modified Keys:
- img (np.uint8)
- instances
Args:
action_candidate (tuple): Action candidates. "normal", "horizontal", \
"vertical", "skip" are supported. Defaults to ('normal', \
'horizontal', 'skip').
action_prob (tuple): Corresponding action probabilities. Should be \
the same length as action_candidate. Defaults to (1, 0, 0).
scale (tuple): (min scale, max scale). Defaults to (0.8, 1.2).
dx (int): The maximum x-axis shift will be (instance width) / dx.
Defaults to 15.
dy (int): The maximum y-axis shift will be (instance height) / dy.
Defaults to 15.
theta (tuple): (min rotation degree, max rotation degree). \
Defaults to (-1, 1).
color_prob (float): Probability of images for color augmentation.
Defaults to 0.5.
hflag (bool): Whether to use heatmap guided. Defaults to False.
aug_ratio (float): Probability of applying this transformation. \
Defaults to 0.5.
"""
def
__init__
(
self
,
action_candidate
:
tuple
=
(
'normal'
,
'horizontal'
,
'skip'
),
action_prob
:
tuple
=
(
1
,
0
,
0
),
scale
:
tuple
=
(
0.8
,
1.2
),
dx
:
int
=
15
,
dy
:
int
=
15
,
theta
:
tuple
=
(
-
1
,
1
),
color_prob
:
float
=
0.5
,
hflag
:
bool
=
False
,
aug_ratio
:
float
=
0.5
)
->
None
:
import
matplotlib
import
matplotlib.pyplot
as
plt
default_backend
=
plt
.
get_backend
()
try
:
import
instaboostfast
as
instaboost
except
ImportError
:
raise
ImportError
(
'Please run "pip install instaboostfast" '
'to install instaboostfast first for instaboost augmentation.'
)
# instaboost will modify the default backend
# and cause visualization to fail.
matplotlib
.
use
(
default_backend
)
self
.
cfg
=
instaboost
.
InstaBoostConfig
(
action_candidate
,
action_prob
,
scale
,
dx
,
dy
,
theta
,
color_prob
,
hflag
)
self
.
aug_ratio
=
aug_ratio
def
_load_anns
(
self
,
results
:
dict
)
->
Tuple
[
list
,
list
]:
"""Convert raw anns to instaboost expected input format."""
anns
=
[]
ignore_anns
=
[]
for
instance
in
results
[
'instances'
]:
label
=
instance
[
'bbox_label'
]
bbox
=
instance
[
'bbox'
]
mask
=
instance
[
'mask'
]
x1
,
y1
,
x2
,
y2
=
bbox
# assert (x2 - x1) >= 1 and (y2 - y1) >= 1
bbox
=
[
x1
,
y1
,
x2
-
x1
,
y2
-
y1
]
if
instance
[
'ignore_flag'
]
==
0
:
anns
.
append
({
'category_id'
:
label
,
'segmentation'
:
mask
,
'bbox'
:
bbox
})
else
:
# Ignore instances without data augmentation
ignore_anns
.
append
(
instance
)
return
anns
,
ignore_anns
def
_parse_anns
(
self
,
results
:
dict
,
anns
:
list
,
ignore_anns
:
list
,
img
:
np
.
ndarray
)
->
dict
:
"""Restore the result of instaboost processing to the original anns
format."""
instances
=
[]
for
ann
in
anns
:
x1
,
y1
,
w
,
h
=
ann
[
'bbox'
]
# TODO: more essential bug need to be fixed in instaboost
if
w
<=
0
or
h
<=
0
:
continue
bbox
=
[
x1
,
y1
,
x1
+
w
,
y1
+
h
]
instances
.
append
(
dict
(
bbox
=
bbox
,
bbox_label
=
ann
[
'category_id'
],
mask
=
ann
[
'segmentation'
],
ignore_flag
=
0
))
instances
.
extend
(
ignore_anns
)
results
[
'img'
]
=
img
results
[
'instances'
]
=
instances
return
results
def
transform
(
self
,
results
)
->
dict
:
"""The transform function."""
img
=
results
[
'img'
]
ori_type
=
img
.
dtype
if
'instances'
not
in
results
or
len
(
results
[
'instances'
])
==
0
:
return
results
anns
,
ignore_anns
=
self
.
_load_anns
(
results
)
if
np
.
random
.
choice
([
0
,
1
],
p
=
[
1
-
self
.
aug_ratio
,
self
.
aug_ratio
]):
try
:
import
instaboostfast
as
instaboost
except
ImportError
:
raise
ImportError
(
'Please run "pip install instaboostfast" '
'to install instaboostfast first.'
)
anns
,
img
=
instaboost
.
get_new_data
(
anns
,
img
.
astype
(
np
.
uint8
),
self
.
cfg
,
background
=
None
)
results
=
self
.
_parse_anns
(
results
,
anns
,
ignore_anns
,
img
.
astype
(
ori_type
))
return
results
def
__repr__
(
self
)
->
str
:
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(aug_ratio=
{
self
.
aug_ratio
}
)'
return
repr_str
mmdet/datasets/transforms/loading.py
0 → 100644
View file @
b12850fe
# Copyright (c) OpenMMLab. All rights reserved.
from
typing
import
Optional
,
Tuple
,
Union
import
mmcv
import
numpy
as
np
import
pycocotools.mask
as
maskUtils
import
torch
from
mmcv.transforms
import
BaseTransform
from
mmcv.transforms
import
LoadAnnotations
as
MMCV_LoadAnnotations
from
mmcv.transforms
import
LoadImageFromFile
from
mmengine.fileio
import
get
from
mmengine.structures
import
BaseDataElement
from
mmdet.registry
import
TRANSFORMS
from
mmdet.structures.bbox
import
get_box_type
from
mmdet.structures.bbox.box_type
import
autocast_box_type
from
mmdet.structures.mask
import
BitmapMasks
,
PolygonMasks
@
TRANSFORMS
.
register_module
()
class
LoadImageFromNDArray
(
LoadImageFromFile
):
"""Load an image from ``results['img']``.
Similar with :obj:`LoadImageFromFile`, but the image has been loaded as
:obj:`np.ndarray` in ``results['img']``. Can be used when loading image
from webcam.
Required Keys:
- img
Modified Keys:
- img
- img_path
- img_shape
- ori_shape
Args:
to_float32 (bool): Whether to convert the loaded image to a float32
numpy array. If set to False, the loaded image is an uint8 array.
Defaults to False.
"""
def
transform
(
self
,
results
:
dict
)
->
dict
:
"""Transform function to add image meta information.
Args:
results (dict): Result dict with Webcam read image in
``results['img']``.
Returns:
dict: The dict contains loaded image and meta information.
"""
img
=
results
[
'img'
]
if
self
.
to_float32
:
img
=
img
.
astype
(
np
.
float32
)
results
[
'img_path'
]
=
None
results
[
'img'
]
=
img
results
[
'img_shape'
]
=
img
.
shape
[:
2
]
results
[
'ori_shape'
]
=
img
.
shape
[:
2
]
return
results
@
TRANSFORMS
.
register_module
()
class
LoadMultiChannelImageFromFiles
(
BaseTransform
):
"""Load multi-channel images from a list of separate channel files.
Required Keys:
- img_path
Modified Keys:
- img
- img_shape
- ori_shape
Args:
to_float32 (bool): Whether to convert the loaded image to a float32
numpy array. If set to False, the loaded image is an uint8 array.
Defaults to False.
color_type (str): The flag argument for :func:``mmcv.imfrombytes``.
Defaults to 'unchanged'.
imdecode_backend (str): The image decoding backend type. The backend
argument for :func:``mmcv.imfrombytes``.
See :func:``mmcv.imfrombytes`` for details.
Defaults to 'cv2'.
file_client_args (dict): Arguments to instantiate the
corresponding backend in mmdet <= 3.0.0rc6. Defaults to None.
backend_args (dict, optional): Arguments to instantiate the
corresponding backend in mmdet >= 3.0.0rc7. Defaults to None.
"""
def
__init__
(
self
,
to_float32
:
bool
=
False
,
color_type
:
str
=
'unchanged'
,
imdecode_backend
:
str
=
'cv2'
,
file_client_args
:
dict
=
None
,
backend_args
:
dict
=
None
,
)
->
None
:
self
.
to_float32
=
to_float32
self
.
color_type
=
color_type
self
.
imdecode_backend
=
imdecode_backend
self
.
backend_args
=
backend_args
if
file_client_args
is
not
None
:
raise
RuntimeError
(
'The `file_client_args` is deprecated, '
'please use `backend_args` instead, please refer to'
'https://github.com/open-mmlab/mmdetection/blob/main/configs/_base_/datasets/coco_detection.py'
# noqa: E501
)
def
transform
(
self
,
results
:
dict
)
->
dict
:
"""Transform functions to load multiple images and get images meta
information.
Args:
results (dict): Result dict from :obj:`mmdet.CustomDataset`.
Returns:
dict: The dict contains loaded images and meta information.
"""
assert
isinstance
(
results
[
'img_path'
],
list
)
img
=
[]
for
name
in
results
[
'img_path'
]:
img_bytes
=
get
(
name
,
backend_args
=
self
.
backend_args
)
img
.
append
(
mmcv
.
imfrombytes
(
img_bytes
,
flag
=
self
.
color_type
,
backend
=
self
.
imdecode_backend
))
img
=
np
.
stack
(
img
,
axis
=-
1
)
if
self
.
to_float32
:
img
=
img
.
astype
(
np
.
float32
)
results
[
'img'
]
=
img
results
[
'img_shape'
]
=
img
.
shape
[:
2
]
results
[
'ori_shape'
]
=
img
.
shape
[:
2
]
return
results
def
__repr__
(
self
):
repr_str
=
(
f
'
{
self
.
__class__
.
__name__
}
('
f
'to_float32=
{
self
.
to_float32
}
, '
f
"color_type='
{
self
.
color_type
}
', "
f
"imdecode_backend='
{
self
.
imdecode_backend
}
', "
f
'backend_args=
{
self
.
backend_args
}
)'
)
return
repr_str
@
TRANSFORMS
.
register_module
()
class
LoadAnnotations
(
MMCV_LoadAnnotations
):
"""Load and process the ``instances`` and ``seg_map`` annotation provided
by dataset.
The annotation format is as the following:
.. code-block:: python
{
'instances':
[
{
# List of 4 numbers representing the bounding box of the
# instance, in (x1, y1, x2, y2) order.
'bbox': [x1, y1, x2, y2],
# Label of image classification.
'bbox_label': 1,
# Used in instance/panoptic segmentation. The segmentation mask
# of the instance or the information of segments.
# 1. If list[list[float]], it represents a list of polygons,
# one for each connected component of the object. Each
# list[float] is one simple polygon in the format of
# [x1, y1, ..., xn, yn] (n >= 3). The Xs and Ys are absolute
# coordinates in unit of pixels.
# 2. If dict, it represents the per-pixel segmentation mask in
# COCO's compressed RLE format. The dict should have keys
# “size” and “counts”. Can be loaded by pycocotools
'mask': list[list[float]] or dict,
}
]
# Filename of semantic or panoptic segmentation ground truth file.
'seg_map_path': 'a/b/c'
}
After this module, the annotation has been changed to the format below:
.. code-block:: python
{
# In (x1, y1, x2, y2) order, float type. N is the number of bboxes
# in an image
'gt_bboxes': BaseBoxes(N, 4)
# In int type.
'gt_bboxes_labels': np.ndarray(N, )
# In built-in class
'gt_masks': PolygonMasks (H, W) or BitmapMasks (H, W)
# In uint8 type.
'gt_seg_map': np.ndarray (H, W)
# in (x, y, v) order, float type.
}
Required Keys:
- height
- width
- instances
- bbox (optional)
- bbox_label
- mask (optional)
- ignore_flag
- seg_map_path (optional)
Added Keys:
- gt_bboxes (BaseBoxes[torch.float32])
- gt_bboxes_labels (np.int64)
- gt_masks (BitmapMasks | PolygonMasks)
- gt_seg_map (np.uint8)
- gt_ignore_flags (bool)
Args:
with_bbox (bool): Whether to parse and load the bbox annotation.
Defaults to True.
with_label (bool): Whether to parse and load the label annotation.
Defaults to True.
with_mask (bool): Whether to parse and load the mask annotation.
Default: False.
with_seg (bool): Whether to parse and load the semantic segmentation
annotation. Defaults to False.
poly2mask (bool): Whether to convert mask to bitmap. Default: True.
box_type (str): The box type used to wrap the bboxes. If ``box_type``
is None, gt_bboxes will keep being np.ndarray. Defaults to 'hbox'.
reduce_zero_label (bool): Whether reduce all label value
by 1. Usually used for datasets where 0 is background label.
Defaults to False.
ignore_index (int): The label index to be ignored.
Valid only if reduce_zero_label is true. Defaults is 255.
imdecode_backend (str): The image decoding backend type. The backend
argument for :func:``mmcv.imfrombytes``.
See :fun:``mmcv.imfrombytes`` for details.
Defaults to 'cv2'.
backend_args (dict, optional): Arguments to instantiate the
corresponding backend. Defaults to None.
"""
def
__init__
(
self
,
with_mask
:
bool
=
False
,
poly2mask
:
bool
=
True
,
box_type
:
str
=
'hbox'
,
# use for semseg
reduce_zero_label
:
bool
=
False
,
ignore_index
:
int
=
255
,
**
kwargs
)
->
None
:
super
(
LoadAnnotations
,
self
).
__init__
(
**
kwargs
)
self
.
with_mask
=
with_mask
self
.
poly2mask
=
poly2mask
self
.
box_type
=
box_type
self
.
reduce_zero_label
=
reduce_zero_label
self
.
ignore_index
=
ignore_index
def
_load_bboxes
(
self
,
results
:
dict
)
->
None
:
"""Private function to load bounding box annotations.
Args:
results (dict): Result dict from :obj:``mmengine.BaseDataset``.
Returns:
dict: The dict contains loaded bounding box annotations.
"""
gt_bboxes
=
[]
gt_ignore_flags
=
[]
for
instance
in
results
.
get
(
'instances'
,
[]):
gt_bboxes
.
append
(
instance
[
'bbox'
])
gt_ignore_flags
.
append
(
instance
[
'ignore_flag'
])
if
self
.
box_type
is
None
:
results
[
'gt_bboxes'
]
=
np
.
array
(
gt_bboxes
,
dtype
=
np
.
float32
).
reshape
((
-
1
,
4
))
else
:
_
,
box_type_cls
=
get_box_type
(
self
.
box_type
)
results
[
'gt_bboxes'
]
=
box_type_cls
(
gt_bboxes
,
dtype
=
torch
.
float32
)
results
[
'gt_ignore_flags'
]
=
np
.
array
(
gt_ignore_flags
,
dtype
=
bool
)
def
_load_labels
(
self
,
results
:
dict
)
->
None
:
"""Private function to load label annotations.
Args:
results (dict): Result dict from :obj:``mmengine.BaseDataset``.
Returns:
dict: The dict contains loaded label annotations.
"""
gt_bboxes_labels
=
[]
for
instance
in
results
.
get
(
'instances'
,
[]):
gt_bboxes_labels
.
append
(
instance
[
'bbox_label'
])
# TODO: Inconsistent with mmcv, consider how to deal with it later.
results
[
'gt_bboxes_labels'
]
=
np
.
array
(
gt_bboxes_labels
,
dtype
=
np
.
int64
)
def
_poly2mask
(
self
,
mask_ann
:
Union
[
list
,
dict
],
img_h
:
int
,
img_w
:
int
)
->
np
.
ndarray
:
"""Private function to convert masks represented with polygon to
bitmaps.
Args:
mask_ann (list | dict): Polygon mask annotation input.
img_h (int): The height of output mask.
img_w (int): The width of output mask.
Returns:
np.ndarray: The decode bitmap mask of shape (img_h, img_w).
"""
if
isinstance
(
mask_ann
,
list
):
# polygon -- a single object might consist of multiple parts
# we merge all parts into one mask rle code
rles
=
maskUtils
.
frPyObjects
(
mask_ann
,
img_h
,
img_w
)
rle
=
maskUtils
.
merge
(
rles
)
elif
isinstance
(
mask_ann
[
'counts'
],
list
):
# uncompressed RLE
rle
=
maskUtils
.
frPyObjects
(
mask_ann
,
img_h
,
img_w
)
else
:
# rle
rle
=
mask_ann
mask
=
maskUtils
.
decode
(
rle
)
return
mask
def
_process_masks
(
self
,
results
:
dict
)
->
list
:
"""Process gt_masks and filter invalid polygons.
Args:
results (dict): Result dict from :obj:``mmengine.BaseDataset``.
Returns:
list: Processed gt_masks.
"""
gt_masks
=
[]
gt_ignore_flags
=
[]
for
instance
in
results
.
get
(
'instances'
,
[]):
gt_mask
=
instance
[
'mask'
]
# If the annotation of segmentation mask is invalid,
# ignore the whole instance.
if
isinstance
(
gt_mask
,
list
):
gt_mask
=
[
np
.
array
(
polygon
)
for
polygon
in
gt_mask
if
len
(
polygon
)
%
2
==
0
and
len
(
polygon
)
>=
6
]
if
len
(
gt_mask
)
==
0
:
# ignore this instance and set gt_mask to a fake mask
instance
[
'ignore_flag'
]
=
1
gt_mask
=
[
np
.
zeros
(
6
)]
elif
not
self
.
poly2mask
:
# `PolygonMasks` requires a ploygon of format List[np.array],
# other formats are invalid.
instance
[
'ignore_flag'
]
=
1
gt_mask
=
[
np
.
zeros
(
6
)]
elif
isinstance
(
gt_mask
,
dict
)
and
\
not
(
gt_mask
.
get
(
'counts'
)
is
not
None
and
gt_mask
.
get
(
'size'
)
is
not
None
and
isinstance
(
gt_mask
[
'counts'
],
(
list
,
str
))):
# if gt_mask is a dict, it should include `counts` and `size`,
# so that `BitmapMasks` can uncompressed RLE
instance
[
'ignore_flag'
]
=
1
gt_mask
=
[
np
.
zeros
(
6
)]
gt_masks
.
append
(
gt_mask
)
# re-process gt_ignore_flags
gt_ignore_flags
.
append
(
instance
[
'ignore_flag'
])
results
[
'gt_ignore_flags'
]
=
np
.
array
(
gt_ignore_flags
,
dtype
=
bool
)
return
gt_masks
def
_load_masks
(
self
,
results
:
dict
)
->
None
:
"""Private function to load mask annotations.
Args:
results (dict): Result dict from :obj:``mmengine.BaseDataset``.
"""
h
,
w
=
results
[
'ori_shape'
]
gt_masks
=
self
.
_process_masks
(
results
)
if
self
.
poly2mask
:
gt_masks
=
BitmapMasks
(
[
self
.
_poly2mask
(
mask
,
h
,
w
)
for
mask
in
gt_masks
],
h
,
w
)
else
:
# fake polygon masks will be ignored in `PackDetInputs`
gt_masks
=
PolygonMasks
([
mask
for
mask
in
gt_masks
],
h
,
w
)
results
[
'gt_masks'
]
=
gt_masks
def
_load_seg_map
(
self
,
results
:
dict
)
->
None
:
"""Private function to load semantic segmentation annotations.
Args:
results (dict): Result dict from :obj:``mmcv.BaseDataset``.
Returns:
dict: The dict contains loaded semantic segmentation annotations.
"""
if
results
.
get
(
'seg_map_path'
,
None
)
is
None
:
return
img_bytes
=
get
(
results
[
'seg_map_path'
],
backend_args
=
self
.
backend_args
)
gt_semantic_seg
=
mmcv
.
imfrombytes
(
img_bytes
,
flag
=
'unchanged'
,
backend
=
self
.
imdecode_backend
).
squeeze
()
if
self
.
reduce_zero_label
:
# avoid using underflow conversion
gt_semantic_seg
[
gt_semantic_seg
==
0
]
=
self
.
ignore_index
gt_semantic_seg
=
gt_semantic_seg
-
1
gt_semantic_seg
[
gt_semantic_seg
==
self
.
ignore_index
-
1
]
=
self
.
ignore_index
# modify if custom classes
if
results
.
get
(
'label_map'
,
None
)
is
not
None
:
# Add deep copy to solve bug of repeatedly
# replace `gt_semantic_seg`, which is reported in
# https://github.com/open-mmlab/mmsegmentation/pull/1445/
gt_semantic_seg_copy
=
gt_semantic_seg
.
copy
()
for
old_id
,
new_id
in
results
[
'label_map'
].
items
():
gt_semantic_seg
[
gt_semantic_seg_copy
==
old_id
]
=
new_id
results
[
'gt_seg_map'
]
=
gt_semantic_seg
results
[
'ignore_index'
]
=
self
.
ignore_index
def
transform
(
self
,
results
:
dict
)
->
dict
:
"""Function to load multiple types annotations.
Args:
results (dict): Result dict from :obj:``mmengine.BaseDataset``.
Returns:
dict: The dict contains loaded bounding box, label and
semantic segmentation.
"""
if
self
.
with_bbox
:
self
.
_load_bboxes
(
results
)
if
self
.
with_label
:
self
.
_load_labels
(
results
)
if
self
.
with_mask
:
self
.
_load_masks
(
results
)
if
self
.
with_seg
:
self
.
_load_seg_map
(
results
)
return
results
def
__repr__
(
self
)
->
str
:
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(with_bbox=
{
self
.
with_bbox
}
, '
repr_str
+=
f
'with_label=
{
self
.
with_label
}
, '
repr_str
+=
f
'with_mask=
{
self
.
with_mask
}
, '
repr_str
+=
f
'with_seg=
{
self
.
with_seg
}
, '
repr_str
+=
f
'poly2mask=
{
self
.
poly2mask
}
, '
repr_str
+=
f
"imdecode_backend='
{
self
.
imdecode_backend
}
', "
repr_str
+=
f
'backend_args=
{
self
.
backend_args
}
)'
return
repr_str
@
TRANSFORMS
.
register_module
()
class
LoadPanopticAnnotations
(
LoadAnnotations
):
"""Load multiple types of panoptic annotations.
The annotation format is as the following:
.. code-block:: python
{
'instances':
[
{
# List of 4 numbers representing the bounding box of the
# instance, in (x1, y1, x2, y2) order.
'bbox': [x1, y1, x2, y2],
# Label of image classification.
'bbox_label': 1,
},
...
]
'segments_info':
[
{
# id = cls_id + instance_id * INSTANCE_OFFSET
'id': int,
# Contiguous category id defined in dataset.
'category': int
# Thing flag.
'is_thing': bool
},
...
]
# Filename of semantic or panoptic segmentation ground truth file.
'seg_map_path': 'a/b/c'
}
After this module, the annotation has been changed to the format below:
.. code-block:: python
{
# In (x1, y1, x2, y2) order, float type. N is the number of bboxes
# in an image
'gt_bboxes': BaseBoxes(N, 4)
# In int type.
'gt_bboxes_labels': np.ndarray(N, )
# In built-in class
'gt_masks': PolygonMasks (H, W) or BitmapMasks (H, W)
# In uint8 type.
'gt_seg_map': np.ndarray (H, W)
# in (x, y, v) order, float type.
}
Required Keys:
- height
- width
- instances
- bbox
- bbox_label
- ignore_flag
- segments_info
- id
- category
- is_thing
- seg_map_path
Added Keys:
- gt_bboxes (BaseBoxes[torch.float32])
- gt_bboxes_labels (np.int64)
- gt_masks (BitmapMasks | PolygonMasks)
- gt_seg_map (np.uint8)
- gt_ignore_flags (bool)
Args:
with_bbox (bool): Whether to parse and load the bbox annotation.
Defaults to True.
with_label (bool): Whether to parse and load the label annotation.
Defaults to True.
with_mask (bool): Whether to parse and load the mask annotation.
Defaults to True.
with_seg (bool): Whether to parse and load the semantic segmentation
annotation. Defaults to False.
box_type (str): The box mode used to wrap the bboxes.
imdecode_backend (str): The image decoding backend type. The backend
argument for :func:``mmcv.imfrombytes``.
See :fun:``mmcv.imfrombytes`` for details.
Defaults to 'cv2'.
backend_args (dict, optional): Arguments to instantiate the
corresponding backend in mmdet >= 3.0.0rc7. Defaults to None.
"""
def
__init__
(
self
,
with_bbox
:
bool
=
True
,
with_label
:
bool
=
True
,
with_mask
:
bool
=
True
,
with_seg
:
bool
=
True
,
box_type
:
str
=
'hbox'
,
imdecode_backend
:
str
=
'cv2'
,
backend_args
:
dict
=
None
)
->
None
:
try
:
from
panopticapi
import
utils
except
ImportError
:
raise
ImportError
(
'panopticapi is not installed, please install it by: '
'pip install git+https://github.com/cocodataset/'
'panopticapi.git.'
)
self
.
rgb2id
=
utils
.
rgb2id
super
(
LoadPanopticAnnotations
,
self
).
__init__
(
with_bbox
=
with_bbox
,
with_label
=
with_label
,
with_mask
=
with_mask
,
with_seg
=
with_seg
,
with_keypoints
=
False
,
box_type
=
box_type
,
imdecode_backend
=
imdecode_backend
,
backend_args
=
backend_args
)
def
_load_masks_and_semantic_segs
(
self
,
results
:
dict
)
->
None
:
"""Private function to load mask and semantic segmentation annotations.
In gt_semantic_seg, the foreground label is from ``0`` to
``num_things - 1``, the background label is from ``num_things`` to
``num_things + num_stuff - 1``, 255 means the ignored label (``VOID``).
Args:
results (dict): Result dict from :obj:``mmdet.CustomDataset``.
"""
# seg_map_path is None, when inference on the dataset without gts.
if
results
.
get
(
'seg_map_path'
,
None
)
is
None
:
return
img_bytes
=
get
(
results
[
'seg_map_path'
],
backend_args
=
self
.
backend_args
)
pan_png
=
mmcv
.
imfrombytes
(
img_bytes
,
flag
=
'color'
,
channel_order
=
'rgb'
).
squeeze
()
pan_png
=
self
.
rgb2id
(
pan_png
)
gt_masks
=
[]
gt_seg
=
np
.
zeros_like
(
pan_png
)
+
255
# 255 as ignore
for
segment_info
in
results
[
'segments_info'
]:
mask
=
(
pan_png
==
segment_info
[
'id'
])
gt_seg
=
np
.
where
(
mask
,
segment_info
[
'category'
],
gt_seg
)
# The legal thing masks
if
segment_info
.
get
(
'is_thing'
):
gt_masks
.
append
(
mask
.
astype
(
np
.
uint8
))
if
self
.
with_mask
:
h
,
w
=
results
[
'ori_shape'
]
gt_masks
=
BitmapMasks
(
gt_masks
,
h
,
w
)
results
[
'gt_masks'
]
=
gt_masks
if
self
.
with_seg
:
results
[
'gt_seg_map'
]
=
gt_seg
def
transform
(
self
,
results
:
dict
)
->
dict
:
"""Function to load multiple types panoptic annotations.
Args:
results (dict): Result dict from :obj:``mmdet.CustomDataset``.
Returns:
dict: The dict contains loaded bounding box, label, mask and
semantic segmentation annotations.
"""
if
self
.
with_bbox
:
self
.
_load_bboxes
(
results
)
if
self
.
with_label
:
self
.
_load_labels
(
results
)
if
self
.
with_mask
or
self
.
with_seg
:
# The tasks completed by '_load_masks' and '_load_semantic_segs'
# in LoadAnnotations are merged to one function.
self
.
_load_masks_and_semantic_segs
(
results
)
return
results
@
TRANSFORMS
.
register_module
()
class
LoadProposals
(
BaseTransform
):
"""Load proposal pipeline.
Required Keys:
- proposals
Modified Keys:
- proposals
Args:
num_max_proposals (int, optional): Maximum number of proposals to load.
If not specified, all proposals will be loaded.
"""
def
__init__
(
self
,
num_max_proposals
:
Optional
[
int
]
=
None
)
->
None
:
self
.
num_max_proposals
=
num_max_proposals
def
transform
(
self
,
results
:
dict
)
->
dict
:
"""Transform function to load proposals from file.
Args:
results (dict): Result dict from :obj:`mmdet.CustomDataset`.
Returns:
dict: The dict contains loaded proposal annotations.
"""
proposals
=
results
[
'proposals'
]
# the type of proposals should be `dict` or `InstanceData`
assert
isinstance
(
proposals
,
dict
)
\
or
isinstance
(
proposals
,
BaseDataElement
)
bboxes
=
proposals
[
'bboxes'
].
astype
(
np
.
float32
)
assert
bboxes
.
shape
[
1
]
==
4
,
\
f
'Proposals should have shapes (n, 4), but found
{
bboxes
.
shape
}
'
if
'scores'
in
proposals
:
scores
=
proposals
[
'scores'
].
astype
(
np
.
float32
)
assert
bboxes
.
shape
[
0
]
==
scores
.
shape
[
0
]
else
:
scores
=
np
.
zeros
(
bboxes
.
shape
[
0
],
dtype
=
np
.
float32
)
if
self
.
num_max_proposals
is
not
None
:
# proposals should sort by scores during dumping the proposals
bboxes
=
bboxes
[:
self
.
num_max_proposals
]
scores
=
scores
[:
self
.
num_max_proposals
]
if
len
(
bboxes
)
==
0
:
bboxes
=
np
.
zeros
((
0
,
4
),
dtype
=
np
.
float32
)
scores
=
np
.
zeros
(
0
,
dtype
=
np
.
float32
)
results
[
'proposals'
]
=
bboxes
results
[
'proposals_scores'
]
=
scores
return
results
def
__repr__
(
self
):
return
self
.
__class__
.
__name__
+
\
f
'(num_max_proposals=
{
self
.
num_max_proposals
}
)'
@
TRANSFORMS
.
register_module
()
class
FilterAnnotations
(
BaseTransform
):
"""Filter invalid annotations.
Required Keys:
- gt_bboxes (BaseBoxes[torch.float32]) (optional)
- gt_bboxes_labels (np.int64) (optional)
- gt_masks (BitmapMasks | PolygonMasks) (optional)
- gt_ignore_flags (bool) (optional)
Modified Keys:
- gt_bboxes (optional)
- gt_bboxes_labels (optional)
- gt_masks (optional)
- gt_ignore_flags (optional)
Args:
min_gt_bbox_wh (tuple[float]): Minimum width and height of ground truth
boxes. Default: (1., 1.)
min_gt_mask_area (int): Minimum foreground area of ground truth masks.
Default: 1
by_box (bool): Filter instances with bounding boxes not meeting the
min_gt_bbox_wh threshold. Default: True
by_mask (bool): Filter instances with masks not meeting
min_gt_mask_area threshold. Default: False
keep_empty (bool): Whether to return None when it
becomes an empty bbox after filtering. Defaults to True.
"""
def
__init__
(
self
,
min_gt_bbox_wh
:
Tuple
[
int
,
int
]
=
(
1
,
1
),
min_gt_mask_area
:
int
=
1
,
by_box
:
bool
=
True
,
by_mask
:
bool
=
False
,
keep_empty
:
bool
=
True
)
->
None
:
# TODO: add more filter options
assert
by_box
or
by_mask
self
.
min_gt_bbox_wh
=
min_gt_bbox_wh
self
.
min_gt_mask_area
=
min_gt_mask_area
self
.
by_box
=
by_box
self
.
by_mask
=
by_mask
self
.
keep_empty
=
keep_empty
@
autocast_box_type
()
def
transform
(
self
,
results
:
dict
)
->
Union
[
dict
,
None
]:
"""Transform function to filter annotations.
Args:
results (dict): Result dict.
Returns:
dict: Updated result dict.
"""
assert
'gt_bboxes'
in
results
gt_bboxes
=
results
[
'gt_bboxes'
]
if
gt_bboxes
.
shape
[
0
]
==
0
:
return
results
tests
=
[]
if
self
.
by_box
:
tests
.
append
(
((
gt_bboxes
.
widths
>
self
.
min_gt_bbox_wh
[
0
])
&
(
gt_bboxes
.
heights
>
self
.
min_gt_bbox_wh
[
1
])).
numpy
())
if
self
.
by_mask
:
assert
'gt_masks'
in
results
gt_masks
=
results
[
'gt_masks'
]
tests
.
append
(
gt_masks
.
areas
>=
self
.
min_gt_mask_area
)
keep
=
tests
[
0
]
for
t
in
tests
[
1
:]:
keep
=
keep
&
t
if
not
keep
.
any
():
if
self
.
keep_empty
:
return
None
keys
=
(
'gt_bboxes'
,
'gt_bboxes_labels'
,
'gt_masks'
,
'gt_ignore_flags'
)
for
key
in
keys
:
if
key
in
results
:
results
[
key
]
=
results
[
key
][
keep
]
return
results
def
__repr__
(
self
):
return
self
.
__class__
.
__name__
+
\
f
'(min_gt_bbox_wh=
{
self
.
min_gt_bbox_wh
}
, '
\
f
'keep_empty=
{
self
.
keep_empty
}
)'
@
TRANSFORMS
.
register_module
()
class
LoadEmptyAnnotations
(
BaseTransform
):
"""Load Empty Annotations for unlabeled images.
Added Keys:
- gt_bboxes (np.float32)
- gt_bboxes_labels (np.int64)
- gt_masks (BitmapMasks | PolygonMasks)
- gt_seg_map (np.uint8)
- gt_ignore_flags (bool)
Args:
with_bbox (bool): Whether to load the pseudo bbox annotation.
Defaults to True.
with_label (bool): Whether to load the pseudo label annotation.
Defaults to True.
with_mask (bool): Whether to load the pseudo mask annotation.
Default: False.
with_seg (bool): Whether to load the pseudo semantic segmentation
annotation. Defaults to False.
seg_ignore_label (int): The fill value used for segmentation map.
Note this value must equals ``ignore_label`` in ``semantic_head``
of the corresponding config. Defaults to 255.
"""
def
__init__
(
self
,
with_bbox
:
bool
=
True
,
with_label
:
bool
=
True
,
with_mask
:
bool
=
False
,
with_seg
:
bool
=
False
,
seg_ignore_label
:
int
=
255
)
->
None
:
self
.
with_bbox
=
with_bbox
self
.
with_label
=
with_label
self
.
with_mask
=
with_mask
self
.
with_seg
=
with_seg
self
.
seg_ignore_label
=
seg_ignore_label
def
transform
(
self
,
results
:
dict
)
->
dict
:
"""Transform function to load empty annotations.
Args:
results (dict): Result dict.
Returns:
dict: Updated result dict.
"""
if
self
.
with_bbox
:
results
[
'gt_bboxes'
]
=
np
.
zeros
((
0
,
4
),
dtype
=
np
.
float32
)
results
[
'gt_ignore_flags'
]
=
np
.
zeros
((
0
,
),
dtype
=
bool
)
if
self
.
with_label
:
results
[
'gt_bboxes_labels'
]
=
np
.
zeros
((
0
,
),
dtype
=
np
.
int64
)
if
self
.
with_mask
:
# TODO: support PolygonMasks
h
,
w
=
results
[
'img_shape'
]
gt_masks
=
np
.
zeros
((
0
,
h
,
w
),
dtype
=
np
.
uint8
)
results
[
'gt_masks'
]
=
BitmapMasks
(
gt_masks
,
h
,
w
)
if
self
.
with_seg
:
h
,
w
=
results
[
'img_shape'
]
results
[
'gt_seg_map'
]
=
self
.
seg_ignore_label
*
np
.
ones
(
(
h
,
w
),
dtype
=
np
.
uint8
)
return
results
def
__repr__
(
self
)
->
str
:
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(with_bbox=
{
self
.
with_bbox
}
, '
repr_str
+=
f
'with_label=
{
self
.
with_label
}
, '
repr_str
+=
f
'with_mask=
{
self
.
with_mask
}
, '
repr_str
+=
f
'with_seg=
{
self
.
with_seg
}
, '
repr_str
+=
f
'seg_ignore_label=
{
self
.
seg_ignore_label
}
)'
return
repr_str
@
TRANSFORMS
.
register_module
()
class
InferencerLoader
(
BaseTransform
):
"""Load an image from ``results['img']``.
Similar with :obj:`LoadImageFromFile`, but the image has been loaded as
:obj:`np.ndarray` in ``results['img']``. Can be used when loading image
from webcam.
Required Keys:
- img
Modified Keys:
- img
- img_path
- img_shape
- ori_shape
Args:
to_float32 (bool): Whether to convert the loaded image to a float32
numpy array. If set to False, the loaded image is an uint8 array.
Defaults to False.
"""
def
__init__
(
self
,
**
kwargs
)
->
None
:
super
().
__init__
()
self
.
from_file
=
TRANSFORMS
.
build
(
dict
(
type
=
'LoadImageFromFile'
,
**
kwargs
))
self
.
from_ndarray
=
TRANSFORMS
.
build
(
dict
(
type
=
'mmdet.LoadImageFromNDArray'
,
**
kwargs
))
def
transform
(
self
,
results
:
Union
[
str
,
np
.
ndarray
,
dict
])
->
dict
:
"""Transform function to add image meta information.
Args:
results (str, np.ndarray or dict): The result.
Returns:
dict: The dict contains loaded image and meta information.
"""
if
isinstance
(
results
,
str
):
inputs
=
dict
(
img_path
=
results
)
elif
isinstance
(
results
,
np
.
ndarray
):
inputs
=
dict
(
img
=
results
)
elif
isinstance
(
results
,
dict
):
inputs
=
results
else
:
raise
NotImplementedError
if
'img'
in
inputs
:
return
self
.
from_ndarray
(
inputs
)
return
self
.
from_file
(
inputs
)
@
TRANSFORMS
.
register_module
()
class
LoadTrackAnnotations
(
LoadAnnotations
):
"""Load and process the ``instances`` and ``seg_map`` annotation provided
by dataset. It must load ``instances_ids`` which is only used in the
tracking tasks. The annotation format is as the following:
.. code-block:: python
{
'instances':
[
{
# List of 4 numbers representing the bounding box of the
# instance, in (x1, y1, x2, y2) order.
'bbox': [x1, y1, x2, y2],
# Label of image classification.
'bbox_label': 1,
# Used in tracking.
# Id of instances.
'instance_id': 100,
# Used in instance/panoptic segmentation. The segmentation mask
# of the instance or the information of segments.
# 1. If list[list[float]], it represents a list of polygons,
# one for each connected component of the object. Each
# list[float] is one simple polygon in the format of
# [x1, y1, ..., xn, yn] (n >= 3). The Xs and Ys are absolute
# coordinates in unit of pixels.
# 2. If dict, it represents the per-pixel segmentation mask in
# COCO's compressed RLE format. The dict should have keys
# “size” and “counts”. Can be loaded by pycocotools
'mask': list[list[float]] or dict,
}
]
# Filename of semantic or panoptic segmentation ground truth file.
'seg_map_path': 'a/b/c'
}
After this module, the annotation has been changed to the format below:
.. code-block:: python
{
# In (x1, y1, x2, y2) order, float type. N is the number of bboxes
# in an image
'gt_bboxes': np.ndarray(N, 4)
# In int type.
'gt_bboxes_labels': np.ndarray(N, )
# In built-in class
'gt_masks': PolygonMasks (H, W) or BitmapMasks (H, W)
# In uint8 type.
'gt_seg_map': np.ndarray (H, W)
# in (x, y, v) order, float type.
}
Required Keys:
- height (optional)
- width (optional)
- instances
- bbox (optional)
- bbox_label
- instance_id (optional)
- mask (optional)
- ignore_flag (optional)
- seg_map_path (optional)
Added Keys:
- gt_bboxes (np.float32)
- gt_bboxes_labels (np.int32)
- gt_instances_ids (np.int32)
- gt_masks (BitmapMasks | PolygonMasks)
- gt_seg_map (np.uint8)
- gt_ignore_flags (np.bool)
"""
def
__init__
(
self
,
**
kwargs
)
->
None
:
super
().
__init__
(
**
kwargs
)
def
_load_bboxes
(
self
,
results
:
dict
)
->
None
:
"""Private function to load bounding box annotations.
Args:
results (dict): Result dict from :obj:``mmcv.BaseDataset``.
Returns:
dict: The dict contains loaded bounding box annotations.
"""
gt_bboxes
=
[]
gt_ignore_flags
=
[]
# TODO: use bbox_type
for
instance
in
results
[
'instances'
]:
# The datasets which are only format in evaluation don't have
# groundtruth boxes.
if
'bbox'
in
instance
:
gt_bboxes
.
append
(
instance
[
'bbox'
])
if
'ignore_flag'
in
instance
:
gt_ignore_flags
.
append
(
instance
[
'ignore_flag'
])
# TODO: check this case
if
len
(
gt_bboxes
)
!=
len
(
gt_ignore_flags
):
# There may be no ``gt_ignore_flags`` in some cases, we treat them
# as all False in order to keep the length of ``gt_bboxes`` and
# ``gt_ignore_flags`` the same
gt_ignore_flags
=
[
False
]
*
len
(
gt_bboxes
)
results
[
'gt_bboxes'
]
=
np
.
array
(
gt_bboxes
,
dtype
=
np
.
float32
).
reshape
(
-
1
,
4
)
results
[
'gt_ignore_flags'
]
=
np
.
array
(
gt_ignore_flags
,
dtype
=
bool
)
def
_load_instances_ids
(
self
,
results
:
dict
)
->
None
:
"""Private function to load instances id annotations.
Args:
results (dict): Result dict from :obj :obj:``mmcv.BaseDataset``.
Returns:
dict: The dict containing instances id annotations.
"""
gt_instances_ids
=
[]
for
instance
in
results
[
'instances'
]:
gt_instances_ids
.
append
(
instance
[
'instance_id'
])
results
[
'gt_instances_ids'
]
=
np
.
array
(
gt_instances_ids
,
dtype
=
np
.
int32
)
def
transform
(
self
,
results
:
dict
)
->
dict
:
"""Function to load multiple types annotations.
Args:
results (dict): Result dict from :obj:``mmcv.BaseDataset``.
Returns:
dict: The dict contains loaded bounding box, label, instances id
and semantic segmentation and keypoints annotations.
"""
results
=
super
().
transform
(
results
)
self
.
_load_instances_ids
(
results
)
return
results
def
__repr__
(
self
)
->
str
:
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(with_bbox=
{
self
.
with_bbox
}
, '
repr_str
+=
f
'with_label=
{
self
.
with_label
}
, '
repr_str
+=
f
'with_mask=
{
self
.
with_mask
}
, '
repr_str
+=
f
'with_seg=
{
self
.
with_seg
}
, '
repr_str
+=
f
'poly2mask=
{
self
.
poly2mask
}
, '
repr_str
+=
f
"imdecode_backend='
{
self
.
imdecode_backend
}
', "
repr_str
+=
f
'file_client_args=
{
self
.
file_client_args
}
)'
return
repr_str
Prev
1
…
15
16
17
18
19
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment