Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
RTMDet_mmcv
Commits
ff793569
Commit
ff793569
authored
Nov 18, 2023
by
dengjb
Browse files
update code
parent
fdfe3c4f
Pipeline
#639
failed with stages
in 0 seconds
Changes
386
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
2733 additions
and
0 deletions
+2733
-0
mmdet/datasets/transforms/formatting.py
mmdet/datasets/transforms/formatting.py
+512
-0
mmdet/datasets/transforms/frame_sampling.py
mmdet/datasets/transforms/frame_sampling.py
+177
-0
mmdet/datasets/transforms/geometric.py
mmdet/datasets/transforms/geometric.py
+754
-0
mmdet/datasets/transforms/instaboost.py
mmdet/datasets/transforms/instaboost.py
+150
-0
mmdet/datasets/transforms/loading.py
mmdet/datasets/transforms/loading.py
+1074
-0
mmdet/datasets/transforms/transformers_glip.py
mmdet/datasets/transforms/transformers_glip.py
+66
-0
No files found.
Too many changes to show.
To preserve performance only
386 of 386+
files are displayed.
Plain diff
Email patch
mmdet/datasets/transforms/formatting.py
0 → 100644
View file @
ff793569
# Copyright (c) OpenMMLab. All rights reserved.
from
typing
import
Optional
,
Sequence
import
numpy
as
np
from
mmcv.transforms
import
to_tensor
from
mmcv.transforms.base
import
BaseTransform
from
mmengine.structures
import
InstanceData
,
PixelData
from
mmdet.registry
import
TRANSFORMS
from
mmdet.structures
import
DetDataSample
,
ReIDDataSample
,
TrackDataSample
from
mmdet.structures.bbox
import
BaseBoxes
@
TRANSFORMS
.
register_module
()
class
PackDetInputs
(
BaseTransform
):
"""Pack the inputs data for the detection / semantic segmentation /
panoptic segmentation.
The ``img_meta`` item is always populated. The contents of the
``img_meta`` dictionary depends on ``meta_keys``. By default this includes:
- ``img_id``: id of the image
- ``img_path``: path to the image file
- ``ori_shape``: original shape of the image as a tuple (h, w)
- ``img_shape``: shape of the image input to the network as a tuple
\
(h, w). Note that images may be zero padded on the
\
bottom/right if the batch tensor is larger than this shape.
- ``scale_factor``: a float indicating the preprocessing scale
- ``flip``: a boolean indicating if image flip transform was used
- ``flip_direction``: the flipping direction
Args:
meta_keys (Sequence[str], optional): Meta keys to be converted to
``mmcv.DataContainer`` and collected in ``data[img_metas]``.
Default: ``('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'flip', 'flip_direction')``
"""
mapping_table
=
{
'gt_bboxes'
:
'bboxes'
,
'gt_bboxes_labels'
:
'labels'
,
'gt_masks'
:
'masks'
}
def
__init__
(
self
,
meta_keys
=
(
'img_id'
,
'img_path'
,
'ori_shape'
,
'img_shape'
,
'scale_factor'
,
'flip'
,
'flip_direction'
)):
self
.
meta_keys
=
meta_keys
def
transform
(
self
,
results
:
dict
)
->
dict
:
"""Method to pack the input data.
Args:
results (dict): Result dict from the data pipeline.
Returns:
dict:
- 'inputs' (obj:`torch.Tensor`): The forward data of models.
- 'data_sample' (obj:`DetDataSample`): The annotation info of the
sample.
"""
packed_results
=
dict
()
if
'img'
in
results
:
img
=
results
[
'img'
]
if
len
(
img
.
shape
)
<
3
:
img
=
np
.
expand_dims
(
img
,
-
1
)
# To improve the computational speed by by 3-5 times, apply:
# If image is not contiguous, use
# `numpy.transpose()` followed by `numpy.ascontiguousarray()`
# If image is already contiguous, use
# `torch.permute()` followed by `torch.contiguous()`
# Refer to https://github.com/open-mmlab/mmdetection/pull/9533
# for more details
if
not
img
.
flags
.
c_contiguous
:
img
=
np
.
ascontiguousarray
(
img
.
transpose
(
2
,
0
,
1
))
img
=
to_tensor
(
img
)
else
:
img
=
to_tensor
(
img
).
permute
(
2
,
0
,
1
).
contiguous
()
packed_results
[
'inputs'
]
=
img
if
'gt_ignore_flags'
in
results
:
valid_idx
=
np
.
where
(
results
[
'gt_ignore_flags'
]
==
0
)[
0
]
ignore_idx
=
np
.
where
(
results
[
'gt_ignore_flags'
]
==
1
)[
0
]
data_sample
=
DetDataSample
()
instance_data
=
InstanceData
()
ignore_instance_data
=
InstanceData
()
for
key
in
self
.
mapping_table
.
keys
():
if
key
not
in
results
:
continue
if
key
==
'gt_masks'
or
isinstance
(
results
[
key
],
BaseBoxes
):
if
'gt_ignore_flags'
in
results
:
instance_data
[
self
.
mapping_table
[
key
]]
=
results
[
key
][
valid_idx
]
ignore_instance_data
[
self
.
mapping_table
[
key
]]
=
results
[
key
][
ignore_idx
]
else
:
instance_data
[
self
.
mapping_table
[
key
]]
=
results
[
key
]
else
:
if
'gt_ignore_flags'
in
results
:
instance_data
[
self
.
mapping_table
[
key
]]
=
to_tensor
(
results
[
key
][
valid_idx
])
ignore_instance_data
[
self
.
mapping_table
[
key
]]
=
to_tensor
(
results
[
key
][
ignore_idx
])
else
:
instance_data
[
self
.
mapping_table
[
key
]]
=
to_tensor
(
results
[
key
])
data_sample
.
gt_instances
=
instance_data
data_sample
.
ignored_instances
=
ignore_instance_data
if
'proposals'
in
results
:
proposals
=
InstanceData
(
bboxes
=
to_tensor
(
results
[
'proposals'
]),
scores
=
to_tensor
(
results
[
'proposals_scores'
]))
data_sample
.
proposals
=
proposals
if
'gt_seg_map'
in
results
:
gt_sem_seg_data
=
dict
(
sem_seg
=
to_tensor
(
results
[
'gt_seg_map'
][
None
,
...].
copy
()))
gt_sem_seg_data
=
PixelData
(
**
gt_sem_seg_data
)
if
'ignore_index'
in
results
:
metainfo
=
dict
(
ignore_index
=
results
[
'ignore_index'
])
gt_sem_seg_data
.
set_metainfo
(
metainfo
)
data_sample
.
gt_sem_seg
=
gt_sem_seg_data
img_meta
=
{}
for
key
in
self
.
meta_keys
:
if
key
in
results
:
img_meta
[
key
]
=
results
[
key
]
data_sample
.
set_metainfo
(
img_meta
)
packed_results
[
'data_samples'
]
=
data_sample
return
packed_results
def
__repr__
(
self
)
->
str
:
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(meta_keys=
{
self
.
meta_keys
}
)'
return
repr_str
@
TRANSFORMS
.
register_module
()
class
ToTensor
:
"""Convert some results to :obj:`torch.Tensor` by given keys.
Args:
keys (Sequence[str]): Keys that need to be converted to Tensor.
"""
def
__init__
(
self
,
keys
):
self
.
keys
=
keys
def
__call__
(
self
,
results
):
"""Call function to convert data in results to :obj:`torch.Tensor`.
Args:
results (dict): Result dict contains the data to convert.
Returns:
dict: The result dict contains the data converted
to :obj:`torch.Tensor`.
"""
for
key
in
self
.
keys
:
results
[
key
]
=
to_tensor
(
results
[
key
])
return
results
def
__repr__
(
self
):
return
self
.
__class__
.
__name__
+
f
'(keys=
{
self
.
keys
}
)'
@
TRANSFORMS
.
register_module
()
class
ImageToTensor
:
"""Convert image to :obj:`torch.Tensor` by given keys.
The dimension order of input image is (H, W, C). The pipeline will convert
it to (C, H, W). If only 2 dimension (H, W) is given, the output would be
(1, H, W).
Args:
keys (Sequence[str]): Key of images to be converted to Tensor.
"""
def
__init__
(
self
,
keys
):
self
.
keys
=
keys
def
__call__
(
self
,
results
):
"""Call function to convert image in results to :obj:`torch.Tensor` and
transpose the channel order.
Args:
results (dict): Result dict contains the image data to convert.
Returns:
dict: The result dict contains the image converted
to :obj:`torch.Tensor` and permuted to (C, H, W) order.
"""
for
key
in
self
.
keys
:
img
=
results
[
key
]
if
len
(
img
.
shape
)
<
3
:
img
=
np
.
expand_dims
(
img
,
-
1
)
results
[
key
]
=
to_tensor
(
img
).
permute
(
2
,
0
,
1
).
contiguous
()
return
results
def
__repr__
(
self
):
return
self
.
__class__
.
__name__
+
f
'(keys=
{
self
.
keys
}
)'
@
TRANSFORMS
.
register_module
()
class
Transpose
:
"""Transpose some results by given keys.
Args:
keys (Sequence[str]): Keys of results to be transposed.
order (Sequence[int]): Order of transpose.
"""
def
__init__
(
self
,
keys
,
order
):
self
.
keys
=
keys
self
.
order
=
order
def
__call__
(
self
,
results
):
"""Call function to transpose the channel order of data in results.
Args:
results (dict): Result dict contains the data to transpose.
Returns:
dict: The result dict contains the data transposed to
\
``self.order``.
"""
for
key
in
self
.
keys
:
results
[
key
]
=
results
[
key
].
transpose
(
self
.
order
)
return
results
def
__repr__
(
self
):
return
self
.
__class__
.
__name__
+
\
f
'(keys=
{
self
.
keys
}
, order=
{
self
.
order
}
)'
@
TRANSFORMS
.
register_module
()
class
WrapFieldsToLists
:
"""Wrap fields of the data dictionary into lists for evaluation.
This class can be used as a last step of a test or validation
pipeline for single image evaluation or inference.
Example:
>>> test_pipeline = [
>>> dict(type='LoadImageFromFile'),
>>> dict(type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
>>> dict(type='Pad', size_divisor=32),
>>> dict(type='ImageToTensor', keys=['img']),
>>> dict(type='Collect', keys=['img']),
>>> dict(type='WrapFieldsToLists')
>>> ]
"""
def
__call__
(
self
,
results
):
"""Call function to wrap fields into lists.
Args:
results (dict): Result dict contains the data to wrap.
Returns:
dict: The result dict where value of ``self.keys`` are wrapped
\
into list.
"""
# Wrap dict fields into lists
for
key
,
val
in
results
.
items
():
results
[
key
]
=
[
val
]
return
results
def
__repr__
(
self
):
return
f
'
{
self
.
__class__
.
__name__
}
()'
@
TRANSFORMS
.
register_module
()
class
PackTrackInputs
(
BaseTransform
):
"""Pack the inputs data for the multi object tracking and video instance
segmentation. All the information of images are packed to ``inputs``. All
the information except images are packed to ``data_samples``. In order to
get the original annotaiton and meta info, we add `instances` key into meta
keys.
Args:
meta_keys (Sequence[str]): Meta keys to be collected in
``data_sample.metainfo``. Defaults to None.
default_meta_keys (tuple): Default meta keys. Defaults to ('img_id',
'img_path', 'ori_shape', 'img_shape', 'scale_factor',
'flip', 'flip_direction', 'frame_id', 'is_video_data',
'video_id', 'video_length', 'instances').
"""
mapping_table
=
{
'gt_bboxes'
:
'bboxes'
,
'gt_bboxes_labels'
:
'labels'
,
'gt_masks'
:
'masks'
,
'gt_instances_ids'
:
'instances_ids'
}
def
__init__
(
self
,
meta_keys
:
Optional
[
dict
]
=
None
,
default_meta_keys
:
tuple
=
(
'img_id'
,
'img_path'
,
'ori_shape'
,
'img_shape'
,
'scale_factor'
,
'flip'
,
'flip_direction'
,
'frame_id'
,
'video_id'
,
'video_length'
,
'ori_video_length'
,
'instances'
)):
self
.
meta_keys
=
default_meta_keys
if
meta_keys
is
not
None
:
if
isinstance
(
meta_keys
,
str
):
meta_keys
=
(
meta_keys
,
)
else
:
assert
isinstance
(
meta_keys
,
tuple
),
\
'meta_keys must be str or tuple'
self
.
meta_keys
+=
meta_keys
def
transform
(
self
,
results
:
dict
)
->
dict
:
"""Method to pack the input data.
Args:
results (dict): Result dict from the data pipeline.
Returns:
dict:
- 'inputs' (dict[Tensor]): The forward data of models.
- 'data_samples' (obj:`TrackDataSample`): The annotation info of
the samples.
"""
packed_results
=
dict
()
packed_results
[
'inputs'
]
=
dict
()
# 1. Pack images
if
'img'
in
results
:
imgs
=
results
[
'img'
]
imgs
=
np
.
stack
(
imgs
,
axis
=
0
)
imgs
=
imgs
.
transpose
(
0
,
3
,
1
,
2
)
packed_results
[
'inputs'
]
=
to_tensor
(
imgs
)
# 2. Pack InstanceData
if
'gt_ignore_flags'
in
results
:
gt_ignore_flags_list
=
results
[
'gt_ignore_flags'
]
valid_idx_list
,
ignore_idx_list
=
[],
[]
for
gt_ignore_flags
in
gt_ignore_flags_list
:
valid_idx
=
np
.
where
(
gt_ignore_flags
==
0
)[
0
]
ignore_idx
=
np
.
where
(
gt_ignore_flags
==
1
)[
0
]
valid_idx_list
.
append
(
valid_idx
)
ignore_idx_list
.
append
(
ignore_idx
)
assert
'img_id'
in
results
,
"'img_id' must contained in the results "
'for counting the number of images'
num_imgs
=
len
(
results
[
'img_id'
])
instance_data_list
=
[
InstanceData
()
for
_
in
range
(
num_imgs
)]
ignore_instance_data_list
=
[
InstanceData
()
for
_
in
range
(
num_imgs
)]
for
key
in
self
.
mapping_table
.
keys
():
if
key
not
in
results
:
continue
if
key
==
'gt_masks'
:
mapped_key
=
self
.
mapping_table
[
key
]
gt_masks_list
=
results
[
key
]
if
'gt_ignore_flags'
in
results
:
for
i
,
gt_mask
in
enumerate
(
gt_masks_list
):
valid_idx
,
ignore_idx
=
valid_idx_list
[
i
],
ignore_idx_list
[
i
]
instance_data_list
[
i
][
mapped_key
]
=
gt_mask
[
valid_idx
]
ignore_instance_data_list
[
i
][
mapped_key
]
=
gt_mask
[
ignore_idx
]
else
:
for
i
,
gt_mask
in
enumerate
(
gt_masks_list
):
instance_data_list
[
i
][
mapped_key
]
=
gt_mask
else
:
anns_list
=
results
[
key
]
if
'gt_ignore_flags'
in
results
:
for
i
,
ann
in
enumerate
(
anns_list
):
valid_idx
,
ignore_idx
=
valid_idx_list
[
i
],
ignore_idx_list
[
i
]
instance_data_list
[
i
][
self
.
mapping_table
[
key
]]
=
to_tensor
(
ann
[
valid_idx
])
ignore_instance_data_list
[
i
][
self
.
mapping_table
[
key
]]
=
to_tensor
(
ann
[
ignore_idx
])
else
:
for
i
,
ann
in
enumerate
(
anns_list
):
instance_data_list
[
i
][
self
.
mapping_table
[
key
]]
=
to_tensor
(
ann
)
det_data_samples_list
=
[]
for
i
in
range
(
num_imgs
):
det_data_sample
=
DetDataSample
()
det_data_sample
.
gt_instances
=
instance_data_list
[
i
]
det_data_sample
.
ignored_instances
=
ignore_instance_data_list
[
i
]
det_data_samples_list
.
append
(
det_data_sample
)
# 3. Pack metainfo
for
key
in
self
.
meta_keys
:
if
key
not
in
results
:
continue
img_metas_list
=
results
[
key
]
for
i
,
img_meta
in
enumerate
(
img_metas_list
):
det_data_samples_list
[
i
].
set_metainfo
({
f
'
{
key
}
'
:
img_meta
})
track_data_sample
=
TrackDataSample
()
track_data_sample
.
video_data_samples
=
det_data_samples_list
if
'key_frame_flags'
in
results
:
key_frame_flags
=
np
.
asarray
(
results
[
'key_frame_flags'
])
key_frames_inds
=
np
.
where
(
key_frame_flags
)[
0
].
tolist
()
ref_frames_inds
=
np
.
where
(
~
key_frame_flags
)[
0
].
tolist
()
track_data_sample
.
set_metainfo
(
dict
(
key_frames_inds
=
key_frames_inds
))
track_data_sample
.
set_metainfo
(
dict
(
ref_frames_inds
=
ref_frames_inds
))
packed_results
[
'data_samples'
]
=
track_data_sample
return
packed_results
def
__repr__
(
self
)
->
str
:
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'meta_keys=
{
self
.
meta_keys
}
, '
repr_str
+=
f
'default_meta_keys=
{
self
.
default_meta_keys
}
)'
return
repr_str
@
TRANSFORMS
.
register_module
()
class
PackReIDInputs
(
BaseTransform
):
"""Pack the inputs data for the ReID. The ``meta_info`` item is always
populated. The contents of the ``meta_info`` dictionary depends on
``meta_keys``. By default this includes:
- ``img_path``: path to the image file.
- ``ori_shape``: original shape of the image as a tuple (H, W).
- ``img_shape``: shape of the image input to the network as a tuple
(H, W). Note that images may be zero padded on the bottom/right
if the batch tensor is larger than this shape.
- ``scale``: scale of the image as a tuple (W, H).
- ``scale_factor``: a float indicating the pre-processing scale.
- ``flip``: a boolean indicating if image flip transform was used.
- ``flip_direction``: the flipping direction.
Args:
meta_keys (Sequence[str], optional): The meta keys to saved in the
``metainfo`` of the packed ``data_sample``.
"""
default_meta_keys
=
(
'img_path'
,
'ori_shape'
,
'img_shape'
,
'scale'
,
'scale_factor'
)
def
__init__
(
self
,
meta_keys
:
Sequence
[
str
]
=
())
->
None
:
self
.
meta_keys
=
self
.
default_meta_keys
if
meta_keys
is
not
None
:
if
isinstance
(
meta_keys
,
str
):
meta_keys
=
(
meta_keys
,
)
else
:
assert
isinstance
(
meta_keys
,
tuple
),
\
'meta_keys must be str or tuple.'
self
.
meta_keys
+=
meta_keys
def
transform
(
self
,
results
:
dict
)
->
dict
:
"""Method to pack the input data.
Args:
results (dict): Result dict from the data pipeline.
Returns:
dict:
- 'inputs' (dict[Tensor]): The forward data of models.
- 'data_samples' (obj:`ReIDDataSample`): The meta info of the
sample.
"""
packed_results
=
dict
(
inputs
=
dict
(),
data_samples
=
None
)
assert
'img'
in
results
,
'Missing the key ``img``.'
_type
=
type
(
results
[
'img'
])
label
=
results
[
'gt_label'
]
if
_type
==
list
:
img
=
results
[
'img'
]
label
=
np
.
stack
(
label
,
axis
=
0
)
# (N,)
assert
all
([
type
(
v
)
==
_type
for
v
in
results
.
values
()]),
\
'All items in the results must have the same type.'
else
:
img
=
[
results
[
'img'
]]
img
=
np
.
stack
(
img
,
axis
=
3
)
# (H, W, C, N)
img
=
img
.
transpose
(
3
,
2
,
0
,
1
)
# (N, C, H, W)
img
=
np
.
ascontiguousarray
(
img
)
packed_results
[
'inputs'
]
=
to_tensor
(
img
)
data_sample
=
ReIDDataSample
()
data_sample
.
set_gt_label
(
label
)
meta_info
=
dict
()
for
key
in
self
.
meta_keys
:
meta_info
[
key
]
=
results
[
key
]
data_sample
.
set_metainfo
(
meta_info
)
packed_results
[
'data_samples'
]
=
data_sample
return
packed_results
def
__repr__
(
self
)
->
str
:
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(meta_keys=
{
self
.
meta_keys
}
)'
return
repr_str
mmdet/datasets/transforms/frame_sampling.py
0 → 100644
View file @
ff793569
# Copyright (c) OpenMMLab. All rights reserved.
import
random
from
collections
import
defaultdict
from
typing
import
Dict
,
List
,
Optional
,
Union
from
mmcv.transforms
import
BaseTransform
from
mmdet.registry
import
TRANSFORMS
@
TRANSFORMS
.
register_module
()
class
BaseFrameSample
(
BaseTransform
):
"""Directly get the key frame, no reference frames.
Args:
collect_video_keys (list[str]): The keys of video info to be
collected.
"""
def
__init__
(
self
,
collect_video_keys
:
List
[
str
]
=
[
'video_id'
,
'video_length'
]):
self
.
collect_video_keys
=
collect_video_keys
def
prepare_data
(
self
,
video_infos
:
dict
,
sampled_inds
:
List
[
int
])
->
Dict
[
str
,
List
]:
"""Prepare data for the subsequent pipeline.
Args:
video_infos (dict): The whole video information.
sampled_inds (list[int]): The sampled frame indices.
Returns:
dict: The processed data information.
"""
frames_anns
=
video_infos
[
'images'
]
final_data_info
=
defaultdict
(
list
)
# for data in frames_anns:
for
index
in
sampled_inds
:
data
=
frames_anns
[
index
]
# copy the info in video-level into img-level
for
key
in
self
.
collect_video_keys
:
if
key
==
'video_length'
:
data
[
'ori_video_length'
]
=
video_infos
[
key
]
data
[
'video_length'
]
=
len
(
sampled_inds
)
else
:
data
[
key
]
=
video_infos
[
key
]
# Collate data_list (list of dict to dict of list)
for
key
,
value
in
data
.
items
():
final_data_info
[
key
].
append
(
value
)
return
final_data_info
def
transform
(
self
,
video_infos
:
dict
)
->
Optional
[
Dict
[
str
,
List
]]:
"""Transform the video information.
Args:
video_infos (dict): The whole video information.
Returns:
dict: The data information of the key frames.
"""
if
'key_frame_id'
in
video_infos
:
key_frame_id
=
video_infos
[
'key_frame_id'
]
assert
isinstance
(
video_infos
[
'key_frame_id'
],
int
)
else
:
key_frame_id
=
random
.
sample
(
list
(
range
(
video_infos
[
'video_length'
])),
1
)[
0
]
results
=
self
.
prepare_data
(
video_infos
,
[
key_frame_id
])
return
results
def
__repr__
(
self
)
->
str
:
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(collect_video_keys=
{
self
.
collect_video_keys
}
)'
return
repr_str
@
TRANSFORMS
.
register_module
()
class
UniformRefFrameSample
(
BaseFrameSample
):
"""Uniformly sample reference frames.
Args:
num_ref_imgs (int): Number of reference frames to be sampled.
frame_range (int | list[int]): Range of frames to be sampled around
key frame. If int, the range is [-frame_range, frame_range].
Defaults to 10.
filter_key_img (bool): Whether to filter the key frame when
sampling reference frames. Defaults to True.
collect_video_keys (list[str]): The keys of video info to be
collected.
"""
def
__init__
(
self
,
num_ref_imgs
:
int
=
1
,
frame_range
:
Union
[
int
,
List
[
int
]]
=
10
,
filter_key_img
:
bool
=
True
,
collect_video_keys
:
List
[
str
]
=
[
'video_id'
,
'video_length'
]):
self
.
num_ref_imgs
=
num_ref_imgs
self
.
filter_key_img
=
filter_key_img
if
isinstance
(
frame_range
,
int
):
assert
frame_range
>=
0
,
'frame_range can not be a negative value.'
frame_range
=
[
-
frame_range
,
frame_range
]
elif
isinstance
(
frame_range
,
list
):
assert
len
(
frame_range
)
==
2
,
'The length must be 2.'
assert
frame_range
[
0
]
<=
0
and
frame_range
[
1
]
>=
0
for
i
in
frame_range
:
assert
isinstance
(
i
,
int
),
'Each element must be int.'
else
:
raise
TypeError
(
'The type of frame_range must be int or list.'
)
self
.
frame_range
=
frame_range
super
().
__init__
(
collect_video_keys
=
collect_video_keys
)
def
sampling_frames
(
self
,
video_length
:
int
,
key_frame_id
:
int
):
"""Sampling frames.
Args:
video_length (int): The length of the video.
key_frame_id (int): The key frame id.
Returns:
list[int]: The sampled frame indices.
"""
if
video_length
>
1
:
left
=
max
(
0
,
key_frame_id
+
self
.
frame_range
[
0
])
right
=
min
(
key_frame_id
+
self
.
frame_range
[
1
],
video_length
-
1
)
frame_ids
=
list
(
range
(
0
,
video_length
))
valid_ids
=
frame_ids
[
left
:
right
+
1
]
if
self
.
filter_key_img
and
key_frame_id
in
valid_ids
:
valid_ids
.
remove
(
key_frame_id
)
assert
len
(
valid_ids
)
>
0
,
'After filtering key frame, there are no valid frames'
if
len
(
valid_ids
)
<
self
.
num_ref_imgs
:
valid_ids
=
valid_ids
*
self
.
num_ref_imgs
ref_frame_ids
=
random
.
sample
(
valid_ids
,
self
.
num_ref_imgs
)
else
:
ref_frame_ids
=
[
key_frame_id
]
*
self
.
num_ref_imgs
sampled_frames_ids
=
[
key_frame_id
]
+
ref_frame_ids
sampled_frames_ids
=
sorted
(
sampled_frames_ids
)
key_frames_ind
=
sampled_frames_ids
.
index
(
key_frame_id
)
key_frame_flags
=
[
False
]
*
len
(
sampled_frames_ids
)
key_frame_flags
[
key_frames_ind
]
=
True
return
sampled_frames_ids
,
key_frame_flags
def
transform
(
self
,
video_infos
:
dict
)
->
Optional
[
Dict
[
str
,
List
]]:
"""Transform the video information.
Args:
video_infos (dict): The whole video information.
Returns:
dict: The data information of the sampled frames.
"""
if
'key_frame_id'
in
video_infos
:
key_frame_id
=
video_infos
[
'key_frame_id'
]
assert
isinstance
(
video_infos
[
'key_frame_id'
],
int
)
else
:
key_frame_id
=
random
.
sample
(
list
(
range
(
video_infos
[
'video_length'
])),
1
)[
0
]
(
sampled_frames_ids
,
key_frame_flags
)
=
self
.
sampling_frames
(
video_infos
[
'video_length'
],
key_frame_id
=
key_frame_id
)
results
=
self
.
prepare_data
(
video_infos
,
sampled_frames_ids
)
results
[
'key_frame_flags'
]
=
key_frame_flags
return
results
def
__repr__
(
self
)
->
str
:
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(num_ref_imgs=
{
self
.
num_ref_imgs
}
, '
repr_str
+=
f
'frame_range=
{
self
.
frame_range
}
, '
repr_str
+=
f
'filter_key_img=
{
self
.
filter_key_img
}
, '
repr_str
+=
f
'collect_video_keys=
{
self
.
collect_video_keys
}
)'
return
repr_str
mmdet/datasets/transforms/geometric.py
0 → 100644
View file @
ff793569
# Copyright (c) OpenMMLab. All rights reserved.
from
typing
import
Optional
,
Union
import
cv2
import
mmcv
import
numpy
as
np
from
mmcv.transforms
import
BaseTransform
from
mmcv.transforms.utils
import
cache_randomness
from
mmdet.registry
import
TRANSFORMS
from
mmdet.structures.bbox
import
autocast_box_type
from
.augment_wrappers
import
_MAX_LEVEL
,
level_to_mag
@
TRANSFORMS
.
register_module
()
class
GeomTransform
(
BaseTransform
):
"""Base class for geometric transformations. All geometric transformations
need to inherit from this base class. ``GeomTransform`` unifies the class
attributes and class functions of geometric transformations (ShearX,
ShearY, Rotate, TranslateX, and TranslateY), and records the homography
matrix.
Required Keys:
- img
- gt_bboxes (BaseBoxes[torch.float32]) (optional)
- gt_masks (BitmapMasks | PolygonMasks) (optional)
- gt_seg_map (np.uint8) (optional)
Modified Keys:
- img
- gt_bboxes
- gt_masks
- gt_seg_map
Added Keys:
- homography_matrix
Args:
prob (float): The probability for performing the geometric
transformation and should be in range [0, 1]. Defaults to 1.0.
level (int, optional): The level should be in range [0, _MAX_LEVEL].
If level is None, it will generate from [0, _MAX_LEVEL] randomly.
Defaults to None.
min_mag (float): The minimum magnitude for geometric transformation.
Defaults to 0.0.
max_mag (float): The maximum magnitude for geometric transformation.
Defaults to 1.0.
reversal_prob (float): The probability that reverses the geometric
transformation magnitude. Should be in range [0,1].
Defaults to 0.5.
img_border_value (int | float | tuple): The filled values for
image border. If float, the same fill value will be used for
all the three channels of image. If tuple, it should be 3 elements.
Defaults to 128.
mask_border_value (int): The fill value used for masks. Defaults to 0.
seg_ignore_label (int): The fill value used for segmentation map.
Note this value must equals ``ignore_label`` in ``semantic_head``
of the corresponding config. Defaults to 255.
interpolation (str): Interpolation method, accepted values are
"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
backend, "nearest", "bilinear" for 'pillow' backend. Defaults
to 'bilinear'.
"""
def
__init__
(
self
,
prob
:
float
=
1.0
,
level
:
Optional
[
int
]
=
None
,
min_mag
:
float
=
0.0
,
max_mag
:
float
=
1.0
,
reversal_prob
:
float
=
0.5
,
img_border_value
:
Union
[
int
,
float
,
tuple
]
=
128
,
mask_border_value
:
int
=
0
,
seg_ignore_label
:
int
=
255
,
interpolation
:
str
=
'bilinear'
)
->
None
:
assert
0
<=
prob
<=
1.0
,
f
'The probability of the transformation '
\
f
'should be in range [0,1], got
{
prob
}
.'
assert
level
is
None
or
isinstance
(
level
,
int
),
\
f
'The level should be None or type int, got
{
type
(
level
)
}
.'
assert
level
is
None
or
0
<=
level
<=
_MAX_LEVEL
,
\
f
'The level should be in range [0,
{
_MAX_LEVEL
}
], got
{
level
}
.'
assert
isinstance
(
min_mag
,
float
),
\
f
'min_mag should be type float, got
{
type
(
min_mag
)
}
.'
assert
isinstance
(
max_mag
,
float
),
\
f
'max_mag should be type float, got
{
type
(
max_mag
)
}
.'
assert
min_mag
<=
max_mag
,
\
f
'min_mag should smaller than max_mag, '
\
f
'got min_mag=
{
min_mag
}
and max_mag=
{
max_mag
}
'
assert
isinstance
(
reversal_prob
,
float
),
\
f
'reversal_prob should be type float, got
{
type
(
max_mag
)
}
.'
assert
0
<=
reversal_prob
<=
1.0
,
\
f
'The reversal probability of the transformation magnitude '
\
f
'should be type float, got
{
type
(
reversal_prob
)
}
.'
if
isinstance
(
img_border_value
,
(
float
,
int
)):
img_border_value
=
tuple
([
float
(
img_border_value
)]
*
3
)
elif
isinstance
(
img_border_value
,
tuple
):
assert
len
(
img_border_value
)
==
3
,
\
f
'img_border_value as tuple must have 3 elements, '
\
f
'got
{
len
(
img_border_value
)
}
.'
img_border_value
=
tuple
([
float
(
val
)
for
val
in
img_border_value
])
else
:
raise
ValueError
(
'img_border_value must be float or tuple with 3 elements.'
)
assert
np
.
all
([
0
<=
val
<=
255
for
val
in
img_border_value
]),
'all '
\
'elements of img_border_value should between range [0,255].'
\
f
'got
{
img_border_value
}
.'
self
.
prob
=
prob
self
.
level
=
level
self
.
min_mag
=
min_mag
self
.
max_mag
=
max_mag
self
.
reversal_prob
=
reversal_prob
self
.
img_border_value
=
img_border_value
self
.
mask_border_value
=
mask_border_value
self
.
seg_ignore_label
=
seg_ignore_label
self
.
interpolation
=
interpolation
def
_transform_img
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Transform the image."""
pass
def
_transform_masks
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Transform the masks."""
pass
def
_transform_seg
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Transform the segmentation map."""
pass
def
_get_homography_matrix
(
self
,
results
:
dict
,
mag
:
float
)
->
np
.
ndarray
:
"""Get the homography matrix for the geometric transformation."""
return
np
.
eye
(
3
,
dtype
=
np
.
float32
)
def
_transform_bboxes
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Transform the bboxes."""
results
[
'gt_bboxes'
].
project_
(
self
.
homography_matrix
)
results
[
'gt_bboxes'
].
clip_
(
results
[
'img_shape'
])
def
_record_homography_matrix
(
self
,
results
:
dict
)
->
None
:
"""Record the homography matrix for the geometric transformation."""
if
results
.
get
(
'homography_matrix'
,
None
)
is
None
:
results
[
'homography_matrix'
]
=
self
.
homography_matrix
else
:
results
[
'homography_matrix'
]
=
self
.
homography_matrix
@
results
[
'homography_matrix'
]
@
cache_randomness
def
_random_disable
(
self
):
"""Randomly disable the transform."""
return
np
.
random
.
rand
()
>
self
.
prob
@
cache_randomness
def
_get_mag
(
self
):
"""Get the magnitude of the transform."""
mag
=
level_to_mag
(
self
.
level
,
self
.
min_mag
,
self
.
max_mag
)
return
-
mag
if
np
.
random
.
rand
()
>
self
.
reversal_prob
else
mag
@
autocast_box_type
()
def
transform
(
self
,
results
:
dict
)
->
dict
:
"""Transform function for images, bounding boxes, masks and semantic
segmentation map.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Transformed results.
"""
if
self
.
_random_disable
():
return
results
mag
=
self
.
_get_mag
()
self
.
homography_matrix
=
self
.
_get_homography_matrix
(
results
,
mag
)
self
.
_record_homography_matrix
(
results
)
self
.
_transform_img
(
results
,
mag
)
if
results
.
get
(
'gt_bboxes'
,
None
)
is
not
None
:
self
.
_transform_bboxes
(
results
,
mag
)
if
results
.
get
(
'gt_masks'
,
None
)
is
not
None
:
self
.
_transform_masks
(
results
,
mag
)
if
results
.
get
(
'gt_seg_map'
,
None
)
is
not
None
:
self
.
_transform_seg
(
results
,
mag
)
return
results
def
__repr__
(
self
)
->
str
:
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(prob=
{
self
.
prob
}
, '
repr_str
+=
f
'level=
{
self
.
level
}
, '
repr_str
+=
f
'min_mag=
{
self
.
min_mag
}
, '
repr_str
+=
f
'max_mag=
{
self
.
max_mag
}
, '
repr_str
+=
f
'reversal_prob=
{
self
.
reversal_prob
}
, '
repr_str
+=
f
'img_border_value=
{
self
.
img_border_value
}
, '
repr_str
+=
f
'mask_border_value=
{
self
.
mask_border_value
}
, '
repr_str
+=
f
'seg_ignore_label=
{
self
.
seg_ignore_label
}
, '
repr_str
+=
f
'interpolation=
{
self
.
interpolation
}
)'
return
repr_str
@
TRANSFORMS
.
register_module
()
class
ShearX
(
GeomTransform
):
"""Shear the images, bboxes, masks and segmentation map horizontally.
Required Keys:
- img
- gt_bboxes (BaseBoxes[torch.float32]) (optional)
- gt_masks (BitmapMasks | PolygonMasks) (optional)
- gt_seg_map (np.uint8) (optional)
Modified Keys:
- img
- gt_bboxes
- gt_masks
- gt_seg_map
Added Keys:
- homography_matrix
Args:
prob (float): The probability for performing Shear and should be in
range [0, 1]. Defaults to 1.0.
level (int, optional): The level should be in range [0, _MAX_LEVEL].
If level is None, it will generate from [0, _MAX_LEVEL] randomly.
Defaults to None.
min_mag (float): The minimum angle for the horizontal shear.
Defaults to 0.0.
max_mag (float): The maximum angle for the horizontal shear.
Defaults to 30.0.
reversal_prob (float): The probability that reverses the horizontal
shear magnitude. Should be in range [0,1]. Defaults to 0.5.
img_border_value (int | float | tuple): The filled values for
image border. If float, the same fill value will be used for
all the three channels of image. If tuple, it should be 3 elements.
Defaults to 128.
mask_border_value (int): The fill value used for masks. Defaults to 0.
seg_ignore_label (int): The fill value used for segmentation map.
Note this value must equals ``ignore_label`` in ``semantic_head``
of the corresponding config. Defaults to 255.
interpolation (str): Interpolation method, accepted values are
"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
backend, "nearest", "bilinear" for 'pillow' backend. Defaults
to 'bilinear'.
"""
def
__init__
(
self
,
prob
:
float
=
1.0
,
level
:
Optional
[
int
]
=
None
,
min_mag
:
float
=
0.0
,
max_mag
:
float
=
30.0
,
reversal_prob
:
float
=
0.5
,
img_border_value
:
Union
[
int
,
float
,
tuple
]
=
128
,
mask_border_value
:
int
=
0
,
seg_ignore_label
:
int
=
255
,
interpolation
:
str
=
'bilinear'
)
->
None
:
assert
0.
<=
min_mag
<=
90.
,
\
f
'min_mag angle for ShearX should be '
\
f
'in range [0, 90], got
{
min_mag
}
.'
assert
0.
<=
max_mag
<=
90.
,
\
f
'max_mag angle for ShearX should be '
\
f
'in range [0, 90], got
{
max_mag
}
.'
super
().
__init__
(
prob
=
prob
,
level
=
level
,
min_mag
=
min_mag
,
max_mag
=
max_mag
,
reversal_prob
=
reversal_prob
,
img_border_value
=
img_border_value
,
mask_border_value
=
mask_border_value
,
seg_ignore_label
=
seg_ignore_label
,
interpolation
=
interpolation
)
@
cache_randomness
def
_get_mag
(
self
):
"""Get the magnitude of the transform."""
mag
=
level_to_mag
(
self
.
level
,
self
.
min_mag
,
self
.
max_mag
)
mag
=
np
.
tan
(
mag
*
np
.
pi
/
180
)
return
-
mag
if
np
.
random
.
rand
()
>
self
.
reversal_prob
else
mag
def
_get_homography_matrix
(
self
,
results
:
dict
,
mag
:
float
)
->
np
.
ndarray
:
"""Get the homography matrix for ShearX."""
return
np
.
array
([[
1
,
mag
,
0
],
[
0
,
1
,
0
],
[
0
,
0
,
1
]],
dtype
=
np
.
float32
)
def
_transform_img
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Shear the image horizontally."""
results
[
'img'
]
=
mmcv
.
imshear
(
results
[
'img'
],
mag
,
direction
=
'horizontal'
,
border_value
=
self
.
img_border_value
,
interpolation
=
self
.
interpolation
)
def
_transform_masks
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Shear the masks horizontally."""
results
[
'gt_masks'
]
=
results
[
'gt_masks'
].
shear
(
results
[
'img_shape'
],
mag
,
direction
=
'horizontal'
,
border_value
=
self
.
mask_border_value
,
interpolation
=
self
.
interpolation
)
def
_transform_seg
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Shear the segmentation map horizontally."""
results
[
'gt_seg_map'
]
=
mmcv
.
imshear
(
results
[
'gt_seg_map'
],
mag
,
direction
=
'horizontal'
,
border_value
=
self
.
seg_ignore_label
,
interpolation
=
'nearest'
)
@
TRANSFORMS
.
register_module
()
class
ShearY
(
GeomTransform
):
"""Shear the images, bboxes, masks and segmentation map vertically.
Required Keys:
- img
- gt_bboxes (BaseBoxes[torch.float32]) (optional)
- gt_masks (BitmapMasks | PolygonMasks) (optional)
- gt_seg_map (np.uint8) (optional)
Modified Keys:
- img
- gt_bboxes
- gt_masks
- gt_seg_map
Added Keys:
- homography_matrix
Args:
prob (float): The probability for performing ShearY and should be in
range [0, 1]. Defaults to 1.0.
level (int, optional): The level should be in range [0,_MAX_LEVEL].
If level is None, it will generate from [0, _MAX_LEVEL] randomly.
Defaults to None.
min_mag (float): The minimum angle for the vertical shear.
Defaults to 0.0.
max_mag (float): The maximum angle for the vertical shear.
Defaults to 30.0.
reversal_prob (float): The probability that reverses the vertical
shear magnitude. Should be in range [0,1]. Defaults to 0.5.
img_border_value (int | float | tuple): The filled values for
image border. If float, the same fill value will be used for
all the three channels of image. If tuple, it should be 3 elements.
Defaults to 128.
mask_border_value (int): The fill value used for masks. Defaults to 0.
seg_ignore_label (int): The fill value used for segmentation map.
Note this value must equals ``ignore_label`` in ``semantic_head``
of the corresponding config. Defaults to 255.
interpolation (str): Interpolation method, accepted values are
"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
backend, "nearest", "bilinear" for 'pillow' backend. Defaults
to 'bilinear'.
"""
def
__init__
(
self
,
prob
:
float
=
1.0
,
level
:
Optional
[
int
]
=
None
,
min_mag
:
float
=
0.0
,
max_mag
:
float
=
30.
,
reversal_prob
:
float
=
0.5
,
img_border_value
:
Union
[
int
,
float
,
tuple
]
=
128
,
mask_border_value
:
int
=
0
,
seg_ignore_label
:
int
=
255
,
interpolation
:
str
=
'bilinear'
)
->
None
:
assert
0.
<=
min_mag
<=
90.
,
\
f
'min_mag angle for ShearY should be '
\
f
'in range [0, 90], got
{
min_mag
}
.'
assert
0.
<=
max_mag
<=
90.
,
\
f
'max_mag angle for ShearY should be '
\
f
'in range [0, 90], got
{
max_mag
}
.'
super
().
__init__
(
prob
=
prob
,
level
=
level
,
min_mag
=
min_mag
,
max_mag
=
max_mag
,
reversal_prob
=
reversal_prob
,
img_border_value
=
img_border_value
,
mask_border_value
=
mask_border_value
,
seg_ignore_label
=
seg_ignore_label
,
interpolation
=
interpolation
)
@
cache_randomness
def
_get_mag
(
self
):
"""Get the magnitude of the transform."""
mag
=
level_to_mag
(
self
.
level
,
self
.
min_mag
,
self
.
max_mag
)
mag
=
np
.
tan
(
mag
*
np
.
pi
/
180
)
return
-
mag
if
np
.
random
.
rand
()
>
self
.
reversal_prob
else
mag
def
_get_homography_matrix
(
self
,
results
:
dict
,
mag
:
float
)
->
np
.
ndarray
:
"""Get the homography matrix for ShearY."""
return
np
.
array
([[
1
,
0
,
0
],
[
mag
,
1
,
0
],
[
0
,
0
,
1
]],
dtype
=
np
.
float32
)
def
_transform_img
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Shear the image vertically."""
results
[
'img'
]
=
mmcv
.
imshear
(
results
[
'img'
],
mag
,
direction
=
'vertical'
,
border_value
=
self
.
img_border_value
,
interpolation
=
self
.
interpolation
)
def
_transform_masks
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Shear the masks vertically."""
results
[
'gt_masks'
]
=
results
[
'gt_masks'
].
shear
(
results
[
'img_shape'
],
mag
,
direction
=
'vertical'
,
border_value
=
self
.
mask_border_value
,
interpolation
=
self
.
interpolation
)
def
_transform_seg
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Shear the segmentation map vertically."""
results
[
'gt_seg_map'
]
=
mmcv
.
imshear
(
results
[
'gt_seg_map'
],
mag
,
direction
=
'vertical'
,
border_value
=
self
.
seg_ignore_label
,
interpolation
=
'nearest'
)
@
TRANSFORMS
.
register_module
()
class
Rotate
(
GeomTransform
):
"""Rotate the images, bboxes, masks and segmentation map.
Required Keys:
- img
- gt_bboxes (BaseBoxes[torch.float32]) (optional)
- gt_masks (BitmapMasks | PolygonMasks) (optional)
- gt_seg_map (np.uint8) (optional)
Modified Keys:
- img
- gt_bboxes
- gt_masks
- gt_seg_map
Added Keys:
- homography_matrix
Args:
prob (float): The probability for perform transformation and
should be in range 0 to 1. Defaults to 1.0.
level (int, optional): The level should be in range [0, _MAX_LEVEL].
If level is None, it will generate from [0, _MAX_LEVEL] randomly.
Defaults to None.
min_mag (float): The maximum angle for rotation.
Defaults to 0.0.
max_mag (float): The maximum angle for rotation.
Defaults to 30.0.
reversal_prob (float): The probability that reverses the rotation
magnitude. Should be in range [0,1]. Defaults to 0.5.
img_border_value (int | float | tuple): The filled values for
image border. If float, the same fill value will be used for
all the three channels of image. If tuple, it should be 3 elements.
Defaults to 128.
mask_border_value (int): The fill value used for masks. Defaults to 0.
seg_ignore_label (int): The fill value used for segmentation map.
Note this value must equals ``ignore_label`` in ``semantic_head``
of the corresponding config. Defaults to 255.
interpolation (str): Interpolation method, accepted values are
"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
backend, "nearest", "bilinear" for 'pillow' backend. Defaults
to 'bilinear'.
"""
def
__init__
(
self
,
prob
:
float
=
1.0
,
level
:
Optional
[
int
]
=
None
,
min_mag
:
float
=
0.0
,
max_mag
:
float
=
30.0
,
reversal_prob
:
float
=
0.5
,
img_border_value
:
Union
[
int
,
float
,
tuple
]
=
128
,
mask_border_value
:
int
=
0
,
seg_ignore_label
:
int
=
255
,
interpolation
:
str
=
'bilinear'
)
->
None
:
assert
0.
<=
min_mag
<=
180.
,
\
f
'min_mag for Rotate should be in range [0,180], got
{
min_mag
}
.'
assert
0.
<=
max_mag
<=
180.
,
\
f
'max_mag for Rotate should be in range [0,180], got
{
max_mag
}
.'
super
().
__init__
(
prob
=
prob
,
level
=
level
,
min_mag
=
min_mag
,
max_mag
=
max_mag
,
reversal_prob
=
reversal_prob
,
img_border_value
=
img_border_value
,
mask_border_value
=
mask_border_value
,
seg_ignore_label
=
seg_ignore_label
,
interpolation
=
interpolation
)
def
_get_homography_matrix
(
self
,
results
:
dict
,
mag
:
float
)
->
np
.
ndarray
:
"""Get the homography matrix for Rotate."""
img_shape
=
results
[
'img_shape'
]
center
=
((
img_shape
[
1
]
-
1
)
*
0.5
,
(
img_shape
[
0
]
-
1
)
*
0.5
)
cv2_rotation_matrix
=
cv2
.
getRotationMatrix2D
(
center
,
-
mag
,
1.0
)
return
np
.
concatenate
(
[
cv2_rotation_matrix
,
np
.
array
([
0
,
0
,
1
]).
reshape
((
1
,
3
))]).
astype
(
np
.
float32
)
def
_transform_img
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Rotate the image."""
results
[
'img'
]
=
mmcv
.
imrotate
(
results
[
'img'
],
mag
,
border_value
=
self
.
img_border_value
,
interpolation
=
self
.
interpolation
)
def
_transform_masks
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Rotate the masks."""
results
[
'gt_masks'
]
=
results
[
'gt_masks'
].
rotate
(
results
[
'img_shape'
],
mag
,
border_value
=
self
.
mask_border_value
,
interpolation
=
self
.
interpolation
)
def
_transform_seg
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Rotate the segmentation map."""
results
[
'gt_seg_map'
]
=
mmcv
.
imrotate
(
results
[
'gt_seg_map'
],
mag
,
border_value
=
self
.
seg_ignore_label
,
interpolation
=
'nearest'
)
@
TRANSFORMS
.
register_module
()
class
TranslateX
(
GeomTransform
):
"""Translate the images, bboxes, masks and segmentation map horizontally.
Required Keys:
- img
- gt_bboxes (BaseBoxes[torch.float32]) (optional)
- gt_masks (BitmapMasks | PolygonMasks) (optional)
- gt_seg_map (np.uint8) (optional)
Modified Keys:
- img
- gt_bboxes
- gt_masks
- gt_seg_map
Added Keys:
- homography_matrix
Args:
prob (float): The probability for perform transformation and
should be in range 0 to 1. Defaults to 1.0.
level (int, optional): The level should be in range [0, _MAX_LEVEL].
If level is None, it will generate from [0, _MAX_LEVEL] randomly.
Defaults to None.
min_mag (float): The minimum pixel's offset ratio for horizontal
translation. Defaults to 0.0.
max_mag (float): The maximum pixel's offset ratio for horizontal
translation. Defaults to 0.1.
reversal_prob (float): The probability that reverses the horizontal
translation magnitude. Should be in range [0,1]. Defaults to 0.5.
img_border_value (int | float | tuple): The filled values for
image border. If float, the same fill value will be used for
all the three channels of image. If tuple, it should be 3 elements.
Defaults to 128.
mask_border_value (int): The fill value used for masks. Defaults to 0.
seg_ignore_label (int): The fill value used for segmentation map.
Note this value must equals ``ignore_label`` in ``semantic_head``
of the corresponding config. Defaults to 255.
interpolation (str): Interpolation method, accepted values are
"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
backend, "nearest", "bilinear" for 'pillow' backend. Defaults
to 'bilinear'.
"""
def
__init__
(
self
,
prob
:
float
=
1.0
,
level
:
Optional
[
int
]
=
None
,
min_mag
:
float
=
0.0
,
max_mag
:
float
=
0.1
,
reversal_prob
:
float
=
0.5
,
img_border_value
:
Union
[
int
,
float
,
tuple
]
=
128
,
mask_border_value
:
int
=
0
,
seg_ignore_label
:
int
=
255
,
interpolation
:
str
=
'bilinear'
)
->
None
:
assert
0.
<=
min_mag
<=
1.
,
\
f
'min_mag ratio for TranslateX should be '
\
f
'in range [0, 1], got
{
min_mag
}
.'
assert
0.
<=
max_mag
<=
1.
,
\
f
'max_mag ratio for TranslateX should be '
\
f
'in range [0, 1], got
{
max_mag
}
.'
super
().
__init__
(
prob
=
prob
,
level
=
level
,
min_mag
=
min_mag
,
max_mag
=
max_mag
,
reversal_prob
=
reversal_prob
,
img_border_value
=
img_border_value
,
mask_border_value
=
mask_border_value
,
seg_ignore_label
=
seg_ignore_label
,
interpolation
=
interpolation
)
def
_get_homography_matrix
(
self
,
results
:
dict
,
mag
:
float
)
->
np
.
ndarray
:
"""Get the homography matrix for TranslateX."""
mag
=
int
(
results
[
'img_shape'
][
1
]
*
mag
)
return
np
.
array
([[
1
,
0
,
mag
],
[
0
,
1
,
0
],
[
0
,
0
,
1
]],
dtype
=
np
.
float32
)
def
_transform_img
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Translate the image horizontally."""
mag
=
int
(
results
[
'img_shape'
][
1
]
*
mag
)
results
[
'img'
]
=
mmcv
.
imtranslate
(
results
[
'img'
],
mag
,
direction
=
'horizontal'
,
border_value
=
self
.
img_border_value
,
interpolation
=
self
.
interpolation
)
def
_transform_masks
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Translate the masks horizontally."""
mag
=
int
(
results
[
'img_shape'
][
1
]
*
mag
)
results
[
'gt_masks'
]
=
results
[
'gt_masks'
].
translate
(
results
[
'img_shape'
],
mag
,
direction
=
'horizontal'
,
border_value
=
self
.
mask_border_value
,
interpolation
=
self
.
interpolation
)
def
_transform_seg
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Translate the segmentation map horizontally."""
mag
=
int
(
results
[
'img_shape'
][
1
]
*
mag
)
results
[
'gt_seg_map'
]
=
mmcv
.
imtranslate
(
results
[
'gt_seg_map'
],
mag
,
direction
=
'horizontal'
,
border_value
=
self
.
seg_ignore_label
,
interpolation
=
'nearest'
)
@
TRANSFORMS
.
register_module
()
class
TranslateY
(
GeomTransform
):
"""Translate the images, bboxes, masks and segmentation map vertically.
Required Keys:
- img
- gt_bboxes (BaseBoxes[torch.float32]) (optional)
- gt_masks (BitmapMasks | PolygonMasks) (optional)
- gt_seg_map (np.uint8) (optional)
Modified Keys:
- img
- gt_bboxes
- gt_masks
- gt_seg_map
Added Keys:
- homography_matrix
Args:
prob (float): The probability for perform transformation and
should be in range 0 to 1. Defaults to 1.0.
level (int, optional): The level should be in range [0, _MAX_LEVEL].
If level is None, it will generate from [0, _MAX_LEVEL] randomly.
Defaults to None.
min_mag (float): The minimum pixel's offset ratio for vertical
translation. Defaults to 0.0.
max_mag (float): The maximum pixel's offset ratio for vertical
translation. Defaults to 0.1.
reversal_prob (float): The probability that reverses the vertical
translation magnitude. Should be in range [0,1]. Defaults to 0.5.
img_border_value (int | float | tuple): The filled values for
image border. If float, the same fill value will be used for
all the three channels of image. If tuple, it should be 3 elements.
Defaults to 128.
mask_border_value (int): The fill value used for masks. Defaults to 0.
seg_ignore_label (int): The fill value used for segmentation map.
Note this value must equals ``ignore_label`` in ``semantic_head``
of the corresponding config. Defaults to 255.
interpolation (str): Interpolation method, accepted values are
"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
backend, "nearest", "bilinear" for 'pillow' backend. Defaults
to 'bilinear'.
"""
def
__init__
(
self
,
prob
:
float
=
1.0
,
level
:
Optional
[
int
]
=
None
,
min_mag
:
float
=
0.0
,
max_mag
:
float
=
0.1
,
reversal_prob
:
float
=
0.5
,
img_border_value
:
Union
[
int
,
float
,
tuple
]
=
128
,
mask_border_value
:
int
=
0
,
seg_ignore_label
:
int
=
255
,
interpolation
:
str
=
'bilinear'
)
->
None
:
assert
0.
<=
min_mag
<=
1.
,
\
f
'min_mag ratio for TranslateY should be '
\
f
'in range [0,1], got
{
min_mag
}
.'
assert
0.
<=
max_mag
<=
1.
,
\
f
'max_mag ratio for TranslateY should be '
\
f
'in range [0,1], got
{
max_mag
}
.'
super
().
__init__
(
prob
=
prob
,
level
=
level
,
min_mag
=
min_mag
,
max_mag
=
max_mag
,
reversal_prob
=
reversal_prob
,
img_border_value
=
img_border_value
,
mask_border_value
=
mask_border_value
,
seg_ignore_label
=
seg_ignore_label
,
interpolation
=
interpolation
)
def
_get_homography_matrix
(
self
,
results
:
dict
,
mag
:
float
)
->
np
.
ndarray
:
"""Get the homography matrix for TranslateY."""
mag
=
int
(
results
[
'img_shape'
][
0
]
*
mag
)
return
np
.
array
([[
1
,
0
,
0
],
[
0
,
1
,
mag
],
[
0
,
0
,
1
]],
dtype
=
np
.
float32
)
def
_transform_img
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Translate the image vertically."""
mag
=
int
(
results
[
'img_shape'
][
0
]
*
mag
)
results
[
'img'
]
=
mmcv
.
imtranslate
(
results
[
'img'
],
mag
,
direction
=
'vertical'
,
border_value
=
self
.
img_border_value
,
interpolation
=
self
.
interpolation
)
def
_transform_masks
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Translate masks vertically."""
mag
=
int
(
results
[
'img_shape'
][
0
]
*
mag
)
results
[
'gt_masks'
]
=
results
[
'gt_masks'
].
translate
(
results
[
'img_shape'
],
mag
,
direction
=
'vertical'
,
border_value
=
self
.
mask_border_value
,
interpolation
=
self
.
interpolation
)
def
_transform_seg
(
self
,
results
:
dict
,
mag
:
float
)
->
None
:
"""Translate segmentation map vertically."""
mag
=
int
(
results
[
'img_shape'
][
0
]
*
mag
)
results
[
'gt_seg_map'
]
=
mmcv
.
imtranslate
(
results
[
'gt_seg_map'
],
mag
,
direction
=
'vertical'
,
border_value
=
self
.
seg_ignore_label
,
interpolation
=
'nearest'
)
mmdet/datasets/transforms/instaboost.py
0 → 100644
View file @
ff793569
# Copyright (c) OpenMMLab. All rights reserved.
from
typing
import
Tuple
import
numpy
as
np
from
mmcv.transforms
import
BaseTransform
from
mmdet.registry
import
TRANSFORMS
@
TRANSFORMS
.
register_module
()
class
InstaBoost
(
BaseTransform
):
r
"""Data augmentation method in `InstaBoost: Boosting Instance
Segmentation Via Probability Map Guided Copy-Pasting
<https://arxiv.org/abs/1908.07801>`_.
Refer to https://github.com/GothicAi/Instaboost for implementation details.
Required Keys:
- img (np.uint8)
- instances
Modified Keys:
- img (np.uint8)
- instances
Args:
action_candidate (tuple): Action candidates. "normal", "horizontal", \
"vertical", "skip" are supported. Defaults to ('normal', \
'horizontal', 'skip').
action_prob (tuple): Corresponding action probabilities. Should be \
the same length as action_candidate. Defaults to (1, 0, 0).
scale (tuple): (min scale, max scale). Defaults to (0.8, 1.2).
dx (int): The maximum x-axis shift will be (instance width) / dx.
Defaults to 15.
dy (int): The maximum y-axis shift will be (instance height) / dy.
Defaults to 15.
theta (tuple): (min rotation degree, max rotation degree). \
Defaults to (-1, 1).
color_prob (float): Probability of images for color augmentation.
Defaults to 0.5.
hflag (bool): Whether to use heatmap guided. Defaults to False.
aug_ratio (float): Probability of applying this transformation. \
Defaults to 0.5.
"""
def
__init__
(
self
,
action_candidate
:
tuple
=
(
'normal'
,
'horizontal'
,
'skip'
),
action_prob
:
tuple
=
(
1
,
0
,
0
),
scale
:
tuple
=
(
0.8
,
1.2
),
dx
:
int
=
15
,
dy
:
int
=
15
,
theta
:
tuple
=
(
-
1
,
1
),
color_prob
:
float
=
0.5
,
hflag
:
bool
=
False
,
aug_ratio
:
float
=
0.5
)
->
None
:
import
matplotlib
import
matplotlib.pyplot
as
plt
default_backend
=
plt
.
get_backend
()
try
:
import
instaboostfast
as
instaboost
except
ImportError
:
raise
ImportError
(
'Please run "pip install instaboostfast" '
'to install instaboostfast first for instaboost augmentation.'
)
# instaboost will modify the default backend
# and cause visualization to fail.
matplotlib
.
use
(
default_backend
)
self
.
cfg
=
instaboost
.
InstaBoostConfig
(
action_candidate
,
action_prob
,
scale
,
dx
,
dy
,
theta
,
color_prob
,
hflag
)
self
.
aug_ratio
=
aug_ratio
def
_load_anns
(
self
,
results
:
dict
)
->
Tuple
[
list
,
list
]:
"""Convert raw anns to instaboost expected input format."""
anns
=
[]
ignore_anns
=
[]
for
instance
in
results
[
'instances'
]:
label
=
instance
[
'bbox_label'
]
bbox
=
instance
[
'bbox'
]
mask
=
instance
[
'mask'
]
x1
,
y1
,
x2
,
y2
=
bbox
# assert (x2 - x1) >= 1 and (y2 - y1) >= 1
bbox
=
[
x1
,
y1
,
x2
-
x1
,
y2
-
y1
]
if
instance
[
'ignore_flag'
]
==
0
:
anns
.
append
({
'category_id'
:
label
,
'segmentation'
:
mask
,
'bbox'
:
bbox
})
else
:
# Ignore instances without data augmentation
ignore_anns
.
append
(
instance
)
return
anns
,
ignore_anns
def
_parse_anns
(
self
,
results
:
dict
,
anns
:
list
,
ignore_anns
:
list
,
img
:
np
.
ndarray
)
->
dict
:
"""Restore the result of instaboost processing to the original anns
format."""
instances
=
[]
for
ann
in
anns
:
x1
,
y1
,
w
,
h
=
ann
[
'bbox'
]
# TODO: more essential bug need to be fixed in instaboost
if
w
<=
0
or
h
<=
0
:
continue
bbox
=
[
x1
,
y1
,
x1
+
w
,
y1
+
h
]
instances
.
append
(
dict
(
bbox
=
bbox
,
bbox_label
=
ann
[
'category_id'
],
mask
=
ann
[
'segmentation'
],
ignore_flag
=
0
))
instances
.
extend
(
ignore_anns
)
results
[
'img'
]
=
img
results
[
'instances'
]
=
instances
return
results
def
transform
(
self
,
results
)
->
dict
:
"""The transform function."""
img
=
results
[
'img'
]
ori_type
=
img
.
dtype
if
'instances'
not
in
results
or
len
(
results
[
'instances'
])
==
0
:
return
results
anns
,
ignore_anns
=
self
.
_load_anns
(
results
)
if
np
.
random
.
choice
([
0
,
1
],
p
=
[
1
-
self
.
aug_ratio
,
self
.
aug_ratio
]):
try
:
import
instaboostfast
as
instaboost
except
ImportError
:
raise
ImportError
(
'Please run "pip install instaboostfast" '
'to install instaboostfast first.'
)
anns
,
img
=
instaboost
.
get_new_data
(
anns
,
img
.
astype
(
np
.
uint8
),
self
.
cfg
,
background
=
None
)
results
=
self
.
_parse_anns
(
results
,
anns
,
ignore_anns
,
img
.
astype
(
ori_type
))
return
results
def
__repr__
(
self
)
->
str
:
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(aug_ratio=
{
self
.
aug_ratio
}
)'
return
repr_str
mmdet/datasets/transforms/loading.py
0 → 100644
View file @
ff793569
# Copyright (c) OpenMMLab. All rights reserved.
from
typing
import
Optional
,
Tuple
,
Union
import
mmcv
import
numpy
as
np
import
pycocotools.mask
as
maskUtils
import
torch
from
mmcv.transforms
import
BaseTransform
from
mmcv.transforms
import
LoadAnnotations
as
MMCV_LoadAnnotations
from
mmcv.transforms
import
LoadImageFromFile
from
mmengine.fileio
import
get
from
mmengine.structures
import
BaseDataElement
from
mmdet.registry
import
TRANSFORMS
from
mmdet.structures.bbox
import
get_box_type
from
mmdet.structures.bbox.box_type
import
autocast_box_type
from
mmdet.structures.mask
import
BitmapMasks
,
PolygonMasks
@
TRANSFORMS
.
register_module
()
class
LoadImageFromNDArray
(
LoadImageFromFile
):
"""Load an image from ``results['img']``.
Similar with :obj:`LoadImageFromFile`, but the image has been loaded as
:obj:`np.ndarray` in ``results['img']``. Can be used when loading image
from webcam.
Required Keys:
- img
Modified Keys:
- img
- img_path
- img_shape
- ori_shape
Args:
to_float32 (bool): Whether to convert the loaded image to a float32
numpy array. If set to False, the loaded image is an uint8 array.
Defaults to False.
"""
def
transform
(
self
,
results
:
dict
)
->
dict
:
"""Transform function to add image meta information.
Args:
results (dict): Result dict with Webcam read image in
``results['img']``.
Returns:
dict: The dict contains loaded image and meta information.
"""
img
=
results
[
'img'
]
if
self
.
to_float32
:
img
=
img
.
astype
(
np
.
float32
)
results
[
'img_path'
]
=
None
results
[
'img'
]
=
img
results
[
'img_shape'
]
=
img
.
shape
[:
2
]
results
[
'ori_shape'
]
=
img
.
shape
[:
2
]
return
results
@
TRANSFORMS
.
register_module
()
class
LoadMultiChannelImageFromFiles
(
BaseTransform
):
"""Load multi-channel images from a list of separate channel files.
Required Keys:
- img_path
Modified Keys:
- img
- img_shape
- ori_shape
Args:
to_float32 (bool): Whether to convert the loaded image to a float32
numpy array. If set to False, the loaded image is an uint8 array.
Defaults to False.
color_type (str): The flag argument for :func:``mmcv.imfrombytes``.
Defaults to 'unchanged'.
imdecode_backend (str): The image decoding backend type. The backend
argument for :func:``mmcv.imfrombytes``.
See :func:``mmcv.imfrombytes`` for details.
Defaults to 'cv2'.
file_client_args (dict): Arguments to instantiate the
corresponding backend in mmdet <= 3.0.0rc6. Defaults to None.
backend_args (dict, optional): Arguments to instantiate the
corresponding backend in mmdet >= 3.0.0rc7. Defaults to None.
"""
def
__init__
(
self
,
to_float32
:
bool
=
False
,
color_type
:
str
=
'unchanged'
,
imdecode_backend
:
str
=
'cv2'
,
file_client_args
:
dict
=
None
,
backend_args
:
dict
=
None
,
)
->
None
:
self
.
to_float32
=
to_float32
self
.
color_type
=
color_type
self
.
imdecode_backend
=
imdecode_backend
self
.
backend_args
=
backend_args
if
file_client_args
is
not
None
:
raise
RuntimeError
(
'The `file_client_args` is deprecated, '
'please use `backend_args` instead, please refer to'
'https://github.com/open-mmlab/mmdetection/blob/main/configs/_base_/datasets/coco_detection.py'
# noqa: E501
)
def
transform
(
self
,
results
:
dict
)
->
dict
:
"""Transform functions to load multiple images and get images meta
information.
Args:
results (dict): Result dict from :obj:`mmdet.CustomDataset`.
Returns:
dict: The dict contains loaded images and meta information.
"""
assert
isinstance
(
results
[
'img_path'
],
list
)
img
=
[]
for
name
in
results
[
'img_path'
]:
img_bytes
=
get
(
name
,
backend_args
=
self
.
backend_args
)
img
.
append
(
mmcv
.
imfrombytes
(
img_bytes
,
flag
=
self
.
color_type
,
backend
=
self
.
imdecode_backend
))
img
=
np
.
stack
(
img
,
axis
=-
1
)
if
self
.
to_float32
:
img
=
img
.
astype
(
np
.
float32
)
results
[
'img'
]
=
img
results
[
'img_shape'
]
=
img
.
shape
[:
2
]
results
[
'ori_shape'
]
=
img
.
shape
[:
2
]
return
results
def
__repr__
(
self
):
repr_str
=
(
f
'
{
self
.
__class__
.
__name__
}
('
f
'to_float32=
{
self
.
to_float32
}
, '
f
"color_type='
{
self
.
color_type
}
', "
f
"imdecode_backend='
{
self
.
imdecode_backend
}
', "
f
'backend_args=
{
self
.
backend_args
}
)'
)
return
repr_str
@
TRANSFORMS
.
register_module
()
class
LoadAnnotations
(
MMCV_LoadAnnotations
):
"""Load and process the ``instances`` and ``seg_map`` annotation provided
by dataset.
The annotation format is as the following:
.. code-block:: python
{
'instances':
[
{
# List of 4 numbers representing the bounding box of the
# instance, in (x1, y1, x2, y2) order.
'bbox': [x1, y1, x2, y2],
# Label of image classification.
'bbox_label': 1,
# Used in instance/panoptic segmentation. The segmentation mask
# of the instance or the information of segments.
# 1. If list[list[float]], it represents a list of polygons,
# one for each connected component of the object. Each
# list[float] is one simple polygon in the format of
# [x1, y1, ..., xn, yn] (n >= 3). The Xs and Ys are absolute
# coordinates in unit of pixels.
# 2. If dict, it represents the per-pixel segmentation mask in
# COCO's compressed RLE format. The dict should have keys
# “size” and “counts”. Can be loaded by pycocotools
'mask': list[list[float]] or dict,
}
]
# Filename of semantic or panoptic segmentation ground truth file.
'seg_map_path': 'a/b/c'
}
After this module, the annotation has been changed to the format below:
.. code-block:: python
{
# In (x1, y1, x2, y2) order, float type. N is the number of bboxes
# in an image
'gt_bboxes': BaseBoxes(N, 4)
# In int type.
'gt_bboxes_labels': np.ndarray(N, )
# In built-in class
'gt_masks': PolygonMasks (H, W) or BitmapMasks (H, W)
# In uint8 type.
'gt_seg_map': np.ndarray (H, W)
# in (x, y, v) order, float type.
}
Required Keys:
- height
- width
- instances
- bbox (optional)
- bbox_label
- mask (optional)
- ignore_flag
- seg_map_path (optional)
Added Keys:
- gt_bboxes (BaseBoxes[torch.float32])
- gt_bboxes_labels (np.int64)
- gt_masks (BitmapMasks | PolygonMasks)
- gt_seg_map (np.uint8)
- gt_ignore_flags (bool)
Args:
with_bbox (bool): Whether to parse and load the bbox annotation.
Defaults to True.
with_label (bool): Whether to parse and load the label annotation.
Defaults to True.
with_mask (bool): Whether to parse and load the mask annotation.
Default: False.
with_seg (bool): Whether to parse and load the semantic segmentation
annotation. Defaults to False.
poly2mask (bool): Whether to convert mask to bitmap. Default: True.
box_type (str): The box type used to wrap the bboxes. If ``box_type``
is None, gt_bboxes will keep being np.ndarray. Defaults to 'hbox'.
reduce_zero_label (bool): Whether reduce all label value
by 1. Usually used for datasets where 0 is background label.
Defaults to False.
ignore_index (int): The label index to be ignored.
Valid only if reduce_zero_label is true. Defaults is 255.
imdecode_backend (str): The image decoding backend type. The backend
argument for :func:``mmcv.imfrombytes``.
See :fun:``mmcv.imfrombytes`` for details.
Defaults to 'cv2'.
backend_args (dict, optional): Arguments to instantiate the
corresponding backend. Defaults to None.
"""
def
__init__
(
self
,
with_mask
:
bool
=
False
,
poly2mask
:
bool
=
True
,
box_type
:
str
=
'hbox'
,
# use for semseg
reduce_zero_label
:
bool
=
False
,
ignore_index
:
int
=
255
,
**
kwargs
)
->
None
:
super
(
LoadAnnotations
,
self
).
__init__
(
**
kwargs
)
self
.
with_mask
=
with_mask
self
.
poly2mask
=
poly2mask
self
.
box_type
=
box_type
self
.
reduce_zero_label
=
reduce_zero_label
self
.
ignore_index
=
ignore_index
def
_load_bboxes
(
self
,
results
:
dict
)
->
None
:
"""Private function to load bounding box annotations.
Args:
results (dict): Result dict from :obj:``mmengine.BaseDataset``.
Returns:
dict: The dict contains loaded bounding box annotations.
"""
gt_bboxes
=
[]
gt_ignore_flags
=
[]
for
instance
in
results
.
get
(
'instances'
,
[]):
gt_bboxes
.
append
(
instance
[
'bbox'
])
gt_ignore_flags
.
append
(
instance
[
'ignore_flag'
])
if
self
.
box_type
is
None
:
results
[
'gt_bboxes'
]
=
np
.
array
(
gt_bboxes
,
dtype
=
np
.
float32
).
reshape
((
-
1
,
4
))
else
:
_
,
box_type_cls
=
get_box_type
(
self
.
box_type
)
results
[
'gt_bboxes'
]
=
box_type_cls
(
gt_bboxes
,
dtype
=
torch
.
float32
)
results
[
'gt_ignore_flags'
]
=
np
.
array
(
gt_ignore_flags
,
dtype
=
bool
)
def
_load_labels
(
self
,
results
:
dict
)
->
None
:
"""Private function to load label annotations.
Args:
results (dict): Result dict from :obj:``mmengine.BaseDataset``.
Returns:
dict: The dict contains loaded label annotations.
"""
gt_bboxes_labels
=
[]
for
instance
in
results
.
get
(
'instances'
,
[]):
gt_bboxes_labels
.
append
(
instance
[
'bbox_label'
])
# TODO: Inconsistent with mmcv, consider how to deal with it later.
results
[
'gt_bboxes_labels'
]
=
np
.
array
(
gt_bboxes_labels
,
dtype
=
np
.
int64
)
def
_poly2mask
(
self
,
mask_ann
:
Union
[
list
,
dict
],
img_h
:
int
,
img_w
:
int
)
->
np
.
ndarray
:
"""Private function to convert masks represented with polygon to
bitmaps.
Args:
mask_ann (list | dict): Polygon mask annotation input.
img_h (int): The height of output mask.
img_w (int): The width of output mask.
Returns:
np.ndarray: The decode bitmap mask of shape (img_h, img_w).
"""
if
isinstance
(
mask_ann
,
list
):
# polygon -- a single object might consist of multiple parts
# we merge all parts into one mask rle code
rles
=
maskUtils
.
frPyObjects
(
mask_ann
,
img_h
,
img_w
)
rle
=
maskUtils
.
merge
(
rles
)
elif
isinstance
(
mask_ann
[
'counts'
],
list
):
# uncompressed RLE
rle
=
maskUtils
.
frPyObjects
(
mask_ann
,
img_h
,
img_w
)
else
:
# rle
rle
=
mask_ann
mask
=
maskUtils
.
decode
(
rle
)
return
mask
def
_process_masks
(
self
,
results
:
dict
)
->
list
:
"""Process gt_masks and filter invalid polygons.
Args:
results (dict): Result dict from :obj:``mmengine.BaseDataset``.
Returns:
list: Processed gt_masks.
"""
gt_masks
=
[]
gt_ignore_flags
=
[]
for
instance
in
results
.
get
(
'instances'
,
[]):
gt_mask
=
instance
[
'mask'
]
# If the annotation of segmentation mask is invalid,
# ignore the whole instance.
if
isinstance
(
gt_mask
,
list
):
gt_mask
=
[
np
.
array
(
polygon
)
for
polygon
in
gt_mask
if
len
(
polygon
)
%
2
==
0
and
len
(
polygon
)
>=
6
]
if
len
(
gt_mask
)
==
0
:
# ignore this instance and set gt_mask to a fake mask
instance
[
'ignore_flag'
]
=
1
gt_mask
=
[
np
.
zeros
(
6
)]
elif
not
self
.
poly2mask
:
# `PolygonMasks` requires a ploygon of format List[np.array],
# other formats are invalid.
instance
[
'ignore_flag'
]
=
1
gt_mask
=
[
np
.
zeros
(
6
)]
elif
isinstance
(
gt_mask
,
dict
)
and
\
not
(
gt_mask
.
get
(
'counts'
)
is
not
None
and
gt_mask
.
get
(
'size'
)
is
not
None
and
isinstance
(
gt_mask
[
'counts'
],
(
list
,
str
))):
# if gt_mask is a dict, it should include `counts` and `size`,
# so that `BitmapMasks` can uncompressed RLE
instance
[
'ignore_flag'
]
=
1
gt_mask
=
[
np
.
zeros
(
6
)]
gt_masks
.
append
(
gt_mask
)
# re-process gt_ignore_flags
gt_ignore_flags
.
append
(
instance
[
'ignore_flag'
])
results
[
'gt_ignore_flags'
]
=
np
.
array
(
gt_ignore_flags
,
dtype
=
bool
)
return
gt_masks
def
_load_masks
(
self
,
results
:
dict
)
->
None
:
"""Private function to load mask annotations.
Args:
results (dict): Result dict from :obj:``mmengine.BaseDataset``.
"""
h
,
w
=
results
[
'ori_shape'
]
gt_masks
=
self
.
_process_masks
(
results
)
if
self
.
poly2mask
:
gt_masks
=
BitmapMasks
(
[
self
.
_poly2mask
(
mask
,
h
,
w
)
for
mask
in
gt_masks
],
h
,
w
)
else
:
# fake polygon masks will be ignored in `PackDetInputs`
gt_masks
=
PolygonMasks
([
mask
for
mask
in
gt_masks
],
h
,
w
)
results
[
'gt_masks'
]
=
gt_masks
def
_load_seg_map
(
self
,
results
:
dict
)
->
None
:
"""Private function to load semantic segmentation annotations.
Args:
results (dict): Result dict from :obj:``mmcv.BaseDataset``.
Returns:
dict: The dict contains loaded semantic segmentation annotations.
"""
if
results
.
get
(
'seg_map_path'
,
None
)
is
None
:
return
img_bytes
=
get
(
results
[
'seg_map_path'
],
backend_args
=
self
.
backend_args
)
gt_semantic_seg
=
mmcv
.
imfrombytes
(
img_bytes
,
flag
=
'unchanged'
,
backend
=
self
.
imdecode_backend
).
squeeze
()
if
self
.
reduce_zero_label
:
# avoid using underflow conversion
gt_semantic_seg
[
gt_semantic_seg
==
0
]
=
self
.
ignore_index
gt_semantic_seg
=
gt_semantic_seg
-
1
gt_semantic_seg
[
gt_semantic_seg
==
self
.
ignore_index
-
1
]
=
self
.
ignore_index
# modify if custom classes
if
results
.
get
(
'label_map'
,
None
)
is
not
None
:
# Add deep copy to solve bug of repeatedly
# replace `gt_semantic_seg`, which is reported in
# https://github.com/open-mmlab/mmsegmentation/pull/1445/
gt_semantic_seg_copy
=
gt_semantic_seg
.
copy
()
for
old_id
,
new_id
in
results
[
'label_map'
].
items
():
gt_semantic_seg
[
gt_semantic_seg_copy
==
old_id
]
=
new_id
results
[
'gt_seg_map'
]
=
gt_semantic_seg
results
[
'ignore_index'
]
=
self
.
ignore_index
def
transform
(
self
,
results
:
dict
)
->
dict
:
"""Function to load multiple types annotations.
Args:
results (dict): Result dict from :obj:``mmengine.BaseDataset``.
Returns:
dict: The dict contains loaded bounding box, label and
semantic segmentation.
"""
if
self
.
with_bbox
:
self
.
_load_bboxes
(
results
)
if
self
.
with_label
:
self
.
_load_labels
(
results
)
if
self
.
with_mask
:
self
.
_load_masks
(
results
)
if
self
.
with_seg
:
self
.
_load_seg_map
(
results
)
return
results
def
__repr__
(
self
)
->
str
:
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(with_bbox=
{
self
.
with_bbox
}
, '
repr_str
+=
f
'with_label=
{
self
.
with_label
}
, '
repr_str
+=
f
'with_mask=
{
self
.
with_mask
}
, '
repr_str
+=
f
'with_seg=
{
self
.
with_seg
}
, '
repr_str
+=
f
'poly2mask=
{
self
.
poly2mask
}
, '
repr_str
+=
f
"imdecode_backend='
{
self
.
imdecode_backend
}
', "
repr_str
+=
f
'backend_args=
{
self
.
backend_args
}
)'
return
repr_str
@
TRANSFORMS
.
register_module
()
class
LoadPanopticAnnotations
(
LoadAnnotations
):
"""Load multiple types of panoptic annotations.
The annotation format is as the following:
.. code-block:: python
{
'instances':
[
{
# List of 4 numbers representing the bounding box of the
# instance, in (x1, y1, x2, y2) order.
'bbox': [x1, y1, x2, y2],
# Label of image classification.
'bbox_label': 1,
},
...
]
'segments_info':
[
{
# id = cls_id + instance_id * INSTANCE_OFFSET
'id': int,
# Contiguous category id defined in dataset.
'category': int
# Thing flag.
'is_thing': bool
},
...
]
# Filename of semantic or panoptic segmentation ground truth file.
'seg_map_path': 'a/b/c'
}
After this module, the annotation has been changed to the format below:
.. code-block:: python
{
# In (x1, y1, x2, y2) order, float type. N is the number of bboxes
# in an image
'gt_bboxes': BaseBoxes(N, 4)
# In int type.
'gt_bboxes_labels': np.ndarray(N, )
# In built-in class
'gt_masks': PolygonMasks (H, W) or BitmapMasks (H, W)
# In uint8 type.
'gt_seg_map': np.ndarray (H, W)
# in (x, y, v) order, float type.
}
Required Keys:
- height
- width
- instances
- bbox
- bbox_label
- ignore_flag
- segments_info
- id
- category
- is_thing
- seg_map_path
Added Keys:
- gt_bboxes (BaseBoxes[torch.float32])
- gt_bboxes_labels (np.int64)
- gt_masks (BitmapMasks | PolygonMasks)
- gt_seg_map (np.uint8)
- gt_ignore_flags (bool)
Args:
with_bbox (bool): Whether to parse and load the bbox annotation.
Defaults to True.
with_label (bool): Whether to parse and load the label annotation.
Defaults to True.
with_mask (bool): Whether to parse and load the mask annotation.
Defaults to True.
with_seg (bool): Whether to parse and load the semantic segmentation
annotation. Defaults to False.
box_type (str): The box mode used to wrap the bboxes.
imdecode_backend (str): The image decoding backend type. The backend
argument for :func:``mmcv.imfrombytes``.
See :fun:``mmcv.imfrombytes`` for details.
Defaults to 'cv2'.
backend_args (dict, optional): Arguments to instantiate the
corresponding backend in mmdet >= 3.0.0rc7. Defaults to None.
"""
def
__init__
(
self
,
with_bbox
:
bool
=
True
,
with_label
:
bool
=
True
,
with_mask
:
bool
=
True
,
with_seg
:
bool
=
True
,
box_type
:
str
=
'hbox'
,
imdecode_backend
:
str
=
'cv2'
,
backend_args
:
dict
=
None
)
->
None
:
try
:
from
panopticapi
import
utils
except
ImportError
:
raise
ImportError
(
'panopticapi is not installed, please install it by: '
'pip install git+https://github.com/cocodataset/'
'panopticapi.git.'
)
self
.
rgb2id
=
utils
.
rgb2id
super
(
LoadPanopticAnnotations
,
self
).
__init__
(
with_bbox
=
with_bbox
,
with_label
=
with_label
,
with_mask
=
with_mask
,
with_seg
=
with_seg
,
with_keypoints
=
False
,
box_type
=
box_type
,
imdecode_backend
=
imdecode_backend
,
backend_args
=
backend_args
)
def
_load_masks_and_semantic_segs
(
self
,
results
:
dict
)
->
None
:
"""Private function to load mask and semantic segmentation annotations.
In gt_semantic_seg, the foreground label is from ``0`` to
``num_things - 1``, the background label is from ``num_things`` to
``num_things + num_stuff - 1``, 255 means the ignored label (``VOID``).
Args:
results (dict): Result dict from :obj:``mmdet.CustomDataset``.
"""
# seg_map_path is None, when inference on the dataset without gts.
if
results
.
get
(
'seg_map_path'
,
None
)
is
None
:
return
img_bytes
=
get
(
results
[
'seg_map_path'
],
backend_args
=
self
.
backend_args
)
pan_png
=
mmcv
.
imfrombytes
(
img_bytes
,
flag
=
'color'
,
channel_order
=
'rgb'
).
squeeze
()
pan_png
=
self
.
rgb2id
(
pan_png
)
gt_masks
=
[]
gt_seg
=
np
.
zeros_like
(
pan_png
)
+
255
# 255 as ignore
for
segment_info
in
results
[
'segments_info'
]:
mask
=
(
pan_png
==
segment_info
[
'id'
])
gt_seg
=
np
.
where
(
mask
,
segment_info
[
'category'
],
gt_seg
)
# The legal thing masks
if
segment_info
.
get
(
'is_thing'
):
gt_masks
.
append
(
mask
.
astype
(
np
.
uint8
))
if
self
.
with_mask
:
h
,
w
=
results
[
'ori_shape'
]
gt_masks
=
BitmapMasks
(
gt_masks
,
h
,
w
)
results
[
'gt_masks'
]
=
gt_masks
if
self
.
with_seg
:
results
[
'gt_seg_map'
]
=
gt_seg
def
transform
(
self
,
results
:
dict
)
->
dict
:
"""Function to load multiple types panoptic annotations.
Args:
results (dict): Result dict from :obj:``mmdet.CustomDataset``.
Returns:
dict: The dict contains loaded bounding box, label, mask and
semantic segmentation annotations.
"""
if
self
.
with_bbox
:
self
.
_load_bboxes
(
results
)
if
self
.
with_label
:
self
.
_load_labels
(
results
)
if
self
.
with_mask
or
self
.
with_seg
:
# The tasks completed by '_load_masks' and '_load_semantic_segs'
# in LoadAnnotations are merged to one function.
self
.
_load_masks_and_semantic_segs
(
results
)
return
results
@
TRANSFORMS
.
register_module
()
class
LoadProposals
(
BaseTransform
):
"""Load proposal pipeline.
Required Keys:
- proposals
Modified Keys:
- proposals
Args:
num_max_proposals (int, optional): Maximum number of proposals to load.
If not specified, all proposals will be loaded.
"""
def
__init__
(
self
,
num_max_proposals
:
Optional
[
int
]
=
None
)
->
None
:
self
.
num_max_proposals
=
num_max_proposals
def
transform
(
self
,
results
:
dict
)
->
dict
:
"""Transform function to load proposals from file.
Args:
results (dict): Result dict from :obj:`mmdet.CustomDataset`.
Returns:
dict: The dict contains loaded proposal annotations.
"""
proposals
=
results
[
'proposals'
]
# the type of proposals should be `dict` or `InstanceData`
assert
isinstance
(
proposals
,
dict
)
\
or
isinstance
(
proposals
,
BaseDataElement
)
bboxes
=
proposals
[
'bboxes'
].
astype
(
np
.
float32
)
assert
bboxes
.
shape
[
1
]
==
4
,
\
f
'Proposals should have shapes (n, 4), but found
{
bboxes
.
shape
}
'
if
'scores'
in
proposals
:
scores
=
proposals
[
'scores'
].
astype
(
np
.
float32
)
assert
bboxes
.
shape
[
0
]
==
scores
.
shape
[
0
]
else
:
scores
=
np
.
zeros
(
bboxes
.
shape
[
0
],
dtype
=
np
.
float32
)
if
self
.
num_max_proposals
is
not
None
:
# proposals should sort by scores during dumping the proposals
bboxes
=
bboxes
[:
self
.
num_max_proposals
]
scores
=
scores
[:
self
.
num_max_proposals
]
if
len
(
bboxes
)
==
0
:
bboxes
=
np
.
zeros
((
0
,
4
),
dtype
=
np
.
float32
)
scores
=
np
.
zeros
(
0
,
dtype
=
np
.
float32
)
results
[
'proposals'
]
=
bboxes
results
[
'proposals_scores'
]
=
scores
return
results
def
__repr__
(
self
):
return
self
.
__class__
.
__name__
+
\
f
'(num_max_proposals=
{
self
.
num_max_proposals
}
)'
@
TRANSFORMS
.
register_module
()
class
FilterAnnotations
(
BaseTransform
):
"""Filter invalid annotations.
Required Keys:
- gt_bboxes (BaseBoxes[torch.float32]) (optional)
- gt_bboxes_labels (np.int64) (optional)
- gt_masks (BitmapMasks | PolygonMasks) (optional)
- gt_ignore_flags (bool) (optional)
Modified Keys:
- gt_bboxes (optional)
- gt_bboxes_labels (optional)
- gt_masks (optional)
- gt_ignore_flags (optional)
Args:
min_gt_bbox_wh (tuple[float]): Minimum width and height of ground truth
boxes. Default: (1., 1.)
min_gt_mask_area (int): Minimum foreground area of ground truth masks.
Default: 1
by_box (bool): Filter instances with bounding boxes not meeting the
min_gt_bbox_wh threshold. Default: True
by_mask (bool): Filter instances with masks not meeting
min_gt_mask_area threshold. Default: False
keep_empty (bool): Whether to return None when it
becomes an empty bbox after filtering. Defaults to True.
"""
def
__init__
(
self
,
min_gt_bbox_wh
:
Tuple
[
int
,
int
]
=
(
1
,
1
),
min_gt_mask_area
:
int
=
1
,
by_box
:
bool
=
True
,
by_mask
:
bool
=
False
,
keep_empty
:
bool
=
True
)
->
None
:
# TODO: add more filter options
assert
by_box
or
by_mask
self
.
min_gt_bbox_wh
=
min_gt_bbox_wh
self
.
min_gt_mask_area
=
min_gt_mask_area
self
.
by_box
=
by_box
self
.
by_mask
=
by_mask
self
.
keep_empty
=
keep_empty
@
autocast_box_type
()
def
transform
(
self
,
results
:
dict
)
->
Union
[
dict
,
None
]:
"""Transform function to filter annotations.
Args:
results (dict): Result dict.
Returns:
dict: Updated result dict.
"""
assert
'gt_bboxes'
in
results
gt_bboxes
=
results
[
'gt_bboxes'
]
if
gt_bboxes
.
shape
[
0
]
==
0
:
return
results
tests
=
[]
if
self
.
by_box
:
tests
.
append
(
((
gt_bboxes
.
widths
>
self
.
min_gt_bbox_wh
[
0
])
&
(
gt_bboxes
.
heights
>
self
.
min_gt_bbox_wh
[
1
])).
numpy
())
if
self
.
by_mask
:
assert
'gt_masks'
in
results
gt_masks
=
results
[
'gt_masks'
]
tests
.
append
(
gt_masks
.
areas
>=
self
.
min_gt_mask_area
)
keep
=
tests
[
0
]
for
t
in
tests
[
1
:]:
keep
=
keep
&
t
if
not
keep
.
any
():
if
self
.
keep_empty
:
return
None
keys
=
(
'gt_bboxes'
,
'gt_bboxes_labels'
,
'gt_masks'
,
'gt_ignore_flags'
)
for
key
in
keys
:
if
key
in
results
:
results
[
key
]
=
results
[
key
][
keep
]
return
results
def
__repr__
(
self
):
return
self
.
__class__
.
__name__
+
\
f
'(min_gt_bbox_wh=
{
self
.
min_gt_bbox_wh
}
, '
\
f
'keep_empty=
{
self
.
keep_empty
}
)'
@
TRANSFORMS
.
register_module
()
class
LoadEmptyAnnotations
(
BaseTransform
):
"""Load Empty Annotations for unlabeled images.
Added Keys:
- gt_bboxes (np.float32)
- gt_bboxes_labels (np.int64)
- gt_masks (BitmapMasks | PolygonMasks)
- gt_seg_map (np.uint8)
- gt_ignore_flags (bool)
Args:
with_bbox (bool): Whether to load the pseudo bbox annotation.
Defaults to True.
with_label (bool): Whether to load the pseudo label annotation.
Defaults to True.
with_mask (bool): Whether to load the pseudo mask annotation.
Default: False.
with_seg (bool): Whether to load the pseudo semantic segmentation
annotation. Defaults to False.
seg_ignore_label (int): The fill value used for segmentation map.
Note this value must equals ``ignore_label`` in ``semantic_head``
of the corresponding config. Defaults to 255.
"""
def
__init__
(
self
,
with_bbox
:
bool
=
True
,
with_label
:
bool
=
True
,
with_mask
:
bool
=
False
,
with_seg
:
bool
=
False
,
seg_ignore_label
:
int
=
255
)
->
None
:
self
.
with_bbox
=
with_bbox
self
.
with_label
=
with_label
self
.
with_mask
=
with_mask
self
.
with_seg
=
with_seg
self
.
seg_ignore_label
=
seg_ignore_label
def
transform
(
self
,
results
:
dict
)
->
dict
:
"""Transform function to load empty annotations.
Args:
results (dict): Result dict.
Returns:
dict: Updated result dict.
"""
if
self
.
with_bbox
:
results
[
'gt_bboxes'
]
=
np
.
zeros
((
0
,
4
),
dtype
=
np
.
float32
)
results
[
'gt_ignore_flags'
]
=
np
.
zeros
((
0
,
),
dtype
=
bool
)
if
self
.
with_label
:
results
[
'gt_bboxes_labels'
]
=
np
.
zeros
((
0
,
),
dtype
=
np
.
int64
)
if
self
.
with_mask
:
# TODO: support PolygonMasks
h
,
w
=
results
[
'img_shape'
]
gt_masks
=
np
.
zeros
((
0
,
h
,
w
),
dtype
=
np
.
uint8
)
results
[
'gt_masks'
]
=
BitmapMasks
(
gt_masks
,
h
,
w
)
if
self
.
with_seg
:
h
,
w
=
results
[
'img_shape'
]
results
[
'gt_seg_map'
]
=
self
.
seg_ignore_label
*
np
.
ones
(
(
h
,
w
),
dtype
=
np
.
uint8
)
return
results
def
__repr__
(
self
)
->
str
:
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(with_bbox=
{
self
.
with_bbox
}
, '
repr_str
+=
f
'with_label=
{
self
.
with_label
}
, '
repr_str
+=
f
'with_mask=
{
self
.
with_mask
}
, '
repr_str
+=
f
'with_seg=
{
self
.
with_seg
}
, '
repr_str
+=
f
'seg_ignore_label=
{
self
.
seg_ignore_label
}
)'
return
repr_str
@
TRANSFORMS
.
register_module
()
class
InferencerLoader
(
BaseTransform
):
"""Load an image from ``results['img']``.
Similar with :obj:`LoadImageFromFile`, but the image has been loaded as
:obj:`np.ndarray` in ``results['img']``. Can be used when loading image
from webcam.
Required Keys:
- img
Modified Keys:
- img
- img_path
- img_shape
- ori_shape
Args:
to_float32 (bool): Whether to convert the loaded image to a float32
numpy array. If set to False, the loaded image is an uint8 array.
Defaults to False.
"""
def
__init__
(
self
,
**
kwargs
)
->
None
:
super
().
__init__
()
self
.
from_file
=
TRANSFORMS
.
build
(
dict
(
type
=
'LoadImageFromFile'
,
**
kwargs
))
self
.
from_ndarray
=
TRANSFORMS
.
build
(
dict
(
type
=
'mmdet.LoadImageFromNDArray'
,
**
kwargs
))
def
transform
(
self
,
results
:
Union
[
str
,
np
.
ndarray
,
dict
])
->
dict
:
"""Transform function to add image meta information.
Args:
results (str, np.ndarray or dict): The result.
Returns:
dict: The dict contains loaded image and meta information.
"""
if
isinstance
(
results
,
str
):
inputs
=
dict
(
img_path
=
results
)
elif
isinstance
(
results
,
np
.
ndarray
):
inputs
=
dict
(
img
=
results
)
elif
isinstance
(
results
,
dict
):
inputs
=
results
else
:
raise
NotImplementedError
if
'img'
in
inputs
:
return
self
.
from_ndarray
(
inputs
)
return
self
.
from_file
(
inputs
)
@
TRANSFORMS
.
register_module
()
class
LoadTrackAnnotations
(
LoadAnnotations
):
"""Load and process the ``instances`` and ``seg_map`` annotation provided
by dataset. It must load ``instances_ids`` which is only used in the
tracking tasks. The annotation format is as the following:
.. code-block:: python
{
'instances':
[
{
# List of 4 numbers representing the bounding box of the
# instance, in (x1, y1, x2, y2) order.
'bbox': [x1, y1, x2, y2],
# Label of image classification.
'bbox_label': 1,
# Used in tracking.
# Id of instances.
'instance_id': 100,
# Used in instance/panoptic segmentation. The segmentation mask
# of the instance or the information of segments.
# 1. If list[list[float]], it represents a list of polygons,
# one for each connected component of the object. Each
# list[float] is one simple polygon in the format of
# [x1, y1, ..., xn, yn] (n >= 3). The Xs and Ys are absolute
# coordinates in unit of pixels.
# 2. If dict, it represents the per-pixel segmentation mask in
# COCO's compressed RLE format. The dict should have keys
# “size” and “counts”. Can be loaded by pycocotools
'mask': list[list[float]] or dict,
}
]
# Filename of semantic or panoptic segmentation ground truth file.
'seg_map_path': 'a/b/c'
}
After this module, the annotation has been changed to the format below:
.. code-block:: python
{
# In (x1, y1, x2, y2) order, float type. N is the number of bboxes
# in an image
'gt_bboxes': np.ndarray(N, 4)
# In int type.
'gt_bboxes_labels': np.ndarray(N, )
# In built-in class
'gt_masks': PolygonMasks (H, W) or BitmapMasks (H, W)
# In uint8 type.
'gt_seg_map': np.ndarray (H, W)
# in (x, y, v) order, float type.
}
Required Keys:
- height (optional)
- width (optional)
- instances
- bbox (optional)
- bbox_label
- instance_id (optional)
- mask (optional)
- ignore_flag (optional)
- seg_map_path (optional)
Added Keys:
- gt_bboxes (np.float32)
- gt_bboxes_labels (np.int32)
- gt_instances_ids (np.int32)
- gt_masks (BitmapMasks | PolygonMasks)
- gt_seg_map (np.uint8)
- gt_ignore_flags (np.bool)
"""
def
__init__
(
self
,
**
kwargs
)
->
None
:
super
().
__init__
(
**
kwargs
)
def
_load_bboxes
(
self
,
results
:
dict
)
->
None
:
"""Private function to load bounding box annotations.
Args:
results (dict): Result dict from :obj:``mmcv.BaseDataset``.
Returns:
dict: The dict contains loaded bounding box annotations.
"""
gt_bboxes
=
[]
gt_ignore_flags
=
[]
# TODO: use bbox_type
for
instance
in
results
[
'instances'
]:
# The datasets which are only format in evaluation don't have
# groundtruth boxes.
if
'bbox'
in
instance
:
gt_bboxes
.
append
(
instance
[
'bbox'
])
if
'ignore_flag'
in
instance
:
gt_ignore_flags
.
append
(
instance
[
'ignore_flag'
])
# TODO: check this case
if
len
(
gt_bboxes
)
!=
len
(
gt_ignore_flags
):
# There may be no ``gt_ignore_flags`` in some cases, we treat them
# as all False in order to keep the length of ``gt_bboxes`` and
# ``gt_ignore_flags`` the same
gt_ignore_flags
=
[
False
]
*
len
(
gt_bboxes
)
results
[
'gt_bboxes'
]
=
np
.
array
(
gt_bboxes
,
dtype
=
np
.
float32
).
reshape
(
-
1
,
4
)
results
[
'gt_ignore_flags'
]
=
np
.
array
(
gt_ignore_flags
,
dtype
=
bool
)
def
_load_instances_ids
(
self
,
results
:
dict
)
->
None
:
"""Private function to load instances id annotations.
Args:
results (dict): Result dict from :obj :obj:``mmcv.BaseDataset``.
Returns:
dict: The dict containing instances id annotations.
"""
gt_instances_ids
=
[]
for
instance
in
results
[
'instances'
]:
gt_instances_ids
.
append
(
instance
[
'instance_id'
])
results
[
'gt_instances_ids'
]
=
np
.
array
(
gt_instances_ids
,
dtype
=
np
.
int32
)
def
transform
(
self
,
results
:
dict
)
->
dict
:
"""Function to load multiple types annotations.
Args:
results (dict): Result dict from :obj:``mmcv.BaseDataset``.
Returns:
dict: The dict contains loaded bounding box, label, instances id
and semantic segmentation and keypoints annotations.
"""
results
=
super
().
transform
(
results
)
self
.
_load_instances_ids
(
results
)
return
results
def
__repr__
(
self
)
->
str
:
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(with_bbox=
{
self
.
with_bbox
}
, '
repr_str
+=
f
'with_label=
{
self
.
with_label
}
, '
repr_str
+=
f
'with_mask=
{
self
.
with_mask
}
, '
repr_str
+=
f
'with_seg=
{
self
.
with_seg
}
, '
repr_str
+=
f
'poly2mask=
{
self
.
poly2mask
}
, '
repr_str
+=
f
"imdecode_backend='
{
self
.
imdecode_backend
}
', "
repr_str
+=
f
'file_client_args=
{
self
.
file_client_args
}
)'
return
repr_str
mmdet/datasets/transforms/transformers_glip.py
0 → 100644
View file @
ff793569
# Copyright (c) OpenMMLab. All rights reserved.
import
mmcv
import
numpy
as
np
from
mmcv.transforms
import
BaseTransform
from
mmdet.registry
import
TRANSFORMS
from
mmdet.structures.bbox
import
HorizontalBoxes
,
autocast_box_type
from
.transforms
import
RandomFlip
@
TRANSFORMS
.
register_module
()
class
GTBoxSubOne_GLIP
(
BaseTransform
):
"""Subtract 1 from the x2 and y2 coordinates of the gt_bboxes."""
def
transform
(
self
,
results
:
dict
)
->
dict
:
if
'gt_bboxes'
in
results
:
gt_bboxes
=
results
[
'gt_bboxes'
]
if
isinstance
(
gt_bboxes
,
np
.
ndarray
):
gt_bboxes
[:,
2
:]
-=
1
results
[
'gt_bboxes'
]
=
gt_bboxes
elif
isinstance
(
gt_bboxes
,
HorizontalBoxes
):
gt_bboxes
=
results
[
'gt_bboxes'
].
tensor
gt_bboxes
[:,
2
:]
-=
1
results
[
'gt_bboxes'
]
=
HorizontalBoxes
(
gt_bboxes
)
else
:
raise
NotImplementedError
return
results
@
TRANSFORMS
.
register_module
()
class
RandomFlip_GLIP
(
RandomFlip
):
"""Flip the image & bboxes & masks & segs horizontally or vertically.
When using horizontal flipping, the corresponding bbox x-coordinate needs
to be additionally subtracted by one.
"""
@
autocast_box_type
()
def
_flip
(
self
,
results
:
dict
)
->
None
:
"""Flip images, bounding boxes, and semantic segmentation map."""
# flip image
results
[
'img'
]
=
mmcv
.
imflip
(
results
[
'img'
],
direction
=
results
[
'flip_direction'
])
img_shape
=
results
[
'img'
].
shape
[:
2
]
# flip bboxes
if
results
.
get
(
'gt_bboxes'
,
None
)
is
not
None
:
results
[
'gt_bboxes'
].
flip_
(
img_shape
,
results
[
'flip_direction'
])
# Only change this line
if
results
[
'flip_direction'
]
==
'horizontal'
:
results
[
'gt_bboxes'
].
translate_
([
-
1
,
0
])
# TODO: check it
# flip masks
if
results
.
get
(
'gt_masks'
,
None
)
is
not
None
:
results
[
'gt_masks'
]
=
results
[
'gt_masks'
].
flip
(
results
[
'flip_direction'
])
# flip segs
if
results
.
get
(
'gt_seg_map'
,
None
)
is
not
None
:
results
[
'gt_seg_map'
]
=
mmcv
.
imflip
(
results
[
'gt_seg_map'
],
direction
=
results
[
'flip_direction'
])
# record homography matrix for flip
self
.
_record_homography_matrix
(
results
)
Prev
1
…
16
17
18
19
20
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment