Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
SOLOv2-pytorch
Commits
bfbd2223
Commit
bfbd2223
authored
Oct 21, 2018
by
Kai Chen
Browse files
add custom datasets
parent
4990aae6
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
440 additions
and
254 deletions
+440
-254
mmdet/datasets/__init__.py
mmdet/datasets/__init__.py
+2
-1
mmdet/datasets/coco.py
mmdet/datasets/coco.py
+24
-253
mmdet/datasets/custom.py
mmdet/datasets/custom.py
+274
-0
tools/convert_datasets/pascal_voc.py
tools/convert_datasets/pascal_voc.py
+140
-0
No files found.
mmdet/datasets/__init__.py
View file @
bfbd2223
from
.custom
import
CustomDataset
from
.coco
import
CocoDataset
from
.loader
import
GroupSampler
,
DistributedGroupSampler
,
build_dataloader
from
.utils
import
to_tensor
,
random_scale
,
show_ann
__all__
=
[
'CocoDataset'
,
'GroupSampler'
,
'DistributedGroupSampler'
,
'CustomDataset'
,
'CocoDataset'
,
'GroupSampler'
,
'DistributedGroupSampler'
,
'build_dataloader'
,
'to_tensor'
,
'random_scale'
,
'show_ann'
]
mmdet/datasets/coco.py
View file @
bfbd2223
import
os.path
as
osp
import
mmcv
import
numpy
as
np
from
mmcv.parallel
import
DataContainer
as
DC
from
pycocotools.coco
import
COCO
from
torch.utils.data
import
Dataset
from
.transforms
import
(
ImageTransform
,
BboxTransform
,
MaskTransform
,
Numpy2Tensor
)
from
.utils
import
to_tensor
,
show_ann
,
random_scale
from
.custom
import
CustomDataset
class
CocoDataset
(
Dataset
):
class
CocoDataset
(
Custom
Dataset
):
def
__init__
(
self
,
ann_file
,
img_prefix
,
img_scale
,
img_norm_cfg
,
size_divisor
=
None
,
proposal_file
=
None
,
num_max_proposals
=
1000
,
flip_ratio
=
0
,
with_mask
=
True
,
with_crowd
=
True
,
with_label
=
True
,
test_mode
=
False
,
debug
=
False
):
# path of the data file
def
load_annotations
(
self
,
ann_file
):
self
.
coco
=
COCO
(
ann_file
)
# filter images with no annotation during training
if
not
test_mode
:
self
.
img_ids
,
self
.
img_infos
=
self
.
_filter_imgs
()
else
:
self
.
img_ids
=
self
.
coco
.
getImgIds
()
self
.
img_infos
=
[
self
.
coco
.
loadImgs
(
idx
)[
0
]
for
idx
in
self
.
img_ids
]
assert
len
(
self
.
img_ids
)
==
len
(
self
.
img_infos
)
# get the mapping from original category ids to labels
self
.
cat_ids
=
self
.
coco
.
getCatIds
()
self
.
cat2label
=
{
cat_id
:
i
+
1
for
i
,
cat_id
in
enumerate
(
self
.
cat_ids
)
}
# prefix of images path
self
.
img_prefix
=
img_prefix
# (long_edge, short_edge) or [(long1, short1), (long2, short2), ...]
self
.
img_scales
=
img_scale
if
isinstance
(
img_scale
,
list
)
else
[
img_scale
]
assert
mmcv
.
is_list_of
(
self
.
img_scales
,
tuple
)
# color channel order and normalize configs
self
.
img_norm_cfg
=
img_norm_cfg
# proposals
# TODO: revise _filter_imgs to be more flexible
if
proposal_file
is
not
None
:
self
.
proposals
=
mmcv
.
load
(
proposal_file
)
ori_ids
=
self
.
coco
.
getImgIds
()
sorted_idx
=
[
ori_ids
.
index
(
id
)
for
id
in
self
.
img_ids
]
self
.
proposals
=
[
self
.
proposals
[
idx
]
for
idx
in
sorted_idx
]
else
:
self
.
proposals
=
None
self
.
num_max_proposals
=
num_max_proposals
# flip ratio
self
.
flip_ratio
=
flip_ratio
assert
flip_ratio
>=
0
and
flip_ratio
<=
1
# padding border to ensure the image size can be divided by
# size_divisor (used for FPN)
self
.
size_divisor
=
size_divisor
# with crowd or not, False when using RetinaNet
self
.
with_crowd
=
with_crowd
# with mask or not
self
.
with_mask
=
with_mask
# with label is False for RPN
self
.
with_label
=
with_label
# in test mode or not
self
.
test_mode
=
test_mode
# debug mode or not
self
.
debug
=
debug
# set group flag for the sampler
self
.
_set_group_flag
()
# transforms
self
.
img_transform
=
ImageTransform
(
size_divisor
=
self
.
size_divisor
,
**
self
.
img_norm_cfg
)
self
.
bbox_transform
=
BboxTransform
()
self
.
mask_transform
=
MaskTransform
()
self
.
numpy2tensor
=
Numpy2Tensor
()
def
__len__
(
self
):
return
len
(
self
.
img_ids
)
def
_filter_imgs
(
self
,
min_size
=
32
):
"""Filter images too small or without ground truths."""
img_ids
=
list
(
set
([
_
[
'image_id'
]
for
_
in
self
.
coco
.
anns
.
values
()]))
valid_ids
=
[]
cat_ids
=
self
.
coco
.
getCatIds
()
self
.
cat2label
=
{
cat_id
:
i
+
1
for
i
,
cat_id
in
enumerate
(
cat_ids
)}
self
.
img_ids
=
self
.
coco
.
getImgIds
()
img_infos
=
[]
for
i
in
img_ids
:
for
i
in
self
.
img_ids
:
info
=
self
.
coco
.
loadImgs
(
i
)[
0
]
if
min
(
info
[
'width'
],
info
[
'height'
])
>=
min_size
:
valid_ids
.
append
(
i
)
img_infos
.
append
(
info
)
return
valid_ids
,
img_infos
info
[
'filename'
]
=
info
[
'file_name'
]
img_infos
.
append
(
info
)
return
img_infos
def
_load
_ann_info
(
self
,
idx
):
img_id
=
self
.
img_i
d
s
[
idx
]
def
get
_ann_info
(
self
,
idx
):
img_id
=
self
.
img_i
nfo
s
[
idx
]
[
'id'
]
ann_ids
=
self
.
coco
.
getAnnIds
(
imgIds
=
img_id
)
ann_info
=
self
.
coco
.
loadAnns
(
ann_ids
)
return
ann_info
return
self
.
_parse_ann_info
(
ann_info
)
def
_filter_imgs
(
self
,
min_size
=
32
):
"""Filter images too small or without ground truths."""
valid_inds
=
[]
ids_with_ann
=
set
(
_
[
'image_id'
]
for
_
in
self
.
coco
.
anns
.
values
())
for
i
,
img_info
in
enumerate
(
self
.
img_infos
):
if
self
.
img_ids
[
i
]
not
in
ids_with_ann
:
continue
if
min
(
img_info
[
'width'
],
img_info
[
'height'
])
>=
min_size
:
valid_inds
.
append
(
i
)
return
valid_inds
def
_parse_ann_info
(
self
,
ann_info
,
with_mask
=
True
):
"""Parse bbox and mask annotation.
...
...
@@ -172,158 +98,3 @@ class CocoDataset(Dataset):
ann
[
'mask_polys'
]
=
gt_mask_polys
ann
[
'poly_lens'
]
=
gt_poly_lens
return
ann
def
_set_group_flag
(
self
):
"""Set flag according to image aspect ratio.
Images with aspect ratio greater than 1 will be set as group 1,
otherwise group 0.
"""
self
.
flag
=
np
.
zeros
(
len
(
self
.
img_ids
),
dtype
=
np
.
uint8
)
for
i
in
range
(
len
(
self
.
img_ids
)):
img_info
=
self
.
img_infos
[
i
]
if
img_info
[
'width'
]
/
img_info
[
'height'
]
>
1
:
self
.
flag
[
i
]
=
1
def
_rand_another
(
self
,
idx
):
pool
=
np
.
where
(
self
.
flag
==
self
.
flag
[
idx
])[
0
]
return
np
.
random
.
choice
(
pool
)
def
__getitem__
(
self
,
idx
):
if
self
.
test_mode
:
return
self
.
prepare_test_img
(
idx
)
while
True
:
img_info
=
self
.
img_infos
[
idx
]
ann_info
=
self
.
_load_ann_info
(
idx
)
# load image
img
=
mmcv
.
imread
(
osp
.
join
(
self
.
img_prefix
,
img_info
[
'file_name'
]))
if
self
.
debug
:
show_ann
(
self
.
coco
,
img
,
ann_info
)
# load proposals if necessary
if
self
.
proposals
is
not
None
:
proposals
=
self
.
proposals
[
idx
][:
self
.
num_max_proposals
]
# TODO: Handle empty proposals properly. Currently images with
# no proposals are just ignored, but they can be used for
# training in concept.
if
len
(
proposals
)
==
0
:
idx
=
self
.
_rand_another
(
idx
)
continue
if
not
(
proposals
.
shape
[
1
]
==
4
or
proposals
.
shape
[
1
]
==
5
):
raise
AssertionError
(
'proposals should have shapes (n, 4) or (n, 5), '
'but found {}'
.
format
(
proposals
.
shape
))
if
proposals
.
shape
[
1
]
==
5
:
scores
=
proposals
[:,
4
,
None
]
proposals
=
proposals
[:,
:
4
]
else
:
scores
=
None
ann
=
self
.
_parse_ann_info
(
ann_info
,
self
.
with_mask
)
gt_bboxes
=
ann
[
'bboxes'
]
gt_labels
=
ann
[
'labels'
]
gt_bboxes_ignore
=
ann
[
'bboxes_ignore'
]
# skip the image if there is no valid gt bbox
if
len
(
gt_bboxes
)
==
0
:
idx
=
self
.
_rand_another
(
idx
)
continue
# apply transforms
flip
=
True
if
np
.
random
.
rand
()
<
self
.
flip_ratio
else
False
img_scale
=
random_scale
(
self
.
img_scales
)
# sample a scale
img
,
img_shape
,
pad_shape
,
scale_factor
=
self
.
img_transform
(
img
,
img_scale
,
flip
)
if
self
.
proposals
is
not
None
:
proposals
=
self
.
bbox_transform
(
proposals
,
img_shape
,
scale_factor
,
flip
)
proposals
=
np
.
hstack
(
[
proposals
,
scores
])
if
scores
is
not
None
else
proposals
gt_bboxes
=
self
.
bbox_transform
(
gt_bboxes
,
img_shape
,
scale_factor
,
flip
)
gt_bboxes_ignore
=
self
.
bbox_transform
(
gt_bboxes_ignore
,
img_shape
,
scale_factor
,
flip
)
if
self
.
with_mask
:
gt_masks
=
self
.
mask_transform
(
ann
[
'masks'
],
pad_shape
,
scale_factor
,
flip
)
ori_shape
=
(
img_info
[
'height'
],
img_info
[
'width'
],
3
)
img_meta
=
dict
(
ori_shape
=
ori_shape
,
img_shape
=
img_shape
,
pad_shape
=
pad_shape
,
scale_factor
=
scale_factor
,
flip
=
flip
)
data
=
dict
(
img
=
DC
(
to_tensor
(
img
),
stack
=
True
),
img_meta
=
DC
(
img_meta
,
cpu_only
=
True
),
gt_bboxes
=
DC
(
to_tensor
(
gt_bboxes
)))
if
self
.
proposals
is
not
None
:
data
[
'proposals'
]
=
DC
(
to_tensor
(
proposals
))
if
self
.
with_label
:
data
[
'gt_labels'
]
=
DC
(
to_tensor
(
gt_labels
))
if
self
.
with_crowd
:
data
[
'gt_bboxes_ignore'
]
=
DC
(
to_tensor
(
gt_bboxes_ignore
))
if
self
.
with_mask
:
data
[
'gt_masks'
]
=
DC
(
gt_masks
,
cpu_only
=
True
)
return
data
def
prepare_test_img
(
self
,
idx
):
"""Prepare an image for testing (multi-scale and flipping)"""
img_info
=
self
.
img_infos
[
idx
]
img
=
mmcv
.
imread
(
osp
.
join
(
self
.
img_prefix
,
img_info
[
'file_name'
]))
if
self
.
proposals
is
not
None
:
proposal
=
self
.
proposals
[
idx
][:
self
.
num_max_proposals
]
if
not
(
proposal
.
shape
[
1
]
==
4
or
proposal
.
shape
[
1
]
==
5
):
raise
AssertionError
(
'proposals should have shapes (n, 4) or (n, 5), '
'but found {}'
.
format
(
proposal
.
shape
))
else
:
proposal
=
None
def
prepare_single
(
img
,
scale
,
flip
,
proposal
=
None
):
_img
,
img_shape
,
pad_shape
,
scale_factor
=
self
.
img_transform
(
img
,
scale
,
flip
)
_img
=
to_tensor
(
_img
)
_img_meta
=
dict
(
ori_shape
=
(
img_info
[
'height'
],
img_info
[
'width'
],
3
),
img_shape
=
img_shape
,
pad_shape
=
pad_shape
,
scale_factor
=
scale_factor
,
flip
=
flip
)
if
proposal
is
not
None
:
if
proposal
.
shape
[
1
]
==
5
:
score
=
proposal
[:,
4
,
None
]
proposal
=
proposal
[:,
:
4
]
else
:
score
=
None
_proposal
=
self
.
bbox_transform
(
proposal
,
img_shape
,
scale_factor
,
flip
)
_proposal
=
np
.
hstack
(
[
_proposal
,
score
])
if
score
is
not
None
else
_proposal
_proposal
=
to_tensor
(
_proposal
)
else
:
_proposal
=
None
return
_img
,
_img_meta
,
_proposal
imgs
=
[]
img_metas
=
[]
proposals
=
[]
for
scale
in
self
.
img_scales
:
_img
,
_img_meta
,
_proposal
=
prepare_single
(
img
,
scale
,
False
,
proposal
)
imgs
.
append
(
_img
)
img_metas
.
append
(
DC
(
_img_meta
,
cpu_only
=
True
))
proposals
.
append
(
_proposal
)
if
self
.
flip_ratio
>
0
:
_img
,
_img_meta
,
_proposal
=
prepare_single
(
img
,
scale
,
True
,
proposal
)
imgs
.
append
(
_img
)
img_metas
.
append
(
DC
(
_img_meta
,
cpu_only
=
True
))
proposals
.
append
(
_proposal
)
data
=
dict
(
img
=
imgs
,
img_meta
=
img_metas
)
if
self
.
proposals
is
not
None
:
data
[
'proposals'
]
=
proposals
return
data
mmdet/datasets/custom.py
0 → 100644
View file @
bfbd2223
import
os.path
as
osp
import
mmcv
import
numpy
as
np
from
mmcv.parallel
import
DataContainer
as
DC
from
torch.utils.data
import
Dataset
from
.transforms
import
(
ImageTransform
,
BboxTransform
,
MaskTransform
,
Numpy2Tensor
)
from
.utils
import
to_tensor
,
random_scale
class
CustomDataset
(
Dataset
):
"""Custom dataset for detection.
Annotation format:
[
{
'filename': 'a.jpg',
'width': 1280,
'height': 720,
'ann': {
'bboxes': <np.ndarray> (n, 4),
'labels': <np.ndarray> (n, ),
'bboxes_ignore': <np.ndarray> (k, 4),
'labels_ignore': <np.ndarray> (k, 4) (optional field)
}
},
...
]
The `ann` field is optional for testing.
"""
def
__init__
(
self
,
ann_file
,
img_prefix
,
img_scale
,
img_norm_cfg
,
size_divisor
=
None
,
proposal_file
=
None
,
num_max_proposals
=
1000
,
flip_ratio
=
0
,
with_mask
=
True
,
with_crowd
=
True
,
with_label
=
True
,
test_mode
=
False
):
# load annotations (and proposals)
self
.
img_infos
=
self
.
load_annotations
(
ann_file
)
if
proposal_file
is
not
None
:
self
.
proposals
=
self
.
load_proposals
(
proposal_file
)
else
:
self
.
proposals
=
None
# filter images with no annotation during training
if
not
test_mode
:
valid_inds
=
self
.
_filter_imgs
()
self
.
img_infos
=
[
self
.
img_infos
[
i
]
for
i
in
valid_inds
]
if
self
.
proposals
is
not
None
:
self
.
proposals
=
[
self
.
proposals
[
i
]
for
i
in
valid_inds
]
# prefix of images path
self
.
img_prefix
=
img_prefix
# (long_edge, short_edge) or [(long1, short1), (long2, short2), ...]
self
.
img_scales
=
img_scale
if
isinstance
(
img_scale
,
list
)
else
[
img_scale
]
assert
mmcv
.
is_list_of
(
self
.
img_scales
,
tuple
)
# normalization configs
self
.
img_norm_cfg
=
img_norm_cfg
# max proposals per image
self
.
num_max_proposals
=
num_max_proposals
# flip ratio
self
.
flip_ratio
=
flip_ratio
assert
flip_ratio
>=
0
and
flip_ratio
<=
1
# padding border to ensure the image size can be divided by
# size_divisor (used for FPN)
self
.
size_divisor
=
size_divisor
# with mask or not (reserved field, takes no effect)
self
.
with_mask
=
with_mask
# some datasets provide bbox annotations as ignore/crowd/difficult,
# if `with_crowd` is True, then these info is returned.
self
.
with_crowd
=
with_crowd
# with label is False for RPN
self
.
with_label
=
with_label
# in test mode or not
self
.
test_mode
=
test_mode
# set group flag for the sampler
if
not
self
.
test_mode
:
self
.
_set_group_flag
()
# transforms
self
.
img_transform
=
ImageTransform
(
size_divisor
=
self
.
size_divisor
,
**
self
.
img_norm_cfg
)
self
.
bbox_transform
=
BboxTransform
()
self
.
mask_transform
=
MaskTransform
()
self
.
numpy2tensor
=
Numpy2Tensor
()
def
__len__
(
self
):
return
len
(
self
.
img_infos
)
def
load_annotations
(
self
,
ann_file
):
return
mmcv
.
load
(
ann_file
)
def
load_proposals
(
self
,
proposal_file
):
return
mmcv
.
load
(
proposal_file
)
def
get_ann_info
(
self
,
idx
):
return
self
.
img_infos
[
idx
][
'ann'
]
def
_filter_imgs
(
self
,
min_size
=
32
):
"""Filter images too small."""
valid_inds
=
[]
for
i
,
img_info
in
enumerate
(
self
.
img_infos
):
if
min
(
img_info
[
'width'
],
img_info
[
'height'
])
>=
min_size
:
valid_inds
.
append
(
i
)
return
valid_inds
def
_set_group_flag
(
self
):
"""Set flag according to image aspect ratio.
Images with aspect ratio greater than 1 will be set as group 1,
otherwise group 0.
"""
self
.
flag
=
np
.
zeros
(
len
(
self
),
dtype
=
np
.
uint8
)
for
i
in
range
(
len
(
self
)):
img_info
=
self
.
img_infos
[
i
]
if
img_info
[
'width'
]
/
img_info
[
'height'
]
>
1
:
self
.
flag
[
i
]
=
1
def
_rand_another
(
self
,
idx
):
pool
=
np
.
where
(
self
.
flag
==
self
.
flag
[
idx
])[
0
]
return
np
.
random
.
choice
(
pool
)
def
__getitem__
(
self
,
idx
):
if
self
.
test_mode
:
return
self
.
prepare_test_img
(
idx
)
while
True
:
data
=
self
.
prepare_train_img
(
idx
)
if
data
is
None
:
idx
=
self
.
_rand_another
(
idx
)
continue
return
data
def
prepare_train_img
(
self
,
idx
):
img_info
=
self
.
img_infos
[
idx
]
# load image
img
=
mmcv
.
imread
(
osp
.
join
(
self
.
img_prefix
,
img_info
[
'filename'
]))
# load proposals if necessary
if
self
.
proposals
is
not
None
:
proposals
=
self
.
proposals
[
idx
][:
self
.
num_max_proposals
]
# TODO: Handle empty proposals properly. Currently images with
# no proposals are just ignored, but they can be used for
# training in concept.
if
len
(
proposals
)
==
0
:
return
None
if
not
(
proposals
.
shape
[
1
]
==
4
or
proposals
.
shape
[
1
]
==
5
):
raise
AssertionError
(
'proposals should have shapes (n, 4) or (n, 5), '
'but found {}'
.
format
(
proposals
.
shape
))
if
proposals
.
shape
[
1
]
==
5
:
scores
=
proposals
[:,
4
,
None
]
proposals
=
proposals
[:,
:
4
]
else
:
scores
=
None
ann
=
self
.
get_ann_info
(
idx
)
gt_bboxes
=
ann
[
'bboxes'
]
gt_labels
=
ann
[
'labels'
]
if
self
.
with_crowd
:
gt_bboxes_ignore
=
ann
[
'bboxes_ignore'
]
# skip the image if there is no valid gt bbox
if
len
(
gt_bboxes
)
==
0
:
return
None
# apply transforms
flip
=
True
if
np
.
random
.
rand
()
<
self
.
flip_ratio
else
False
img_scale
=
random_scale
(
self
.
img_scales
)
# sample a scale
img
,
img_shape
,
pad_shape
,
scale_factor
=
self
.
img_transform
(
img
,
img_scale
,
flip
)
if
self
.
proposals
is
not
None
:
proposals
=
self
.
bbox_transform
(
proposals
,
img_shape
,
scale_factor
,
flip
)
proposals
=
np
.
hstack
(
[
proposals
,
scores
])
if
scores
is
not
None
else
proposals
gt_bboxes
=
self
.
bbox_transform
(
gt_bboxes
,
img_shape
,
scale_factor
,
flip
)
if
self
.
with_crowd
:
gt_bboxes_ignore
=
self
.
bbox_transform
(
gt_bboxes_ignore
,
img_shape
,
scale_factor
,
flip
)
if
self
.
with_mask
:
gt_masks
=
self
.
mask_transform
(
ann
[
'masks'
],
pad_shape
,
scale_factor
,
flip
)
ori_shape
=
(
img_info
[
'height'
],
img_info
[
'width'
],
3
)
img_meta
=
dict
(
ori_shape
=
ori_shape
,
img_shape
=
img_shape
,
pad_shape
=
pad_shape
,
scale_factor
=
scale_factor
,
flip
=
flip
)
data
=
dict
(
img
=
DC
(
to_tensor
(
img
),
stack
=
True
),
img_meta
=
DC
(
img_meta
,
cpu_only
=
True
),
gt_bboxes
=
DC
(
to_tensor
(
gt_bboxes
)))
if
self
.
proposals
is
not
None
:
data
[
'proposals'
]
=
DC
(
to_tensor
(
proposals
))
if
self
.
with_label
:
data
[
'gt_labels'
]
=
DC
(
to_tensor
(
gt_labels
))
if
self
.
with_crowd
:
data
[
'gt_bboxes_ignore'
]
=
DC
(
to_tensor
(
gt_bboxes_ignore
))
if
self
.
with_mask
:
data
[
'gt_masks'
]
=
DC
(
gt_masks
,
cpu_only
=
True
)
return
data
def
prepare_test_img
(
self
,
idx
):
"""Prepare an image for testing (multi-scale and flipping)"""
img_info
=
self
.
img_infos
[
idx
]
img
=
mmcv
.
imread
(
osp
.
join
(
self
.
img_prefix
,
img_info
[
'filename'
]))
if
self
.
proposals
is
not
None
:
proposal
=
self
.
proposals
[
idx
][:
self
.
num_max_proposals
]
if
not
(
proposal
.
shape
[
1
]
==
4
or
proposal
.
shape
[
1
]
==
5
):
raise
AssertionError
(
'proposals should have shapes (n, 4) or (n, 5), '
'but found {}'
.
format
(
proposal
.
shape
))
else
:
proposal
=
None
def
prepare_single
(
img
,
scale
,
flip
,
proposal
=
None
):
_img
,
img_shape
,
pad_shape
,
scale_factor
=
self
.
img_transform
(
img
,
scale
,
flip
)
_img
=
to_tensor
(
_img
)
_img_meta
=
dict
(
ori_shape
=
(
img_info
[
'height'
],
img_info
[
'width'
],
3
),
img_shape
=
img_shape
,
pad_shape
=
pad_shape
,
scale_factor
=
scale_factor
,
flip
=
flip
)
if
proposal
is
not
None
:
if
proposal
.
shape
[
1
]
==
5
:
score
=
proposal
[:,
4
,
None
]
proposal
=
proposal
[:,
:
4
]
else
:
score
=
None
_proposal
=
self
.
bbox_transform
(
proposal
,
img_shape
,
scale_factor
,
flip
)
_proposal
=
np
.
hstack
(
[
_proposal
,
score
])
if
score
is
not
None
else
_proposal
_proposal
=
to_tensor
(
_proposal
)
else
:
_proposal
=
None
return
_img
,
_img_meta
,
_proposal
imgs
=
[]
img_metas
=
[]
proposals
=
[]
for
scale
in
self
.
img_scales
:
_img
,
_img_meta
,
_proposal
=
prepare_single
(
img
,
scale
,
False
,
proposal
)
imgs
.
append
(
_img
)
img_metas
.
append
(
DC
(
_img_meta
,
cpu_only
=
True
))
proposals
.
append
(
_proposal
)
if
self
.
flip_ratio
>
0
:
_img
,
_img_meta
,
_proposal
=
prepare_single
(
img
,
scale
,
True
,
proposal
)
imgs
.
append
(
_img
)
img_metas
.
append
(
DC
(
_img_meta
,
cpu_only
=
True
))
proposals
.
append
(
_proposal
)
data
=
dict
(
img
=
imgs
,
img_meta
=
img_metas
)
if
self
.
proposals
is
not
None
:
data
[
'proposals'
]
=
proposals
return
data
\ No newline at end of file
tools/convert_datasets/pascal_voc.py
0 → 100644
View file @
bfbd2223
import
argparse
import
os.path
as
osp
import
xml.etree.ElementTree
as
ET
import
mmcv
import
numpy
as
np
from
mmdet.core
import
voc_classes
label_ids
=
{
name
:
i
+
1
for
i
,
name
in
enumerate
(
voc_classes
())}
def
parse_xml
(
args
):
xml_path
,
img_path
=
args
tree
=
ET
.
parse
(
xml_path
)
root
=
tree
.
getroot
()
size
=
root
.
find
(
'size'
)
w
=
int
(
size
.
find
(
'width'
).
text
)
h
=
int
(
size
.
find
(
'height'
).
text
)
bboxes
=
[]
labels
=
[]
bboxes_ignore
=
[]
labels_ignore
=
[]
for
obj
in
root
.
findall
(
'object'
):
name
=
obj
.
find
(
'name'
).
text
label
=
label_ids
[
name
]
difficult
=
int
(
obj
.
find
(
'difficult'
).
text
)
bnd_box
=
obj
.
find
(
'bndbox'
)
bbox
=
[
int
(
bnd_box
.
find
(
'xmin'
).
text
),
int
(
bnd_box
.
find
(
'ymin'
).
text
),
int
(
bnd_box
.
find
(
'xmax'
).
text
),
int
(
bnd_box
.
find
(
'ymax'
).
text
)
]
if
difficult
:
bboxes_ignore
.
append
(
bbox
)
labels_ignore
.
append
(
label
)
else
:
bboxes
.
append
(
bbox
)
labels
.
append
(
label
)
if
not
bboxes
:
bboxes
=
np
.
zeros
((
0
,
4
))
labels
=
np
.
zeros
((
0
,
))
else
:
bboxes
=
np
.
array
(
bboxes
,
ndmin
=
2
)
-
1
labels
=
np
.
array
(
labels
)
if
not
bboxes_ignore
:
bboxes_ignore
=
np
.
zeros
((
0
,
4
))
labels_ignore
=
np
.
zeros
((
0
,
))
else
:
bboxes_ignore
=
np
.
array
(
bboxes_ignore
,
ndmin
=
2
)
-
1
labels_ignore
=
np
.
array
(
labels_ignore
)
annotation
=
{
'filename'
:
img_path
,
'width'
:
w
,
'height'
:
h
,
'ann'
:
{
'bboxes'
:
bboxes
.
astype
(
np
.
float32
),
'labels'
:
labels
.
astype
(
np
.
int64
),
'bboxes_ignore'
:
bboxes_ignore
.
astype
(
np
.
float32
),
'labels_ignore'
:
labels_ignore
.
astype
(
np
.
int64
)
}
}
return
annotation
def
cvt_annotations
(
devkit_path
,
years
,
split
,
out_file
):
if
not
isinstance
(
years
,
list
):
years
=
[
years
]
annotations
=
[]
for
year
in
years
:
filelist
=
osp
.
join
(
devkit_path
,
'VOC{}/ImageSets/Main/{}.txt'
.
format
(
year
,
split
))
if
not
osp
.
isfile
(
filelist
):
print
(
'filelist does not exist: {}, skip voc{} {}'
.
format
(
filelist
,
year
,
split
))
return
img_names
=
mmcv
.
list_from_file
(
filelist
)
xml_paths
=
[
osp
.
join
(
devkit_path
,
'VOC{}/Annotations/{}.xml'
.
format
(
year
,
img_name
))
for
img_name
in
img_names
]
img_paths
=
[
'VOC{}/JPEGImages/{}.jpg'
.
format
(
year
,
img_name
)
for
img_name
in
img_names
]
part_annotations
=
mmcv
.
track_progress
(
parse_xml
,
list
(
zip
(
xml_paths
,
img_paths
)))
annotations
.
extend
(
part_annotations
)
mmcv
.
dump
(
annotations
,
out_file
)
return
annotations
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Convert PASCAL VOC annotations to mmdetection format'
)
parser
.
add_argument
(
'devkit_path'
,
help
=
'pascal voc devkit path'
)
parser
.
add_argument
(
'-o'
,
'--out-dir'
,
help
=
'output path'
)
args
=
parser
.
parse_args
()
return
args
def
main
():
args
=
parse_args
()
devkit_path
=
args
.
devkit_path
out_dir
=
args
.
out_dir
if
args
.
out_dir
else
devkit_path
mmcv
.
mkdir_or_exist
(
out_dir
)
years
=
[]
if
osp
.
isdir
(
osp
.
join
(
devkit_path
,
'VOC2007'
)):
years
.
append
(
'2007'
)
if
osp
.
isdir
(
osp
.
join
(
devkit_path
,
'VOC2012'
)):
years
.
append
(
'2012'
)
if
'2007'
in
years
and
'2012'
in
years
:
years
.
append
([
'2007'
,
'2012'
])
if
not
years
:
raise
IOError
(
'The devkit path {} contains neither "VOC2007" nor '
'"VOC2012" subfolder'
.
format
(
devkit_path
))
for
year
in
years
:
if
year
==
'2007'
:
prefix
=
'voc07'
elif
year
==
'2012'
:
prefix
=
'voc12'
elif
year
==
[
'2007'
,
'2012'
]:
prefix
=
'voc0712'
for
split
in
[
'train'
,
'val'
,
'trainval'
]:
dataset_name
=
prefix
+
'_'
+
split
print
(
'processing {} ...'
.
format
(
dataset_name
))
cvt_annotations
(
devkit_path
,
year
,
split
,
osp
.
join
(
out_dir
,
dataset_name
+
'.pkl'
))
if
not
isinstance
(
year
,
list
):
dataset_name
=
prefix
+
'_test'
print
(
'processing {} ...'
.
format
(
dataset_name
))
cvt_annotations
(
devkit_path
,
year
,
'test'
,
osp
.
join
(
out_dir
,
dataset_name
+
'.pkl'
))
print
(
'Done!'
)
if
__name__
==
'__main__'
:
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment