Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
SOLOv2-pytorch
Commits
bfbd2223
Commit
bfbd2223
authored
Oct 21, 2018
by
Kai Chen
Browse files
add custom datasets
parent
4990aae6
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
440 additions
and
254 deletions
+440
-254
mmdet/datasets/__init__.py
mmdet/datasets/__init__.py
+2
-1
mmdet/datasets/coco.py
mmdet/datasets/coco.py
+24
-253
mmdet/datasets/custom.py
mmdet/datasets/custom.py
+274
-0
tools/convert_datasets/pascal_voc.py
tools/convert_datasets/pascal_voc.py
+140
-0
No files found.
mmdet/datasets/__init__.py
View file @
bfbd2223
from
.custom
import
CustomDataset
from
.coco
import
CocoDataset
from
.coco
import
CocoDataset
from
.loader
import
GroupSampler
,
DistributedGroupSampler
,
build_dataloader
from
.loader
import
GroupSampler
,
DistributedGroupSampler
,
build_dataloader
from
.utils
import
to_tensor
,
random_scale
,
show_ann
from
.utils
import
to_tensor
,
random_scale
,
show_ann
__all__
=
[
__all__
=
[
'CocoDataset'
,
'GroupSampler'
,
'DistributedGroupSampler'
,
'CustomDataset'
,
'CocoDataset'
,
'GroupSampler'
,
'DistributedGroupSampler'
,
'build_dataloader'
,
'to_tensor'
,
'random_scale'
,
'show_ann'
'build_dataloader'
,
'to_tensor'
,
'random_scale'
,
'show_ann'
]
]
mmdet/datasets/coco.py
View file @
bfbd2223
import
os.path
as
osp
import
mmcv
import
numpy
as
np
import
numpy
as
np
from
mmcv.parallel
import
DataContainer
as
DC
from
pycocotools.coco
import
COCO
from
pycocotools.coco
import
COCO
from
torch.utils.data
import
Dataset
from
.transforms
import
(
ImageTransform
,
BboxTransform
,
MaskTransform
,
from
.custom
import
CustomDataset
Numpy2Tensor
)
from
.utils
import
to_tensor
,
show_ann
,
random_scale
class
CocoDataset
(
Dataset
):
class
CocoDataset
(
Custom
Dataset
):
def
__init__
(
self
,
def
load_annotations
(
self
,
ann_file
):
ann_file
,
img_prefix
,
img_scale
,
img_norm_cfg
,
size_divisor
=
None
,
proposal_file
=
None
,
num_max_proposals
=
1000
,
flip_ratio
=
0
,
with_mask
=
True
,
with_crowd
=
True
,
with_label
=
True
,
test_mode
=
False
,
debug
=
False
):
# path of the data file
self
.
coco
=
COCO
(
ann_file
)
self
.
coco
=
COCO
(
ann_file
)
# filter images with no annotation during training
cat_ids
=
self
.
coco
.
getCatIds
()
if
not
test_mode
:
self
.
cat2label
=
{
cat_id
:
i
+
1
for
i
,
cat_id
in
enumerate
(
cat_ids
)}
self
.
img_ids
,
self
.
img_infos
=
self
.
_filter_imgs
()
self
.
img_ids
=
self
.
coco
.
getImgIds
()
else
:
self
.
img_ids
=
self
.
coco
.
getImgIds
()
self
.
img_infos
=
[
self
.
coco
.
loadImgs
(
idx
)[
0
]
for
idx
in
self
.
img_ids
]
assert
len
(
self
.
img_ids
)
==
len
(
self
.
img_infos
)
# get the mapping from original category ids to labels
self
.
cat_ids
=
self
.
coco
.
getCatIds
()
self
.
cat2label
=
{
cat_id
:
i
+
1
for
i
,
cat_id
in
enumerate
(
self
.
cat_ids
)
}
# prefix of images path
self
.
img_prefix
=
img_prefix
# (long_edge, short_edge) or [(long1, short1), (long2, short2), ...]
self
.
img_scales
=
img_scale
if
isinstance
(
img_scale
,
list
)
else
[
img_scale
]
assert
mmcv
.
is_list_of
(
self
.
img_scales
,
tuple
)
# color channel order and normalize configs
self
.
img_norm_cfg
=
img_norm_cfg
# proposals
# TODO: revise _filter_imgs to be more flexible
if
proposal_file
is
not
None
:
self
.
proposals
=
mmcv
.
load
(
proposal_file
)
ori_ids
=
self
.
coco
.
getImgIds
()
sorted_idx
=
[
ori_ids
.
index
(
id
)
for
id
in
self
.
img_ids
]
self
.
proposals
=
[
self
.
proposals
[
idx
]
for
idx
in
sorted_idx
]
else
:
self
.
proposals
=
None
self
.
num_max_proposals
=
num_max_proposals
# flip ratio
self
.
flip_ratio
=
flip_ratio
assert
flip_ratio
>=
0
and
flip_ratio
<=
1
# padding border to ensure the image size can be divided by
# size_divisor (used for FPN)
self
.
size_divisor
=
size_divisor
# with crowd or not, False when using RetinaNet
self
.
with_crowd
=
with_crowd
# with mask or not
self
.
with_mask
=
with_mask
# with label is False for RPN
self
.
with_label
=
with_label
# in test mode or not
self
.
test_mode
=
test_mode
# debug mode or not
self
.
debug
=
debug
# set group flag for the sampler
self
.
_set_group_flag
()
# transforms
self
.
img_transform
=
ImageTransform
(
size_divisor
=
self
.
size_divisor
,
**
self
.
img_norm_cfg
)
self
.
bbox_transform
=
BboxTransform
()
self
.
mask_transform
=
MaskTransform
()
self
.
numpy2tensor
=
Numpy2Tensor
()
def
__len__
(
self
):
return
len
(
self
.
img_ids
)
def
_filter_imgs
(
self
,
min_size
=
32
):
"""Filter images too small or without ground truths."""
img_ids
=
list
(
set
([
_
[
'image_id'
]
for
_
in
self
.
coco
.
anns
.
values
()]))
valid_ids
=
[]
img_infos
=
[]
img_infos
=
[]
for
i
in
img_ids
:
for
i
in
self
.
img_ids
:
info
=
self
.
coco
.
loadImgs
(
i
)[
0
]
info
=
self
.
coco
.
loadImgs
(
i
)[
0
]
if
min
(
info
[
'width'
],
info
[
'height'
])
>=
min_size
:
info
[
'filename'
]
=
info
[
'file_name'
]
valid_ids
.
append
(
i
)
img_infos
.
append
(
info
)
img_infos
.
append
(
info
)
return
img_infos
return
valid_ids
,
img_infos
def
_load
_ann_info
(
self
,
idx
):
def
get
_ann_info
(
self
,
idx
):
img_id
=
self
.
img_i
d
s
[
idx
]
img_id
=
self
.
img_i
nfo
s
[
idx
]
[
'id'
]
ann_ids
=
self
.
coco
.
getAnnIds
(
imgIds
=
img_id
)
ann_ids
=
self
.
coco
.
getAnnIds
(
imgIds
=
img_id
)
ann_info
=
self
.
coco
.
loadAnns
(
ann_ids
)
ann_info
=
self
.
coco
.
loadAnns
(
ann_ids
)
return
ann_info
return
self
.
_parse_ann_info
(
ann_info
)
def
_filter_imgs
(
self
,
min_size
=
32
):
"""Filter images too small or without ground truths."""
valid_inds
=
[]
ids_with_ann
=
set
(
_
[
'image_id'
]
for
_
in
self
.
coco
.
anns
.
values
())
for
i
,
img_info
in
enumerate
(
self
.
img_infos
):
if
self
.
img_ids
[
i
]
not
in
ids_with_ann
:
continue
if
min
(
img_info
[
'width'
],
img_info
[
'height'
])
>=
min_size
:
valid_inds
.
append
(
i
)
return
valid_inds
def
_parse_ann_info
(
self
,
ann_info
,
with_mask
=
True
):
def
_parse_ann_info
(
self
,
ann_info
,
with_mask
=
True
):
"""Parse bbox and mask annotation.
"""Parse bbox and mask annotation.
...
@@ -172,158 +98,3 @@ class CocoDataset(Dataset):
...
@@ -172,158 +98,3 @@ class CocoDataset(Dataset):
ann
[
'mask_polys'
]
=
gt_mask_polys
ann
[
'mask_polys'
]
=
gt_mask_polys
ann
[
'poly_lens'
]
=
gt_poly_lens
ann
[
'poly_lens'
]
=
gt_poly_lens
return
ann
return
ann
def
_set_group_flag
(
self
):
"""Set flag according to image aspect ratio.
Images with aspect ratio greater than 1 will be set as group 1,
otherwise group 0.
"""
self
.
flag
=
np
.
zeros
(
len
(
self
.
img_ids
),
dtype
=
np
.
uint8
)
for
i
in
range
(
len
(
self
.
img_ids
)):
img_info
=
self
.
img_infos
[
i
]
if
img_info
[
'width'
]
/
img_info
[
'height'
]
>
1
:
self
.
flag
[
i
]
=
1
def
_rand_another
(
self
,
idx
):
pool
=
np
.
where
(
self
.
flag
==
self
.
flag
[
idx
])[
0
]
return
np
.
random
.
choice
(
pool
)
def
__getitem__
(
self
,
idx
):
if
self
.
test_mode
:
return
self
.
prepare_test_img
(
idx
)
while
True
:
img_info
=
self
.
img_infos
[
idx
]
ann_info
=
self
.
_load_ann_info
(
idx
)
# load image
img
=
mmcv
.
imread
(
osp
.
join
(
self
.
img_prefix
,
img_info
[
'file_name'
]))
if
self
.
debug
:
show_ann
(
self
.
coco
,
img
,
ann_info
)
# load proposals if necessary
if
self
.
proposals
is
not
None
:
proposals
=
self
.
proposals
[
idx
][:
self
.
num_max_proposals
]
# TODO: Handle empty proposals properly. Currently images with
# no proposals are just ignored, but they can be used for
# training in concept.
if
len
(
proposals
)
==
0
:
idx
=
self
.
_rand_another
(
idx
)
continue
if
not
(
proposals
.
shape
[
1
]
==
4
or
proposals
.
shape
[
1
]
==
5
):
raise
AssertionError
(
'proposals should have shapes (n, 4) or (n, 5), '
'but found {}'
.
format
(
proposals
.
shape
))
if
proposals
.
shape
[
1
]
==
5
:
scores
=
proposals
[:,
4
,
None
]
proposals
=
proposals
[:,
:
4
]
else
:
scores
=
None
ann
=
self
.
_parse_ann_info
(
ann_info
,
self
.
with_mask
)
gt_bboxes
=
ann
[
'bboxes'
]
gt_labels
=
ann
[
'labels'
]
gt_bboxes_ignore
=
ann
[
'bboxes_ignore'
]
# skip the image if there is no valid gt bbox
if
len
(
gt_bboxes
)
==
0
:
idx
=
self
.
_rand_another
(
idx
)
continue
# apply transforms
flip
=
True
if
np
.
random
.
rand
()
<
self
.
flip_ratio
else
False
img_scale
=
random_scale
(
self
.
img_scales
)
# sample a scale
img
,
img_shape
,
pad_shape
,
scale_factor
=
self
.
img_transform
(
img
,
img_scale
,
flip
)
if
self
.
proposals
is
not
None
:
proposals
=
self
.
bbox_transform
(
proposals
,
img_shape
,
scale_factor
,
flip
)
proposals
=
np
.
hstack
(
[
proposals
,
scores
])
if
scores
is
not
None
else
proposals
gt_bboxes
=
self
.
bbox_transform
(
gt_bboxes
,
img_shape
,
scale_factor
,
flip
)
gt_bboxes_ignore
=
self
.
bbox_transform
(
gt_bboxes_ignore
,
img_shape
,
scale_factor
,
flip
)
if
self
.
with_mask
:
gt_masks
=
self
.
mask_transform
(
ann
[
'masks'
],
pad_shape
,
scale_factor
,
flip
)
ori_shape
=
(
img_info
[
'height'
],
img_info
[
'width'
],
3
)
img_meta
=
dict
(
ori_shape
=
ori_shape
,
img_shape
=
img_shape
,
pad_shape
=
pad_shape
,
scale_factor
=
scale_factor
,
flip
=
flip
)
data
=
dict
(
img
=
DC
(
to_tensor
(
img
),
stack
=
True
),
img_meta
=
DC
(
img_meta
,
cpu_only
=
True
),
gt_bboxes
=
DC
(
to_tensor
(
gt_bboxes
)))
if
self
.
proposals
is
not
None
:
data
[
'proposals'
]
=
DC
(
to_tensor
(
proposals
))
if
self
.
with_label
:
data
[
'gt_labels'
]
=
DC
(
to_tensor
(
gt_labels
))
if
self
.
with_crowd
:
data
[
'gt_bboxes_ignore'
]
=
DC
(
to_tensor
(
gt_bboxes_ignore
))
if
self
.
with_mask
:
data
[
'gt_masks'
]
=
DC
(
gt_masks
,
cpu_only
=
True
)
return
data
def
prepare_test_img
(
self
,
idx
):
"""Prepare an image for testing (multi-scale and flipping)"""
img_info
=
self
.
img_infos
[
idx
]
img
=
mmcv
.
imread
(
osp
.
join
(
self
.
img_prefix
,
img_info
[
'file_name'
]))
if
self
.
proposals
is
not
None
:
proposal
=
self
.
proposals
[
idx
][:
self
.
num_max_proposals
]
if
not
(
proposal
.
shape
[
1
]
==
4
or
proposal
.
shape
[
1
]
==
5
):
raise
AssertionError
(
'proposals should have shapes (n, 4) or (n, 5), '
'but found {}'
.
format
(
proposal
.
shape
))
else
:
proposal
=
None
def
prepare_single
(
img
,
scale
,
flip
,
proposal
=
None
):
_img
,
img_shape
,
pad_shape
,
scale_factor
=
self
.
img_transform
(
img
,
scale
,
flip
)
_img
=
to_tensor
(
_img
)
_img_meta
=
dict
(
ori_shape
=
(
img_info
[
'height'
],
img_info
[
'width'
],
3
),
img_shape
=
img_shape
,
pad_shape
=
pad_shape
,
scale_factor
=
scale_factor
,
flip
=
flip
)
if
proposal
is
not
None
:
if
proposal
.
shape
[
1
]
==
5
:
score
=
proposal
[:,
4
,
None
]
proposal
=
proposal
[:,
:
4
]
else
:
score
=
None
_proposal
=
self
.
bbox_transform
(
proposal
,
img_shape
,
scale_factor
,
flip
)
_proposal
=
np
.
hstack
(
[
_proposal
,
score
])
if
score
is
not
None
else
_proposal
_proposal
=
to_tensor
(
_proposal
)
else
:
_proposal
=
None
return
_img
,
_img_meta
,
_proposal
imgs
=
[]
img_metas
=
[]
proposals
=
[]
for
scale
in
self
.
img_scales
:
_img
,
_img_meta
,
_proposal
=
prepare_single
(
img
,
scale
,
False
,
proposal
)
imgs
.
append
(
_img
)
img_metas
.
append
(
DC
(
_img_meta
,
cpu_only
=
True
))
proposals
.
append
(
_proposal
)
if
self
.
flip_ratio
>
0
:
_img
,
_img_meta
,
_proposal
=
prepare_single
(
img
,
scale
,
True
,
proposal
)
imgs
.
append
(
_img
)
img_metas
.
append
(
DC
(
_img_meta
,
cpu_only
=
True
))
proposals
.
append
(
_proposal
)
data
=
dict
(
img
=
imgs
,
img_meta
=
img_metas
)
if
self
.
proposals
is
not
None
:
data
[
'proposals'
]
=
proposals
return
data
mmdet/datasets/custom.py
0 → 100644
View file @
bfbd2223
import
os.path
as
osp
import
mmcv
import
numpy
as
np
from
mmcv.parallel
import
DataContainer
as
DC
from
torch.utils.data
import
Dataset
from
.transforms
import
(
ImageTransform
,
BboxTransform
,
MaskTransform
,
Numpy2Tensor
)
from
.utils
import
to_tensor
,
random_scale
class
CustomDataset
(
Dataset
):
"""Custom dataset for detection.
Annotation format:
[
{
'filename': 'a.jpg',
'width': 1280,
'height': 720,
'ann': {
'bboxes': <np.ndarray> (n, 4),
'labels': <np.ndarray> (n, ),
'bboxes_ignore': <np.ndarray> (k, 4),
'labels_ignore': <np.ndarray> (k, 4) (optional field)
}
},
...
]
The `ann` field is optional for testing.
"""
def
__init__
(
self
,
ann_file
,
img_prefix
,
img_scale
,
img_norm_cfg
,
size_divisor
=
None
,
proposal_file
=
None
,
num_max_proposals
=
1000
,
flip_ratio
=
0
,
with_mask
=
True
,
with_crowd
=
True
,
with_label
=
True
,
test_mode
=
False
):
# load annotations (and proposals)
self
.
img_infos
=
self
.
load_annotations
(
ann_file
)
if
proposal_file
is
not
None
:
self
.
proposals
=
self
.
load_proposals
(
proposal_file
)
else
:
self
.
proposals
=
None
# filter images with no annotation during training
if
not
test_mode
:
valid_inds
=
self
.
_filter_imgs
()
self
.
img_infos
=
[
self
.
img_infos
[
i
]
for
i
in
valid_inds
]
if
self
.
proposals
is
not
None
:
self
.
proposals
=
[
self
.
proposals
[
i
]
for
i
in
valid_inds
]
# prefix of images path
self
.
img_prefix
=
img_prefix
# (long_edge, short_edge) or [(long1, short1), (long2, short2), ...]
self
.
img_scales
=
img_scale
if
isinstance
(
img_scale
,
list
)
else
[
img_scale
]
assert
mmcv
.
is_list_of
(
self
.
img_scales
,
tuple
)
# normalization configs
self
.
img_norm_cfg
=
img_norm_cfg
# max proposals per image
self
.
num_max_proposals
=
num_max_proposals
# flip ratio
self
.
flip_ratio
=
flip_ratio
assert
flip_ratio
>=
0
and
flip_ratio
<=
1
# padding border to ensure the image size can be divided by
# size_divisor (used for FPN)
self
.
size_divisor
=
size_divisor
# with mask or not (reserved field, takes no effect)
self
.
with_mask
=
with_mask
# some datasets provide bbox annotations as ignore/crowd/difficult,
# if `with_crowd` is True, then these info is returned.
self
.
with_crowd
=
with_crowd
# with label is False for RPN
self
.
with_label
=
with_label
# in test mode or not
self
.
test_mode
=
test_mode
# set group flag for the sampler
if
not
self
.
test_mode
:
self
.
_set_group_flag
()
# transforms
self
.
img_transform
=
ImageTransform
(
size_divisor
=
self
.
size_divisor
,
**
self
.
img_norm_cfg
)
self
.
bbox_transform
=
BboxTransform
()
self
.
mask_transform
=
MaskTransform
()
self
.
numpy2tensor
=
Numpy2Tensor
()
def
__len__
(
self
):
return
len
(
self
.
img_infos
)
def
load_annotations
(
self
,
ann_file
):
return
mmcv
.
load
(
ann_file
)
def
load_proposals
(
self
,
proposal_file
):
return
mmcv
.
load
(
proposal_file
)
def
get_ann_info
(
self
,
idx
):
return
self
.
img_infos
[
idx
][
'ann'
]
def
_filter_imgs
(
self
,
min_size
=
32
):
"""Filter images too small."""
valid_inds
=
[]
for
i
,
img_info
in
enumerate
(
self
.
img_infos
):
if
min
(
img_info
[
'width'
],
img_info
[
'height'
])
>=
min_size
:
valid_inds
.
append
(
i
)
return
valid_inds
def
_set_group_flag
(
self
):
"""Set flag according to image aspect ratio.
Images with aspect ratio greater than 1 will be set as group 1,
otherwise group 0.
"""
self
.
flag
=
np
.
zeros
(
len
(
self
),
dtype
=
np
.
uint8
)
for
i
in
range
(
len
(
self
)):
img_info
=
self
.
img_infos
[
i
]
if
img_info
[
'width'
]
/
img_info
[
'height'
]
>
1
:
self
.
flag
[
i
]
=
1
def
_rand_another
(
self
,
idx
):
pool
=
np
.
where
(
self
.
flag
==
self
.
flag
[
idx
])[
0
]
return
np
.
random
.
choice
(
pool
)
def
__getitem__
(
self
,
idx
):
if
self
.
test_mode
:
return
self
.
prepare_test_img
(
idx
)
while
True
:
data
=
self
.
prepare_train_img
(
idx
)
if
data
is
None
:
idx
=
self
.
_rand_another
(
idx
)
continue
return
data
def
prepare_train_img
(
self
,
idx
):
img_info
=
self
.
img_infos
[
idx
]
# load image
img
=
mmcv
.
imread
(
osp
.
join
(
self
.
img_prefix
,
img_info
[
'filename'
]))
# load proposals if necessary
if
self
.
proposals
is
not
None
:
proposals
=
self
.
proposals
[
idx
][:
self
.
num_max_proposals
]
# TODO: Handle empty proposals properly. Currently images with
# no proposals are just ignored, but they can be used for
# training in concept.
if
len
(
proposals
)
==
0
:
return
None
if
not
(
proposals
.
shape
[
1
]
==
4
or
proposals
.
shape
[
1
]
==
5
):
raise
AssertionError
(
'proposals should have shapes (n, 4) or (n, 5), '
'but found {}'
.
format
(
proposals
.
shape
))
if
proposals
.
shape
[
1
]
==
5
:
scores
=
proposals
[:,
4
,
None
]
proposals
=
proposals
[:,
:
4
]
else
:
scores
=
None
ann
=
self
.
get_ann_info
(
idx
)
gt_bboxes
=
ann
[
'bboxes'
]
gt_labels
=
ann
[
'labels'
]
if
self
.
with_crowd
:
gt_bboxes_ignore
=
ann
[
'bboxes_ignore'
]
# skip the image if there is no valid gt bbox
if
len
(
gt_bboxes
)
==
0
:
return
None
# apply transforms
flip
=
True
if
np
.
random
.
rand
()
<
self
.
flip_ratio
else
False
img_scale
=
random_scale
(
self
.
img_scales
)
# sample a scale
img
,
img_shape
,
pad_shape
,
scale_factor
=
self
.
img_transform
(
img
,
img_scale
,
flip
)
if
self
.
proposals
is
not
None
:
proposals
=
self
.
bbox_transform
(
proposals
,
img_shape
,
scale_factor
,
flip
)
proposals
=
np
.
hstack
(
[
proposals
,
scores
])
if
scores
is
not
None
else
proposals
gt_bboxes
=
self
.
bbox_transform
(
gt_bboxes
,
img_shape
,
scale_factor
,
flip
)
if
self
.
with_crowd
:
gt_bboxes_ignore
=
self
.
bbox_transform
(
gt_bboxes_ignore
,
img_shape
,
scale_factor
,
flip
)
if
self
.
with_mask
:
gt_masks
=
self
.
mask_transform
(
ann
[
'masks'
],
pad_shape
,
scale_factor
,
flip
)
ori_shape
=
(
img_info
[
'height'
],
img_info
[
'width'
],
3
)
img_meta
=
dict
(
ori_shape
=
ori_shape
,
img_shape
=
img_shape
,
pad_shape
=
pad_shape
,
scale_factor
=
scale_factor
,
flip
=
flip
)
data
=
dict
(
img
=
DC
(
to_tensor
(
img
),
stack
=
True
),
img_meta
=
DC
(
img_meta
,
cpu_only
=
True
),
gt_bboxes
=
DC
(
to_tensor
(
gt_bboxes
)))
if
self
.
proposals
is
not
None
:
data
[
'proposals'
]
=
DC
(
to_tensor
(
proposals
))
if
self
.
with_label
:
data
[
'gt_labels'
]
=
DC
(
to_tensor
(
gt_labels
))
if
self
.
with_crowd
:
data
[
'gt_bboxes_ignore'
]
=
DC
(
to_tensor
(
gt_bboxes_ignore
))
if
self
.
with_mask
:
data
[
'gt_masks'
]
=
DC
(
gt_masks
,
cpu_only
=
True
)
return
data
def
prepare_test_img
(
self
,
idx
):
"""Prepare an image for testing (multi-scale and flipping)"""
img_info
=
self
.
img_infos
[
idx
]
img
=
mmcv
.
imread
(
osp
.
join
(
self
.
img_prefix
,
img_info
[
'filename'
]))
if
self
.
proposals
is
not
None
:
proposal
=
self
.
proposals
[
idx
][:
self
.
num_max_proposals
]
if
not
(
proposal
.
shape
[
1
]
==
4
or
proposal
.
shape
[
1
]
==
5
):
raise
AssertionError
(
'proposals should have shapes (n, 4) or (n, 5), '
'but found {}'
.
format
(
proposal
.
shape
))
else
:
proposal
=
None
def
prepare_single
(
img
,
scale
,
flip
,
proposal
=
None
):
_img
,
img_shape
,
pad_shape
,
scale_factor
=
self
.
img_transform
(
img
,
scale
,
flip
)
_img
=
to_tensor
(
_img
)
_img_meta
=
dict
(
ori_shape
=
(
img_info
[
'height'
],
img_info
[
'width'
],
3
),
img_shape
=
img_shape
,
pad_shape
=
pad_shape
,
scale_factor
=
scale_factor
,
flip
=
flip
)
if
proposal
is
not
None
:
if
proposal
.
shape
[
1
]
==
5
:
score
=
proposal
[:,
4
,
None
]
proposal
=
proposal
[:,
:
4
]
else
:
score
=
None
_proposal
=
self
.
bbox_transform
(
proposal
,
img_shape
,
scale_factor
,
flip
)
_proposal
=
np
.
hstack
(
[
_proposal
,
score
])
if
score
is
not
None
else
_proposal
_proposal
=
to_tensor
(
_proposal
)
else
:
_proposal
=
None
return
_img
,
_img_meta
,
_proposal
imgs
=
[]
img_metas
=
[]
proposals
=
[]
for
scale
in
self
.
img_scales
:
_img
,
_img_meta
,
_proposal
=
prepare_single
(
img
,
scale
,
False
,
proposal
)
imgs
.
append
(
_img
)
img_metas
.
append
(
DC
(
_img_meta
,
cpu_only
=
True
))
proposals
.
append
(
_proposal
)
if
self
.
flip_ratio
>
0
:
_img
,
_img_meta
,
_proposal
=
prepare_single
(
img
,
scale
,
True
,
proposal
)
imgs
.
append
(
_img
)
img_metas
.
append
(
DC
(
_img_meta
,
cpu_only
=
True
))
proposals
.
append
(
_proposal
)
data
=
dict
(
img
=
imgs
,
img_meta
=
img_metas
)
if
self
.
proposals
is
not
None
:
data
[
'proposals'
]
=
proposals
return
data
\ No newline at end of file
tools/convert_datasets/pascal_voc.py
0 → 100644
View file @
bfbd2223
import
argparse
import
os.path
as
osp
import
xml.etree.ElementTree
as
ET
import
mmcv
import
numpy
as
np
from
mmdet.core
import
voc_classes
label_ids
=
{
name
:
i
+
1
for
i
,
name
in
enumerate
(
voc_classes
())}
def
parse_xml
(
args
):
xml_path
,
img_path
=
args
tree
=
ET
.
parse
(
xml_path
)
root
=
tree
.
getroot
()
size
=
root
.
find
(
'size'
)
w
=
int
(
size
.
find
(
'width'
).
text
)
h
=
int
(
size
.
find
(
'height'
).
text
)
bboxes
=
[]
labels
=
[]
bboxes_ignore
=
[]
labels_ignore
=
[]
for
obj
in
root
.
findall
(
'object'
):
name
=
obj
.
find
(
'name'
).
text
label
=
label_ids
[
name
]
difficult
=
int
(
obj
.
find
(
'difficult'
).
text
)
bnd_box
=
obj
.
find
(
'bndbox'
)
bbox
=
[
int
(
bnd_box
.
find
(
'xmin'
).
text
),
int
(
bnd_box
.
find
(
'ymin'
).
text
),
int
(
bnd_box
.
find
(
'xmax'
).
text
),
int
(
bnd_box
.
find
(
'ymax'
).
text
)
]
if
difficult
:
bboxes_ignore
.
append
(
bbox
)
labels_ignore
.
append
(
label
)
else
:
bboxes
.
append
(
bbox
)
labels
.
append
(
label
)
if
not
bboxes
:
bboxes
=
np
.
zeros
((
0
,
4
))
labels
=
np
.
zeros
((
0
,
))
else
:
bboxes
=
np
.
array
(
bboxes
,
ndmin
=
2
)
-
1
labels
=
np
.
array
(
labels
)
if
not
bboxes_ignore
:
bboxes_ignore
=
np
.
zeros
((
0
,
4
))
labels_ignore
=
np
.
zeros
((
0
,
))
else
:
bboxes_ignore
=
np
.
array
(
bboxes_ignore
,
ndmin
=
2
)
-
1
labels_ignore
=
np
.
array
(
labels_ignore
)
annotation
=
{
'filename'
:
img_path
,
'width'
:
w
,
'height'
:
h
,
'ann'
:
{
'bboxes'
:
bboxes
.
astype
(
np
.
float32
),
'labels'
:
labels
.
astype
(
np
.
int64
),
'bboxes_ignore'
:
bboxes_ignore
.
astype
(
np
.
float32
),
'labels_ignore'
:
labels_ignore
.
astype
(
np
.
int64
)
}
}
return
annotation
def
cvt_annotations
(
devkit_path
,
years
,
split
,
out_file
):
if
not
isinstance
(
years
,
list
):
years
=
[
years
]
annotations
=
[]
for
year
in
years
:
filelist
=
osp
.
join
(
devkit_path
,
'VOC{}/ImageSets/Main/{}.txt'
.
format
(
year
,
split
))
if
not
osp
.
isfile
(
filelist
):
print
(
'filelist does not exist: {}, skip voc{} {}'
.
format
(
filelist
,
year
,
split
))
return
img_names
=
mmcv
.
list_from_file
(
filelist
)
xml_paths
=
[
osp
.
join
(
devkit_path
,
'VOC{}/Annotations/{}.xml'
.
format
(
year
,
img_name
))
for
img_name
in
img_names
]
img_paths
=
[
'VOC{}/JPEGImages/{}.jpg'
.
format
(
year
,
img_name
)
for
img_name
in
img_names
]
part_annotations
=
mmcv
.
track_progress
(
parse_xml
,
list
(
zip
(
xml_paths
,
img_paths
)))
annotations
.
extend
(
part_annotations
)
mmcv
.
dump
(
annotations
,
out_file
)
return
annotations
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Convert PASCAL VOC annotations to mmdetection format'
)
parser
.
add_argument
(
'devkit_path'
,
help
=
'pascal voc devkit path'
)
parser
.
add_argument
(
'-o'
,
'--out-dir'
,
help
=
'output path'
)
args
=
parser
.
parse_args
()
return
args
def
main
():
args
=
parse_args
()
devkit_path
=
args
.
devkit_path
out_dir
=
args
.
out_dir
if
args
.
out_dir
else
devkit_path
mmcv
.
mkdir_or_exist
(
out_dir
)
years
=
[]
if
osp
.
isdir
(
osp
.
join
(
devkit_path
,
'VOC2007'
)):
years
.
append
(
'2007'
)
if
osp
.
isdir
(
osp
.
join
(
devkit_path
,
'VOC2012'
)):
years
.
append
(
'2012'
)
if
'2007'
in
years
and
'2012'
in
years
:
years
.
append
([
'2007'
,
'2012'
])
if
not
years
:
raise
IOError
(
'The devkit path {} contains neither "VOC2007" nor '
'"VOC2012" subfolder'
.
format
(
devkit_path
))
for
year
in
years
:
if
year
==
'2007'
:
prefix
=
'voc07'
elif
year
==
'2012'
:
prefix
=
'voc12'
elif
year
==
[
'2007'
,
'2012'
]:
prefix
=
'voc0712'
for
split
in
[
'train'
,
'val'
,
'trainval'
]:
dataset_name
=
prefix
+
'_'
+
split
print
(
'processing {} ...'
.
format
(
dataset_name
))
cvt_annotations
(
devkit_path
,
year
,
split
,
osp
.
join
(
out_dir
,
dataset_name
+
'.pkl'
))
if
not
isinstance
(
year
,
list
):
dataset_name
=
prefix
+
'_test'
print
(
'processing {} ...'
.
format
(
dataset_name
))
cvt_annotations
(
devkit_path
,
year
,
'test'
,
osp
.
join
(
out_dir
,
dataset_name
+
'.pkl'
))
print
(
'Done!'
)
if
__name__
==
'__main__'
:
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment