Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenych
Painter_pytorch
Commits
106580f9
"vscode:/vscode.git/clone" did not exist on "b1bcc84024da4d2de24f5964a0d1151b48c5467b"
Commit
106580f9
authored
Dec 29, 2023
by
chenych
Browse files
First commit
parents
Pipeline
#689
failed with stages
in 0 seconds
Changes
117
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2787 additions
and
0 deletions
+2787
-0
data/mmdet_custom/configs/coco_panoptic_ca_inst_gen_orgflip.py
...mmdet_custom/configs/coco_panoptic_ca_inst_gen_orgflip.py
+152
-0
data/mmdet_custom/data/coco_panoptic.py
data/mmdet_custom/data/coco_panoptic.py
+210
-0
data/mmdet_custom/data/pipelines/transforms.py
data/mmdet_custom/data/pipelines/transforms.py
+177
-0
data/mmdet_custom/gen_json_coco_panoptic_inst.py
data/mmdet_custom/gen_json_coco_panoptic_inst.py
+49
-0
data/mmdet_custom/tools/dist_test.sh
data/mmdet_custom/tools/dist_test.sh
+22
-0
data/mmdet_custom/tools/dist_train.sh
data/mmdet_custom/tools/dist_train.sh
+20
-0
data/mmdet_custom/tools/test.py
data/mmdet_custom/tools/test.py
+231
-0
data/mmdet_custom/tools/train.py
data/mmdet_custom/tools/train.py
+249
-0
data/mmpose_custom/apis/test.py
data/mmpose_custom/apis/test.py
+41
-0
data/mmpose_custom/apis/train.py
data/mmpose_custom/apis/train.py
+121
-0
data/mmpose_custom/configs/_base_/coco.py
data/mmpose_custom/configs/_base_/coco.py
+181
-0
data/mmpose_custom/configs/_base_/default_runtime.py
data/mmpose_custom/configs/_base_/default_runtime.py
+20
-0
data/mmpose_custom/configs/coco_256x192_gendata.py
data/mmpose_custom/configs/coco_256x192_gendata.py
+154
-0
data/mmpose_custom/configs/coco_256x192_gendata_test.py
data/mmpose_custom/configs/coco_256x192_gendata_test.py
+153
-0
data/mmpose_custom/configs/coco_256x192_gendata_testflip.py
data/mmpose_custom/configs/coco_256x192_gendata_testflip.py
+153
-0
data/mmpose_custom/configs/coco_256x192_test_offline.py
data/mmpose_custom/configs/coco_256x192_test_offline.py
+161
-0
data/mmpose_custom/data/pipelines/custom_transform.py
data/mmpose_custom/data/pipelines/custom_transform.py
+128
-0
data/mmpose_custom/data/pipelines/top_down_transform.py
data/mmpose_custom/data/pipelines/top_down_transform.py
+182
-0
data/mmpose_custom/data/topdown_coco_dataset.py
data/mmpose_custom/data/topdown_coco_dataset.py
+318
-0
data/mmpose_custom/gen_json_coco_pose.py
data/mmpose_custom/gen_json_coco_pose.py
+65
-0
No files found.
data/mmdet_custom/configs/coco_panoptic_ca_inst_gen_orgflip.py
0 → 100644
View file @
106580f9
# modified from mask2former config
_base_
=
[
'./_base_/dataset/coco_panoptic.py'
,
'./_base_/default_runtime.py'
]
num_things_classes
=
80
num_stuff_classes
=
53
num_classes
=
num_things_classes
+
num_stuff_classes
model
=
None
# dataset settings
image_size
=
(
1024
,
1024
)
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
,
to_float32
=
True
),
dict
(
type
=
'LoadPanopticAnnotations'
,
with_bbox
=
True
,
with_mask
=
True
,
with_seg
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
1.0
),
# # large scale jittering
dict
(
type
=
'Resize'
,
img_scale
=
image_size
,
ratio_range
=
(
1.0
,
1.0
),
multiscale_mode
=
'range'
,
keep_ratio
=
False
),
# dict(
# type='RandomCrop',
# crop_size=image_size,
# crop_type='absolute',
# recompute_bbox=True,
# allow_negative_crop=True),
# dict(type='Normalize', **img_norm_cfg),
dict
(
type
=
'Pad'
,
size
=
image_size
),
dict
(
type
=
'SaveDataPairCustom'
,
dir_name
=
'train_orgflip'
,
target_path
=
'/home/datasets/coco/pano_ca_inst'
,
),
# custom, we don't care the transforms afterward
dict
(
type
=
'DefaultFormatBundle'
,
img_to_float
=
True
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
,
'gt_masks'
,
'gt_semantic_seg'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
,
to_float32
=
True
),
dict
(
type
=
'LoadPanopticAnnotations'
,
with_bbox
=
True
,
with_mask
=
True
,
with_seg
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.0
),
# large scale jittering
dict
(
type
=
'Resize'
,
img_scale
=
image_size
,
ratio_range
=
(
1.0
,
1.0
),
multiscale_mode
=
'range'
,
keep_ratio
=
False
),
dict
(
type
=
'Pad'
,
size
=
image_size
),
dict
(
type
=
'SaveDataPairCustom'
,
dir_name
=
'val_org'
,
target_path
=
'/home/datasets/coco/pano_ca_inst'
,
),
# custom, we don't care the transforms afterward
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'DefaultFormatBundle'
,
img_to_float
=
True
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
,
'gt_masks'
,
'gt_semantic_seg'
]),
]
data_root
=
'/home/datasets/coco/'
data
=
dict
(
samples_per_gpu
=
8
,
workers_per_gpu
=
8
,
train
=
dict
(
pipeline
=
train_pipeline
),
val
=
dict
(
pipeline
=
test_pipeline
,
ins_ann_file
=
data_root
+
'annotations/instances_val2017.json'
,
),
test
=
dict
(
pipeline
=
test_pipeline
,
ins_ann_file
=
data_root
+
'annotations/instances_val2017.json'
,
))
embed_multi
=
dict
(
lr_mult
=
1.0
,
decay_mult
=
0.0
)
# optimizer
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
0.0001
,
weight_decay
=
0.05
,
eps
=
1e-8
,
betas
=
(
0.9
,
0.999
),
paramwise_cfg
=
dict
(
custom_keys
=
{
'backbone'
:
dict
(
lr_mult
=
0.1
,
decay_mult
=
1.0
),
'query_embed'
:
embed_multi
,
'query_feat'
:
embed_multi
,
'level_embed'
:
embed_multi
,
},
norm_decay_mult
=
0.0
))
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
0.01
,
norm_type
=
2
))
custom
=
dict
(
load_data_only
=
True
,
)
by_epoch
=
True
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
gamma
=
0.1
,
by_epoch
=
by_epoch
,
step
=
[
327778
,
355092
],
warmup
=
'linear'
,
warmup_by_epoch
=
by_epoch
,
warmup_ratio
=
1.0
,
# no warmup
warmup_iters
=
10
)
max_iters
=
368750
# runner = dict(type='IterBasedRunner', max_iters=max_iters)
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
1
)
# we prefer by epoch
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
,
by_epoch
=
by_epoch
),
dict
(
type
=
'TensorboardLoggerHook'
,
by_epoch
=
by_epoch
)
])
interval
=
5000
workflow
=
[(
'train'
,
interval
)]
checkpoint_config
=
dict
(
by_epoch
=
by_epoch
,
interval
=
interval
,
save_last
=
True
,
max_keep_ckpts
=
3
)
# Before 365001th iteration, we do evaluation every 5000 iterations.
# After 365000th iteration, we do evaluation every 368750 iterations,
# which means that we do evaluation at the end of training.
dynamic_intervals
=
[(
max_iters
//
interval
*
interval
+
1
,
max_iters
)]
evaluation
=
dict
(
interval
=
interval
,
dynamic_intervals
=
dynamic_intervals
,
metric
=
[
'PQ'
,
'bbox'
,
'segm'
])
# import newly registered module
custom_imports
=
dict
(
imports
=
[
'data.coco_panoptic'
,
'data.pipelines.transforms'
,
],
allow_failed_imports
=
False
)
data/mmdet_custom/data/coco_panoptic.py
0 → 100644
View file @
106580f9
# Copyright (c) OpenMMLab. All rights reserved.
import
itertools
import
os
from
collections
import
defaultdict
import
mmcv
import
numpy
as
np
from
mmcv.utils
import
print_log
from
terminaltables
import
AsciiTable
from
mmdet.core
import
INSTANCE_OFFSET
from
mmdet.datasets.api_wrappers
import
COCO
,
pq_compute_multi_core
from
mmdet.datasets.builder
import
DATASETS
from
mmdet.datasets.coco
import
CocoDataset
from
mmdet.datasets.coco_panoptic
import
CocoPanopticDataset
,
COCOPanoptic
try
:
import
panopticapi
from
panopticapi.evaluation
import
VOID
from
panopticapi.utils
import
id2rgb
except
ImportError
:
panopticapi
=
None
id2rgb
=
None
VOID
=
None
__all__
=
[
'CocoPanopticDatasetCustom'
]
class
COCOPanoptic
(
COCO
):
"""This wrapper is for loading the panoptic style annotation file.
The format is shown in the CocoPanopticDataset class.
Args:
annotation_file (str): Path of annotation file.
"""
def
__init__
(
self
,
annotation_file
=
None
):
if
panopticapi
is
None
:
raise
RuntimeError
(
'panopticapi is not installed, please install it by: '
'pip install git+https://github.com/cocodataset/'
'panopticapi.git.'
)
super
(
COCOPanoptic
,
self
).
__init__
(
annotation_file
)
def
createIndex
(
self
,
use_ext
=
False
):
assert
use_ext
is
False
# create index
print
(
'creating index...'
)
# anns stores 'segment_id -> annotation'
anns
,
cats
,
imgs
=
{},
{},
{}
img_to_anns
,
cat_to_imgs
=
defaultdict
(
list
),
defaultdict
(
list
)
if
'annotations'
in
self
.
dataset
:
for
ann
,
img_info
in
zip
(
self
.
dataset
[
'annotations'
],
self
.
dataset
[
'images'
]):
img_info
[
'segm_file'
]
=
ann
[
'file_name'
]
for
seg_ann
in
ann
[
'segments_info'
]:
# to match with instance.json
seg_ann
[
'image_id'
]
=
ann
[
'image_id'
]
seg_ann
[
'height'
]
=
img_info
[
'height'
]
seg_ann
[
'width'
]
=
img_info
[
'width'
]
img_to_anns
[
ann
[
'image_id'
]].
append
(
seg_ann
)
# segment_id is not unique in coco dataset orz...
if
seg_ann
[
'id'
]
in
anns
.
keys
():
anns
[
seg_ann
[
'id'
]].
append
(
seg_ann
)
else
:
anns
[
seg_ann
[
'id'
]]
=
[
seg_ann
]
if
'images'
in
self
.
dataset
:
for
img
in
self
.
dataset
[
'images'
]:
imgs
[
img
[
'id'
]]
=
img
if
'categories'
in
self
.
dataset
:
for
cat
in
self
.
dataset
[
'categories'
]:
cats
[
cat
[
'id'
]]
=
cat
if
'annotations'
in
self
.
dataset
and
'categories'
in
self
.
dataset
:
for
ann
in
self
.
dataset
[
'annotations'
]:
for
seg_ann
in
ann
[
'segments_info'
]:
cat_to_imgs
[
seg_ann
[
'category_id'
]].
append
(
ann
[
'image_id'
])
print
(
'index created!'
)
self
.
anns
=
anns
self
.
imgToAnns
=
img_to_anns
self
.
catToImgs
=
cat_to_imgs
self
.
imgs
=
imgs
self
.
cats
=
cats
def
load_anns
(
self
,
ids
=
[]):
"""Load anns with the specified ids.
self.anns is a list of annotation lists instead of a
list of annotations.
Args:
ids (int array): integer ids specifying anns
Returns:
anns (object array): loaded ann objects
"""
anns
=
[]
if
hasattr
(
ids
,
'__iter__'
)
and
hasattr
(
ids
,
'__len__'
):
# self.anns is a list of annotation lists instead of
# a list of annotations
for
id
in
ids
:
anns
+=
self
.
anns
[
id
]
return
anns
elif
type
(
ids
)
==
int
:
return
self
.
anns
[
ids
]
@
DATASETS
.
register_module
()
class
CocoPanopticDatasetCustom
(
CocoPanopticDataset
):
"""Coco dataset for Panoptic segmentation.
The annotation format is shown as follows. The `ann` field is optional
for testing.
.. code-block:: none
[
{
'filename': f'{image_id:012}.png',
'image_id':9
'segments_info': {
[
{
'id': 8345037, (segment_id in panoptic png,
convert from rgb)
'category_id': 51,
'iscrowd': 0,
'bbox': (x1, y1, w, h),
'area': 24315,
'segmentation': list,(coded mask)
},
...
}
}
},
...
]
Args:
ann_file (str): Panoptic segmentation annotation file path.
pipeline (list[dict]): Processing pipeline.
ins_ann_file (str): Instance segmentation annotation file path.
Defaults to None.
classes (str | Sequence[str], optional): Specify classes to load.
If is None, ``cls.CLASSES`` will be used. Defaults to None.
data_root (str, optional): Data root for ``ann_file``,
``ins_ann_file`` ``img_prefix``, ``seg_prefix``, ``proposal_file``
if specified. Defaults to None.
img_prefix (str, optional): Prefix of path to images. Defaults to ''.
seg_prefix (str, optional): Prefix of path to segmentation files.
Defaults to None.
proposal_file (str, optional): Path to proposal file. Defaults to None.
test_mode (bool, optional): If set True, annotation will not be loaded.
Defaults to False.
filter_empty_gt (bool, optional): If set true, images without bounding
boxes of the dataset's classes will be filtered out. This option
only works when `test_mode=False`, i.e., we never filter images
during tests. Defaults to True.
file_client_args (:obj:`mmcv.ConfigDict` | dict): file client args.
Defaults to dict(backend='disk').
"""
def
load_annotations
(
self
,
ann_file
):
"""Load annotation from COCO Panoptic style annotation file.
Args:
ann_file (str): Path of annotation file.
Returns:
list[dict]: Annotation info from COCO api.
"""
self
.
coco
=
COCOPanoptic
(
ann_file
)
self
.
cat_ids
=
self
.
coco
.
get_cat_ids
()
self
.
cat2label
=
{
cat_id
:
i
for
i
,
cat_id
in
enumerate
(
self
.
cat_ids
)}
self
.
categories
=
self
.
coco
.
cats
self
.
img_ids
=
self
.
coco
.
get_img_ids
()
data_infos
=
[]
for
i
in
self
.
img_ids
:
info
=
self
.
coco
.
load_imgs
([
i
])[
0
]
info
[
'filename'
]
=
info
[
'file_name'
]
info
[
'segm_file'
]
=
info
[
'filename'
].
replace
(
'jpg'
,
'png'
)
data_infos
.
append
(
info
)
return
data_infos
def
prepare_test_img
(
self
,
idx
):
"""Get testing data after pipeline.
Args:
idx (int): Index of data.
Returns:
dict: Testing data after pipeline with new keys introduced by
\
pipeline.
"""
img_info
=
self
.
data_infos
[
idx
]
# results = dict(img_info=img_info)
ann_info
=
self
.
get_ann_info
(
idx
)
results
=
dict
(
img_info
=
img_info
,
ann_info
=
ann_info
)
if
self
.
proposals
is
not
None
:
results
[
'proposals'
]
=
self
.
proposals
[
idx
]
self
.
pre_pipeline
(
results
)
return
self
.
pipeline
(
results
)
data/mmdet_custom/data/pipelines/transforms.py
0 → 100644
View file @
106580f9
# Copyright (c) OpenMMLab. All rights reserved.
import
copy
import
inspect
import
math
import
warnings
import
os
from
PIL
import
Image
import
cv2
import
mmcv
import
numpy
as
np
from
numpy
import
random
from
mmdet.datasets.builder
import
PIPELINES
try
:
from
imagecorruptions
import
corrupt
except
ImportError
:
corrupt
=
None
try
:
import
albumentations
from
albumentations
import
Compose
except
ImportError
:
albumentations
=
None
Compose
=
None
def
define_colors_per_location_r_gb
(
num_location_r
=
16
,
num_location_gb
=
20
):
sep_r
=
255
//
num_location_r
sep_gb
=
256
//
num_location_gb
+
1
# +1 for bigger sep in gb
color_dict
=
{}
# R = G = B = 0
# B += separation_per_channel # offset for the first loop
for
global_y
in
range
(
4
):
for
global_x
in
range
(
4
):
global_locat
=
(
global_x
,
global_y
)
global_locat_sum
=
global_y
*
4
+
global_x
R
=
255
-
global_locat_sum
*
sep_r
for
local_y
in
range
(
num_location_gb
):
for
local_x
in
range
(
num_location_gb
):
local_locat
=
(
local_x
,
local_y
)
G
=
255
-
local_y
*
sep_gb
B
=
255
-
local_x
*
sep_gb
assert
(
R
<
256
)
and
(
G
<
256
)
and
(
B
<
256
)
assert
(
R
>=
0
)
and
(
G
>=
0
)
and
(
B
>=
0
)
assert
(
R
,
G
,
B
)
not
in
color_dict
.
values
()
location
=
(
global_locat
,
local_locat
)
color_dict
[
location
]
=
(
R
,
G
,
B
)
# colors = [v for k, v in color_dict.items()]
return
color_dict
def
simplify_color_dict
(
color_dict
,
num_location_r
=
16
,
num_location_gb
=
20
):
color_dict_simple
=
{}
for
k
,
v
in
color_dict
.
items
():
global_locat
,
local_locat
=
k
global_x
,
global_y
=
global_locat
local_x
,
local_y
=
local_locat
absolute_x
=
global_x
*
num_location_gb
+
local_x
absolute_y
=
global_y
*
num_location_gb
+
local_y
color_dict_simple
[(
absolute_x
,
absolute_y
)]
=
np
.
array
(
v
)
return
color_dict_simple
@
PIPELINES
.
register_module
()
class
SaveDataPairCustom
:
"""Save PanoInst Masks
"""
def
__init__
(
self
,
dir_name
,
target_path
=
'../datasets/coco/pano_ca_inst'
,
method
=
'mass_center'
,
num_location_r
=
16
,
num_location_gb
=
20
):
self
.
dir_name
=
dir_name
self
.
target_path
=
target_path
output_dir
=
os
.
path
.
join
(
self
.
target_path
,
self
.
dir_name
)
if
not
os
.
path
.
exists
(
output_dir
):
os
.
makedirs
(
output_dir
)
self
.
method
=
method
self
.
color_dict_global_local
=
define_colors_per_location_r_gb
(
num_location_r
=
num_location_r
,
num_location_gb
=
num_location_gb
)
self
.
color_dict
=
simplify_color_dict
(
self
.
color_dict_global_local
,
num_location_r
=
num_location_r
,
num_location_gb
=
num_location_gb
)
def
__call__
(
self
,
results
):
"""Call function to save images.
"""
# get keys of interest
img
=
results
[
'img'
]
# (h, w, 3), ndarray, range 0-255
gt_bboxes
=
results
[
'gt_bboxes'
]
# (num_inst, 4), ndarray, xyxy
gt_labels
=
results
[
'gt_labels'
]
# (num_inst, )
gt_masks
=
results
[
'gt_masks'
].
masks
# BitmapMasks, gt_masks.masks: (num_inst, h, w)
# gt_semantic_seg = results['gt_semantic_seg']
# check input
assert
(
gt_labels
>=
0
).
all
()
and
(
gt_labels
<
80
).
all
()
assert
(
np
.
sum
(
gt_masks
,
axis
=
0
)
>=
0
).
all
()
and
(
np
.
sum
(
gt_masks
,
axis
=
0
)
<=
1
).
all
()
# get box centers
h
,
w
,
_
=
img
.
shape
num_inst
=
len
(
gt_labels
)
segmentation
=
np
.
zeros
((
h
,
w
,
3
),
dtype
=
"uint8"
)
for
idx
in
range
(
num_inst
):
# iscrowd already filtered, and are stored in results['ann_info']['bboxes_ignore']
# but some iscrowd are not correctly labelled, e.g., 000000415447
# if (np.sum(gt_bboxes[idx] == results['ann_info']['bboxes_ignore'], axis=1) == 4).any():
# if len(results['ann_info']['bboxes_ignore']) > 0:
# import pdb; pdb.set_trace()
if
self
.
method
==
"geo_center"
:
box
=
gt_bboxes
[
idx
]
# (4, )
center
=
(
box
[:
2
]
+
box
[
2
:])
/
2
# (2, )
center_x
,
center_y
=
center
elif
self
.
method
==
"mass_center"
:
mask
=
gt_masks
[
idx
]
# (h, w)
center_x
,
center_y
=
self
.
center_of_mass
(
mask
)
else
:
raise
NotImplementedError
(
self
.
method
)
center_x_norm
=
int
(
center_x
/
w
*
79
)
center_y_norm
=
int
(
center_y
/
h
*
79
)
color
=
self
.
color_dict
[(
center_x_norm
,
center_y_norm
)]
mask
=
gt_masks
[
idx
].
astype
(
"bool"
)
# only bool can be used for slicing!
segmentation
[
mask
]
=
color
if
(
segmentation
==
0
).
all
():
# pure black label
return
results
# save files
output_dir
=
os
.
path
.
join
(
self
.
target_path
,
self
.
dir_name
)
file_name
=
results
[
'img_info'
][
'file_name'
]
# images are loaded in bgr order, reverse before saving
img_pil
=
Image
.
fromarray
(
img
[:,
:,
::
-
1
].
astype
(
'uint8'
))
label_pil
=
Image
.
fromarray
(
segmentation
)
image_path
=
os
.
path
.
join
(
output_dir
,
file_name
.
replace
(
".jpg"
,
"_image_{}.png"
.
format
(
self
.
dir_name
)))
label_path
=
os
.
path
.
join
(
output_dir
,
file_name
.
replace
(
".jpg"
,
"_label_{}.png"
.
format
(
self
.
dir_name
)))
# if os.path.exists(image_path) or os.path.exists(label_path):
# print("{} exists!".format(image_path))
# return results
aug_idx
=
0
while
os
.
path
.
exists
(
image_path
)
or
os
.
path
.
exists
(
label_path
):
aug_idx
+=
1
image_path
=
os
.
path
.
join
(
output_dir
,
file_name
.
replace
(
".jpg"
,
"_image_{}_{}.png"
.
format
(
self
.
dir_name
,
aug_idx
)))
label_path
=
os
.
path
.
join
(
output_dir
,
file_name
.
replace
(
".jpg"
,
"_label_{}_{}.png"
.
format
(
self
.
dir_name
,
aug_idx
)))
img_pil
.
save
(
image_path
)
label_pil
.
save
(
label_path
)
return
results
def
center_of_mass
(
self
,
mask
,
esp
=
1e-6
):
"""Calculate the centroid coordinates of the mask.
Args:
mask (Tensor): The mask to be calculated, shape (h, w).
esp (float): Avoid dividing by zero. Default: 1e-6.
Returns:
tuple[Tensor]: the coordinates of the center point of the mask.
- center_h (Tensor): the center point of the height.
- center_w (Tensor): the center point of the width.
"""
h
,
w
=
mask
.
shape
grid_h
=
np
.
arange
(
h
)[:,
None
]
grid_w
=
np
.
arange
(
w
)
normalizer
=
mask
.
sum
().
astype
(
"float"
).
clip
(
min
=
esp
)
center_h
=
(
mask
*
grid_h
).
sum
()
/
normalizer
center_w
=
(
mask
*
grid_w
).
sum
()
/
normalizer
return
center_w
,
center_h
def
__repr__
(
self
):
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(method=
{
self
.
method
}
)'
return
repr_str
data/mmdet_custom/gen_json_coco_panoptic_inst.py
0 → 100644
View file @
106580f9
# --------------------------------------------------------
# Images Speak in Images: A Generalist Painter for In-Context Visual Learning (https://arxiv.org/abs/2212.02499)
# Github source: https://github.com/baaivision/Painter
# Copyright (c) 2022 Beijing Academy of Artificial Intelligence (BAAI)
# Licensed under The MIT License [see LICENSE for details]
# By Xinlong Wang, Wen Wang
# Based on MAE, BEiT, detectron2, Mask2Former, bts, mmcv, mmdetetection, mmpose, MIRNet, MPRNet, and Uformer codebases
# --------------------------------------------------------'
import
os
import
glob
import
json
import
tqdm
import
argparse
def
get_args_parser
():
parser
=
argparse
.
ArgumentParser
(
'COCO class-agnostic instance segmentation preparation'
,
add_help
=
False
)
parser
.
add_argument
(
'--split'
,
type
=
str
,
help
=
'dataset split'
,
choices
=
[
'train'
,
'val'
],
required
=
True
)
parser
.
add_argument
(
'--output_dir'
,
type
=
str
,
help
=
'path to output dir'
,
default
=
'datasets/coco/pano_ca_inst'
)
return
parser
.
parse_args
()
if
__name__
==
"__main__"
:
args
=
get_args_parser
()
panoptic_dir
=
"datasets/coco/pano_ca_inst"
save_path
=
os
.
path
.
join
(
args
.
output_dir
,
"coco_{}_image_panoptic_inst.json"
.
format
(
args
.
split
))
print
(
save_path
)
output_dict
=
[]
image_path_list
=
glob
.
glob
(
os
.
path
.
join
(
panoptic_dir
,
'{}_*'
.
format
(
args
.
split
),
'*image*.png'
))
for
image_path
in
tqdm
.
tqdm
(
image_path_list
):
image_dir
,
image_name
=
os
.
path
.
dirname
(
image_path
),
os
.
path
.
basename
(
image_path
)
panoptic_path
=
os
.
path
.
join
(
image_dir
,
image_name
.
replace
(
'image'
,
'label'
))
assert
os
.
path
.
isfile
(
image_path
)
if
not
os
.
path
.
isfile
(
panoptic_path
):
print
(
"ignore {}"
.
format
(
image_path
))
continue
pair_dict
=
{}
pair_dict
[
"image_path"
]
=
image_path
.
replace
(
'datasets/'
,
''
)
pair_dict
[
"target_path"
]
=
panoptic_path
.
replace
(
'datasets/'
,
''
)
pair_dict
[
"type"
]
=
"coco_image2panoptic_inst"
output_dict
.
append
(
pair_dict
)
json
.
dump
(
output_dict
,
open
(
save_path
,
'w'
))
data/mmdet_custom/tools/dist_test.sh
0 → 100644
View file @
106580f9
#!/usr/bin/env bash
CONFIG
=
$1
CHECKPOINT
=
$2
GPUS
=
$3
NNODES
=
${
NNODES
:-
1
}
NODE_RANK
=
${
NODE_RANK
:-
0
}
PORT
=
${
PORT
:-
29500
}
MASTER_ADDR
=
${
MASTER_ADDR
:-
"127.0.0.1"
}
PYTHONPATH
=
"
$(
dirname
$0
)
/.."
:
$PYTHONPATH
\
python
-m
torch.distributed.launch
\
--nnodes
=
$NNODES
\
--node_rank
=
$NODE_RANK
\
--master_addr
=
$MASTER_ADDR
\
--nproc_per_node
=
$GPUS
\
--master_port
=
$PORT
\
$(
dirname
"
$0
"
)
/test.py
\
$CONFIG
\
$CHECKPOINT
\
--launcher
pytorch
\
${
@
:4
}
data/mmdet_custom/tools/dist_train.sh
0 → 100644
View file @
106580f9
#!/usr/bin/env bash
CONFIG
=
$1
GPUS
=
$2
NNODES
=
${
NNODES
:-
1
}
NODE_RANK
=
${
NODE_RANK
:-
0
}
PORT
=
${
PORT
:-
29500
}
MASTER_ADDR
=
${
MASTER_ADDR
:-
"127.0.0.1"
}
PYTHONPATH
=
"
$(
dirname
$0
)
/.."
:
$PYTHONPATH
\
python
-m
torch.distributed.launch
\
--nnodes
=
$NNODES
\
--node_rank
=
$NODE_RANK
\
--master_addr
=
$MASTER_ADDR
\
--nproc_per_node
=
$GPUS
\
--master_port
=
$PORT
\
$(
dirname
"
$0
"
)
/train.py
\
$CONFIG
\
--seed
0
\
--launcher
pytorch
${
@
:3
}
data/mmdet_custom/tools/test.py
0 → 100644
View file @
106580f9
# Copyright (c) OpenMMLab. All rights reserved.
import
argparse
import
os
import
os.path
as
osp
import
time
import
warnings
import
sys
import
tqdm
import
mmcv
import
torch
from
mmcv
import
Config
,
DictAction
from
mmcv.cnn
import
fuse_conv_bn
from
mmcv.runner
import
(
get_dist_info
,
init_dist
,
load_checkpoint
,
wrap_fp16_model
)
from
mmdet.apis
import
multi_gpu_test
,
single_gpu_test
from
mmdet.datasets
import
(
build_dataloader
,
build_dataset
,
replace_ImageToTensor
)
from
mmdet.models
import
build_detector
from
mmdet.utils
import
(
build_ddp
,
build_dp
,
compat_cfg
,
get_device
,
replace_cfg_vals
,
setup_multi_processes
,
update_data_root
)
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'MMDet test (and eval) a model'
)
parser
.
add_argument
(
'config'
,
help
=
'test config file path'
)
parser
.
add_argument
(
'checkpoint'
,
help
=
'checkpoint file'
)
parser
.
add_argument
(
'--work-dir'
,
help
=
'the directory to save the file containing evaluation metrics'
)
parser
.
add_argument
(
'--out'
,
help
=
'output result file in pickle format'
)
parser
.
add_argument
(
'--fuse-conv-bn'
,
action
=
'store_true'
,
help
=
'Whether to fuse conv and bn, this will slightly increase'
'the inference speed'
)
parser
.
add_argument
(
'--gpu-ids'
,
type
=
int
,
nargs
=
'+'
,
help
=
'(Deprecated, please use --gpu-id) ids of gpus to use '
'(only applicable to non-distributed training)'
)
parser
.
add_argument
(
'--gpu-id'
,
type
=
int
,
default
=
0
,
help
=
'id of gpu to use '
'(only applicable to non-distributed testing)'
)
parser
.
add_argument
(
'--format-only'
,
action
=
'store_true'
,
help
=
'Format the output results without perform evaluation. It is'
'useful when you want to format the result to a specific format and '
'submit it to the test server'
)
parser
.
add_argument
(
'--eval'
,
type
=
str
,
nargs
=
'+'
,
help
=
'evaluation metrics, which depends on the dataset, e.g., "bbox",'
' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC'
)
parser
.
add_argument
(
'--show'
,
action
=
'store_true'
,
help
=
'show results'
)
parser
.
add_argument
(
'--show-dir'
,
help
=
'directory where painted images will be saved'
)
parser
.
add_argument
(
'--show-score-thr'
,
type
=
float
,
default
=
0.3
,
help
=
'score threshold (default: 0.3)'
)
parser
.
add_argument
(
'--gpu-collect'
,
action
=
'store_true'
,
help
=
'whether to use gpu to collect results.'
)
parser
.
add_argument
(
'--tmpdir'
,
help
=
'tmp directory used for collecting results from multiple '
'workers, available when gpu-collect is not specified'
)
parser
.
add_argument
(
'--cfg-options'
,
nargs
=
'+'
,
action
=
DictAction
,
help
=
'override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.'
)
parser
.
add_argument
(
'--options'
,
nargs
=
'+'
,
action
=
DictAction
,
help
=
'custom options for evaluation, the key-value pair in xxx=yyy '
'format will be kwargs for dataset.evaluate() function (deprecate), '
'change to --eval-options instead.'
)
parser
.
add_argument
(
'--eval-options'
,
nargs
=
'+'
,
action
=
DictAction
,
help
=
'custom options for evaluation, the key-value pair in xxx=yyy '
'format will be kwargs for dataset.evaluate() function'
)
parser
.
add_argument
(
'--launcher'
,
choices
=
[
'none'
,
'pytorch'
,
'slurm'
,
'mpi'
],
default
=
'none'
,
help
=
'job launcher'
)
parser
.
add_argument
(
'--local_rank'
,
type
=
int
,
default
=
0
)
args
=
parser
.
parse_args
()
if
'LOCAL_RANK'
not
in
os
.
environ
:
os
.
environ
[
'LOCAL_RANK'
]
=
str
(
args
.
local_rank
)
if
args
.
options
and
args
.
eval_options
:
raise
ValueError
(
'--options and --eval-options cannot be both '
'specified, --options is deprecated in favor of --eval-options'
)
if
args
.
options
:
warnings
.
warn
(
'--options is deprecated in favor of --eval-options'
)
args
.
eval_options
=
args
.
options
return
args
def
main
():
args
=
parse_args
()
assert
args
.
out
or
args
.
eval
or
args
.
format_only
or
args
.
show
\
or
args
.
show_dir
,
\
(
'Please specify at least one operation (save/eval/format/show the '
'results / save the results) with the argument "--out", "--eval"'
', "--format-only", "--show" or "--show-dir"'
)
if
args
.
eval
and
args
.
format_only
:
raise
ValueError
(
'--eval and --format_only cannot be both specified'
)
if
args
.
out
is
not
None
and
not
args
.
out
.
endswith
((
'.pkl'
,
'.pickle'
)):
raise
ValueError
(
'The output file must be a pkl file.'
)
cfg
=
Config
.
fromfile
(
args
.
config
)
# replace the ${key} with the value of cfg.key
cfg
=
replace_cfg_vals
(
cfg
)
# update data root according to MMDET_DATASETS
update_data_root
(
cfg
)
if
args
.
cfg_options
is
not
None
:
cfg
.
merge_from_dict
(
args
.
cfg_options
)
cfg
=
compat_cfg
(
cfg
)
# set multi-process settings
setup_multi_processes
(
cfg
)
# set cudnn_benchmark
if
cfg
.
get
(
'cudnn_benchmark'
,
False
):
torch
.
backends
.
cudnn
.
benchmark
=
True
# if 'pretrained' in cfg.model:
# cfg.model.pretrained = None
# elif 'init_cfg' in cfg.model.backbone:
# cfg.model.backbone.init_cfg = None
#
# if cfg.model.get('neck'):
# if isinstance(cfg.model.neck, list):
# for neck_cfg in cfg.model.neck:
# if neck_cfg.get('rfp_backbone'):
# if neck_cfg.rfp_backbone.get('pretrained'):
# neck_cfg.rfp_backbone.pretrained = None
# elif cfg.model.neck.get('rfp_backbone'):
# if cfg.model.neck.rfp_backbone.get('pretrained'):
# cfg.model.neck.rfp_backbone.pretrained = None
if
args
.
gpu_ids
is
not
None
:
cfg
.
gpu_ids
=
args
.
gpu_ids
[
0
:
1
]
warnings
.
warn
(
'`--gpu-ids` is deprecated, please use `--gpu-id`. '
'Because we only support single GPU mode in '
'non-distributed testing. Use the first GPU '
'in `gpu_ids` now.'
)
else
:
cfg
.
gpu_ids
=
[
args
.
gpu_id
]
cfg
.
device
=
get_device
()
# init distributed env first, since logger depends on the dist info.
if
args
.
launcher
==
'none'
:
distributed
=
False
else
:
distributed
=
True
init_dist
(
args
.
launcher
,
**
cfg
.
dist_params
)
test_dataloader_default_args
=
dict
(
samples_per_gpu
=
1
,
workers_per_gpu
=
2
,
dist
=
distributed
,
shuffle
=
False
)
# in case the test dataset is concatenated
if
isinstance
(
cfg
.
data
.
test
,
dict
):
cfg
.
data
.
test
.
test_mode
=
True
if
cfg
.
data
.
test_dataloader
.
get
(
'samples_per_gpu'
,
1
)
>
1
:
# Replace 'ImageToTensor' to 'DefaultFormatBundle'
cfg
.
data
.
test
.
pipeline
=
replace_ImageToTensor
(
cfg
.
data
.
test
.
pipeline
)
elif
isinstance
(
cfg
.
data
.
test
,
list
):
for
ds_cfg
in
cfg
.
data
.
test
:
ds_cfg
.
test_mode
=
True
if
cfg
.
data
.
test_dataloader
.
get
(
'samples_per_gpu'
,
1
)
>
1
:
for
ds_cfg
in
cfg
.
data
.
test
:
ds_cfg
.
pipeline
=
replace_ImageToTensor
(
ds_cfg
.
pipeline
)
test_loader_cfg
=
{
**
test_dataloader_default_args
,
**
cfg
.
data
.
get
(
'test_dataloader'
,
{})
}
rank
,
_
=
get_dist_info
()
# allows not to create
if
args
.
work_dir
is
not
None
and
rank
==
0
:
mmcv
.
mkdir_or_exist
(
osp
.
abspath
(
args
.
work_dir
))
timestamp
=
time
.
strftime
(
'%Y%m%d_%H%M%S'
,
time
.
localtime
())
json_file
=
osp
.
join
(
args
.
work_dir
,
f
'eval_
{
timestamp
}
.json'
)
# build the dataloader
dataset
=
build_dataset
(
cfg
.
data
.
test
)
data_loader
=
build_dataloader
(
dataset
,
**
test_loader_cfg
)
load_data_only
=
cfg
.
custom
.
get
(
'load_data_only'
,
False
)
assert
load_data_only
for
_
in
tqdm
.
tqdm
(
data_loader
):
pass
print
(
"dataset enumerated, exit!"
)
sys
.
exit
()
if
__name__
==
'__main__'
:
main
()
data/mmdet_custom/tools/train.py
0 → 100644
View file @
106580f9
# Copyright (c) OpenMMLab. All rights reserved.
import
argparse
import
copy
import
os
import
os.path
as
osp
import
time
import
warnings
import
mmcv
import
torch
import
torch.distributed
as
dist
from
mmcv
import
Config
,
DictAction
from
mmcv.runner
import
get_dist_info
,
init_dist
from
mmcv.utils
import
get_git_hash
from
mmdet
import
__version__
from
mmdet.apis
import
init_random_seed
,
set_random_seed
from
mmdet.datasets
import
build_dataset
from
mmdet.models
import
build_detector
from
mmdet.utils
import
(
collect_env
,
get_device
,
get_root_logger
,
replace_cfg_vals
,
setup_multi_processes
,
update_data_root
)
import
sys
sys
.
path
.
insert
(
0
,
'./'
)
from
apis.train
import
train_detector
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Train a detector'
)
parser
.
add_argument
(
'config'
,
help
=
'train config file path'
)
parser
.
add_argument
(
'--work-dir'
,
help
=
'the dir to save logs and models'
)
parser
.
add_argument
(
'--resume-from'
,
help
=
'the checkpoint file to resume from'
)
parser
.
add_argument
(
'--auto-resume'
,
action
=
'store_true'
,
help
=
'resume from the latest checkpoint automatically'
)
parser
.
add_argument
(
'--no-validate'
,
action
=
'store_true'
,
help
=
'whether not to evaluate the checkpoint during training'
)
group_gpus
=
parser
.
add_mutually_exclusive_group
()
group_gpus
.
add_argument
(
'--gpus'
,
type
=
int
,
help
=
'(Deprecated, please use --gpu-id) number of gpus to use '
'(only applicable to non-distributed training)'
)
group_gpus
.
add_argument
(
'--gpu-ids'
,
type
=
int
,
nargs
=
'+'
,
help
=
'(Deprecated, please use --gpu-id) ids of gpus to use '
'(only applicable to non-distributed training)'
)
group_gpus
.
add_argument
(
'--gpu-id'
,
type
=
int
,
default
=
0
,
help
=
'id of gpu to use '
'(only applicable to non-distributed training)'
)
parser
.
add_argument
(
'--seed'
,
type
=
int
,
default
=
None
,
help
=
'random seed'
)
parser
.
add_argument
(
'--diff-seed'
,
action
=
'store_true'
,
help
=
'Whether or not set different seeds for different ranks'
)
parser
.
add_argument
(
'--deterministic'
,
action
=
'store_true'
,
help
=
'whether to set deterministic options for CUDNN backend.'
)
parser
.
add_argument
(
'--options'
,
nargs
=
'+'
,
action
=
DictAction
,
help
=
'override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file (deprecate), '
'change to --cfg-options instead.'
)
parser
.
add_argument
(
'--cfg-options'
,
nargs
=
'+'
,
action
=
DictAction
,
help
=
'override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.'
)
parser
.
add_argument
(
'--launcher'
,
choices
=
[
'none'
,
'pytorch'
,
'slurm'
,
'mpi'
],
default
=
'none'
,
help
=
'job launcher'
)
parser
.
add_argument
(
'--local_rank'
,
type
=
int
,
default
=
0
)
parser
.
add_argument
(
'--auto-scale-lr'
,
action
=
'store_true'
,
help
=
'enable automatically scaling LR.'
)
args
=
parser
.
parse_args
()
if
'LOCAL_RANK'
not
in
os
.
environ
:
os
.
environ
[
'LOCAL_RANK'
]
=
str
(
args
.
local_rank
)
if
args
.
options
and
args
.
cfg_options
:
raise
ValueError
(
'--options and --cfg-options cannot be both '
'specified, --options is deprecated in favor of --cfg-options'
)
if
args
.
options
:
warnings
.
warn
(
'--options is deprecated in favor of --cfg-options'
)
args
.
cfg_options
=
args
.
options
return
args
def
main
():
args
=
parse_args
()
cfg
=
Config
.
fromfile
(
args
.
config
)
# replace the ${key} with the value of cfg.key
cfg
=
replace_cfg_vals
(
cfg
)
# update data root according to MMDET_DATASETS
update_data_root
(
cfg
)
if
args
.
cfg_options
is
not
None
:
cfg
.
merge_from_dict
(
args
.
cfg_options
)
if
args
.
auto_scale_lr
:
if
'auto_scale_lr'
in
cfg
and
\
'enable'
in
cfg
.
auto_scale_lr
and
\
'base_batch_size'
in
cfg
.
auto_scale_lr
:
cfg
.
auto_scale_lr
.
enable
=
True
else
:
warnings
.
warn
(
'Can not find "auto_scale_lr" or '
'"auto_scale_lr.enable" or '
'"auto_scale_lr.base_batch_size" in your'
' configuration file. Please update all the '
'configuration files to mmdet >= 2.24.1.'
)
# set multi-process settings
setup_multi_processes
(
cfg
)
# set cudnn_benchmark
if
cfg
.
get
(
'cudnn_benchmark'
,
False
):
torch
.
backends
.
cudnn
.
benchmark
=
True
# work_dir is determined in this priority: CLI > segment in file > filename
if
args
.
work_dir
is
not
None
:
# update configs according to CLI args if args.work_dir is not None
cfg
.
work_dir
=
args
.
work_dir
elif
cfg
.
get
(
'work_dir'
,
None
)
is
None
:
# use config filename as default work_dir if cfg.work_dir is None
cfg
.
work_dir
=
osp
.
join
(
'./work_dirs'
,
osp
.
splitext
(
osp
.
basename
(
args
.
config
))[
0
])
if
args
.
resume_from
is
not
None
:
cfg
.
resume_from
=
args
.
resume_from
cfg
.
auto_resume
=
args
.
auto_resume
if
args
.
gpus
is
not
None
:
cfg
.
gpu_ids
=
range
(
1
)
warnings
.
warn
(
'`--gpus` is deprecated because we only support '
'single GPU mode in non-distributed training. '
'Use `gpus=1` now.'
)
if
args
.
gpu_ids
is
not
None
:
cfg
.
gpu_ids
=
args
.
gpu_ids
[
0
:
1
]
warnings
.
warn
(
'`--gpu-ids` is deprecated, please use `--gpu-id`. '
'Because we only support single GPU mode in '
'non-distributed training. Use the first GPU '
'in `gpu_ids` now.'
)
if
args
.
gpus
is
None
and
args
.
gpu_ids
is
None
:
cfg
.
gpu_ids
=
[
args
.
gpu_id
]
# init distributed env first, since logger depends on the dist info.
if
args
.
launcher
==
'none'
:
distributed
=
False
else
:
distributed
=
True
init_dist
(
args
.
launcher
,
**
cfg
.
dist_params
)
# re-set gpu_ids with distributed training mode
_
,
world_size
=
get_dist_info
()
cfg
.
gpu_ids
=
range
(
world_size
)
# create work_dir
mmcv
.
mkdir_or_exist
(
osp
.
abspath
(
cfg
.
work_dir
))
# dump config
cfg
.
dump
(
osp
.
join
(
cfg
.
work_dir
,
osp
.
basename
(
args
.
config
)))
# init the logger before other steps
timestamp
=
time
.
strftime
(
'%Y%m%d_%H%M%S'
,
time
.
localtime
())
log_file
=
osp
.
join
(
cfg
.
work_dir
,
f
'
{
timestamp
}
.log'
)
logger
=
get_root_logger
(
log_file
=
log_file
,
log_level
=
cfg
.
log_level
)
# init the meta dict to record some important information such as
# environment info and seed, which will be logged
meta
=
dict
()
# log env info
env_info_dict
=
collect_env
()
env_info
=
'
\n
'
.
join
([(
f
'
{
k
}
:
{
v
}
'
)
for
k
,
v
in
env_info_dict
.
items
()])
dash_line
=
'-'
*
60
+
'
\n
'
logger
.
info
(
'Environment info:
\n
'
+
dash_line
+
env_info
+
'
\n
'
+
dash_line
)
meta
[
'env_info'
]
=
env_info
meta
[
'config'
]
=
cfg
.
pretty_text
# log some basic info
logger
.
info
(
f
'Distributed training:
{
distributed
}
'
)
logger
.
info
(
f
'Config:
\n
{
cfg
.
pretty_text
}
'
)
cfg
.
device
=
get_device
()
# set random seeds
seed
=
init_random_seed
(
args
.
seed
,
device
=
cfg
.
device
)
seed
=
seed
+
dist
.
get_rank
()
if
args
.
diff_seed
else
seed
logger
.
info
(
f
'Set random seed to
{
seed
}
, '
f
'deterministic:
{
args
.
deterministic
}
'
)
set_random_seed
(
seed
,
deterministic
=
args
.
deterministic
)
cfg
.
seed
=
seed
meta
[
'seed'
]
=
seed
meta
[
'exp_name'
]
=
osp
.
basename
(
args
.
config
)
# model = build_detector(
# cfg.model,
# train_cfg=cfg.get('train_cfg'),
# test_cfg=cfg.get('test_cfg'))
# model.init_weights()
datasets
=
[
build_dataset
(
cfg
.
data
.
train
)]
if
len
(
cfg
.
workflow
)
==
2
:
assert
'val'
in
[
mode
for
(
mode
,
_
)
in
cfg
.
workflow
]
val_dataset
=
copy
.
deepcopy
(
cfg
.
data
.
val
)
val_dataset
.
pipeline
=
cfg
.
data
.
train
.
get
(
'pipeline'
,
cfg
.
data
.
train
.
dataset
.
get
(
'pipeline'
))
datasets
.
append
(
build_dataset
(
val_dataset
))
if
cfg
.
checkpoint_config
is
not
None
:
# save mmdet version, config file content and class names in
# checkpoints as meta data
cfg
.
checkpoint_config
.
meta
=
dict
(
mmdet_version
=
__version__
+
get_git_hash
()[:
7
],
CLASSES
=
datasets
[
0
].
CLASSES
)
# add an attribute for visualization convenience
# model.CLASSES = datasets[0].CLASSES
model
=
None
train_detector
(
model
,
datasets
,
cfg
,
distributed
=
distributed
,
validate
=
(
not
args
.
no_validate
),
timestamp
=
timestamp
,
meta
=
meta
)
if
__name__
==
'__main__'
:
main
()
data/mmpose_custom/apis/test.py
0 → 100644
View file @
106580f9
# Copyright (c) OpenMMLab. All rights reserved.
import
os.path
as
osp
import
pickle
import
shutil
import
tempfile
import
mmcv
import
torch
import
torch.distributed
as
dist
from
mmcv.runner
import
get_dist_info
def
single_gpu_test
(
model
,
data_loader
,
pseudo_test
=
False
):
"""Test model with a single gpu.
This method tests model with a single gpu and displays test progress bar.
Args:
model (nn.Module): Model to be tested.
data_loader (nn.Dataloader): Pytorch data loader.
pseudo_test: custom arg
Returns:
list: The prediction results.
"""
model
.
eval
()
results
=
[]
dataset
=
data_loader
.
dataset
prog_bar
=
mmcv
.
ProgressBar
(
len
(
dataset
))
for
data
in
data_loader
:
with
torch
.
no_grad
():
result
=
model
(
return_loss
=
False
,
pseudo_test
=
pseudo_test
,
**
data
)
results
.
append
(
result
)
# use the first key as main key to calculate the batch size
batch_size
=
len
(
next
(
iter
(
data
.
values
())))
for
_
in
range
(
batch_size
):
prog_bar
.
update
()
return
results
data/mmpose_custom/apis/train.py
0 → 100644
View file @
106580f9
# Copyright (c) OpenMMLab. All rights reserved.
import
sys
import
warnings
import
mmcv
import
numpy
as
np
import
torch
import
torch.distributed
as
dist
import
tqdm
from
mmcv.parallel
import
MMDataParallel
,
MMDistributedDataParallel
from
mmcv.runner
import
(
DistSamplerSeedHook
,
EpochBasedRunner
,
OptimizerHook
,
get_dist_info
)
from
mmcv.utils
import
digit_version
from
mmpose.core
import
DistEvalHook
,
EvalHook
,
build_optimizers
from
mmpose.core.distributed_wrapper
import
DistributedDataParallelWrapper
from
mmpose.datasets
import
build_dataloader
,
build_dataset
from
mmpose.utils
import
get_root_logger
try
:
from
mmcv.runner
import
Fp16OptimizerHook
except
ImportError
:
warnings
.
warn
(
'Fp16OptimizerHook from mmpose will be deprecated from '
'v0.15.0. Please install mmcv>=1.1.4'
,
DeprecationWarning
)
from
mmpose.core
import
Fp16OptimizerHook
def
init_random_seed
(
seed
=
None
,
device
=
'cuda'
):
"""Initialize random seed.
If the seed is not set, the seed will be automatically randomized,
and then broadcast to all processes to prevent some potential bugs.
Args:
seed (int, Optional): The seed. Default to None.
device (str): The device where the seed will be put on.
Default to 'cuda'.
Returns:
int: Seed to be used.
"""
if
seed
is
not
None
:
return
seed
# Make sure all ranks share the same random seed to prevent
# some potential bugs. Please refer to
# https://github.com/open-mmlab/mmdetection/issues/6339
rank
,
world_size
=
get_dist_info
()
seed
=
np
.
random
.
randint
(
2
**
31
)
if
world_size
==
1
:
return
seed
if
rank
==
0
:
random_num
=
torch
.
tensor
(
seed
,
dtype
=
torch
.
int32
,
device
=
device
)
else
:
random_num
=
torch
.
tensor
(
0
,
dtype
=
torch
.
int32
,
device
=
device
)
dist
.
broadcast
(
random_num
,
src
=
0
)
return
random_num
.
item
()
def
train_model
(
model
,
dataset
,
cfg
,
distributed
=
False
,
validate
=
False
,
timestamp
=
None
,
meta
=
None
):
"""Train model entry function.
Args:
model (nn.Module): The model to be trained.
dataset (Dataset): Train dataset.
cfg (dict): The config dict for training.
distributed (bool): Whether to use distributed training.
Default: False.
validate (bool): Whether to do evaluation. Default: False.
timestamp (str | None): Local time for runner. Default: None.
meta (dict | None): Meta dict to record some important information.
Default: None
"""
logger
=
get_root_logger
(
cfg
.
log_level
)
# prepare data loaders
dataset
=
dataset
if
isinstance
(
dataset
,
(
list
,
tuple
))
else
[
dataset
]
# step 1: give default values and override (if exist) from cfg.data
loader_cfg
=
{
**
dict
(
seed
=
cfg
.
get
(
'seed'
),
drop_last
=
False
,
dist
=
distributed
,
num_gpus
=
len
(
cfg
.
gpu_ids
)),
**
({}
if
torch
.
__version__
!=
'parrots'
else
dict
(
prefetch_num
=
2
,
pin_memory
=
False
,
)),
**
dict
((
k
,
cfg
.
data
[
k
])
for
k
in
[
'samples_per_gpu'
,
'workers_per_gpu'
,
'shuffle'
,
'seed'
,
'drop_last'
,
'prefetch_num'
,
'pin_memory'
,
'persistent_workers'
,
]
if
k
in
cfg
.
data
)
}
# step 2: cfg.data.train_dataloader has highest priority
train_loader_cfg
=
dict
(
loader_cfg
,
**
cfg
.
data
.
get
(
'train_dataloader'
,
{}))
data_loaders
=
[
build_dataloader
(
ds
,
**
train_loader_cfg
)
for
ds
in
dataset
]
load_data_only
=
cfg
.
data
.
get
(
'load_data_only'
,
False
)
assert
load_data_only
# only enumerate dataset
for
data_loader
in
data_loaders
:
for
_
in
tqdm
.
tqdm
(
data_loader
):
pass
print
(
"dataset enumerated, exit!"
)
sys
.
exit
()
data/mmpose_custom/configs/_base_/coco.py
0 → 100644
View file @
106580f9
dataset_info
=
dict
(
dataset_name
=
'coco'
,
paper_info
=
dict
(
author
=
'Lin, Tsung-Yi and Maire, Michael and '
'Belongie, Serge and Hays, James and '
'Perona, Pietro and Ramanan, Deva and '
r
'Doll{\'a}r, Piotr and Zitnick, C Lawrence'
,
title
=
'Microsoft coco: Common objects in context'
,
container
=
'European conference on computer vision'
,
year
=
'2014'
,
homepage
=
'http://cocodataset.org/'
,
),
keypoint_info
=
{
0
:
dict
(
name
=
'nose'
,
id
=
0
,
color
=
[
51
,
153
,
255
],
type
=
'upper'
,
swap
=
''
),
1
:
dict
(
name
=
'left_eye'
,
id
=
1
,
color
=
[
51
,
153
,
255
],
type
=
'upper'
,
swap
=
'right_eye'
),
2
:
dict
(
name
=
'right_eye'
,
id
=
2
,
color
=
[
51
,
153
,
255
],
type
=
'upper'
,
swap
=
'left_eye'
),
3
:
dict
(
name
=
'left_ear'
,
id
=
3
,
color
=
[
51
,
153
,
255
],
type
=
'upper'
,
swap
=
'right_ear'
),
4
:
dict
(
name
=
'right_ear'
,
id
=
4
,
color
=
[
51
,
153
,
255
],
type
=
'upper'
,
swap
=
'left_ear'
),
5
:
dict
(
name
=
'left_shoulder'
,
id
=
5
,
color
=
[
0
,
255
,
0
],
type
=
'upper'
,
swap
=
'right_shoulder'
),
6
:
dict
(
name
=
'right_shoulder'
,
id
=
6
,
color
=
[
255
,
128
,
0
],
type
=
'upper'
,
swap
=
'left_shoulder'
),
7
:
dict
(
name
=
'left_elbow'
,
id
=
7
,
color
=
[
0
,
255
,
0
],
type
=
'upper'
,
swap
=
'right_elbow'
),
8
:
dict
(
name
=
'right_elbow'
,
id
=
8
,
color
=
[
255
,
128
,
0
],
type
=
'upper'
,
swap
=
'left_elbow'
),
9
:
dict
(
name
=
'left_wrist'
,
id
=
9
,
color
=
[
0
,
255
,
0
],
type
=
'upper'
,
swap
=
'right_wrist'
),
10
:
dict
(
name
=
'right_wrist'
,
id
=
10
,
color
=
[
255
,
128
,
0
],
type
=
'upper'
,
swap
=
'left_wrist'
),
11
:
dict
(
name
=
'left_hip'
,
id
=
11
,
color
=
[
0
,
255
,
0
],
type
=
'lower'
,
swap
=
'right_hip'
),
12
:
dict
(
name
=
'right_hip'
,
id
=
12
,
color
=
[
255
,
128
,
0
],
type
=
'lower'
,
swap
=
'left_hip'
),
13
:
dict
(
name
=
'left_knee'
,
id
=
13
,
color
=
[
0
,
255
,
0
],
type
=
'lower'
,
swap
=
'right_knee'
),
14
:
dict
(
name
=
'right_knee'
,
id
=
14
,
color
=
[
255
,
128
,
0
],
type
=
'lower'
,
swap
=
'left_knee'
),
15
:
dict
(
name
=
'left_ankle'
,
id
=
15
,
color
=
[
0
,
255
,
0
],
type
=
'lower'
,
swap
=
'right_ankle'
),
16
:
dict
(
name
=
'right_ankle'
,
id
=
16
,
color
=
[
255
,
128
,
0
],
type
=
'lower'
,
swap
=
'left_ankle'
)
},
skeleton_info
=
{
0
:
dict
(
link
=
(
'left_ankle'
,
'left_knee'
),
id
=
0
,
color
=
[
0
,
255
,
0
]),
1
:
dict
(
link
=
(
'left_knee'
,
'left_hip'
),
id
=
1
,
color
=
[
0
,
255
,
0
]),
2
:
dict
(
link
=
(
'right_ankle'
,
'right_knee'
),
id
=
2
,
color
=
[
255
,
128
,
0
]),
3
:
dict
(
link
=
(
'right_knee'
,
'right_hip'
),
id
=
3
,
color
=
[
255
,
128
,
0
]),
4
:
dict
(
link
=
(
'left_hip'
,
'right_hip'
),
id
=
4
,
color
=
[
51
,
153
,
255
]),
5
:
dict
(
link
=
(
'left_shoulder'
,
'left_hip'
),
id
=
5
,
color
=
[
51
,
153
,
255
]),
6
:
dict
(
link
=
(
'right_shoulder'
,
'right_hip'
),
id
=
6
,
color
=
[
51
,
153
,
255
]),
7
:
dict
(
link
=
(
'left_shoulder'
,
'right_shoulder'
),
id
=
7
,
color
=
[
51
,
153
,
255
]),
8
:
dict
(
link
=
(
'left_shoulder'
,
'left_elbow'
),
id
=
8
,
color
=
[
0
,
255
,
0
]),
9
:
dict
(
link
=
(
'right_shoulder'
,
'right_elbow'
),
id
=
9
,
color
=
[
255
,
128
,
0
]),
10
:
dict
(
link
=
(
'left_elbow'
,
'left_wrist'
),
id
=
10
,
color
=
[
0
,
255
,
0
]),
11
:
dict
(
link
=
(
'right_elbow'
,
'right_wrist'
),
id
=
11
,
color
=
[
255
,
128
,
0
]),
12
:
dict
(
link
=
(
'left_eye'
,
'right_eye'
),
id
=
12
,
color
=
[
51
,
153
,
255
]),
13
:
dict
(
link
=
(
'nose'
,
'left_eye'
),
id
=
13
,
color
=
[
51
,
153
,
255
]),
14
:
dict
(
link
=
(
'nose'
,
'right_eye'
),
id
=
14
,
color
=
[
51
,
153
,
255
]),
15
:
dict
(
link
=
(
'left_eye'
,
'left_ear'
),
id
=
15
,
color
=
[
51
,
153
,
255
]),
16
:
dict
(
link
=
(
'right_eye'
,
'right_ear'
),
id
=
16
,
color
=
[
51
,
153
,
255
]),
17
:
dict
(
link
=
(
'left_ear'
,
'left_shoulder'
),
id
=
17
,
color
=
[
51
,
153
,
255
]),
18
:
dict
(
link
=
(
'right_ear'
,
'right_shoulder'
),
id
=
18
,
color
=
[
51
,
153
,
255
])
},
joint_weights
=
[
1.
,
1.
,
1.
,
1.
,
1.
,
1.
,
1.
,
1.2
,
1.2
,
1.5
,
1.5
,
1.
,
1.
,
1.2
,
1.2
,
1.5
,
1.5
],
sigmas
=
[
0.026
,
0.025
,
0.025
,
0.035
,
0.035
,
0.079
,
0.079
,
0.072
,
0.072
,
0.062
,
0.062
,
0.107
,
0.107
,
0.087
,
0.087
,
0.089
,
0.089
])
data/mmpose_custom/configs/_base_/default_runtime.py
0 → 100644
View file @
106580f9
checkpoint_config
=
dict
(
interval
=
10
)
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
# dict(type='TensorboardLoggerHook')
# dict(type='PaviLoggerHook') # for internal services
])
log_level
=
'INFO'
load_from
=
None
resume_from
=
None
dist_params
=
dict
(
backend
=
'nccl'
)
workflow
=
[(
'train'
,
1
)]
# disable opencv multithreading to avoid system being overloaded
opencv_num_threads
=
0
# set multi-process start method as `fork` to speed up the training
mp_start_method
=
'fork'
data/mmpose_custom/configs/coco_256x192_gendata.py
0 → 100644
View file @
106580f9
_base_
=
[
'./_base_/default_runtime.py'
,
'./_base_/coco.py'
]
evaluation
=
dict
(
interval
=
10
,
metric
=
'mAP'
,
save_best
=
'AP'
)
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
5e-4
,
)
optimizer_config
=
dict
(
grad_clip
=
None
)
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
170
,
200
])
total_epochs
=
210
channel_cfg
=
dict
(
num_output_channels
=
17
,
dataset_joints
=
17
,
dataset_channel
=
[
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
],
],
inference_channel
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
])
# model settings
model
=
None
use_gt_bbox
=
True
data_cfg
=
dict
(
image_size
=
[
192
,
256
],
heatmap_size
=
[
192
,
256
],
# [48, 64]
num_output_channels
=
channel_cfg
[
'num_output_channels'
],
num_joints
=
channel_cfg
[
'dataset_joints'
],
dataset_channel
=
channel_cfg
[
'dataset_channel'
],
inference_channel
=
channel_cfg
[
'inference_channel'
],
soft_nms
=
False
,
nms_thr
=
1.0
,
oks_thr
=
0.9
,
vis_thr
=
0.2
,
use_gt_bbox
=
use_gt_bbox
,
det_bbox_thr
=
0.0
,
bbox_file
=
'datasets/coco_pose/person_detection_results/'
'COCO_val2017_detections_AP_H_56_person.json'
,
)
sigma
=
[
1.5
,
3
]
aug_idx
=
0
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
# dict(type='TopDownRandomShiftBboxCenter', shift_factor=0.16, prob=0.3),
dict
(
type
=
'TopDownRandomFlip'
,
flip_prob
=
0
),
# dict(
# type='TopDownHalfBodyTransform',
# num_joints_half_body=8,
# prob_half_body=0.3),
# dict(
# type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict
(
type
=
'TopDownAffine'
),
# dict(type='ToTensor'),
# dict(
# type='NormalizeTensor',
# mean=[0.485, 0.456, 0.406],
# std=[0.229, 0.224, 0.225]),
dict
(
type
=
'TopDownGenerateTargetCustom'
,
sigma
=
sigma
,
# the following are custom args
use_gt_bbox
=
use_gt_bbox
,
dir_name
=
'train_256x192_aug{}'
.
format
(
aug_idx
),
target_path
=
'datasets/coco_pose/data_pair'
,
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'target'
,
'target_weight'
],
meta_keys
=
[
'image_file'
,
'joints_3d'
,
'joints_3d_visible'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
val_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
# dict(type='TopDownRandomFlip', flip_prob=1), # for flip test
dict
(
type
=
'TopDownAffine'
),
# dict(type='ToTensor'),
# dict(
# type='NormalizeTensor',
# mean=[0.485, 0.456, 0.406],
# std=[0.229, 0.224, 0.225]),
dict
(
type
=
'TopDownGenerateTargetCustom'
,
sigma
=
sigma
,
# the following are custom args
use_gt_bbox
=
use_gt_bbox
,
dir_name
=
'val_256x192'
,
target_path
=
'datasets/coco_pose/data_pair'
,
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
],
meta_keys
=
[
'image_file'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
test_pipeline
=
val_pipeline
data_root
=
'datasets/coco'
data
=
dict
(
samples_per_gpu
=
32
,
workers_per_gpu
=
8
,
val_dataloader
=
dict
(
samples_per_gpu
=
32
),
test_dataloader
=
dict
(
samples_per_gpu
=
32
),
load_data_only
=
True
,
# custom arg
train
=
dict
(
type
=
'TopDownCocoDatasetCustom'
,
ann_file
=
f
'
{
data_root
}
/annotations/person_keypoints_train2017.json'
,
img_prefix
=
f
'
{
data_root
}
/train2017/'
,
data_cfg
=
data_cfg
,
pipeline
=
train_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
val
=
dict
(
type
=
'TopDownCocoDatasetCustom'
,
ann_file
=
f
'
{
data_root
}
/annotations/person_keypoints_val2017.json'
,
img_prefix
=
f
'
{
data_root
}
/val2017/'
,
data_cfg
=
data_cfg
,
pipeline
=
val_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
test
=
dict
(
type
=
'TopDownCocoDatasetCustom'
,
ann_file
=
f
'
{
data_root
}
/annotations/person_keypoints_val2017.json'
,
img_prefix
=
f
'
{
data_root
}
/val2017/'
,
data_cfg
=
data_cfg
,
pipeline
=
test_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
)
# import newly registered module
custom_imports
=
dict
(
imports
=
[
'model.top_down'
,
'data.topdown_coco_dataset'
,
'data.pipelines.top_down_transform'
,
],
allow_failed_imports
=
False
)
data/mmpose_custom/configs/coco_256x192_gendata_test.py
0 → 100644
View file @
106580f9
_base_
=
[
'./_base_/default_runtime.py'
,
'./_base_/coco.py'
]
evaluation
=
dict
(
interval
=
10
,
metric
=
'mAP'
,
save_best
=
'AP'
)
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
5e-4
,
)
optimizer_config
=
dict
(
grad_clip
=
None
)
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
170
,
200
])
total_epochs
=
210
channel_cfg
=
dict
(
num_output_channels
=
17
,
dataset_joints
=
17
,
dataset_channel
=
[
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
],
],
inference_channel
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
])
# model settings
model
=
None
use_gt_bbox
=
False
data_cfg
=
dict
(
image_size
=
[
192
,
256
],
heatmap_size
=
[
192
,
256
],
# [48, 64]
num_output_channels
=
channel_cfg
[
'num_output_channels'
],
num_joints
=
channel_cfg
[
'dataset_joints'
],
dataset_channel
=
channel_cfg
[
'dataset_channel'
],
inference_channel
=
channel_cfg
[
'inference_channel'
],
soft_nms
=
False
,
nms_thr
=
1.0
,
oks_thr
=
0.9
,
vis_thr
=
0.2
,
use_gt_bbox
=
use_gt_bbox
,
det_bbox_thr
=
0.0
,
bbox_file
=
'datasets/coco_pose/person_detection_results/'
'COCO_val2017_detections_AP_H_56_person.json'
,
)
sigma
=
[
1.5
,
3
]
# 2
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
dict
(
type
=
'TopDownRandomShiftBboxCenter'
,
shift_factor
=
0.16
,
prob
=
0.3
),
dict
(
type
=
'TopDownRandomFlip'
,
flip_prob
=
0.5
),
dict
(
type
=
'TopDownHalfBodyTransform'
,
num_joints_half_body
=
8
,
prob_half_body
=
0.3
),
dict
(
type
=
'TopDownGetRandomScaleRotation'
,
rot_factor
=
40
,
scale_factor
=
0.5
),
dict
(
type
=
'TopDownAffine'
),
# dict(type='ToTensor'),
# dict(
# type='NormalizeTensor',
# mean=[0.485, 0.456, 0.406],
# std=[0.229, 0.224, 0.225]),
dict
(
type
=
'TopDownGenerateTargetCustom'
,
sigma
=
sigma
,
# the following are custom args
use_gt_bbox
=
use_gt_bbox
,
dir_name
=
'train_256x192_aug0'
,
target_path
=
'datasets/coco_pose/data_pair'
,
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'target'
,
'target_weight'
],
meta_keys
=
[
'image_file'
,
'joints_3d'
,
'joints_3d_visible'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
val_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
# dict(type='TopDownRandomFlip', flip_prob=1), # for flip test
dict
(
type
=
'TopDownAffine'
),
# dict(type='ToTensor'),
# dict(
# type='NormalizeTensor',
# mean=[0.485, 0.456, 0.406],
# std=[0.229, 0.224, 0.225]),
dict
(
type
=
'TopDownGenerateTargetCustom'
,
sigma
=
sigma
,
# the following are custom args
use_gt_bbox
=
use_gt_bbox
,
dir_name
=
'test_256x192'
,
target_path
=
'datasets/coco_pose/data_pair'
,
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
],
meta_keys
=
[
'image_file'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
test_pipeline
=
val_pipeline
data_root
=
'datasets/coco'
data
=
dict
(
samples_per_gpu
=
32
,
workers_per_gpu
=
8
,
val_dataloader
=
dict
(
samples_per_gpu
=
32
),
test_dataloader
=
dict
(
samples_per_gpu
=
32
),
load_data_only
=
True
,
# custom arg
train
=
dict
(
type
=
'TopDownCocoDatasetCustom'
,
ann_file
=
f
'
{
data_root
}
/annotations/person_keypoints_train2017.json'
,
img_prefix
=
f
'
{
data_root
}
/train2017/'
,
data_cfg
=
data_cfg
,
pipeline
=
train_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
val
=
dict
(
type
=
'TopDownCocoDatasetCustom'
,
ann_file
=
f
'
{
data_root
}
/annotations/person_keypoints_val2017.json'
,
img_prefix
=
f
'
{
data_root
}
/val2017/'
,
data_cfg
=
data_cfg
,
pipeline
=
val_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
test
=
dict
(
type
=
'TopDownCocoDatasetCustom'
,
ann_file
=
f
'
{
data_root
}
/annotations/person_keypoints_val2017.json'
,
img_prefix
=
f
'
{
data_root
}
/val2017/'
,
data_cfg
=
data_cfg
,
pipeline
=
test_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
)
# import newly registered module
custom_imports
=
dict
(
imports
=
[
'model.top_down'
,
'data.topdown_coco_dataset'
,
'data.pipelines.top_down_transform'
,
],
allow_failed_imports
=
False
)
data/mmpose_custom/configs/coco_256x192_gendata_testflip.py
0 → 100644
View file @
106580f9
_base_
=
[
'./_base_/default_runtime.py'
,
'./_base_/coco.py'
]
evaluation
=
dict
(
interval
=
10
,
metric
=
'mAP'
,
save_best
=
'AP'
)
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
5e-4
,
)
optimizer_config
=
dict
(
grad_clip
=
None
)
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
170
,
200
])
total_epochs
=
210
channel_cfg
=
dict
(
num_output_channels
=
17
,
dataset_joints
=
17
,
dataset_channel
=
[
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
],
],
inference_channel
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
])
# model settings
model
=
None
use_gt_bbox
=
False
data_cfg
=
dict
(
image_size
=
[
192
,
256
],
heatmap_size
=
[
192
,
256
],
# [48, 64]
num_output_channels
=
channel_cfg
[
'num_output_channels'
],
num_joints
=
channel_cfg
[
'dataset_joints'
],
dataset_channel
=
channel_cfg
[
'dataset_channel'
],
inference_channel
=
channel_cfg
[
'inference_channel'
],
soft_nms
=
False
,
nms_thr
=
1.0
,
oks_thr
=
0.9
,
vis_thr
=
0.2
,
use_gt_bbox
=
use_gt_bbox
,
det_bbox_thr
=
0.0
,
bbox_file
=
'datasets/coco_pose/person_detection_results/'
'COCO_val2017_detections_AP_H_56_person.json'
,
)
sigma
=
[
1.5
,
3
]
# 2
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
dict
(
type
=
'TopDownRandomShiftBboxCenter'
,
shift_factor
=
0.16
,
prob
=
0.3
),
dict
(
type
=
'TopDownRandomFlip'
,
flip_prob
=
0.5
),
dict
(
type
=
'TopDownHalfBodyTransform'
,
num_joints_half_body
=
8
,
prob_half_body
=
0.3
),
dict
(
type
=
'TopDownGetRandomScaleRotation'
,
rot_factor
=
40
,
scale_factor
=
0.5
),
dict
(
type
=
'TopDownAffine'
),
# dict(type='ToTensor'),
# dict(
# type='NormalizeTensor',
# mean=[0.485, 0.456, 0.406],
# std=[0.229, 0.224, 0.225]),
dict
(
type
=
'TopDownGenerateTargetCustom'
,
sigma
=
sigma
,
# the following are custom args
use_gt_bbox
=
use_gt_bbox
,
dir_name
=
'train_256x192_aug0'
,
target_path
=
'datasets/coco_pose/data_pair'
,
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'target'
,
'target_weight'
],
meta_keys
=
[
'image_file'
,
'joints_3d'
,
'joints_3d_visible'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
val_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
dict
(
type
=
'TopDownRandomFlip'
,
flip_prob
=
1
),
# for flip test
dict
(
type
=
'TopDownAffine'
),
# dict(type='ToTensor'),
# dict(
# type='NormalizeTensor',
# mean=[0.485, 0.456, 0.406],
# std=[0.229, 0.224, 0.225]),
dict
(
type
=
'TopDownGenerateTargetCustom'
,
sigma
=
sigma
,
# the following are custom args
use_gt_bbox
=
use_gt_bbox
,
dir_name
=
'test_256x192_flip'
,
target_path
=
'datasets/coco_pose/data_pair'
,
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
],
meta_keys
=
[
'image_file'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
test_pipeline
=
val_pipeline
data_root
=
'datasets/coco'
data
=
dict
(
samples_per_gpu
=
32
,
workers_per_gpu
=
8
,
val_dataloader
=
dict
(
samples_per_gpu
=
32
),
test_dataloader
=
dict
(
samples_per_gpu
=
32
),
load_data_only
=
True
,
# custom arg
train
=
dict
(
type
=
'TopDownCocoDatasetCustom'
,
ann_file
=
f
'
{
data_root
}
/annotations/person_keypoints_train2017.json'
,
img_prefix
=
f
'
{
data_root
}
/train2017/'
,
data_cfg
=
data_cfg
,
pipeline
=
train_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
val
=
dict
(
type
=
'TopDownCocoDatasetCustom'
,
ann_file
=
f
'
{
data_root
}
/annotations/person_keypoints_val2017.json'
,
img_prefix
=
f
'
{
data_root
}
/val2017/'
,
data_cfg
=
data_cfg
,
pipeline
=
val_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
test
=
dict
(
type
=
'TopDownCocoDatasetCustom'
,
ann_file
=
f
'
{
data_root
}
/annotations/person_keypoints_val2017.json'
,
img_prefix
=
f
'
{
data_root
}
/val2017/'
,
data_cfg
=
data_cfg
,
pipeline
=
test_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
)
# import newly registered module
custom_imports
=
dict
(
imports
=
[
'model.top_down'
,
'data.topdown_coco_dataset'
,
'data.pipelines.top_down_transform'
,
],
allow_failed_imports
=
False
)
data/mmpose_custom/configs/coco_256x192_test_offline.py
0 → 100644
View file @
106580f9
import
os
job_name
=
"painter_vit_large"
ckpt_file
=
"painter_vit_large.pth"
prompt
=
"000000000165_box0"
image_dir
=
'models_inference/{}/coco_pose_inference_{}_{}/'
.
format
(
job_name
,
ckpt_file
,
prompt
)
if
not
image_dir
[
-
1
]
==
"/"
:
image_dir
=
image_dir
+
'/'
print
(
image_dir
)
_base_
=
[
'./_base_/default_runtime.py'
,
'./_base_/coco.py'
]
evaluation
=
dict
(
interval
=
10
,
metric
=
'mAP'
,
save_best
=
'AP'
)
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
5e-4
,
)
optimizer_config
=
dict
(
grad_clip
=
None
)
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
170
,
200
])
total_epochs
=
210
channel_cfg
=
dict
(
num_output_channels
=
17
,
dataset_joints
=
17
,
dataset_channel
=
[
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
],
],
inference_channel
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
])
# fake model settings
model
=
dict
(
type
=
'TopDownCustom'
,
pretrained
=
None
,
backbone
=
dict
(
type
=
'HRNet'
,
in_channels
=
3
,
extra
=
dict
(
stage1
=
dict
(
num_modules
=
1
,
num_branches
=
1
,
block
=
'BOTTLENECK'
,
num_blocks
=
(
4
,
),
num_channels
=
(
64
,
)),
stage2
=
dict
(
num_modules
=
1
,
num_branches
=
2
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
),
num_channels
=
(
32
,
64
)),
stage3
=
dict
(
num_modules
=
4
,
num_branches
=
3
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
),
num_channels
=
(
32
,
64
,
128
)),
stage4
=
dict
(
num_modules
=
3
,
num_branches
=
4
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
,
4
),
num_channels
=
(
32
,
64
,
128
,
256
))),
),
keypoint_head
=
dict
(
type
=
'TopdownHeatmapSimpleHead'
,
in_channels
=
32
,
out_channels
=
channel_cfg
[
'num_output_channels'
],
num_deconv_layers
=
0
,
extra
=
dict
(
final_conv_kernel
=
1
,
),
loss_keypoint
=
dict
(
type
=
'JointsMSELoss'
,
use_target_weight
=
True
)),
train_cfg
=
dict
(),
test_cfg
=
dict
(
flip_test
=
True
,
post_process
=
'default'
,
shift_heatmap
=
True
,
modulate_kernel
=
17
))
data_cfg
=
dict
(
image_size
=
[
192
,
256
],
heatmap_size
=
[
192
,
256
],
# heatmap_size=[48, 64],
# image_size=[640, 320], # w, h
# heatmap_size=[640, 320],
num_output_channels
=
channel_cfg
[
'num_output_channels'
],
num_joints
=
channel_cfg
[
'dataset_joints'
],
dataset_channel
=
channel_cfg
[
'dataset_channel'
],
inference_channel
=
channel_cfg
[
'inference_channel'
],
soft_nms
=
False
,
nms_thr
=
1.0
,
oks_thr
=
0.9
,
vis_thr
=
0.2
,
use_gt_bbox
=
False
,
imagename_with_boxid
=
True
,
# custom
det_bbox_thr
=
0.0
,
bbox_file
=
'datasets/coco_pose/person_detection_results/'
'COCO_val2017_detections_AP_H_56_person.json'
,
)
# sigma = [1.5, 3] # 2
sigma
=
3
# use the hyper params of R, which is heatmap
val_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
# load custom images according to filename and box_id, using topdown_coco_dataset
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
],
meta_keys
=
[
'image_file'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
test_pipeline
=
val_pipeline
data_root
=
'datasets/coco'
data
=
dict
(
samples_per_gpu
=
32
,
workers_per_gpu
=
8
,
val_dataloader
=
dict
(
samples_per_gpu
=
32
),
test_dataloader
=
dict
(
samples_per_gpu
=
32
),
pseudo_test
=
True
,
# custom arg
val
=
dict
(
type
=
'TopDownCocoDatasetCustom'
,
ann_file
=
f
'
{
data_root
}
/annotations/person_keypoints_val2017.json'
,
# img_prefix=f'{data_root}/val2017/',
img_prefix
=
image_dir
,
data_cfg
=
data_cfg
,
pipeline
=
val_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
test
=
dict
(
type
=
'TopDownCocoDatasetCustom'
,
ann_file
=
f
'
{
data_root
}
/annotations/person_keypoints_val2017.json'
,
# img_prefix=f'{data_root}/val2017/',
img_prefix
=
image_dir
,
data_cfg
=
data_cfg
,
pipeline
=
test_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
)
# import newly registered module
custom_imports
=
dict
(
imports
=
[
'model.top_down'
,
'data.topdown_coco_dataset'
,
'data.pipelines.top_down_transform'
,
],
allow_failed_imports
=
False
)
data/mmpose_custom/data/pipelines/custom_transform.py
0 → 100644
View file @
106580f9
import
os
import
random
import
warnings
import
cv2
import
numpy
as
np
from
PIL
import
Image
def
define_colors_gb_mean_sep
(
num_locations
=
17
):
num_sep_per_channel
=
int
(
num_locations
**
(
1
/
2
))
+
1
# 5
separation_per_channel
=
256
//
num_sep_per_channel
# 51
color_dict
=
{}
# R = G = B = 0
# B += separation_per_channel # offset for the first loop
for
location
in
range
(
num_locations
):
num_seq_g
=
location
//
num_sep_per_channel
num_seq_b
=
location
%
num_sep_per_channel
assert
(
num_seq_g
<=
num_sep_per_channel
)
and
(
num_seq_b
<=
num_sep_per_channel
)
G
=
255
-
num_seq_g
*
separation_per_channel
B
=
255
-
num_seq_b
*
separation_per_channel
assert
(
G
<
256
)
and
(
B
<
256
)
assert
(
G
>=
0
)
and
(
B
>=
0
)
assert
(
G
,
B
)
not
in
color_dict
.
values
()
color_dict
[
location
]
=
(
G
,
B
)
# print(location, (num_seq_g, num_seq_b), (G, B))
# colors = [v for k, v in color_dict.items()]
# min values in gb: [51, 51]
return
color_dict
color_dict
=
define_colors_gb_mean_sep
()
def
encode_target_to_image
(
target
,
target_weight
,
target_dir
,
metas
):
if
len
(
target
.
shape
)
==
3
:
return
encode_rgb_target_to_image
(
target_kernel
=
target
,
target_class
=
target
,
target_weight_kernel
=
target_weight
,
target_weight_class
=
target_weight
,
target_dir
=
target_dir
,
metas
=
metas
,
)
assert
len
(
target
.
shape
)
==
4
return
encode_rgb_target_to_image
(
target_kernel
=
target
[
1
],
target_class
=
target
[
0
],
target_weight_kernel
=
target_weight
[
1
],
target_weight_class
=
target_weight
[
0
],
target_dir
=
target_dir
,
metas
=
metas
,
)
def
check_input
(
target_weight
,
target
,
metas
):
if
not
((
target_weight
.
reshape
(
17
,
1
,
1
)
*
target
)
==
target
).
all
():
print
(
"useful target_weight!"
)
target
=
target_weight
.
reshape
(
17
,
1
,
1
)
*
target
# make sure the invisible part is weighted zero, and thus not shown in target
if
not
(
target_weight
[
np
.
sum
(
metas
[
'joints_3d_visible'
],
axis
=
1
)
==
0
]
==
0
).
all
():
print
(
metas
[
'image_file'
],
"may have joints_3d_visible problems!"
)
def
encode_rgb_target_to_image
(
target_kernel
,
target_class
,
target_weight_kernel
,
target_weight_class
,
target_dir
,
metas
):
"""
Args:
target: ndarray (17, 256, 192)
target_weight: ndarray (17, 1)
metas: dict
Returns:
an RGB image, R encodes heatmap, GB encodes class
"""
check_input
(
target_weight_kernel
,
target_kernel
,
metas
)
check_input
(
target_weight_class
,
target_class
,
metas
)
# 1. handle kernel in R channel
# get max value for collision area
sum_kernel
=
target_kernel
.
max
(
0
)
# (256, 192)
max_kernel_indices
=
target_kernel
.
argmax
(
0
)
# (256, 192)
R
=
sum_kernel
[:,
:,
None
]
*
255.
# (256, 192, 1)
# 2. handle class in BG channels
K
,
H
,
W
=
target_class
.
shape
keypoint_areas_class
=
[]
for
keypoint_idx
in
range
(
K
):
mask
=
target_class
[
keypoint_idx
]
!=
0
keypoint_areas_class
.
append
(
mask
)
keypoint_areas_class
=
np
.
stack
(
keypoint_areas_class
)
# (17, 256, 192)
num_pos_per_location_class
=
keypoint_areas_class
.
sum
(
0
)
# (256, 192)
collision_area_class
=
num_pos_per_location_class
>
1
# (256, 192)
GB_MultiChannel
=
np
.
zeros
((
17
,
256
,
192
,
2
))
for
keypoint_idx
in
range
(
K
):
color
=
color_dict
[
keypoint_idx
]
class_mask
=
keypoint_areas_class
[
keypoint_idx
]
GB_MultiChannel
[
keypoint_idx
][
class_mask
]
=
color
GB
=
GB_MultiChannel
.
sum
(
0
)
# (256, 192, 2)
if
np
.
sum
(
collision_area_class
)
!=
0
:
for
keypoint_idx
in
range
(
K
):
color
=
color_dict
[
keypoint_idx
]
# mach more max_area_this_keypoint for 0, but removed by collision_area_class latter
max_area_this_keypoint
=
max_kernel_indices
==
keypoint_idx
area_of_interest
=
max_area_this_keypoint
*
collision_area_class
if
not
(
area_of_interest
==
0
).
all
():
GB
[
area_of_interest
]
=
color
# 3. get images / labels and save
image_label
=
np
.
concatenate
([
R
,
GB
],
axis
=-
1
).
astype
(
np
.
uint8
)
# (256, 192, 3)
image_label
=
Image
.
fromarray
(
image_label
)
image
=
metas
[
'img'
]
image
=
Image
.
fromarray
(
image
)
box_idx
=
metas
[
'bbox_id'
]
_
,
filename
=
os
.
path
.
dirname
(
metas
[
'image_file'
]),
os
.
path
.
basename
(
metas
[
'image_file'
])
image_path
=
os
.
path
.
join
(
target_dir
,
filename
.
replace
(
".jpg"
,
"_box{}_image.png"
.
format
(
box_idx
)))
label_path
=
os
.
path
.
join
(
target_dir
,
filename
.
replace
(
".jpg"
,
"_box{}_label.png"
.
format
(
box_idx
)))
# if os.path.exists(image_path):
# print(image_path, "exist! return!")
# return
image
.
save
(
image_path
)
image_label
.
save
(
label_path
)
data/mmpose_custom/data/pipelines/top_down_transform.py
0 → 100644
View file @
106580f9
# Copyright (c) OpenMMLab. All rights reserved.
import
warnings
import
os
from
PIL
import
Image
import
cv2
import
numpy
as
np
from
mmpose.core.bbox
import
bbox_xywh2cs
from
mmpose.core.post_processing
import
(
affine_transform
,
fliplr_joints
,
get_affine_transform
,
get_warp_matrix
,
warp_affine_joints
)
from
mmpose.datasets.builder
import
PIPELINES
from
mmpose.datasets.pipelines
import
TopDownGenerateTarget
from
.custom_transform
import
encode_target_to_image
@
PIPELINES
.
register_module
()
class
TopDownGenerateTargetCustom
(
TopDownGenerateTarget
):
"""Generate the target heatmap.
Required key: 'joints_3d', 'joints_3d_visible', 'ann_info'.
Modified key: 'target', and 'target_weight'.
Args:
sigma: Sigma of heatmap gaussian for 'MSRA' approach.
kernel: Kernel of heatmap gaussian for 'Megvii' approach.
encoding (str): Approach to generate target heatmaps.
Currently supported approaches: 'MSRA', 'Megvii', 'UDP'.
Default:'MSRA'
unbiased_encoding (bool): Option to use unbiased
encoding methods.
Paper ref: Zhang et al. Distribution-Aware Coordinate
Representation for Human Pose Estimation (CVPR 2020).
keypoint_pose_distance: Keypoint pose distance for UDP.
Paper ref: Huang et al. The Devil is in the Details: Delving into
Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
target_type (str): supported targets: 'GaussianHeatmap',
'CombinedTarget'. Default:'GaussianHeatmap'
CombinedTarget: The combination of classification target
(response map) and regression target (offset map).
Paper ref: Huang et al. The Devil is in the Details: Delving into
Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
"""
def
__init__
(
self
,
sigma
=
2
,
kernel
=
(
11
,
11
),
valid_radius_factor
=
0.0546875
,
target_type
=
'GaussianHeatmap'
,
encoding
=
'MSRA'
,
unbiased_encoding
=
False
,
# the following are custom args
target_path
=
None
,
dir_name
=
None
,
use_gt_bbox
=
True
):
super
().
__init__
(
sigma
=
sigma
,
kernel
=
kernel
,
valid_radius_factor
=
valid_radius_factor
,
target_type
=
target_type
,
encoding
=
encoding
,
unbiased_encoding
=
unbiased_encoding
)
self
.
target_path
=
target_path
self
.
dir_name
=
dir_name
self
.
use_gt_bbox
=
use_gt_bbox
target_dir
=
os
.
path
.
join
(
self
.
target_path
,
self
.
dir_name
)
if
not
os
.
path
.
exists
(
target_dir
):
os
.
makedirs
(
target_dir
)
def
__call__
(
self
,
results
):
"""Generate the target heatmap."""
joints_3d
=
results
[
'joints_3d'
]
joints_3d_visible
=
results
[
'joints_3d_visible'
]
assert
self
.
encoding
in
[
'MSRA'
,
'Megvii'
,
'UDP'
]
if
self
.
encoding
==
'MSRA'
:
if
isinstance
(
self
.
sigma
,
list
):
num_sigmas
=
len
(
self
.
sigma
)
cfg
=
results
[
'ann_info'
]
num_joints
=
cfg
[
'num_joints'
]
heatmap_size
=
cfg
[
'heatmap_size'
]
target
=
np
.
empty
(
(
0
,
num_joints
,
heatmap_size
[
1
],
heatmap_size
[
0
]),
dtype
=
np
.
float32
)
target_weight
=
np
.
empty
((
0
,
num_joints
,
1
),
dtype
=
np
.
float32
)
for
i
in
range
(
num_sigmas
):
target_i
,
target_weight_i
=
self
.
_msra_generate_target
(
cfg
,
joints_3d
,
joints_3d_visible
,
self
.
sigma
[
i
])
target
=
np
.
concatenate
([
target
,
target_i
[
None
]],
axis
=
0
)
target_weight
=
np
.
concatenate
(
[
target_weight
,
target_weight_i
[
None
]],
axis
=
0
)
else
:
target
,
target_weight
=
self
.
_msra_generate_target
(
results
[
'ann_info'
],
joints_3d
,
joints_3d_visible
,
self
.
sigma
)
elif
self
.
encoding
==
'Megvii'
:
if
isinstance
(
self
.
kernel
,
list
):
num_kernels
=
len
(
self
.
kernel
)
cfg
=
results
[
'ann_info'
]
num_joints
=
cfg
[
'num_joints'
]
W
,
H
=
cfg
[
'heatmap_size'
]
target
=
np
.
empty
((
0
,
num_joints
,
H
,
W
),
dtype
=
np
.
float32
)
target_weight
=
np
.
empty
((
0
,
num_joints
,
1
),
dtype
=
np
.
float32
)
for
i
in
range
(
num_kernels
):
target_i
,
target_weight_i
=
self
.
_megvii_generate_target
(
cfg
,
joints_3d
,
joints_3d_visible
,
self
.
kernel
[
i
])
target
=
np
.
concatenate
([
target
,
target_i
[
None
]],
axis
=
0
)
target_weight
=
np
.
concatenate
(
[
target_weight
,
target_weight_i
[
None
]],
axis
=
0
)
else
:
target
,
target_weight
=
self
.
_megvii_generate_target
(
results
[
'ann_info'
],
joints_3d
,
joints_3d_visible
,
self
.
kernel
)
elif
self
.
encoding
==
'UDP'
:
if
self
.
target_type
.
lower
()
==
'CombinedTarget'
.
lower
():
factors
=
self
.
valid_radius_factor
channel_factor
=
3
elif
self
.
target_type
.
lower
()
==
'GaussianHeatmap'
.
lower
():
factors
=
self
.
sigma
channel_factor
=
1
else
:
raise
ValueError
(
'target_type should be either '
"'GaussianHeatmap' or 'CombinedTarget'"
)
if
isinstance
(
factors
,
list
):
num_factors
=
len
(
factors
)
cfg
=
results
[
'ann_info'
]
num_joints
=
cfg
[
'num_joints'
]
W
,
H
=
cfg
[
'heatmap_size'
]
target
=
np
.
empty
((
0
,
channel_factor
*
num_joints
,
H
,
W
),
dtype
=
np
.
float32
)
target_weight
=
np
.
empty
((
0
,
num_joints
,
1
),
dtype
=
np
.
float32
)
for
i
in
range
(
num_factors
):
target_i
,
target_weight_i
=
self
.
_udp_generate_target
(
cfg
,
joints_3d
,
joints_3d_visible
,
factors
[
i
],
self
.
target_type
)
target
=
np
.
concatenate
([
target
,
target_i
[
None
]],
axis
=
0
)
target_weight
=
np
.
concatenate
(
[
target_weight
,
target_weight_i
[
None
]],
axis
=
0
)
else
:
target
,
target_weight
=
self
.
_udp_generate_target
(
results
[
'ann_info'
],
joints_3d
,
joints_3d_visible
,
factors
,
self
.
target_type
)
else
:
raise
ValueError
(
f
'Encoding approach
{
self
.
encoding
}
is not supported!'
)
results
[
'target'
]
=
target
results
[
'target_weight'
]
=
target_weight
target_dir
=
os
.
path
.
join
(
self
.
target_path
,
self
.
dir_name
)
if
not
self
.
use_gt_bbox
:
box_idx
=
results
[
'bbox_id'
]
image
=
results
[
'img'
]
image
=
Image
.
fromarray
(
image
)
_
,
filename
=
os
.
path
.
dirname
(
results
[
'image_file'
]),
os
.
path
.
basename
(
results
[
'image_file'
])
image_path
=
os
.
path
.
join
(
target_dir
,
filename
.
replace
(
".jpg"
,
"_box{}_image.png"
.
format
(
box_idx
)))
if
os
.
path
.
exists
(
image_path
):
print
(
image_path
,
"exist! return!"
)
return
results
image
.
save
(
image_path
)
else
:
# filter all black target
if
(
target
.
sum
((
1
,
2
))
==
0
).
all
():
return
results
# encode target to image (save is also done inside)
encode_target_to_image
(
target
,
target_weight
,
target_dir
=
target_dir
,
metas
=
results
)
return
results
data/mmpose_custom/data/topdown_coco_dataset.py
0 → 100644
View file @
106580f9
# Copyright (c) OpenMMLab. All rights reserved.
import
os
import
os.path
as
osp
import
tempfile
import
warnings
from
collections
import
OrderedDict
,
defaultdict
import
json_tricks
as
json
import
numpy
as
np
from
mmcv
import
Config
,
deprecated_api_warning
from
xtcocotools.cocoeval
import
COCOeval
from
mmpose.core.post_processing
import
oks_nms
,
soft_oks_nms
from
mmpose.datasets.builder
import
DATASETS
# from mmpose.datasets.datasets.base import Kpt2dSviewRgbImgTopDownDataset
from
mmpose.datasets.datasets.top_down
import
TopDownCocoDataset
@
DATASETS
.
register_module
()
class
TopDownCocoDatasetCustom
(
TopDownCocoDataset
):
"""CocoDataset dataset for top-down pose estimation.
"Microsoft COCO: Common Objects in Context", ECCV'2014.
More details can be found in the `paper
<https://arxiv.org/abs/1405.0312>`__ .
The dataset loads raw features and apply specified transforms
to return a dict containing the image tensors and other information.
COCO keypoint indexes::
0: 'nose',
1: 'left_eye',
2: 'right_eye',
3: 'left_ear',
4: 'right_ear',
5: 'left_shoulder',
6: 'right_shoulder',
7: 'left_elbow',
8: 'right_elbow',
9: 'left_wrist',
10: 'right_wrist',
11: 'left_hip',
12: 'right_hip',
13: 'left_knee',
14: 'right_knee',
15: 'left_ankle',
16: 'right_ankle'
Args:
ann_file (str): Path to the annotation file.
img_prefix (str): Path to a directory where images are held.
Default: None.
data_cfg (dict): config
pipeline (list[dict | callable]): A sequence of data transforms.
dataset_info (DatasetInfo): A class containing all dataset info.
test_mode (bool): Store True when building test or
validation dataset. Default: False.
"""
def
__init__
(
self
,
ann_file
,
img_prefix
,
data_cfg
,
pipeline
,
dataset_info
=
None
,
test_mode
=
False
):
super
().
__init__
(
ann_file
,
img_prefix
,
data_cfg
,
pipeline
,
dataset_info
=
dataset_info
,
test_mode
=
test_mode
)
self
.
imagename_with_boxid
=
data_cfg
.
get
(
'imagename_with_boxid'
,
False
)
def
_load_coco_keypoint_annotation_kernel
(
self
,
img_id
):
"""load annotation from COCOAPI.
Note:
bbox:[x1, y1, w, h]
Args:
img_id: coco image id
Returns:
dict: db entry
"""
img_ann
=
self
.
coco
.
loadImgs
(
img_id
)[
0
]
width
=
img_ann
[
'width'
]
height
=
img_ann
[
'height'
]
num_joints
=
self
.
ann_info
[
'num_joints'
]
ann_ids
=
self
.
coco
.
getAnnIds
(
imgIds
=
img_id
,
iscrowd
=
False
)
objs
=
self
.
coco
.
loadAnns
(
ann_ids
)
# sanitize bboxes
valid_objs
=
[]
for
obj
in
objs
:
if
'bbox'
not
in
obj
:
continue
x
,
y
,
w
,
h
=
obj
[
'bbox'
]
x1
=
max
(
0
,
x
)
y1
=
max
(
0
,
y
)
x2
=
min
(
width
-
1
,
x1
+
max
(
0
,
w
))
y2
=
min
(
height
-
1
,
y1
+
max
(
0
,
h
))
if
(
'area'
not
in
obj
or
obj
[
'area'
]
>
0
)
and
x2
>
x1
and
y2
>
y1
:
obj
[
'clean_bbox'
]
=
[
x1
,
y1
,
x2
-
x1
,
y2
-
y1
]
valid_objs
.
append
(
obj
)
objs
=
valid_objs
bbox_id
=
0
rec
=
[]
for
obj
in
objs
:
if
'keypoints'
not
in
obj
:
continue
if
max
(
obj
[
'keypoints'
])
==
0
:
continue
if
'num_keypoints'
in
obj
and
obj
[
'num_keypoints'
]
==
0
:
continue
joints_3d
=
np
.
zeros
((
num_joints
,
3
),
dtype
=
np
.
float32
)
joints_3d_visible
=
np
.
zeros
((
num_joints
,
3
),
dtype
=
np
.
float32
)
keypoints
=
np
.
array
(
obj
[
'keypoints'
]).
reshape
(
-
1
,
3
)
joints_3d
[:,
:
2
]
=
keypoints
[:,
:
2
]
joints_3d_visible
[:,
:
2
]
=
np
.
minimum
(
1
,
keypoints
[:,
2
:
3
])
image_file
=
osp
.
join
(
self
.
img_prefix
,
self
.
id2name
[
img_id
])
self
.
imagename_with_boxid
=
False
if
self
.
imagename_with_boxid
:
# gt bbox label example: 000000342971_box0_image.png
image_file
=
image_file
.
replace
(
".jpg"
,
"_box{}_image.png"
.
format
(
bbox_id
))
rec
.
append
({
'image_file'
:
image_file
,
'bbox'
:
obj
[
'clean_bbox'
][:
4
],
'rotation'
:
0
,
'joints_3d'
:
joints_3d
,
'joints_3d_visible'
:
joints_3d_visible
,
'dataset'
:
self
.
dataset_name
,
'bbox_score'
:
1
,
'bbox_id'
:
bbox_id
})
bbox_id
=
bbox_id
+
1
return
rec
def
_load_coco_person_detection_results
(
self
):
"""Load coco person detection results."""
num_joints
=
self
.
ann_info
[
'num_joints'
]
all_boxes
=
None
with
open
(
self
.
bbox_file
,
'r'
)
as
f
:
all_boxes
=
json
.
load
(
f
)
if
not
all_boxes
:
raise
ValueError
(
'=> Load %s fail!'
%
self
.
bbox_file
)
print
(
f
'=> Total boxes:
{
len
(
all_boxes
)
}
'
)
kpt_db
=
[]
bbox_id
=
0
for
det_res
in
all_boxes
:
if
det_res
[
'category_id'
]
!=
1
:
continue
image_file
=
osp
.
join
(
self
.
img_prefix
,
self
.
id2name
[
det_res
[
'image_id'
]])
box
=
det_res
[
'bbox'
]
score
=
det_res
[
'score'
]
if
score
<
self
.
det_bbox_thr
:
continue
joints_3d
=
np
.
zeros
((
num_joints
,
3
),
dtype
=
np
.
float32
)
joints_3d_visible
=
np
.
ones
((
num_joints
,
3
),
dtype
=
np
.
float32
)
self
.
imagename_with_boxid
=
False
if
self
.
imagename_with_boxid
:
image_file
=
image_file
.
replace
(
".jpg"
,
"_box{}_image.png"
.
format
(
bbox_id
))
kpt_db
.
append
({
'image_file'
:
image_file
,
'rotation'
:
0
,
'bbox'
:
box
[:
4
],
'bbox_score'
:
score
,
'dataset'
:
self
.
dataset_name
,
'joints_3d'
:
joints_3d
,
'joints_3d_visible'
:
joints_3d_visible
,
'bbox_id'
:
bbox_id
})
bbox_id
=
bbox_id
+
1
print
(
f
'=> Total boxes after filter '
f
'low score@
{
self
.
det_bbox_thr
}
:
{
bbox_id
}
'
)
return
kpt_db
@
deprecated_api_warning
(
name_dict
=
dict
(
outputs
=
'results'
))
def
evaluate
(
self
,
results
,
res_folder
=
None
,
metric
=
'mAP'
,
**
kwargs
):
"""Evaluate coco keypoint results. The pose prediction results will be
saved in ``${res_folder}/result_keypoints.json``.
Note:
- batch_size: N
- num_keypoints: K
- heatmap height: H
- heatmap width: W
Args:
results (list[dict]): Testing results containing the following
items:
- preds (np.ndarray[N,K,3]): The first two dimensions are
\
coordinates, score is the third dimension of the array.
- boxes (np.ndarray[N,6]): [center[0], center[1], scale[0],
\
scale[1],area, score]
- image_paths (list[str]): For example, ['data/coco/val2017
\
/000000393226.jpg']
- heatmap (np.ndarray[N, K, H, W]): model output heatmap
- bbox_id (list(int)).
res_folder (str, optional): The folder to save the testing
results. If not specified, a temp folder will be created.
Default: None.
metric (str | list[str]): Metric to be performed. Defaults: 'mAP'.
Returns:
dict: Evaluation results for evaluation metric.
"""
metrics
=
metric
if
isinstance
(
metric
,
list
)
else
[
metric
]
allowed_metrics
=
[
'mAP'
]
for
metric
in
metrics
:
if
metric
not
in
allowed_metrics
:
raise
KeyError
(
f
'metric
{
metric
}
is not supported'
)
if
res_folder
is
not
None
:
tmp_folder
=
None
res_file
=
osp
.
join
(
res_folder
,
'result_keypoints.json'
)
else
:
tmp_folder
=
tempfile
.
TemporaryDirectory
()
res_file
=
osp
.
join
(
tmp_folder
.
name
,
'result_keypoints.json'
)
kpts
=
defaultdict
(
list
)
for
result
in
results
:
preds
=
result
[
'preds'
]
boxes
=
result
[
'boxes'
]
image_paths
=
result
[
'image_paths'
]
self
.
imagename_with_boxid
=
False
if
self
.
imagename_with_boxid
:
for
idx
,
img_path
in
enumerate
(
image_paths
):
image_dir
,
file_name
=
os
.
path
.
dirname
(
img_path
),
os
.
path
.
basename
(
img_path
)
file_name
=
file_name
.
split
(
"_"
)[
0
]
+
".jpg"
img_path
=
os
.
path
.
join
(
image_dir
,
file_name
)
image_paths
[
idx
]
=
img_path
bbox_ids
=
result
[
'bbox_ids'
]
batch_size
=
len
(
image_paths
)
for
i
in
range
(
batch_size
):
image_id
=
self
.
name2id
[
image_paths
[
i
][
len
(
self
.
img_prefix
):]]
kpts
[
image_id
].
append
({
'keypoints'
:
preds
[
i
],
'center'
:
boxes
[
i
][
0
:
2
],
'scale'
:
boxes
[
i
][
2
:
4
],
'area'
:
boxes
[
i
][
4
],
'score'
:
boxes
[
i
][
5
],
'image_id'
:
image_id
,
'bbox_id'
:
bbox_ids
[
i
]
})
kpts
=
self
.
_sort_and_unique_bboxes
(
kpts
)
# rescoring and oks nms
num_joints
=
self
.
ann_info
[
'num_joints'
]
vis_thr
=
self
.
vis_thr
oks_thr
=
self
.
oks_thr
valid_kpts
=
[]
for
image_id
in
kpts
.
keys
():
img_kpts
=
kpts
[
image_id
]
for
n_p
in
img_kpts
:
box_score
=
n_p
[
'score'
]
if
kwargs
.
get
(
'rle_score'
,
False
):
pose_score
=
n_p
[
'keypoints'
][:,
2
]
n_p
[
'score'
]
=
float
(
box_score
+
np
.
mean
(
pose_score
)
+
np
.
max
(
pose_score
))
else
:
kpt_score
=
0
valid_num
=
0
for
n_jt
in
range
(
0
,
num_joints
):
t_s
=
n_p
[
'keypoints'
][
n_jt
][
2
]
if
t_s
>
vis_thr
:
kpt_score
=
kpt_score
+
t_s
valid_num
=
valid_num
+
1
if
valid_num
!=
0
:
kpt_score
=
kpt_score
/
valid_num
# rescoring
n_p
[
'score'
]
=
kpt_score
*
box_score
if
self
.
use_nms
:
nms
=
soft_oks_nms
if
self
.
soft_nms
else
oks_nms
keep
=
nms
(
img_kpts
,
oks_thr
,
sigmas
=
self
.
sigmas
)
valid_kpts
.
append
([
img_kpts
[
_keep
]
for
_keep
in
keep
])
else
:
valid_kpts
.
append
(
img_kpts
)
self
.
_write_coco_keypoint_results
(
valid_kpts
,
res_file
)
# do evaluation only if the ground truth keypoint annotations exist
if
'annotations'
in
self
.
coco
.
dataset
:
info_str
=
self
.
_do_python_keypoint_eval
(
res_file
)
name_value
=
OrderedDict
(
info_str
)
if
tmp_folder
is
not
None
:
tmp_folder
.
cleanup
()
else
:
warnings
.
warn
(
f
'Due to the absence of ground truth keypoint'
f
'annotations, the quantitative evaluation can not'
f
'be conducted. The prediction results have been'
f
'saved at:
{
osp
.
abspath
(
res_file
)
}
'
)
name_value
=
{}
return
name_value
data/mmpose_custom/gen_json_coco_pose.py
0 → 100644
View file @
106580f9
# --------------------------------------------------------
# Images Speak in Images: A Generalist Painter for In-Context Visual Learning (https://arxiv.org/abs/2212.02499)
# Github source: https://github.com/baaivision/Painter
# Copyright (c) 2022 Beijing Academy of Artificial Intelligence (BAAI)
# Licensed under The MIT License [see LICENSE for details]
# By Xinlong Wang, Wen Wang
# Based on MAE, BEiT, detectron2, Mask2Former, bts, mmcv, mmdetetection, mmpose, MIRNet, MPRNet, and Uformer codebases
# --------------------------------------------------------'
import
os
import
glob
import
json
import
tqdm
import
argparse
def
get_args_parser
():
parser
=
argparse
.
ArgumentParser
(
'COCO pose estimation preparation'
,
add_help
=
False
)
parser
.
add_argument
(
'--split'
,
type
=
str
,
help
=
'dataset split'
,
choices
=
[
'train'
,
'val'
],
required
=
True
)
parser
.
add_argument
(
'--output_dir'
,
type
=
str
,
help
=
'path to output dir'
,
default
=
'datasets/coco_pose'
)
return
parser
.
parse_args
()
if
__name__
==
"__main__"
:
args
=
get_args_parser
()
split
=
args
.
split
if
split
==
"train"
:
aug_list
=
[
"_aug0"
,
"_aug1"
,
"_aug2"
,
"_aug3"
,
"_aug4"
,
"_aug5"
,
"_aug6"
,
"_aug7"
,
"_aug8"
,
"_aug9"
,
"_aug10"
,
"_aug11"
,
"_aug12"
,
"_aug13"
,
"_aug14"
,
"_aug15"
,
"_aug16"
,
"_aug17"
,
"_aug18"
,
"_aug19"
,
]
elif
split
==
"val"
:
aug_list
=
[
""
,
"_flip"
]
else
:
raise
NotImplementedError
save_path
=
os
.
path
.
join
(
args
.
output_dir
,
"coco_pose_256x192_{}.json"
.
format
(
split
))
print
(
save_path
)
output_dict
=
[]
for
aug_idx
in
aug_list
:
image_dir
=
"datasets/coco_pose/data_pair/{}_256x192{}"
.
format
(
split
,
aug_idx
)
print
(
aug_idx
,
image_dir
)
image_path_list
=
glob
.
glob
(
os
.
path
.
join
(
image_dir
,
'*image.png'
))
for
image_path
in
tqdm
.
tqdm
(
image_path_list
):
label_path
=
image_path
.
replace
(
"image.png"
,
"label.png"
)
assert
label_path
!=
image_path
assert
os
.
path
.
isfile
(
image_path
)
if
not
os
.
path
.
isfile
(
label_path
):
print
(
"ignoring {}"
.
format
(
label_path
))
continue
pair_dict
=
{}
pair_dict
[
"image_path"
]
=
image_path
.
replace
(
'datasets/'
,
''
)
pair_dict
[
"target_path"
]
=
label_path
.
replace
(
'datasets/'
,
''
)
pair_dict
[
"type"
]
=
"coco_image2pose"
output_dict
.
append
(
pair_dict
)
json
.
dump
(
output_dict
,
open
(
save_path
,
'w'
))
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment