Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
HRNet_pytorch
Commits
37c8cebc
Commit
37c8cebc
authored
Jun 07, 2023
by
Sugon_ldc
Browse files
add new model
parents
Pipeline
#318
failed with stages
in 0 seconds
Changes
375
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2214 additions
and
0 deletions
+2214
-0
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_w48_animalpose_256x256.py
...opdown_heatmap/animalpose/hrnet_w48_animalpose_256x256.py
+175
-0
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res101_animalpose_256x256.py
...g/topdown_heatmap/animalpose/res101_animalpose_256x256.py
+144
-0
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res152_animalpose_256x256.py
...g/topdown_heatmap/animalpose/res152_animalpose_256x256.py
+144
-0
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res50_animalpose_256x256.py
...mg/topdown_heatmap/animalpose/res50_animalpose_256x256.py
+144
-0
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/resnet_animalpose.md
...w_rgb_img/topdown_heatmap/animalpose/resnet_animalpose.md
+41
-0
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/resnet_animalpose.yml
..._rgb_img/topdown_heatmap/animalpose/resnet_animalpose.yml
+56
-0
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_ap10k.md
...2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_ap10k.md
+41
-0
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_ap10k.yml
...d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_ap10k.yml
+40
-0
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_w32_ap10k_256x256.py
..._rgb_img/topdown_heatmap/ap10k/hrnet_w32_ap10k_256x256.py
+175
-0
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_w48_ap10k_256x256.py
..._rgb_img/topdown_heatmap/ap10k/hrnet_w48_ap10k_256x256.py
+175
-0
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/res101_ap10k_256x256.py
...iew_rgb_img/topdown_heatmap/ap10k/res101_ap10k_256x256.py
+144
-0
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/res50_ap10k_256x256.py
...view_rgb_img/topdown_heatmap/ap10k/res50_ap10k_256x256.py
+144
-0
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/resnet_ap10k.md
...d_kpt_sview_rgb_img/topdown_heatmap/ap10k/resnet_ap10k.md
+41
-0
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/resnet_ap10k.yml
..._kpt_sview_rgb_img/topdown_heatmap/ap10k/resnet_ap10k.yml
+40
-0
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_atrw.md
...l/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_atrw.md
+40
-0
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_atrw.yml
.../2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_atrw.yml
+40
-0
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_w32_atrw_256x256.py
...ew_rgb_img/topdown_heatmap/atrw/hrnet_w32_atrw_256x256.py
+173
-0
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_w48_atrw_256x256.py
...ew_rgb_img/topdown_heatmap/atrw/hrnet_w48_atrw_256x256.py
+173
-0
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res101_atrw_256x256.py
...sview_rgb_img/topdown_heatmap/atrw/res101_atrw_256x256.py
+142
-0
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res152_atrw_256x256.py
...sview_rgb_img/topdown_heatmap/atrw/res152_atrw_256x256.py
+142
-0
No files found.
Too many changes to show.
To preserve performance only
375 of 375+
files are displayed.
Plain diff
Email patch
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_w48_animalpose_256x256.py
0 → 100755
View file @
37c8cebc
_base_
=
[
'../../../../_base_/default_runtime.py'
,
'../../../../_base_/datasets/animalpose.py'
]
evaluation
=
dict
(
interval
=
10
,
metric
=
'mAP'
,
save_best
=
'AP'
)
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
5e-4
,
)
optimizer_config
=
dict
(
grad_clip
=
None
)
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
170
,
200
])
total_epochs
=
210
log_config
=
dict
(
interval
=
1
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
# dict(type='TensorboardLoggerHook')
])
channel_cfg
=
dict
(
num_output_channels
=
20
,
dataset_joints
=
20
,
dataset_channel
=
[
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
],
],
inference_channel
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
])
# model settings
model
=
dict
(
type
=
'TopDown'
,
pretrained
=
'https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w48-8ef0771d.pth'
,
backbone
=
dict
(
type
=
'HRNet'
,
in_channels
=
3
,
extra
=
dict
(
stage1
=
dict
(
num_modules
=
1
,
num_branches
=
1
,
block
=
'BOTTLENECK'
,
num_blocks
=
(
4
,
),
num_channels
=
(
64
,
)),
stage2
=
dict
(
num_modules
=
1
,
num_branches
=
2
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
),
num_channels
=
(
48
,
96
)),
stage3
=
dict
(
num_modules
=
4
,
num_branches
=
3
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
),
num_channels
=
(
48
,
96
,
192
)),
stage4
=
dict
(
num_modules
=
3
,
num_branches
=
4
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
,
4
),
num_channels
=
(
48
,
96
,
192
,
384
))),
),
keypoint_head
=
dict
(
type
=
'TopdownHeatmapSimpleHead'
,
in_channels
=
48
,
out_channels
=
channel_cfg
[
'num_output_channels'
],
num_deconv_layers
=
0
,
extra
=
dict
(
final_conv_kernel
=
1
,
),
loss_keypoint
=
dict
(
type
=
'JointsMSELoss'
,
use_target_weight
=
True
)),
train_cfg
=
dict
(),
test_cfg
=
dict
(
flip_test
=
True
,
post_process
=
'default'
,
shift_heatmap
=
True
,
modulate_kernel
=
11
))
data_cfg
=
dict
(
image_size
=
[
256
,
256
],
heatmap_size
=
[
64
,
64
],
num_output_channels
=
channel_cfg
[
'num_output_channels'
],
num_joints
=
channel_cfg
[
'dataset_joints'
],
dataset_channel
=
channel_cfg
[
'dataset_channel'
],
inference_channel
=
channel_cfg
[
'inference_channel'
],
soft_nms
=
False
,
nms_thr
=
1.0
,
oks_thr
=
0.9
,
vis_thr
=
0.2
,
use_gt_bbox
=
True
,
det_bbox_thr
=
0.0
,
bbox_file
=
''
,
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
dict
(
type
=
'TopDownRandomShiftBboxCenter'
,
shift_factor
=
0.16
,
prob
=
0.3
),
dict
(
type
=
'TopDownRandomFlip'
,
flip_prob
=
0.5
),
dict
(
type
=
'TopDownHalfBodyTransform'
,
num_joints_half_body
=
8
,
prob_half_body
=
0.3
),
dict
(
type
=
'TopDownGetRandomScaleRotation'
,
rot_factor
=
40
,
scale_factor
=
0.5
),
dict
(
type
=
'TopDownAffine'
),
dict
(
type
=
'ToTensor'
),
dict
(
type
=
'NormalizeTensor'
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
]),
dict
(
type
=
'TopDownGenerateTarget'
,
sigma
=
2
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'target'
,
'target_weight'
],
meta_keys
=
[
'image_file'
,
'joints_3d'
,
'joints_3d_visible'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
val_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
dict
(
type
=
'TopDownAffine'
),
dict
(
type
=
'ToTensor'
),
dict
(
type
=
'NormalizeTensor'
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
],
meta_keys
=
[
'image_file'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
test_pipeline
=
val_pipeline
data_root
=
'data/animalpose'
data
=
dict
(
samples_per_gpu
=
64
,
workers_per_gpu
=
2
,
val_dataloader
=
dict
(
samples_per_gpu
=
32
),
test_dataloader
=
dict
(
samples_per_gpu
=
32
),
train
=
dict
(
type
=
'AnimalPoseDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/animalpose_train.json'
,
img_prefix
=
f
'
{
data_root
}
/'
,
data_cfg
=
data_cfg
,
pipeline
=
train_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
val
=
dict
(
type
=
'AnimalPoseDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/animalpose_val.json'
,
img_prefix
=
f
'
{
data_root
}
/'
,
data_cfg
=
data_cfg
,
pipeline
=
val_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
test
=
dict
(
type
=
'AnimalPoseDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/animalpose_val.json'
,
img_prefix
=
f
'
{
data_root
}
/'
,
data_cfg
=
data_cfg
,
pipeline
=
test_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
)
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res101_animalpose_256x256.py
0 → 100755
View file @
37c8cebc
_base_
=
[
'../../../../_base_/default_runtime.py'
,
'../../../../_base_/datasets/animalpose.py'
]
evaluation
=
dict
(
interval
=
10
,
metric
=
'mAP'
,
save_best
=
'AP'
)
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
5e-4
,
)
optimizer_config
=
dict
(
grad_clip
=
None
)
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
170
,
200
])
total_epochs
=
210
log_config
=
dict
(
interval
=
1
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
# dict(type='TensorboardLoggerHook')
])
channel_cfg
=
dict
(
num_output_channels
=
20
,
dataset_joints
=
20
,
dataset_channel
=
[
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
],
],
inference_channel
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
])
# model settings
model
=
dict
(
type
=
'TopDown'
,
pretrained
=
'torchvision://resnet101'
,
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
101
),
keypoint_head
=
dict
(
type
=
'TopdownHeatmapSimpleHead'
,
in_channels
=
2048
,
out_channels
=
channel_cfg
[
'num_output_channels'
],
loss_keypoint
=
dict
(
type
=
'JointsMSELoss'
,
use_target_weight
=
True
)),
train_cfg
=
dict
(),
test_cfg
=
dict
(
flip_test
=
True
,
post_process
=
'default'
,
shift_heatmap
=
True
,
modulate_kernel
=
11
))
data_cfg
=
dict
(
image_size
=
[
256
,
256
],
heatmap_size
=
[
64
,
64
],
num_output_channels
=
channel_cfg
[
'num_output_channels'
],
num_joints
=
channel_cfg
[
'dataset_joints'
],
dataset_channel
=
channel_cfg
[
'dataset_channel'
],
inference_channel
=
channel_cfg
[
'inference_channel'
],
soft_nms
=
False
,
nms_thr
=
1.0
,
oks_thr
=
0.9
,
vis_thr
=
0.2
,
use_gt_bbox
=
True
,
det_bbox_thr
=
0.0
,
bbox_file
=
''
,
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
dict
(
type
=
'TopDownRandomShiftBboxCenter'
,
shift_factor
=
0.16
,
prob
=
0.3
),
dict
(
type
=
'TopDownRandomFlip'
,
flip_prob
=
0.5
),
dict
(
type
=
'TopDownHalfBodyTransform'
,
num_joints_half_body
=
8
,
prob_half_body
=
0.3
),
dict
(
type
=
'TopDownGetRandomScaleRotation'
,
rot_factor
=
40
,
scale_factor
=
0.5
),
dict
(
type
=
'TopDownAffine'
),
dict
(
type
=
'ToTensor'
),
dict
(
type
=
'NormalizeTensor'
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
]),
dict
(
type
=
'TopDownGenerateTarget'
,
sigma
=
2
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'target'
,
'target_weight'
],
meta_keys
=
[
'image_file'
,
'joints_3d'
,
'joints_3d_visible'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
val_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
dict
(
type
=
'TopDownAffine'
),
dict
(
type
=
'ToTensor'
),
dict
(
type
=
'NormalizeTensor'
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
],
meta_keys
=
[
'image_file'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
test_pipeline
=
val_pipeline
data_root
=
'data/animalpose'
data
=
dict
(
samples_per_gpu
=
64
,
workers_per_gpu
=
2
,
val_dataloader
=
dict
(
samples_per_gpu
=
32
),
test_dataloader
=
dict
(
samples_per_gpu
=
32
),
train
=
dict
(
type
=
'AnimalPoseDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/animalpose_train.json'
,
img_prefix
=
f
'
{
data_root
}
/'
,
data_cfg
=
data_cfg
,
pipeline
=
train_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
val
=
dict
(
type
=
'AnimalPoseDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/animalpose_val.json'
,
img_prefix
=
f
'
{
data_root
}
/'
,
data_cfg
=
data_cfg
,
pipeline
=
val_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
test
=
dict
(
type
=
'AnimalPoseDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/animalpose_val.json'
,
img_prefix
=
f
'
{
data_root
}
/'
,
data_cfg
=
data_cfg
,
pipeline
=
test_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
)
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res152_animalpose_256x256.py
0 → 100755
View file @
37c8cebc
_base_
=
[
'../../../../_base_/default_runtime.py'
,
'../../../../_base_/datasets/animalpose.py'
]
evaluation
=
dict
(
interval
=
10
,
metric
=
'mAP'
,
save_best
=
'AP'
)
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
5e-4
,
)
optimizer_config
=
dict
(
grad_clip
=
None
)
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
170
,
200
])
total_epochs
=
210
log_config
=
dict
(
interval
=
1
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
# dict(type='TensorboardLoggerHook')
])
channel_cfg
=
dict
(
num_output_channels
=
20
,
dataset_joints
=
20
,
dataset_channel
=
[
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
],
],
inference_channel
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
])
# model settings
model
=
dict
(
type
=
'TopDown'
,
pretrained
=
'torchvision://resnet152'
,
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
152
),
keypoint_head
=
dict
(
type
=
'TopdownHeatmapSimpleHead'
,
in_channels
=
2048
,
out_channels
=
channel_cfg
[
'num_output_channels'
],
loss_keypoint
=
dict
(
type
=
'JointsMSELoss'
,
use_target_weight
=
True
)),
train_cfg
=
dict
(),
test_cfg
=
dict
(
flip_test
=
True
,
post_process
=
'default'
,
shift_heatmap
=
True
,
modulate_kernel
=
11
))
data_cfg
=
dict
(
image_size
=
[
256
,
256
],
heatmap_size
=
[
64
,
64
],
num_output_channels
=
channel_cfg
[
'num_output_channels'
],
num_joints
=
channel_cfg
[
'dataset_joints'
],
dataset_channel
=
channel_cfg
[
'dataset_channel'
],
inference_channel
=
channel_cfg
[
'inference_channel'
],
soft_nms
=
False
,
nms_thr
=
1.0
,
oks_thr
=
0.9
,
vis_thr
=
0.2
,
use_gt_bbox
=
True
,
det_bbox_thr
=
0.0
,
bbox_file
=
''
,
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
dict
(
type
=
'TopDownRandomShiftBboxCenter'
,
shift_factor
=
0.16
,
prob
=
0.3
),
dict
(
type
=
'TopDownRandomFlip'
,
flip_prob
=
0.5
),
dict
(
type
=
'TopDownHalfBodyTransform'
,
num_joints_half_body
=
8
,
prob_half_body
=
0.3
),
dict
(
type
=
'TopDownGetRandomScaleRotation'
,
rot_factor
=
40
,
scale_factor
=
0.5
),
dict
(
type
=
'TopDownAffine'
),
dict
(
type
=
'ToTensor'
),
dict
(
type
=
'NormalizeTensor'
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
]),
dict
(
type
=
'TopDownGenerateTarget'
,
sigma
=
2
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'target'
,
'target_weight'
],
meta_keys
=
[
'image_file'
,
'joints_3d'
,
'joints_3d_visible'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
val_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
dict
(
type
=
'TopDownAffine'
),
dict
(
type
=
'ToTensor'
),
dict
(
type
=
'NormalizeTensor'
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
],
meta_keys
=
[
'image_file'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
test_pipeline
=
val_pipeline
data_root
=
'data/animalpose'
data
=
dict
(
samples_per_gpu
=
32
,
workers_per_gpu
=
2
,
val_dataloader
=
dict
(
samples_per_gpu
=
32
),
test_dataloader
=
dict
(
samples_per_gpu
=
32
),
train
=
dict
(
type
=
'AnimalPoseDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/animalpose_train.json'
,
img_prefix
=
f
'
{
data_root
}
/'
,
data_cfg
=
data_cfg
,
pipeline
=
train_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
val
=
dict
(
type
=
'AnimalPoseDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/animalpose_val.json'
,
img_prefix
=
f
'
{
data_root
}
/'
,
data_cfg
=
data_cfg
,
pipeline
=
val_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
test
=
dict
(
type
=
'AnimalPoseDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/animalpose_val.json'
,
img_prefix
=
f
'
{
data_root
}
/'
,
data_cfg
=
data_cfg
,
pipeline
=
test_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
)
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res50_animalpose_256x256.py
0 → 100755
View file @
37c8cebc
_base_
=
[
'../../../../_base_/default_runtime.py'
,
'../../../../_base_/datasets/animalpose.py'
]
evaluation
=
dict
(
interval
=
10
,
metric
=
'mAP'
,
save_best
=
'AP'
)
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
5e-4
,
)
optimizer_config
=
dict
(
grad_clip
=
None
)
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
170
,
200
])
total_epochs
=
210
log_config
=
dict
(
interval
=
1
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
# dict(type='TensorboardLoggerHook')
])
channel_cfg
=
dict
(
num_output_channels
=
20
,
dataset_joints
=
20
,
dataset_channel
=
[
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
],
],
inference_channel
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
])
# model settings
model
=
dict
(
type
=
'TopDown'
,
pretrained
=
'torchvision://resnet50'
,
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
),
keypoint_head
=
dict
(
type
=
'TopdownHeatmapSimpleHead'
,
in_channels
=
2048
,
out_channels
=
channel_cfg
[
'num_output_channels'
],
loss_keypoint
=
dict
(
type
=
'JointsMSELoss'
,
use_target_weight
=
True
)),
train_cfg
=
dict
(),
test_cfg
=
dict
(
flip_test
=
True
,
post_process
=
'default'
,
shift_heatmap
=
True
,
modulate_kernel
=
11
))
data_cfg
=
dict
(
image_size
=
[
256
,
256
],
heatmap_size
=
[
64
,
64
],
num_output_channels
=
channel_cfg
[
'num_output_channels'
],
num_joints
=
channel_cfg
[
'dataset_joints'
],
dataset_channel
=
channel_cfg
[
'dataset_channel'
],
inference_channel
=
channel_cfg
[
'inference_channel'
],
soft_nms
=
False
,
nms_thr
=
1.0
,
oks_thr
=
0.9
,
vis_thr
=
0.2
,
use_gt_bbox
=
True
,
det_bbox_thr
=
0.0
,
bbox_file
=
''
,
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
dict
(
type
=
'TopDownRandomShiftBboxCenter'
,
shift_factor
=
0.16
,
prob
=
0.3
),
dict
(
type
=
'TopDownRandomFlip'
,
flip_prob
=
0.5
),
dict
(
type
=
'TopDownHalfBodyTransform'
,
num_joints_half_body
=
8
,
prob_half_body
=
0.3
),
dict
(
type
=
'TopDownGetRandomScaleRotation'
,
rot_factor
=
40
,
scale_factor
=
0.5
),
dict
(
type
=
'TopDownAffine'
),
dict
(
type
=
'ToTensor'
),
dict
(
type
=
'NormalizeTensor'
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
]),
dict
(
type
=
'TopDownGenerateTarget'
,
sigma
=
2
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'target'
,
'target_weight'
],
meta_keys
=
[
'image_file'
,
'joints_3d'
,
'joints_3d_visible'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
val_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
dict
(
type
=
'TopDownAffine'
),
dict
(
type
=
'ToTensor'
),
dict
(
type
=
'NormalizeTensor'
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
],
meta_keys
=
[
'image_file'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
test_pipeline
=
val_pipeline
data_root
=
'data/animalpose'
data
=
dict
(
samples_per_gpu
=
64
,
workers_per_gpu
=
2
,
val_dataloader
=
dict
(
samples_per_gpu
=
32
),
test_dataloader
=
dict
(
samples_per_gpu
=
32
),
train
=
dict
(
type
=
'AnimalPoseDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/animalpose_train.json'
,
img_prefix
=
f
'
{
data_root
}
/'
,
data_cfg
=
data_cfg
,
pipeline
=
train_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
val
=
dict
(
type
=
'AnimalPoseDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/animalpose_val.json'
,
img_prefix
=
f
'
{
data_root
}
/'
,
data_cfg
=
data_cfg
,
pipeline
=
val_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
test
=
dict
(
type
=
'AnimalPoseDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/animalpose_val.json'
,
img_prefix
=
f
'
{
data_root
}
/'
,
data_cfg
=
data_cfg
,
pipeline
=
test_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
)
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/resnet_animalpose.md
0 → 100755
View file @
37c8cebc
<!-- [ALGORITHM] -->
<details>
<summary
align=
"right"
><a
href=
"http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html"
>
SimpleBaseline2D (ECCV'2018)
</a></summary>
```
bibtex
@inproceedings
{
xiao2018simple
,
title
=
{Simple baselines for human pose estimation and tracking}
,
author
=
{Xiao, Bin and Wu, Haiping and Wei, Yichen}
,
booktitle
=
{Proceedings of the European conference on computer vision (ECCV)}
,
pages
=
{466--481}
,
year
=
{2018}
}
```
</details>
<!-- [DATASET] -->
<details>
<summary
align=
"right"
><a
href=
"http://openaccess.thecvf.com/content_ICCV_2019/html/Cao_Cross-Domain_Adaptation_for_Animal_Pose_Estimation_ICCV_2019_paper.html"
>
Animal-Pose (ICCV'2019)
</a></summary>
```
bibtex
@InProceedings
{
Cao_2019_ICCV
,
author
=
{Cao, Jinkun and Tang, Hongyang and Fang, Hao-Shu and Shen, Xiaoyong and Lu, Cewu and Tai, Yu-Wing}
,
title
=
{Cross-Domain Adaptation for Animal Pose Estimation}
,
booktitle
=
{The IEEE International Conference on Computer Vision (ICCV)}
,
month
=
{October}
,
year
=
{2019}
}
```
</details>
Results on AnimalPose validation set (1117 instances)
| Arch | Input Size | AP | AP
<sup>
50
</sup>
| AP
<sup>
75
</sup>
| AR | AR
<sup>
50
</sup>
| ckpt | log |
| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
|
[
pose_resnet_50
](
/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res50_animalpose_256x256.py
)
| 256x256 | 0.688 | 0.945 | 0.772 | 0.733 | 0.952 |
[
ckpt
](
https://download.openmmlab.com/mmpose/animal/resnet/res50_animalpose_256x256-e1f30bff_20210426.pth
)
|
[
log
](
https://download.openmmlab.com/mmpose/animal/resnet/res50_animalpose_256x256_20210426.log.json
)
|
|
[
pose_resnet_101
](
/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res101_animalpose_256x256.py
)
| 256x256 | 0.696 | 0.948 | 0.785 | 0.737 | 0.954 |
[
ckpt
](
https://download.openmmlab.com/mmpose/animal/resnet/res101_animalpose_256x256-85563f4a_20210426.pth
)
|
[
log
](
https://download.openmmlab.com/mmpose/animal/resnet/res101_animalpose_256x256_20210426.log.json
)
|
|
[
pose_resnet_152
](
/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res152_animalpose_256x256.py
)
| 256x256 | 0.709 | 0.948 | 0.797 | 0.749 | 0.951 |
[
ckpt
](
https://download.openmmlab.com/mmpose/animal/resnet/res152_animalpose_256x256-a0a7506c_20210426.pth
)
|
[
log
](
https://download.openmmlab.com/mmpose/animal/resnet/res152_animalpose_256x256_20210426.log.json
)
|
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/resnet_animalpose.yml
0 → 100755
View file @
37c8cebc
Collections
:
-
Name
:
SimpleBaseline2D
Paper
:
Title
:
Simple baselines for human pose estimation and tracking
URL
:
http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
README
:
https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
Models
:
-
Config
:
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res50_animalpose_256x256.py
In Collection
:
SimpleBaseline2D
Metadata
:
Architecture
:
&id001
-
SimpleBaseline2D
Training Data
:
Animal-Pose
Name
:
topdown_heatmap_res50_animalpose_256x256
Results
:
-
Dataset
:
Animal-Pose
Metrics
:
AP
:
0.688
AP@0.5
:
0.945
AP@0.75
:
0.772
AR
:
0.733
AR@0.5
:
0.952
Task
:
Animal 2D Keypoint
Weights
:
https://download.openmmlab.com/mmpose/animal/resnet/res50_animalpose_256x256-e1f30bff_20210426.pth
-
Config
:
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res101_animalpose_256x256.py
In Collection
:
SimpleBaseline2D
Metadata
:
Architecture
:
*id001
Training Data
:
Animal-Pose
Name
:
topdown_heatmap_res101_animalpose_256x256
Results
:
-
Dataset
:
Animal-Pose
Metrics
:
AP
:
0.696
AP@0.5
:
0.948
AP@0.75
:
0.785
AR
:
0.737
AR@0.5
:
0.954
Task
:
Animal 2D Keypoint
Weights
:
https://download.openmmlab.com/mmpose/animal/resnet/res101_animalpose_256x256-85563f4a_20210426.pth
-
Config
:
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res152_animalpose_256x256.py
In Collection
:
SimpleBaseline2D
Metadata
:
Architecture
:
*id001
Training Data
:
Animal-Pose
Name
:
topdown_heatmap_res152_animalpose_256x256
Results
:
-
Dataset
:
Animal-Pose
Metrics
:
AP
:
0.709
AP@0.5
:
0.948
AP@0.75
:
0.797
AR
:
0.749
AR@0.5
:
0.951
Task
:
Animal 2D Keypoint
Weights
:
https://download.openmmlab.com/mmpose/animal/resnet/res152_animalpose_256x256-a0a7506c_20210426.pth
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_ap10k.md
0 → 100755
View file @
37c8cebc
<!-- [ALGORITHM] -->
<details>
<summary
align=
"right"
><a
href=
"http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html"
>
HRNet (CVPR'2019)
</a></summary>
```
bibtex
@inproceedings
{
sun2019deep
,
title
=
{Deep high-resolution representation learning for human pose estimation}
,
author
=
{Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong}
,
booktitle
=
{Proceedings of the IEEE conference on computer vision and pattern recognition}
,
pages
=
{5693--5703}
,
year
=
{2019}
}
```
</details>
<!-- [DATASET] -->
<details>
<summary
align=
"right"
><a
href=
"https://arxiv.org/abs/2108.12617"
>
AP-10K (NeurIPS'2021)
</a></summary>
```
bibtex
@misc
{
yu2021ap10k
,
title
=
{AP-10K: A Benchmark for Animal Pose Estimation in the Wild}
,
author
=
{Hang Yu and Yufei Xu and Jing Zhang and Wei Zhao and Ziyu Guan and Dacheng Tao}
,
year
=
{2021}
,
eprint
=
{2108.12617}
,
archivePrefix
=
{arXiv}
,
primaryClass
=
{cs.CV}
}
```
</details>
Results on AP-10K validation set
| Arch | Input Size | AP | AP
<sup>
50
</sup>
| AP
<sup>
75
</sup>
| AP
<sup>
M
</sup>
| AP
<sup>
L
</sup>
| ckpt | log |
| :----------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :------------: | :------------: | :-----------------------------------------: | :----------------------------------------: |
|
[
pose_hrnet_w32
](
/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_w32_ap10k_256x256.py
)
| 256x256 | 0.722 | 0.939 | 0.787 | 0.555 | 0.730 |
[
ckpt
](
https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_ap10k_256x256-18aac840_20211029.pth
)
|
[
log
](
https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_ap10k_256x256-18aac840_20211029.log.json
)
|
|
[
pose_hrnet_w48
](
/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_w48_ap10k_256x256.py
)
| 256x256 | 0.731 | 0.937 | 0.804 | 0.574 | 0.738 |
[
ckpt
](
https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_ap10k_256x256-d95ab412_20211029.pth
)
|
[
log
](
https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_ap10k_256x256-d95ab412_20211029.log.json
)
|
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_ap10k.yml
0 → 100755
View file @
37c8cebc
Collections
:
-
Name
:
HRNet
Paper
:
Title
:
Deep high-resolution representation learning for human pose estimation
URL
:
http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html
README
:
https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnet.md
Models
:
-
Config
:
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_w32_ap10k_256x256.py
In Collection
:
HRNet
Metadata
:
Architecture
:
&id001
-
HRNet
Training Data
:
AP-10K
Name
:
topdown_heatmap_hrnet_w32_ap10k_256x256
Results
:
-
Dataset
:
AP-10K
Metrics
:
AP
:
0.722
AP@0.5
:
0.939
AP@0.75
:
0.787
APL
:
0.73
APM
:
0.555
Task
:
Animal 2D Keypoint
Weights
:
https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_ap10k_256x256-18aac840_20211029.pth
-
Config
:
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_w48_ap10k_256x256.py
In Collection
:
HRNet
Metadata
:
Architecture
:
*id001
Training Data
:
AP-10K
Name
:
topdown_heatmap_hrnet_w48_ap10k_256x256
Results
:
-
Dataset
:
AP-10K
Metrics
:
AP
:
0.731
AP@0.5
:
0.937
AP@0.75
:
0.804
APL
:
0.738
APM
:
0.574
Task
:
Animal 2D Keypoint
Weights
:
https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_ap10k_256x256-d95ab412_20211029.pth
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_w32_ap10k_256x256.py
0 → 100755
View file @
37c8cebc
_base_
=
[
'../../../../_base_/default_runtime.py'
,
'../../../../_base_/datasets/ap10k.py'
]
evaluation
=
dict
(
interval
=
10
,
metric
=
'mAP'
,
save_best
=
'AP'
)
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
5e-4
,
)
optimizer_config
=
dict
(
grad_clip
=
None
)
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
170
,
200
])
total_epochs
=
210
log_config
=
dict
(
interval
=
1
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
# dict(type='TensorboardLoggerHook')
])
channel_cfg
=
dict
(
num_output_channels
=
17
,
dataset_joints
=
17
,
dataset_channel
=
[
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
],
],
inference_channel
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
])
# model settings
model
=
dict
(
type
=
'TopDown'
,
pretrained
=
'https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w32-36af842e.pth'
,
backbone
=
dict
(
type
=
'HRNet'
,
in_channels
=
3
,
extra
=
dict
(
stage1
=
dict
(
num_modules
=
1
,
num_branches
=
1
,
block
=
'BOTTLENECK'
,
num_blocks
=
(
4
,
),
num_channels
=
(
64
,
)),
stage2
=
dict
(
num_modules
=
1
,
num_branches
=
2
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
),
num_channels
=
(
32
,
64
)),
stage3
=
dict
(
num_modules
=
4
,
num_branches
=
3
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
),
num_channels
=
(
32
,
64
,
128
)),
stage4
=
dict
(
num_modules
=
3
,
num_branches
=
4
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
,
4
),
num_channels
=
(
32
,
64
,
128
,
256
))),
),
keypoint_head
=
dict
(
type
=
'TopdownHeatmapSimpleHead'
,
in_channels
=
32
,
out_channels
=
channel_cfg
[
'num_output_channels'
],
num_deconv_layers
=
0
,
extra
=
dict
(
final_conv_kernel
=
1
,
),
loss_keypoint
=
dict
(
type
=
'JointsMSELoss'
,
use_target_weight
=
True
)),
train_cfg
=
dict
(),
test_cfg
=
dict
(
flip_test
=
True
,
post_process
=
'default'
,
shift_heatmap
=
True
,
modulate_kernel
=
11
))
data_cfg
=
dict
(
image_size
=
[
256
,
256
],
heatmap_size
=
[
64
,
64
],
num_output_channels
=
channel_cfg
[
'num_output_channels'
],
num_joints
=
channel_cfg
[
'dataset_joints'
],
dataset_channel
=
channel_cfg
[
'dataset_channel'
],
inference_channel
=
channel_cfg
[
'inference_channel'
],
soft_nms
=
False
,
nms_thr
=
1.0
,
oks_thr
=
0.9
,
vis_thr
=
0.2
,
use_gt_bbox
=
True
,
det_bbox_thr
=
0.0
,
bbox_file
=
''
,
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
dict
(
type
=
'TopDownRandomShiftBboxCenter'
,
shift_factor
=
0.16
,
prob
=
0.3
),
dict
(
type
=
'TopDownRandomFlip'
,
flip_prob
=
0.5
),
dict
(
type
=
'TopDownHalfBodyTransform'
,
num_joints_half_body
=
8
,
prob_half_body
=
0.3
),
dict
(
type
=
'TopDownGetRandomScaleRotation'
,
rot_factor
=
40
,
scale_factor
=
0.5
),
dict
(
type
=
'TopDownAffine'
),
dict
(
type
=
'ToTensor'
),
dict
(
type
=
'NormalizeTensor'
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
]),
dict
(
type
=
'TopDownGenerateTarget'
,
sigma
=
2
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'target'
,
'target_weight'
],
meta_keys
=
[
'image_file'
,
'joints_3d'
,
'joints_3d_visible'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
val_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
dict
(
type
=
'TopDownAffine'
),
dict
(
type
=
'ToTensor'
),
dict
(
type
=
'NormalizeTensor'
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
],
meta_keys
=
[
'image_file'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
test_pipeline
=
val_pipeline
data_root
=
'data/ap10k'
data
=
dict
(
samples_per_gpu
=
64
,
workers_per_gpu
=
4
,
val_dataloader
=
dict
(
samples_per_gpu
=
32
),
test_dataloader
=
dict
(
samples_per_gpu
=
32
),
train
=
dict
(
type
=
'AnimalAP10KDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/ap10k-train-split1.json'
,
img_prefix
=
f
'
{
data_root
}
/data/'
,
data_cfg
=
data_cfg
,
pipeline
=
train_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
val
=
dict
(
type
=
'AnimalAP10KDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/ap10k-val-split1.json'
,
img_prefix
=
f
'
{
data_root
}
/data/'
,
data_cfg
=
data_cfg
,
pipeline
=
val_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
test
=
dict
(
type
=
'AnimalAP10KDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/ap10k-test-split1.json'
,
img_prefix
=
f
'
{
data_root
}
/data/'
,
data_cfg
=
data_cfg
,
pipeline
=
val_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
)
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_w48_ap10k_256x256.py
0 → 100755
View file @
37c8cebc
_base_
=
[
'../../../../_base_/default_runtime.py'
,
'../../../../_base_/datasets/ap10k.py'
]
evaluation
=
dict
(
interval
=
10
,
metric
=
'mAP'
,
save_best
=
'AP'
)
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
5e-4
,
)
optimizer_config
=
dict
(
grad_clip
=
None
)
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
170
,
200
])
total_epochs
=
210
log_config
=
dict
(
interval
=
1
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
# dict(type='TensorboardLoggerHook')
])
channel_cfg
=
dict
(
num_output_channels
=
17
,
dataset_joints
=
17
,
dataset_channel
=
[
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
],
],
inference_channel
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
])
# model settings
model
=
dict
(
type
=
'TopDown'
,
pretrained
=
'https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w48-8ef0771d.pth'
,
backbone
=
dict
(
type
=
'HRNet'
,
in_channels
=
3
,
extra
=
dict
(
stage1
=
dict
(
num_modules
=
1
,
num_branches
=
1
,
block
=
'BOTTLENECK'
,
num_blocks
=
(
4
,
),
num_channels
=
(
64
,
)),
stage2
=
dict
(
num_modules
=
1
,
num_branches
=
2
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
),
num_channels
=
(
48
,
96
)),
stage3
=
dict
(
num_modules
=
4
,
num_branches
=
3
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
),
num_channels
=
(
48
,
96
,
192
)),
stage4
=
dict
(
num_modules
=
3
,
num_branches
=
4
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
,
4
),
num_channels
=
(
48
,
96
,
192
,
384
))),
),
keypoint_head
=
dict
(
type
=
'TopdownHeatmapSimpleHead'
,
in_channels
=
48
,
out_channels
=
channel_cfg
[
'num_output_channels'
],
num_deconv_layers
=
0
,
extra
=
dict
(
final_conv_kernel
=
1
,
),
loss_keypoint
=
dict
(
type
=
'JointsMSELoss'
,
use_target_weight
=
True
)),
train_cfg
=
dict
(),
test_cfg
=
dict
(
flip_test
=
True
,
post_process
=
'default'
,
shift_heatmap
=
True
,
modulate_kernel
=
11
))
data_cfg
=
dict
(
image_size
=
[
256
,
256
],
heatmap_size
=
[
64
,
64
],
num_output_channels
=
channel_cfg
[
'num_output_channels'
],
num_joints
=
channel_cfg
[
'dataset_joints'
],
dataset_channel
=
channel_cfg
[
'dataset_channel'
],
inference_channel
=
channel_cfg
[
'inference_channel'
],
soft_nms
=
False
,
nms_thr
=
1.0
,
oks_thr
=
0.9
,
vis_thr
=
0.2
,
use_gt_bbox
=
True
,
det_bbox_thr
=
0.0
,
bbox_file
=
''
,
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
dict
(
type
=
'TopDownRandomShiftBboxCenter'
,
shift_factor
=
0.16
,
prob
=
0.3
),
dict
(
type
=
'TopDownRandomFlip'
,
flip_prob
=
0.5
),
dict
(
type
=
'TopDownHalfBodyTransform'
,
num_joints_half_body
=
8
,
prob_half_body
=
0.3
),
dict
(
type
=
'TopDownGetRandomScaleRotation'
,
rot_factor
=
40
,
scale_factor
=
0.5
),
dict
(
type
=
'TopDownAffine'
),
dict
(
type
=
'ToTensor'
),
dict
(
type
=
'NormalizeTensor'
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
]),
dict
(
type
=
'TopDownGenerateTarget'
,
sigma
=
2
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'target'
,
'target_weight'
],
meta_keys
=
[
'image_file'
,
'joints_3d'
,
'joints_3d_visible'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
val_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
dict
(
type
=
'TopDownAffine'
),
dict
(
type
=
'ToTensor'
),
dict
(
type
=
'NormalizeTensor'
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
],
meta_keys
=
[
'image_file'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
test_pipeline
=
val_pipeline
data_root
=
'data/ap10k'
data
=
dict
(
samples_per_gpu
=
64
,
workers_per_gpu
=
4
,
val_dataloader
=
dict
(
samples_per_gpu
=
32
),
test_dataloader
=
dict
(
samples_per_gpu
=
32
),
train
=
dict
(
type
=
'AnimalAP10KDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/ap10k-train-split1.json'
,
img_prefix
=
f
'
{
data_root
}
/data/'
,
data_cfg
=
data_cfg
,
pipeline
=
train_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
val
=
dict
(
type
=
'AnimalAP10KDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/ap10k-val-split1.json'
,
img_prefix
=
f
'
{
data_root
}
/data/'
,
data_cfg
=
data_cfg
,
pipeline
=
val_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
test
=
dict
(
type
=
'AnimalAP10KDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/ap10k-test-split1.json'
,
img_prefix
=
f
'
{
data_root
}
/data/'
,
data_cfg
=
data_cfg
,
pipeline
=
val_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
)
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/res101_ap10k_256x256.py
0 → 100755
View file @
37c8cebc
_base_
=
[
'../../../../_base_/default_runtime.py'
,
'../../../../_base_/datasets/ap10k.py'
]
evaluation
=
dict
(
interval
=
10
,
metric
=
'mAP'
,
save_best
=
'AP'
)
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
5e-4
,
)
optimizer_config
=
dict
(
grad_clip
=
None
)
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
170
,
200
])
total_epochs
=
210
log_config
=
dict
(
interval
=
1
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
# dict(type='TensorboardLoggerHook')
])
channel_cfg
=
dict
(
num_output_channels
=
17
,
dataset_joints
=
17
,
dataset_channel
=
[
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
],
],
inference_channel
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
])
# model settings
model
=
dict
(
type
=
'TopDown'
,
pretrained
=
'torchvision://resnet101'
,
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
101
),
keypoint_head
=
dict
(
type
=
'TopdownHeatmapSimpleHead'
,
in_channels
=
2048
,
out_channels
=
channel_cfg
[
'num_output_channels'
],
loss_keypoint
=
dict
(
type
=
'JointsMSELoss'
,
use_target_weight
=
True
)),
train_cfg
=
dict
(),
test_cfg
=
dict
(
flip_test
=
True
,
post_process
=
'default'
,
shift_heatmap
=
True
,
modulate_kernel
=
11
))
data_cfg
=
dict
(
image_size
=
[
256
,
256
],
heatmap_size
=
[
64
,
64
],
num_output_channels
=
channel_cfg
[
'num_output_channels'
],
num_joints
=
channel_cfg
[
'dataset_joints'
],
dataset_channel
=
channel_cfg
[
'dataset_channel'
],
inference_channel
=
channel_cfg
[
'inference_channel'
],
soft_nms
=
False
,
nms_thr
=
1.0
,
oks_thr
=
0.9
,
vis_thr
=
0.2
,
use_gt_bbox
=
True
,
det_bbox_thr
=
0.0
,
bbox_file
=
''
,
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
dict
(
type
=
'TopDownRandomShiftBboxCenter'
,
shift_factor
=
0.16
,
prob
=
0.3
),
dict
(
type
=
'TopDownRandomFlip'
,
flip_prob
=
0.5
),
dict
(
type
=
'TopDownHalfBodyTransform'
,
num_joints_half_body
=
8
,
prob_half_body
=
0.3
),
dict
(
type
=
'TopDownGetRandomScaleRotation'
,
rot_factor
=
40
,
scale_factor
=
0.5
),
dict
(
type
=
'TopDownAffine'
),
dict
(
type
=
'ToTensor'
),
dict
(
type
=
'NormalizeTensor'
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
]),
dict
(
type
=
'TopDownGenerateTarget'
,
sigma
=
2
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'target'
,
'target_weight'
],
meta_keys
=
[
'image_file'
,
'joints_3d'
,
'joints_3d_visible'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
val_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
dict
(
type
=
'TopDownAffine'
),
dict
(
type
=
'ToTensor'
),
dict
(
type
=
'NormalizeTensor'
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
],
meta_keys
=
[
'image_file'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
test_pipeline
=
val_pipeline
data_root
=
'data/ap10k'
data
=
dict
(
samples_per_gpu
=
64
,
workers_per_gpu
=
4
,
val_dataloader
=
dict
(
samples_per_gpu
=
32
),
test_dataloader
=
dict
(
samples_per_gpu
=
32
),
train
=
dict
(
type
=
'AnimalAP10KDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/ap10k-train-split1.json'
,
img_prefix
=
f
'
{
data_root
}
/data/'
,
data_cfg
=
data_cfg
,
pipeline
=
train_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
val
=
dict
(
type
=
'AnimalAP10KDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/ap10k-val-split1.json'
,
img_prefix
=
f
'
{
data_root
}
/data/'
,
data_cfg
=
data_cfg
,
pipeline
=
val_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
test
=
dict
(
type
=
'AnimalAP10KDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/ap10k-test-split1.json'
,
img_prefix
=
f
'
{
data_root
}
/data/'
,
data_cfg
=
data_cfg
,
pipeline
=
val_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
)
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/res50_ap10k_256x256.py
0 → 100755
View file @
37c8cebc
_base_
=
[
'../../../../_base_/default_runtime.py'
,
'../../../../_base_/datasets/ap10k.py'
]
evaluation
=
dict
(
interval
=
10
,
metric
=
'mAP'
,
save_best
=
'AP'
)
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
5e-4
,
)
optimizer_config
=
dict
(
grad_clip
=
None
)
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
170
,
200
])
total_epochs
=
210
log_config
=
dict
(
interval
=
1
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
# dict(type='TensorboardLoggerHook')
])
channel_cfg
=
dict
(
num_output_channels
=
17
,
dataset_joints
=
17
,
dataset_channel
=
[
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
],
],
inference_channel
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
])
# model settings
model
=
dict
(
type
=
'TopDown'
,
pretrained
=
'torchvision://resnet50'
,
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
),
keypoint_head
=
dict
(
type
=
'TopdownHeatmapSimpleHead'
,
in_channels
=
2048
,
out_channels
=
channel_cfg
[
'num_output_channels'
],
loss_keypoint
=
dict
(
type
=
'JointsMSELoss'
,
use_target_weight
=
True
)),
train_cfg
=
dict
(),
test_cfg
=
dict
(
flip_test
=
True
,
post_process
=
'default'
,
shift_heatmap
=
True
,
modulate_kernel
=
11
))
data_cfg
=
dict
(
image_size
=
[
256
,
256
],
heatmap_size
=
[
64
,
64
],
num_output_channels
=
channel_cfg
[
'num_output_channels'
],
num_joints
=
channel_cfg
[
'dataset_joints'
],
dataset_channel
=
channel_cfg
[
'dataset_channel'
],
inference_channel
=
channel_cfg
[
'inference_channel'
],
soft_nms
=
False
,
nms_thr
=
1.0
,
oks_thr
=
0.9
,
vis_thr
=
0.2
,
use_gt_bbox
=
True
,
det_bbox_thr
=
0.0
,
bbox_file
=
''
,
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
dict
(
type
=
'TopDownRandomShiftBboxCenter'
,
shift_factor
=
0.16
,
prob
=
0.3
),
dict
(
type
=
'TopDownRandomFlip'
,
flip_prob
=
0.5
),
dict
(
type
=
'TopDownHalfBodyTransform'
,
num_joints_half_body
=
8
,
prob_half_body
=
0.3
),
dict
(
type
=
'TopDownGetRandomScaleRotation'
,
rot_factor
=
40
,
scale_factor
=
0.5
),
dict
(
type
=
'TopDownAffine'
),
dict
(
type
=
'ToTensor'
),
dict
(
type
=
'NormalizeTensor'
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
]),
dict
(
type
=
'TopDownGenerateTarget'
,
sigma
=
2
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'target'
,
'target_weight'
],
meta_keys
=
[
'image_file'
,
'joints_3d'
,
'joints_3d_visible'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
val_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
dict
(
type
=
'TopDownAffine'
),
dict
(
type
=
'ToTensor'
),
dict
(
type
=
'NormalizeTensor'
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
],
meta_keys
=
[
'image_file'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
test_pipeline
=
val_pipeline
data_root
=
'data/ap10k'
data
=
dict
(
samples_per_gpu
=
64
,
workers_per_gpu
=
4
,
val_dataloader
=
dict
(
samples_per_gpu
=
32
),
test_dataloader
=
dict
(
samples_per_gpu
=
32
),
train
=
dict
(
type
=
'AnimalAP10KDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/ap10k-train-split1.json'
,
img_prefix
=
f
'
{
data_root
}
/data/'
,
data_cfg
=
data_cfg
,
pipeline
=
train_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
val
=
dict
(
type
=
'AnimalAP10KDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/ap10k-val-split1.json'
,
img_prefix
=
f
'
{
data_root
}
/data/'
,
data_cfg
=
data_cfg
,
pipeline
=
val_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
test
=
dict
(
type
=
'AnimalAP10KDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/ap10k-test-split1.json'
,
img_prefix
=
f
'
{
data_root
}
/data/'
,
data_cfg
=
data_cfg
,
pipeline
=
val_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
)
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/resnet_ap10k.md
0 → 100755
View file @
37c8cebc
<!-- [ALGORITHM] -->
<details>
<summary
align=
"right"
><a
href=
"http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html"
>
SimpleBaseline2D (ECCV'2018)
</a></summary>
```
bibtex
@inproceedings
{
xiao2018simple
,
title
=
{Simple baselines for human pose estimation and tracking}
,
author
=
{Xiao, Bin and Wu, Haiping and Wei, Yichen}
,
booktitle
=
{Proceedings of the European conference on computer vision (ECCV)}
,
pages
=
{466--481}
,
year
=
{2018}
}
```
</details>
<!-- [DATASET] -->
<details>
<summary
align=
"right"
><a
href=
"https://arxiv.org/abs/2108.12617"
>
AP-10K (NeurIPS'2021)
</a></summary>
```
bibtex
@misc
{
yu2021ap10k
,
title
=
{AP-10K: A Benchmark for Animal Pose Estimation in the Wild}
,
author
=
{Hang Yu and Yufei Xu and Jing Zhang and Wei Zhao and Ziyu Guan and Dacheng Tao}
,
year
=
{2021}
,
eprint
=
{2108.12617}
,
archivePrefix
=
{arXiv}
,
primaryClass
=
{cs.CV}
}
```
</details>
Results on AP-10K validation set
| Arch | Input Size | AP | AP
<sup>
50
</sup>
| AP
<sup>
75
</sup>
| AP
<sup>
M
</sup>
| AP
<sup>
L
</sup>
| ckpt | log |
| :----------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :------------: | :------------: | :-----------------------------------------: | :----------------------------------------: |
|
[
pose_resnet_50
](
/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/res50_ap10k_256x256.py
)
| 256x256 | 0.681 | 0.923 | 0.740 | 0.510 | 0.688 |
[
ckpt
](
https://download.openmmlab.com/mmpose/animal/resnet/res50_ap10k_256x256-35760eb8_20211029.pth
)
|
[
log
](
https://download.openmmlab.com/mmpose/animal/resnet/res50_ap10k_256x256-35760eb8_20211029.log.json
)
|
|
[
pose_resnet_101
](
/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/res101_ap10k_256x256.py
)
| 256x256 | 0.681 | 0.922 | 0.742 | 0.534 | 0.688 |
[
ckpt
](
https://download.openmmlab.com/mmpose/animal/resnet/res101_ap10k_256x256-9edfafb9_20211029.pth
)
|
[
log
](
https://download.openmmlab.com/mmpose/animal/resnet/res101_ap10k_256x256-9edfafb9_20211029.log.json
)
|
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/resnet_ap10k.yml
0 → 100755
View file @
37c8cebc
Collections
:
-
Name
:
SimpleBaseline2D
Paper
:
Title
:
Simple baselines for human pose estimation and tracking
URL
:
http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
README
:
https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
Models
:
-
Config
:
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/res50_ap10k_256x256.py
In Collection
:
SimpleBaseline2D
Metadata
:
Architecture
:
&id001
-
SimpleBaseline2D
Training Data
:
AP-10K
Name
:
topdown_heatmap_res50_ap10k_256x256
Results
:
-
Dataset
:
AP-10K
Metrics
:
AP
:
0.681
AP@0.5
:
0.923
AP@0.75
:
0.74
APL
:
0.688
APM
:
0.51
Task
:
Animal 2D Keypoint
Weights
:
https://download.openmmlab.com/mmpose/animal/resnet/res50_ap10k_256x256-35760eb8_20211029.pth
-
Config
:
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/res101_ap10k_256x256.py
In Collection
:
SimpleBaseline2D
Metadata
:
Architecture
:
*id001
Training Data
:
AP-10K
Name
:
topdown_heatmap_res101_ap10k_256x256
Results
:
-
Dataset
:
AP-10K
Metrics
:
AP
:
0.681
AP@0.5
:
0.922
AP@0.75
:
0.742
APL
:
0.688
APM
:
0.534
Task
:
Animal 2D Keypoint
Weights
:
https://download.openmmlab.com/mmpose/animal/resnet/res101_ap10k_256x256-9edfafb9_20211029.pth
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_atrw.md
0 → 100755
View file @
37c8cebc
<!-- [ALGORITHM] -->
<details>
<summary
align=
"right"
><a
href=
"http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html"
>
HRNet (CVPR'2019)
</a></summary>
```
bibtex
@inproceedings
{
sun2019deep
,
title
=
{Deep high-resolution representation learning for human pose estimation}
,
author
=
{Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong}
,
booktitle
=
{Proceedings of the IEEE conference on computer vision and pattern recognition}
,
pages
=
{5693--5703}
,
year
=
{2019}
}
```
</details>
<!-- [DATASET] -->
<details>
<summary
align=
"right"
><a
href=
"https://arxiv.org/abs/1906.05586"
>
ATRW (ACM MM'2020)
</a></summary>
```
bibtex
@inproceedings
{
li2020atrw
,
title
=
{ATRW: A Benchmark for Amur Tiger Re-identification in the Wild}
,
author
=
{Li, Shuyuan and Li, Jianguo and Tang, Hanlin and Qian, Rui and Lin, Weiyao}
,
booktitle
=
{Proceedings of the 28th ACM International Conference on Multimedia}
,
pages
=
{2590--2598}
,
year
=
{2020}
}
```
</details>
Results on ATRW validation set
| Arch | Input Size | AP | AP
<sup>
50
</sup>
| AP
<sup>
75
</sup>
| AR | AR
<sup>
50
</sup>
| ckpt | log |
| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
|
[
pose_hrnet_w32
](
/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_w32_atrw_256x256.py
)
| 256x256 | 0.912 | 0.973 | 0.959 | 0.938 | 0.985 |
[
ckpt
](
https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_atrw_256x256-f027f09a_20210414.pth
)
|
[
log
](
https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_atrw_256x256_20210414.log.json
)
|
|
[
pose_hrnet_w48
](
/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_w48_atrw_256x256.py
)
| 256x256 | 0.911 | 0.972 | 0.946 | 0.937 | 0.985 |
[
ckpt
](
https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_atrw_256x256-ac088892_20210414.pth
)
|
[
log
](
https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_atrw_256x256_20210414.log.json
)
|
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_atrw.yml
0 → 100755
View file @
37c8cebc
Collections
:
-
Name
:
HRNet
Paper
:
Title
:
Deep high-resolution representation learning for human pose estimation
URL
:
http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html
README
:
https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnet.md
Models
:
-
Config
:
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_w32_atrw_256x256.py
In Collection
:
HRNet
Metadata
:
Architecture
:
&id001
-
HRNet
Training Data
:
ATRW
Name
:
topdown_heatmap_hrnet_w32_atrw_256x256
Results
:
-
Dataset
:
ATRW
Metrics
:
AP
:
0.912
AP@0.5
:
0.973
AP@0.75
:
0.959
AR
:
0.938
AR@0.5
:
0.985
Task
:
Animal 2D Keypoint
Weights
:
https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_atrw_256x256-f027f09a_20210414.pth
-
Config
:
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_w48_atrw_256x256.py
In Collection
:
HRNet
Metadata
:
Architecture
:
*id001
Training Data
:
ATRW
Name
:
topdown_heatmap_hrnet_w48_atrw_256x256
Results
:
-
Dataset
:
ATRW
Metrics
:
AP
:
0.911
AP@0.5
:
0.972
AP@0.75
:
0.946
AR
:
0.937
AR@0.5
:
0.985
Task
:
Animal 2D Keypoint
Weights
:
https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_atrw_256x256-ac088892_20210414.pth
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_w32_atrw_256x256.py
0 → 100755
View file @
37c8cebc
_base_
=
[
'../../../../_base_/default_runtime.py'
,
'../../../../_base_/datasets/atrw.py'
]
evaluation
=
dict
(
interval
=
10
,
metric
=
'mAP'
,
save_best
=
'AP'
)
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
5e-4
,
)
optimizer_config
=
dict
(
grad_clip
=
None
)
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
170
,
200
])
total_epochs
=
210
log_config
=
dict
(
interval
=
1
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
# dict(type='TensorboardLoggerHook')
])
channel_cfg
=
dict
(
num_output_channels
=
15
,
dataset_joints
=
15
,
dataset_channel
=
[
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
],
],
inference_channel
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
])
# model settings
model
=
dict
(
type
=
'TopDown'
,
pretrained
=
'https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w32-36af842e.pth'
,
backbone
=
dict
(
type
=
'HRNet'
,
in_channels
=
3
,
extra
=
dict
(
stage1
=
dict
(
num_modules
=
1
,
num_branches
=
1
,
block
=
'BOTTLENECK'
,
num_blocks
=
(
4
,
),
num_channels
=
(
64
,
)),
stage2
=
dict
(
num_modules
=
1
,
num_branches
=
2
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
),
num_channels
=
(
32
,
64
)),
stage3
=
dict
(
num_modules
=
4
,
num_branches
=
3
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
),
num_channels
=
(
32
,
64
,
128
)),
stage4
=
dict
(
num_modules
=
3
,
num_branches
=
4
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
,
4
),
num_channels
=
(
32
,
64
,
128
,
256
))),
),
keypoint_head
=
dict
(
type
=
'TopdownHeatmapSimpleHead'
,
in_channels
=
32
,
out_channels
=
channel_cfg
[
'num_output_channels'
],
num_deconv_layers
=
0
,
extra
=
dict
(
final_conv_kernel
=
1
,
),
loss_keypoint
=
dict
(
type
=
'JointsMSELoss'
,
use_target_weight
=
True
)),
train_cfg
=
dict
(),
test_cfg
=
dict
(
flip_test
=
True
,
post_process
=
'default'
,
shift_heatmap
=
True
,
modulate_kernel
=
11
))
data_cfg
=
dict
(
image_size
=
[
256
,
256
],
heatmap_size
=
[
64
,
64
],
num_output_channels
=
channel_cfg
[
'num_output_channels'
],
num_joints
=
channel_cfg
[
'dataset_joints'
],
dataset_channel
=
channel_cfg
[
'dataset_channel'
],
inference_channel
=
channel_cfg
[
'inference_channel'
],
soft_nms
=
False
,
nms_thr
=
1.0
,
oks_thr
=
0.9
,
vis_thr
=
0.2
,
use_gt_bbox
=
True
,
det_bbox_thr
=
0.0
,
bbox_file
=
''
,
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.0
),
dict
(
type
=
'TopDownRandomShiftBboxCenter'
,
shift_factor
=
0.2
,
prob
=
0.3
),
dict
(
type
=
'TopDownRandomFlip'
,
flip_prob
=
0.5
),
dict
(
type
=
'TopDownHalfBodyTransform'
,
num_joints_half_body
=
8
,
prob_half_body
=
0.3
),
dict
(
type
=
'TopDownGetRandomScaleRotation'
,
rot_factor
=
40
,
scale_factor
=
0.5
),
dict
(
type
=
'TopDownAffine'
),
dict
(
type
=
'ToTensor'
),
dict
(
type
=
'NormalizeTensor'
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
]),
dict
(
type
=
'TopDownGenerateTarget'
,
sigma
=
2
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'target'
,
'target_weight'
],
meta_keys
=
[
'image_file'
,
'joints_3d'
,
'joints_3d_visible'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
val_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.0
),
dict
(
type
=
'TopDownAffine'
),
dict
(
type
=
'ToTensor'
),
dict
(
type
=
'NormalizeTensor'
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
],
meta_keys
=
[
'image_file'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
test_pipeline
=
val_pipeline
data_root
=
'data/atrw'
data
=
dict
(
samples_per_gpu
=
64
,
workers_per_gpu
=
2
,
val_dataloader
=
dict
(
samples_per_gpu
=
32
),
test_dataloader
=
dict
(
samples_per_gpu
=
32
),
train
=
dict
(
type
=
'AnimalATRWDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/keypoint_train.json'
,
img_prefix
=
f
'
{
data_root
}
/images/train/'
,
data_cfg
=
data_cfg
,
pipeline
=
train_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
val
=
dict
(
type
=
'AnimalATRWDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/keypoint_val.json'
,
img_prefix
=
f
'
{
data_root
}
/images/val/'
,
data_cfg
=
data_cfg
,
pipeline
=
val_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
test
=
dict
(
type
=
'AnimalATRWDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/keypoint_val.json'
,
img_prefix
=
f
'
{
data_root
}
/images/val/'
,
data_cfg
=
data_cfg
,
pipeline
=
test_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
)
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_w48_atrw_256x256.py
0 → 100755
View file @
37c8cebc
_base_
=
[
'../../../../_base_/default_runtime.py'
,
'../../../../_base_/datasets/atrw.py'
]
evaluation
=
dict
(
interval
=
10
,
metric
=
'mAP'
,
save_best
=
'AP'
)
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
5e-4
,
)
optimizer_config
=
dict
(
grad_clip
=
None
)
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
170
,
200
])
total_epochs
=
210
log_config
=
dict
(
interval
=
1
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
# dict(type='TensorboardLoggerHook')
])
channel_cfg
=
dict
(
num_output_channels
=
15
,
dataset_joints
=
15
,
dataset_channel
=
[
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
],
],
inference_channel
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
])
# model settings
model
=
dict
(
type
=
'TopDown'
,
pretrained
=
'https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w48-8ef0771d.pth'
,
backbone
=
dict
(
type
=
'HRNet'
,
in_channels
=
3
,
extra
=
dict
(
stage1
=
dict
(
num_modules
=
1
,
num_branches
=
1
,
block
=
'BOTTLENECK'
,
num_blocks
=
(
4
,
),
num_channels
=
(
64
,
)),
stage2
=
dict
(
num_modules
=
1
,
num_branches
=
2
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
),
num_channels
=
(
48
,
96
)),
stage3
=
dict
(
num_modules
=
4
,
num_branches
=
3
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
),
num_channels
=
(
48
,
96
,
192
)),
stage4
=
dict
(
num_modules
=
3
,
num_branches
=
4
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
,
4
),
num_channels
=
(
48
,
96
,
192
,
384
))),
),
keypoint_head
=
dict
(
type
=
'TopdownHeatmapSimpleHead'
,
in_channels
=
48
,
out_channels
=
channel_cfg
[
'num_output_channels'
],
num_deconv_layers
=
0
,
extra
=
dict
(
final_conv_kernel
=
1
,
),
loss_keypoint
=
dict
(
type
=
'JointsMSELoss'
,
use_target_weight
=
True
)),
train_cfg
=
dict
(),
test_cfg
=
dict
(
flip_test
=
True
,
post_process
=
'default'
,
shift_heatmap
=
True
,
modulate_kernel
=
11
))
data_cfg
=
dict
(
image_size
=
[
256
,
256
],
heatmap_size
=
[
64
,
64
],
num_output_channels
=
channel_cfg
[
'num_output_channels'
],
num_joints
=
channel_cfg
[
'dataset_joints'
],
dataset_channel
=
channel_cfg
[
'dataset_channel'
],
inference_channel
=
channel_cfg
[
'inference_channel'
],
soft_nms
=
False
,
nms_thr
=
1.0
,
oks_thr
=
0.9
,
vis_thr
=
0.2
,
use_gt_bbox
=
True
,
det_bbox_thr
=
0.0
,
bbox_file
=
''
,
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.0
),
dict
(
type
=
'TopDownRandomShiftBboxCenter'
,
shift_factor
=
0.2
,
prob
=
0.3
),
dict
(
type
=
'TopDownRandomFlip'
,
flip_prob
=
0.5
),
dict
(
type
=
'TopDownHalfBodyTransform'
,
num_joints_half_body
=
8
,
prob_half_body
=
0.3
),
dict
(
type
=
'TopDownGetRandomScaleRotation'
,
rot_factor
=
40
,
scale_factor
=
0.5
),
dict
(
type
=
'TopDownAffine'
),
dict
(
type
=
'ToTensor'
),
dict
(
type
=
'NormalizeTensor'
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
]),
dict
(
type
=
'TopDownGenerateTarget'
,
sigma
=
2
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'target'
,
'target_weight'
],
meta_keys
=
[
'image_file'
,
'joints_3d'
,
'joints_3d_visible'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
val_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.0
),
dict
(
type
=
'TopDownAffine'
),
dict
(
type
=
'ToTensor'
),
dict
(
type
=
'NormalizeTensor'
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
],
meta_keys
=
[
'image_file'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
test_pipeline
=
val_pipeline
data_root
=
'data/atrw'
data
=
dict
(
samples_per_gpu
=
64
,
workers_per_gpu
=
2
,
val_dataloader
=
dict
(
samples_per_gpu
=
32
),
test_dataloader
=
dict
(
samples_per_gpu
=
32
),
train
=
dict
(
type
=
'AnimalATRWDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/keypoint_train.json'
,
img_prefix
=
f
'
{
data_root
}
/images/train/'
,
data_cfg
=
data_cfg
,
pipeline
=
train_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
val
=
dict
(
type
=
'AnimalATRWDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/keypoint_val.json'
,
img_prefix
=
f
'
{
data_root
}
/images/val/'
,
data_cfg
=
data_cfg
,
pipeline
=
val_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
test
=
dict
(
type
=
'AnimalATRWDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/keypoint_val.json'
,
img_prefix
=
f
'
{
data_root
}
/images/val/'
,
data_cfg
=
data_cfg
,
pipeline
=
test_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
)
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res101_atrw_256x256.py
0 → 100755
View file @
37c8cebc
_base_
=
[
'../../../../_base_/default_runtime.py'
,
'../../../../_base_/datasets/atrw.py'
]
evaluation
=
dict
(
interval
=
10
,
metric
=
'mAP'
,
save_best
=
'AP'
)
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
5e-4
,
)
optimizer_config
=
dict
(
grad_clip
=
None
)
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
170
,
200
])
total_epochs
=
210
log_config
=
dict
(
interval
=
1
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
# dict(type='TensorboardLoggerHook')
])
channel_cfg
=
dict
(
num_output_channels
=
15
,
dataset_joints
=
15
,
dataset_channel
=
[
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
],
],
inference_channel
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
])
# model settings
model
=
dict
(
type
=
'TopDown'
,
pretrained
=
'torchvision://resnet101'
,
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
101
),
keypoint_head
=
dict
(
type
=
'TopdownHeatmapSimpleHead'
,
in_channels
=
2048
,
out_channels
=
channel_cfg
[
'num_output_channels'
],
loss_keypoint
=
dict
(
type
=
'JointsMSELoss'
,
use_target_weight
=
True
)),
train_cfg
=
dict
(),
test_cfg
=
dict
(
flip_test
=
True
,
post_process
=
'default'
,
shift_heatmap
=
True
,
modulate_kernel
=
11
))
data_cfg
=
dict
(
image_size
=
[
256
,
256
],
heatmap_size
=
[
64
,
64
],
num_output_channels
=
channel_cfg
[
'num_output_channels'
],
num_joints
=
channel_cfg
[
'dataset_joints'
],
dataset_channel
=
channel_cfg
[
'dataset_channel'
],
inference_channel
=
channel_cfg
[
'inference_channel'
],
soft_nms
=
False
,
nms_thr
=
1.0
,
oks_thr
=
0.9
,
vis_thr
=
0.2
,
use_gt_bbox
=
True
,
det_bbox_thr
=
0.0
,
bbox_file
=
''
,
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.0
),
dict
(
type
=
'TopDownRandomShiftBboxCenter'
,
shift_factor
=
0.2
,
prob
=
0.3
),
dict
(
type
=
'TopDownRandomFlip'
,
flip_prob
=
0.5
),
dict
(
type
=
'TopDownHalfBodyTransform'
,
num_joints_half_body
=
8
,
prob_half_body
=
0.3
),
dict
(
type
=
'TopDownGetRandomScaleRotation'
,
rot_factor
=
40
,
scale_factor
=
0.5
),
dict
(
type
=
'TopDownAffine'
),
dict
(
type
=
'ToTensor'
),
dict
(
type
=
'NormalizeTensor'
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
]),
dict
(
type
=
'TopDownGenerateTarget'
,
sigma
=
2
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'target'
,
'target_weight'
],
meta_keys
=
[
'image_file'
,
'joints_3d'
,
'joints_3d_visible'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
val_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.0
),
dict
(
type
=
'TopDownAffine'
),
dict
(
type
=
'ToTensor'
),
dict
(
type
=
'NormalizeTensor'
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
],
meta_keys
=
[
'image_file'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
test_pipeline
=
val_pipeline
data_root
=
'data/atrw'
data
=
dict
(
samples_per_gpu
=
64
,
workers_per_gpu
=
2
,
val_dataloader
=
dict
(
samples_per_gpu
=
32
),
test_dataloader
=
dict
(
samples_per_gpu
=
32
),
train
=
dict
(
type
=
'AnimalATRWDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/keypoint_train.json'
,
img_prefix
=
f
'
{
data_root
}
/images/train/'
,
data_cfg
=
data_cfg
,
pipeline
=
train_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
val
=
dict
(
type
=
'AnimalATRWDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/keypoint_val.json'
,
img_prefix
=
f
'
{
data_root
}
/images/val/'
,
data_cfg
=
data_cfg
,
pipeline
=
val_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
test
=
dict
(
type
=
'AnimalATRWDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/keypoint_val.json'
,
img_prefix
=
f
'
{
data_root
}
/images/val/'
,
data_cfg
=
data_cfg
,
pipeline
=
test_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
)
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res152_atrw_256x256.py
0 → 100755
View file @
37c8cebc
_base_
=
[
'../../../../_base_/default_runtime.py'
,
'../../../../_base_/datasets/atrw.py'
]
evaluation
=
dict
(
interval
=
10
,
metric
=
'mAP'
,
save_best
=
'AP'
)
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
5e-4
,
)
optimizer_config
=
dict
(
grad_clip
=
None
)
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
170
,
200
])
total_epochs
=
210
log_config
=
dict
(
interval
=
1
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
# dict(type='TensorboardLoggerHook')
])
channel_cfg
=
dict
(
num_output_channels
=
15
,
dataset_joints
=
15
,
dataset_channel
=
[
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
],
],
inference_channel
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
])
# model settings
model
=
dict
(
type
=
'TopDown'
,
pretrained
=
'torchvision://resnet152'
,
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
152
),
keypoint_head
=
dict
(
type
=
'TopdownHeatmapSimpleHead'
,
in_channels
=
2048
,
out_channels
=
channel_cfg
[
'num_output_channels'
],
loss_keypoint
=
dict
(
type
=
'JointsMSELoss'
,
use_target_weight
=
True
)),
train_cfg
=
dict
(),
test_cfg
=
dict
(
flip_test
=
True
,
post_process
=
'default'
,
shift_heatmap
=
True
,
modulate_kernel
=
11
))
data_cfg
=
dict
(
image_size
=
[
256
,
256
],
heatmap_size
=
[
64
,
64
],
num_output_channels
=
channel_cfg
[
'num_output_channels'
],
num_joints
=
channel_cfg
[
'dataset_joints'
],
dataset_channel
=
channel_cfg
[
'dataset_channel'
],
inference_channel
=
channel_cfg
[
'inference_channel'
],
soft_nms
=
False
,
nms_thr
=
1.0
,
oks_thr
=
0.9
,
vis_thr
=
0.2
,
use_gt_bbox
=
True
,
det_bbox_thr
=
0.0
,
bbox_file
=
''
,
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.0
),
dict
(
type
=
'TopDownRandomShiftBboxCenter'
,
shift_factor
=
0.2
,
prob
=
0.3
),
dict
(
type
=
'TopDownRandomFlip'
,
flip_prob
=
0.5
),
dict
(
type
=
'TopDownHalfBodyTransform'
,
num_joints_half_body
=
8
,
prob_half_body
=
0.3
),
dict
(
type
=
'TopDownGetRandomScaleRotation'
,
rot_factor
=
40
,
scale_factor
=
0.5
),
dict
(
type
=
'TopDownAffine'
),
dict
(
type
=
'ToTensor'
),
dict
(
type
=
'NormalizeTensor'
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
]),
dict
(
type
=
'TopDownGenerateTarget'
,
sigma
=
2
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'target'
,
'target_weight'
],
meta_keys
=
[
'image_file'
,
'joints_3d'
,
'joints_3d_visible'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
val_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.0
),
dict
(
type
=
'TopDownAffine'
),
dict
(
type
=
'ToTensor'
),
dict
(
type
=
'NormalizeTensor'
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
],
meta_keys
=
[
'image_file'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
test_pipeline
=
val_pipeline
data_root
=
'data/atrw'
data
=
dict
(
samples_per_gpu
=
64
,
workers_per_gpu
=
2
,
val_dataloader
=
dict
(
samples_per_gpu
=
32
),
test_dataloader
=
dict
(
samples_per_gpu
=
32
),
train
=
dict
(
type
=
'AnimalATRWDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/keypoint_train.json'
,
img_prefix
=
f
'
{
data_root
}
/images/train/'
,
data_cfg
=
data_cfg
,
pipeline
=
train_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
val
=
dict
(
type
=
'AnimalATRWDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/keypoint_val.json'
,
img_prefix
=
f
'
{
data_root
}
/images/val/'
,
data_cfg
=
data_cfg
,
pipeline
=
val_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
test
=
dict
(
type
=
'AnimalATRWDataset'
,
ann_file
=
f
'
{
data_root
}
/annotations/keypoint_val.json'
,
img_prefix
=
f
'
{
data_root
}
/images/val/'
,
data_cfg
=
data_cfg
,
pipeline
=
test_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
)
Prev
1
2
3
4
5
6
7
8
…
19
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment