Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
mashun1
mmpose-rtmo_pytorch
Commits
ca8a762a
Commit
ca8a762a
authored
Jan 23, 2024
by
chenzk
Browse files
v1.0
parents
Changes
283
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2062 additions
and
0 deletions
+2062
-0
configs/animal_2d_keypoint/topdown_heatmap/ak/td-hm_hrnet-w32_8xb32-300e_animalkingdom_P2-256x256.py
...ak/td-hm_hrnet-w32_8xb32-300e_animalkingdom_P2-256x256.py
+146
-0
configs/animal_2d_keypoint/topdown_heatmap/ak/td-hm_hrnet-w32_8xb32-300e_animalkingdom_P3_amphibian-256x256.py
...rnet-w32_8xb32-300e_animalkingdom_P3_amphibian-256x256.py
+146
-0
configs/animal_2d_keypoint/topdown_heatmap/ak/td-hm_hrnet-w32_8xb32-300e_animalkingdom_P3_bird-256x256.py
...-hm_hrnet-w32_8xb32-300e_animalkingdom_P3_bird-256x256.py
+146
-0
configs/animal_2d_keypoint/topdown_heatmap/ak/td-hm_hrnet-w32_8xb32-300e_animalkingdom_P3_fish-256x256.py
...-hm_hrnet-w32_8xb32-300e_animalkingdom_P3_fish-256x256.py
+146
-0
configs/animal_2d_keypoint/topdown_heatmap/ak/td-hm_hrnet-w32_8xb32-300e_animalkingdom_P3_mammal-256x256.py
...m_hrnet-w32_8xb32-300e_animalkingdom_P3_mammal-256x256.py
+146
-0
configs/animal_2d_keypoint/topdown_heatmap/ak/td-hm_hrnet-w32_8xb32-300e_animalkingdom_P3_reptile-256x256.py
..._hrnet-w32_8xb32-300e_animalkingdom_P3_reptile-256x256.py
+146
-0
configs/animal_2d_keypoint/topdown_heatmap/animalpose/hrnet_animalpose.md
...d_keypoint/topdown_heatmap/animalpose/hrnet_animalpose.md
+40
-0
configs/animal_2d_keypoint/topdown_heatmap/animalpose/hrnet_animalpose.yml
..._keypoint/topdown_heatmap/animalpose/hrnet_animalpose.yml
+34
-0
configs/animal_2d_keypoint/topdown_heatmap/animalpose/resnet_animalpose.md
..._keypoint/topdown_heatmap/animalpose/resnet_animalpose.md
+41
-0
configs/animal_2d_keypoint/topdown_heatmap/animalpose/resnet_animalpose.yml
...keypoint/topdown_heatmap/animalpose/resnet_animalpose.yml
+51
-0
configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_hrnet-w32_8xb64-210e_animalpose-256x256.py
...imalpose/td-hm_hrnet-w32_8xb64-210e_animalpose-256x256.py
+147
-0
configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_hrnet-w48_8xb64-210e_animalpose-256x256.py
...imalpose/td-hm_hrnet-w48_8xb64-210e_animalpose-256x256.py
+147
-0
configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res101_8xb64-210e_animalpose-256x256.py
.../animalpose/td-hm_res101_8xb64-210e_animalpose-256x256.py
+118
-0
configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res152_8xb32-210e_animalpose-256x256.py
.../animalpose/td-hm_res152_8xb32-210e_animalpose-256x256.py
+118
-0
configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res50_8xb64-210e_animalpose-256x256.py
...p/animalpose/td-hm_res50_8xb64-210e_animalpose-256x256.py
+118
-0
configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext-m_udp_8xb64-210e_ap10k-256x256.py
...n_heatmap/ap10k/cspnext-m_udp_8xb64-210e_ap10k-256x256.py
+220
-0
configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext_udp_ap10k.md
...al_2d_keypoint/topdown_heatmap/ap10k/cspnext_udp_ap10k.md
+58
-0
configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext_udp_ap10k.yml
...l_2d_keypoint/topdown_heatmap/ap10k/cspnext_udp_ap10k.yml
+19
-0
configs/animal_2d_keypoint/topdown_heatmap/ap10k/hrnet_ap10k.md
...s/animal_2d_keypoint/topdown_heatmap/ap10k/hrnet_ap10k.md
+41
-0
configs/animal_2d_keypoint/topdown_heatmap/ap10k/hrnet_ap10k.yml
.../animal_2d_keypoint/topdown_heatmap/ap10k/hrnet_ap10k.yml
+34
-0
No files found.
Too many changes to show.
To preserve performance only
283 of 283+
files are displayed.
Plain diff
Email patch
configs/animal_2d_keypoint/topdown_heatmap/ak/td-hm_hrnet-w32_8xb32-300e_animalkingdom_P2-256x256.py
0 → 100644
View file @
ca8a762a
_base_
=
[
'../../../_base_/default_runtime.py'
]
# runtime
train_cfg
=
dict
(
max_epochs
=
300
,
val_interval
=
10
)
# optimizer
optim_wrapper
=
dict
(
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
5e-4
,
))
# learning policy
param_scheduler
=
[
dict
(
type
=
'LinearLR'
,
begin
=
0
,
end
=
500
,
start_factor
=
0.001
,
by_epoch
=
False
),
# warm-up
dict
(
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
210
,
milestones
=
[
170
,
200
],
gamma
=
0.1
,
by_epoch
=
True
)
]
# automatically scaling LR based on the actual training batch size
auto_scale_lr
=
dict
(
base_batch_size
=
512
)
# hooks
default_hooks
=
dict
(
checkpoint
=
dict
(
save_best
=
'PCK'
,
rule
=
'greater'
))
# codec settings
codec
=
dict
(
type
=
'MSRAHeatmap'
,
input_size
=
(
256
,
256
),
heatmap_size
=
(
64
,
64
),
sigma
=
2
)
# model settings
model
=
dict
(
type
=
'TopdownPoseEstimator'
,
data_preprocessor
=
dict
(
type
=
'PoseDataPreprocessor'
,
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
bgr_to_rgb
=
True
),
backbone
=
dict
(
type
=
'HRNet'
,
in_channels
=
3
,
extra
=
dict
(
stage1
=
dict
(
num_modules
=
1
,
num_branches
=
1
,
block
=
'BOTTLENECK'
,
num_blocks
=
(
4
,
),
num_channels
=
(
64
,
)),
stage2
=
dict
(
num_modules
=
1
,
num_branches
=
2
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
),
num_channels
=
(
32
,
64
)),
stage3
=
dict
(
num_modules
=
4
,
num_branches
=
3
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
),
num_channels
=
(
32
,
64
,
128
)),
stage4
=
dict
(
num_modules
=
3
,
num_branches
=
4
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
,
4
),
num_channels
=
(
32
,
64
,
128
,
256
))),
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
'https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w32-36af842e.pth'
),
),
head
=
dict
(
type
=
'HeatmapHead'
,
in_channels
=
32
,
out_channels
=
23
,
deconv_out_channels
=
None
,
loss
=
dict
(
type
=
'KeypointMSELoss'
,
use_target_weight
=
True
),
decoder
=
codec
),
test_cfg
=
dict
(
flip_test
=
True
,
flip_mode
=
'heatmap'
,
shift_heatmap
=
True
,
))
# base dataset settings
dataset_type
=
'AnimalKingdomDataset'
data_mode
=
'topdown'
data_root
=
'data/ak/'
# pipelines
train_pipeline
=
[
dict
(
type
=
'LoadImage'
),
dict
(
type
=
'GetBBoxCenterScale'
),
dict
(
type
=
'RandomFlip'
,
direction
=
'horizontal'
),
dict
(
type
=
'RandomHalfBody'
),
dict
(
type
=
'RandomBBoxTransform'
),
dict
(
type
=
'TopdownAffine'
,
input_size
=
codec
[
'input_size'
]),
dict
(
type
=
'GenerateTarget'
,
encoder
=
codec
),
dict
(
type
=
'PackPoseInputs'
)
]
val_pipeline
=
[
dict
(
type
=
'LoadImage'
),
dict
(
type
=
'GetBBoxCenterScale'
),
dict
(
type
=
'TopdownAffine'
,
input_size
=
codec
[
'input_size'
]),
dict
(
type
=
'PackPoseInputs'
)
]
# data loaders
train_dataloader
=
dict
(
batch_size
=
32
,
num_workers
=
2
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_mode
=
data_mode
,
ann_file
=
'annotations/ak_P2/train.json'
,
data_prefix
=
dict
(
img
=
'images/'
),
pipeline
=
train_pipeline
,
))
val_dataloader
=
dict
(
batch_size
=
24
,
num_workers
=
2
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
,
round_up
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_mode
=
data_mode
,
ann_file
=
'annotations/ak_P2/test.json'
,
data_prefix
=
dict
(
img
=
'images/'
),
test_mode
=
True
,
pipeline
=
val_pipeline
,
))
test_dataloader
=
val_dataloader
# evaluators
val_evaluator
=
[
dict
(
type
=
'PCKAccuracy'
,
thr
=
0.05
),
dict
(
type
=
'AUC'
)]
test_evaluator
=
val_evaluator
configs/animal_2d_keypoint/topdown_heatmap/ak/td-hm_hrnet-w32_8xb32-300e_animalkingdom_P3_amphibian-256x256.py
0 → 100644
View file @
ca8a762a
_base_
=
[
'../../../_base_/default_runtime.py'
]
# runtime
train_cfg
=
dict
(
max_epochs
=
300
,
val_interval
=
10
)
# optimizer
optim_wrapper
=
dict
(
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
5e-4
,
))
# learning policy
param_scheduler
=
[
dict
(
type
=
'LinearLR'
,
begin
=
0
,
end
=
500
,
start_factor
=
0.001
,
by_epoch
=
False
),
# warm-up
dict
(
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
210
,
milestones
=
[
170
,
200
],
gamma
=
0.1
,
by_epoch
=
True
)
]
# automatically scaling LR based on the actual training batch size
auto_scale_lr
=
dict
(
base_batch_size
=
512
)
# hooks
default_hooks
=
dict
(
checkpoint
=
dict
(
save_best
=
'PCK'
,
rule
=
'greater'
))
# codec settings
codec
=
dict
(
type
=
'MSRAHeatmap'
,
input_size
=
(
256
,
256
),
heatmap_size
=
(
64
,
64
),
sigma
=
2
)
# model settings
model
=
dict
(
type
=
'TopdownPoseEstimator'
,
data_preprocessor
=
dict
(
type
=
'PoseDataPreprocessor'
,
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
bgr_to_rgb
=
True
),
backbone
=
dict
(
type
=
'HRNet'
,
in_channels
=
3
,
extra
=
dict
(
stage1
=
dict
(
num_modules
=
1
,
num_branches
=
1
,
block
=
'BOTTLENECK'
,
num_blocks
=
(
4
,
),
num_channels
=
(
64
,
)),
stage2
=
dict
(
num_modules
=
1
,
num_branches
=
2
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
),
num_channels
=
(
32
,
64
)),
stage3
=
dict
(
num_modules
=
4
,
num_branches
=
3
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
),
num_channels
=
(
32
,
64
,
128
)),
stage4
=
dict
(
num_modules
=
3
,
num_branches
=
4
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
,
4
),
num_channels
=
(
32
,
64
,
128
,
256
))),
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
'https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w32-36af842e.pth'
),
),
head
=
dict
(
type
=
'HeatmapHead'
,
in_channels
=
32
,
out_channels
=
23
,
deconv_out_channels
=
None
,
loss
=
dict
(
type
=
'KeypointMSELoss'
,
use_target_weight
=
True
),
decoder
=
codec
),
test_cfg
=
dict
(
flip_test
=
True
,
flip_mode
=
'heatmap'
,
shift_heatmap
=
True
,
))
# base dataset settings
dataset_type
=
'AnimalKingdomDataset'
data_mode
=
'topdown'
data_root
=
'data/ak/'
# pipelines
train_pipeline
=
[
dict
(
type
=
'LoadImage'
),
dict
(
type
=
'GetBBoxCenterScale'
),
dict
(
type
=
'RandomFlip'
,
direction
=
'horizontal'
),
dict
(
type
=
'RandomHalfBody'
),
dict
(
type
=
'RandomBBoxTransform'
),
dict
(
type
=
'TopdownAffine'
,
input_size
=
codec
[
'input_size'
]),
dict
(
type
=
'GenerateTarget'
,
encoder
=
codec
),
dict
(
type
=
'PackPoseInputs'
)
]
val_pipeline
=
[
dict
(
type
=
'LoadImage'
),
dict
(
type
=
'GetBBoxCenterScale'
),
dict
(
type
=
'TopdownAffine'
,
input_size
=
codec
[
'input_size'
]),
dict
(
type
=
'PackPoseInputs'
)
]
# data loaders
train_dataloader
=
dict
(
batch_size
=
32
,
num_workers
=
2
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_mode
=
data_mode
,
ann_file
=
'annotations/ak_P3_amphibian/train.json'
,
data_prefix
=
dict
(
img
=
'images/'
),
pipeline
=
train_pipeline
,
))
val_dataloader
=
dict
(
batch_size
=
24
,
num_workers
=
2
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
,
round_up
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_mode
=
data_mode
,
ann_file
=
'annotations/ak_P3_amphibian/test.json'
,
data_prefix
=
dict
(
img
=
'images/'
),
test_mode
=
True
,
pipeline
=
val_pipeline
,
))
test_dataloader
=
val_dataloader
# evaluators
val_evaluator
=
[
dict
(
type
=
'PCKAccuracy'
,
thr
=
0.05
),
dict
(
type
=
'AUC'
)]
test_evaluator
=
val_evaluator
configs/animal_2d_keypoint/topdown_heatmap/ak/td-hm_hrnet-w32_8xb32-300e_animalkingdom_P3_bird-256x256.py
0 → 100644
View file @
ca8a762a
_base_
=
[
'../../../_base_/default_runtime.py'
]
# runtime
train_cfg
=
dict
(
max_epochs
=
300
,
val_interval
=
10
)
# optimizer
optim_wrapper
=
dict
(
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
5e-4
,
))
# learning policy
param_scheduler
=
[
dict
(
type
=
'LinearLR'
,
begin
=
0
,
end
=
500
,
start_factor
=
0.001
,
by_epoch
=
False
),
# warm-up
dict
(
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
210
,
milestones
=
[
170
,
200
],
gamma
=
0.1
,
by_epoch
=
True
)
]
# automatically scaling LR based on the actual training batch size
auto_scale_lr
=
dict
(
base_batch_size
=
512
)
# hooks
default_hooks
=
dict
(
checkpoint
=
dict
(
save_best
=
'PCK'
,
rule
=
'greater'
))
# codec settings
codec
=
dict
(
type
=
'MSRAHeatmap'
,
input_size
=
(
256
,
256
),
heatmap_size
=
(
64
,
64
),
sigma
=
2
)
# model settings
model
=
dict
(
type
=
'TopdownPoseEstimator'
,
data_preprocessor
=
dict
(
type
=
'PoseDataPreprocessor'
,
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
bgr_to_rgb
=
True
),
backbone
=
dict
(
type
=
'HRNet'
,
in_channels
=
3
,
extra
=
dict
(
stage1
=
dict
(
num_modules
=
1
,
num_branches
=
1
,
block
=
'BOTTLENECK'
,
num_blocks
=
(
4
,
),
num_channels
=
(
64
,
)),
stage2
=
dict
(
num_modules
=
1
,
num_branches
=
2
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
),
num_channels
=
(
32
,
64
)),
stage3
=
dict
(
num_modules
=
4
,
num_branches
=
3
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
),
num_channels
=
(
32
,
64
,
128
)),
stage4
=
dict
(
num_modules
=
3
,
num_branches
=
4
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
,
4
),
num_channels
=
(
32
,
64
,
128
,
256
))),
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
'https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w32-36af842e.pth'
),
),
head
=
dict
(
type
=
'HeatmapHead'
,
in_channels
=
32
,
out_channels
=
23
,
deconv_out_channels
=
None
,
loss
=
dict
(
type
=
'KeypointMSELoss'
,
use_target_weight
=
True
),
decoder
=
codec
),
test_cfg
=
dict
(
flip_test
=
True
,
flip_mode
=
'heatmap'
,
shift_heatmap
=
True
,
))
# base dataset settings
dataset_type
=
'AnimalKingdomDataset'
data_mode
=
'topdown'
data_root
=
'data/ak/'
# pipelines
train_pipeline
=
[
dict
(
type
=
'LoadImage'
),
dict
(
type
=
'GetBBoxCenterScale'
),
dict
(
type
=
'RandomFlip'
,
direction
=
'horizontal'
),
dict
(
type
=
'RandomHalfBody'
),
dict
(
type
=
'RandomBBoxTransform'
),
dict
(
type
=
'TopdownAffine'
,
input_size
=
codec
[
'input_size'
]),
dict
(
type
=
'GenerateTarget'
,
encoder
=
codec
),
dict
(
type
=
'PackPoseInputs'
)
]
val_pipeline
=
[
dict
(
type
=
'LoadImage'
),
dict
(
type
=
'GetBBoxCenterScale'
),
dict
(
type
=
'TopdownAffine'
,
input_size
=
codec
[
'input_size'
]),
dict
(
type
=
'PackPoseInputs'
)
]
# data loaders
train_dataloader
=
dict
(
batch_size
=
32
,
num_workers
=
2
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_mode
=
data_mode
,
ann_file
=
'annotations/ak_P3_bird/train.json'
,
data_prefix
=
dict
(
img
=
'images/'
),
pipeline
=
train_pipeline
,
))
val_dataloader
=
dict
(
batch_size
=
24
,
num_workers
=
2
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
,
round_up
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_mode
=
data_mode
,
ann_file
=
'annotations/ak_P3_bird/test.json'
,
data_prefix
=
dict
(
img
=
'images/'
),
test_mode
=
True
,
pipeline
=
val_pipeline
,
))
test_dataloader
=
val_dataloader
# evaluators
val_evaluator
=
[
dict
(
type
=
'PCKAccuracy'
,
thr
=
0.05
),
dict
(
type
=
'AUC'
)]
test_evaluator
=
val_evaluator
configs/animal_2d_keypoint/topdown_heatmap/ak/td-hm_hrnet-w32_8xb32-300e_animalkingdom_P3_fish-256x256.py
0 → 100644
View file @
ca8a762a
_base_
=
[
'../../../_base_/default_runtime.py'
]
# runtime
train_cfg
=
dict
(
max_epochs
=
300
,
val_interval
=
10
)
# optimizer
optim_wrapper
=
dict
(
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
5e-4
,
))
# learning policy
param_scheduler
=
[
dict
(
type
=
'LinearLR'
,
begin
=
0
,
end
=
500
,
start_factor
=
0.001
,
by_epoch
=
False
),
# warm-up
dict
(
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
210
,
milestones
=
[
170
,
200
],
gamma
=
0.1
,
by_epoch
=
True
)
]
# automatically scaling LR based on the actual training batch size
auto_scale_lr
=
dict
(
base_batch_size
=
512
)
# hooks
default_hooks
=
dict
(
checkpoint
=
dict
(
save_best
=
'PCK'
,
rule
=
'greater'
))
# codec settings
codec
=
dict
(
type
=
'MSRAHeatmap'
,
input_size
=
(
256
,
256
),
heatmap_size
=
(
64
,
64
),
sigma
=
2
)
# model settings
model
=
dict
(
type
=
'TopdownPoseEstimator'
,
data_preprocessor
=
dict
(
type
=
'PoseDataPreprocessor'
,
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
bgr_to_rgb
=
True
),
backbone
=
dict
(
type
=
'HRNet'
,
in_channels
=
3
,
extra
=
dict
(
stage1
=
dict
(
num_modules
=
1
,
num_branches
=
1
,
block
=
'BOTTLENECK'
,
num_blocks
=
(
4
,
),
num_channels
=
(
64
,
)),
stage2
=
dict
(
num_modules
=
1
,
num_branches
=
2
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
),
num_channels
=
(
32
,
64
)),
stage3
=
dict
(
num_modules
=
4
,
num_branches
=
3
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
),
num_channels
=
(
32
,
64
,
128
)),
stage4
=
dict
(
num_modules
=
3
,
num_branches
=
4
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
,
4
),
num_channels
=
(
32
,
64
,
128
,
256
))),
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
'https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w32-36af842e.pth'
),
),
head
=
dict
(
type
=
'HeatmapHead'
,
in_channels
=
32
,
out_channels
=
23
,
deconv_out_channels
=
None
,
loss
=
dict
(
type
=
'KeypointMSELoss'
,
use_target_weight
=
True
),
decoder
=
codec
),
test_cfg
=
dict
(
flip_test
=
True
,
flip_mode
=
'heatmap'
,
shift_heatmap
=
True
,
))
# base dataset settings
dataset_type
=
'AnimalKingdomDataset'
data_mode
=
'topdown'
data_root
=
'data/ak/'
# pipelines
train_pipeline
=
[
dict
(
type
=
'LoadImage'
),
dict
(
type
=
'GetBBoxCenterScale'
),
dict
(
type
=
'RandomFlip'
,
direction
=
'horizontal'
),
dict
(
type
=
'RandomHalfBody'
),
dict
(
type
=
'RandomBBoxTransform'
),
dict
(
type
=
'TopdownAffine'
,
input_size
=
codec
[
'input_size'
]),
dict
(
type
=
'GenerateTarget'
,
encoder
=
codec
),
dict
(
type
=
'PackPoseInputs'
)
]
val_pipeline
=
[
dict
(
type
=
'LoadImage'
),
dict
(
type
=
'GetBBoxCenterScale'
),
dict
(
type
=
'TopdownAffine'
,
input_size
=
codec
[
'input_size'
]),
dict
(
type
=
'PackPoseInputs'
)
]
# data loaders
train_dataloader
=
dict
(
batch_size
=
32
,
num_workers
=
2
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_mode
=
data_mode
,
ann_file
=
'annotations/ak_P3_fish/train.json'
,
data_prefix
=
dict
(
img
=
'images/'
),
pipeline
=
train_pipeline
,
))
val_dataloader
=
dict
(
batch_size
=
24
,
num_workers
=
2
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
,
round_up
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_mode
=
data_mode
,
ann_file
=
'annotations/ak_P3_fish/test.json'
,
data_prefix
=
dict
(
img
=
'images/'
),
test_mode
=
True
,
pipeline
=
val_pipeline
,
))
test_dataloader
=
val_dataloader
# evaluators
val_evaluator
=
[
dict
(
type
=
'PCKAccuracy'
,
thr
=
0.05
),
dict
(
type
=
'AUC'
)]
test_evaluator
=
val_evaluator
configs/animal_2d_keypoint/topdown_heatmap/ak/td-hm_hrnet-w32_8xb32-300e_animalkingdom_P3_mammal-256x256.py
0 → 100644
View file @
ca8a762a
_base_
=
[
'../../../_base_/default_runtime.py'
]
# runtime
train_cfg
=
dict
(
max_epochs
=
300
,
val_interval
=
10
)
# optimizer
optim_wrapper
=
dict
(
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
5e-4
,
))
# learning policy
param_scheduler
=
[
dict
(
type
=
'LinearLR'
,
begin
=
0
,
end
=
500
,
start_factor
=
0.001
,
by_epoch
=
False
),
# warm-up
dict
(
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
210
,
milestones
=
[
170
,
200
],
gamma
=
0.1
,
by_epoch
=
True
)
]
# automatically scaling LR based on the actual training batch size
auto_scale_lr
=
dict
(
base_batch_size
=
512
)
# hooks
default_hooks
=
dict
(
checkpoint
=
dict
(
save_best
=
'PCK'
,
rule
=
'greater'
))
# codec settings
codec
=
dict
(
type
=
'MSRAHeatmap'
,
input_size
=
(
256
,
256
),
heatmap_size
=
(
64
,
64
),
sigma
=
2
)
# model settings
model
=
dict
(
type
=
'TopdownPoseEstimator'
,
data_preprocessor
=
dict
(
type
=
'PoseDataPreprocessor'
,
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
bgr_to_rgb
=
True
),
backbone
=
dict
(
type
=
'HRNet'
,
in_channels
=
3
,
extra
=
dict
(
stage1
=
dict
(
num_modules
=
1
,
num_branches
=
1
,
block
=
'BOTTLENECK'
,
num_blocks
=
(
4
,
),
num_channels
=
(
64
,
)),
stage2
=
dict
(
num_modules
=
1
,
num_branches
=
2
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
),
num_channels
=
(
32
,
64
)),
stage3
=
dict
(
num_modules
=
4
,
num_branches
=
3
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
),
num_channels
=
(
32
,
64
,
128
)),
stage4
=
dict
(
num_modules
=
3
,
num_branches
=
4
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
,
4
),
num_channels
=
(
32
,
64
,
128
,
256
))),
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
'https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w32-36af842e.pth'
),
),
head
=
dict
(
type
=
'HeatmapHead'
,
in_channels
=
32
,
out_channels
=
23
,
deconv_out_channels
=
None
,
loss
=
dict
(
type
=
'KeypointMSELoss'
,
use_target_weight
=
True
),
decoder
=
codec
),
test_cfg
=
dict
(
flip_test
=
True
,
flip_mode
=
'heatmap'
,
shift_heatmap
=
True
,
))
# base dataset settings
dataset_type
=
'AnimalKingdomDataset'
data_mode
=
'topdown'
data_root
=
'data/ak/'
# pipelines
train_pipeline
=
[
dict
(
type
=
'LoadImage'
),
dict
(
type
=
'GetBBoxCenterScale'
),
dict
(
type
=
'RandomFlip'
,
direction
=
'horizontal'
),
dict
(
type
=
'RandomHalfBody'
),
dict
(
type
=
'RandomBBoxTransform'
),
dict
(
type
=
'TopdownAffine'
,
input_size
=
codec
[
'input_size'
]),
dict
(
type
=
'GenerateTarget'
,
encoder
=
codec
),
dict
(
type
=
'PackPoseInputs'
)
]
val_pipeline
=
[
dict
(
type
=
'LoadImage'
),
dict
(
type
=
'GetBBoxCenterScale'
),
dict
(
type
=
'TopdownAffine'
,
input_size
=
codec
[
'input_size'
]),
dict
(
type
=
'PackPoseInputs'
)
]
# data loaders
train_dataloader
=
dict
(
batch_size
=
32
,
num_workers
=
2
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_mode
=
data_mode
,
ann_file
=
'annotations/ak_P3_mammal/train.json'
,
data_prefix
=
dict
(
img
=
'images/'
),
pipeline
=
train_pipeline
,
))
val_dataloader
=
dict
(
batch_size
=
24
,
num_workers
=
2
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
,
round_up
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_mode
=
data_mode
,
ann_file
=
'annotations/ak_P3_mammal/test.json'
,
data_prefix
=
dict
(
img
=
'images/'
),
test_mode
=
True
,
pipeline
=
val_pipeline
,
))
test_dataloader
=
val_dataloader
# evaluators
val_evaluator
=
[
dict
(
type
=
'PCKAccuracy'
,
thr
=
0.05
),
dict
(
type
=
'AUC'
)]
test_evaluator
=
val_evaluator
configs/animal_2d_keypoint/topdown_heatmap/ak/td-hm_hrnet-w32_8xb32-300e_animalkingdom_P3_reptile-256x256.py
0 → 100644
View file @
ca8a762a
_base_
=
[
'../../../_base_/default_runtime.py'
]
# runtime
train_cfg
=
dict
(
max_epochs
=
300
,
val_interval
=
10
)
# optimizer
optim_wrapper
=
dict
(
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
5e-4
,
))
# learning policy
param_scheduler
=
[
dict
(
type
=
'LinearLR'
,
begin
=
0
,
end
=
500
,
start_factor
=
0.001
,
by_epoch
=
False
),
# warm-up
dict
(
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
210
,
milestones
=
[
170
,
200
],
gamma
=
0.1
,
by_epoch
=
True
)
]
# automatically scaling LR based on the actual training batch size
auto_scale_lr
=
dict
(
base_batch_size
=
512
)
# hooks
default_hooks
=
dict
(
checkpoint
=
dict
(
save_best
=
'PCK'
,
rule
=
'greater'
))
# codec settings
codec
=
dict
(
type
=
'MSRAHeatmap'
,
input_size
=
(
256
,
256
),
heatmap_size
=
(
64
,
64
),
sigma
=
2
)
# model settings
model
=
dict
(
type
=
'TopdownPoseEstimator'
,
data_preprocessor
=
dict
(
type
=
'PoseDataPreprocessor'
,
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
bgr_to_rgb
=
True
),
backbone
=
dict
(
type
=
'HRNet'
,
in_channels
=
3
,
extra
=
dict
(
stage1
=
dict
(
num_modules
=
1
,
num_branches
=
1
,
block
=
'BOTTLENECK'
,
num_blocks
=
(
4
,
),
num_channels
=
(
64
,
)),
stage2
=
dict
(
num_modules
=
1
,
num_branches
=
2
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
),
num_channels
=
(
32
,
64
)),
stage3
=
dict
(
num_modules
=
4
,
num_branches
=
3
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
),
num_channels
=
(
32
,
64
,
128
)),
stage4
=
dict
(
num_modules
=
3
,
num_branches
=
4
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
,
4
),
num_channels
=
(
32
,
64
,
128
,
256
))),
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
'https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w32-36af842e.pth'
),
),
head
=
dict
(
type
=
'HeatmapHead'
,
in_channels
=
32
,
out_channels
=
23
,
deconv_out_channels
=
None
,
loss
=
dict
(
type
=
'KeypointMSELoss'
,
use_target_weight
=
True
),
decoder
=
codec
),
test_cfg
=
dict
(
flip_test
=
True
,
flip_mode
=
'heatmap'
,
shift_heatmap
=
True
,
))
# base dataset settings
dataset_type
=
'AnimalKingdomDataset'
data_mode
=
'topdown'
data_root
=
'data/ak/'
# pipelines
train_pipeline
=
[
dict
(
type
=
'LoadImage'
),
dict
(
type
=
'GetBBoxCenterScale'
),
dict
(
type
=
'RandomFlip'
,
direction
=
'horizontal'
),
dict
(
type
=
'RandomHalfBody'
),
dict
(
type
=
'RandomBBoxTransform'
),
dict
(
type
=
'TopdownAffine'
,
input_size
=
codec
[
'input_size'
]),
dict
(
type
=
'GenerateTarget'
,
encoder
=
codec
),
dict
(
type
=
'PackPoseInputs'
)
]
val_pipeline
=
[
dict
(
type
=
'LoadImage'
),
dict
(
type
=
'GetBBoxCenterScale'
),
dict
(
type
=
'TopdownAffine'
,
input_size
=
codec
[
'input_size'
]),
dict
(
type
=
'PackPoseInputs'
)
]
# data loaders
train_dataloader
=
dict
(
batch_size
=
32
,
num_workers
=
2
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_mode
=
data_mode
,
ann_file
=
'annotations/ak_P3_reptile/train.json'
,
data_prefix
=
dict
(
img
=
'images/'
),
pipeline
=
train_pipeline
,
))
val_dataloader
=
dict
(
batch_size
=
24
,
num_workers
=
2
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
,
round_up
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_mode
=
data_mode
,
ann_file
=
'annotations/ak_P3_reptile/test.json'
,
data_prefix
=
dict
(
img
=
'images/'
),
test_mode
=
True
,
pipeline
=
val_pipeline
,
))
test_dataloader
=
val_dataloader
# evaluators
val_evaluator
=
[
dict
(
type
=
'PCKAccuracy'
,
thr
=
0.05
),
dict
(
type
=
'AUC'
)]
test_evaluator
=
val_evaluator
configs/animal_2d_keypoint/topdown_heatmap/animalpose/hrnet_animalpose.md
0 → 100644
View file @
ca8a762a
<!-- [ALGORITHM] -->
<details>
<summary
align=
"right"
><a
href=
"http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html"
>
HRNet (CVPR'2019)
</a></summary>
```
bibtex
@inproceedings
{
sun2019deep
,
title
=
{Deep high-resolution representation learning for human pose estimation}
,
author
=
{Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong}
,
booktitle
=
{Proceedings of the IEEE conference on computer vision and pattern recognition}
,
pages
=
{5693--5703}
,
year
=
{2019}
}
```
</details>
<!-- [DATASET] -->
<details>
<summary
align=
"right"
><a
href=
"http://openaccess.thecvf.com/content_ICCV_2019/html/Cao_Cross-Domain_Adaptation_for_Animal_Pose_Estimation_ICCV_2019_paper.html"
>
Animal-Pose (ICCV'2019)
</a></summary>
```
bibtex
@InProceedings
{
Cao_2019_ICCV
,
author
=
{Cao, Jinkun and Tang, Hongyang and Fang, Hao-Shu and Shen, Xiaoyong and Lu, Cewu and Tai, Yu-Wing}
,
title
=
{Cross-Domain Adaptation for Animal Pose Estimation}
,
booktitle
=
{The IEEE International Conference on Computer Vision (ICCV)}
,
month
=
{October}
,
year
=
{2019}
}
```
</details>
Results on AnimalPose validation set (1117 instances)
| Arch | Input Size | AP | AP
<sup>
50
</sup>
| AP
<sup>
75
</sup>
| AR | AR
<sup>
50
</sup>
| ckpt | log |
| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
|
[
pose_hrnet_w32
](
/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_hrnet-w32_8xb64-210e_animalpose-256x256.py
)
| 256x256 | 0.740 | 0.959 | 0.833 | 0.780 | 0.965 |
[
ckpt
](
https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_animalpose_256x256-1aa7f075_20210426.pth
)
|
[
log
](
https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_animalpose_256x256_20210426.log.json
)
|
|
[
pose_hrnet_w48
](
/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_hrnet-w48_8xb64-210e_animalpose-256x256.py
)
| 256x256 | 0.738 | 0.958 | 0.831 | 0.778 | 0.962 |
[
ckpt
](
https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_animalpose_256x256-34644726_20210426.pth
)
|
[
log
](
https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_animalpose_256x256_20210426.log.json
)
|
configs/animal_2d_keypoint/topdown_heatmap/animalpose/hrnet_animalpose.yml
0 → 100644
View file @
ca8a762a
Models
:
-
Config
:
configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_hrnet-w32_8xb64-210e_animalpose-256x256.py
In Collection
:
HRNet
Metadata
:
Architecture
:
&id001
-
HRNet
Training Data
:
Animal-Pose
Name
:
td-hm_hrnet-w32_8xb64-210e_animalpose-256x256
Results
:
-
Dataset
:
Animal-Pose
Metrics
:
AP
:
0.740
AP@0.5
:
0.959
AP@0.75
:
0.833
AR
:
0.780
AR@0.5
:
0.965
Task
:
Animal 2D Keypoint
Weights
:
https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_animalpose_256x256-1aa7f075_20210426.pth
-
Config
:
configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_hrnet-w48_8xb64-210e_animalpose-256x256.py
In Collection
:
HRNet
Metadata
:
Architecture
:
*id001
Training Data
:
Animal-Pose
Name
:
td-hm_hrnet-w48_8xb64-210e_animalpose-256x256
Results
:
-
Dataset
:
Animal-Pose
Metrics
:
AP
:
0.738
AP@0.5
:
0.958
AP@0.75
:
0.831
AR
:
0.778
AR@0.5
:
0.962
Task
:
Animal 2D Keypoint
Weights
:
https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_animalpose_256x256-34644726_20210426.pth
configs/animal_2d_keypoint/topdown_heatmap/animalpose/resnet_animalpose.md
0 → 100644
View file @
ca8a762a
<!-- [ALGORITHM] -->
<details>
<summary
align=
"right"
><a
href=
"http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html"
>
SimpleBaseline2D (ECCV'2018)
</a></summary>
```
bibtex
@inproceedings
{
xiao2018simple
,
title
=
{Simple baselines for human pose estimation and tracking}
,
author
=
{Xiao, Bin and Wu, Haiping and Wei, Yichen}
,
booktitle
=
{Proceedings of the European conference on computer vision (ECCV)}
,
pages
=
{466--481}
,
year
=
{2018}
}
```
</details>
<!-- [DATASET] -->
<details>
<summary
align=
"right"
><a
href=
"http://openaccess.thecvf.com/content_ICCV_2019/html/Cao_Cross-Domain_Adaptation_for_Animal_Pose_Estimation_ICCV_2019_paper.html"
>
Animal-Pose (ICCV'2019)
</a></summary>
```
bibtex
@InProceedings
{
Cao_2019_ICCV
,
author
=
{Cao, Jinkun and Tang, Hongyang and Fang, Hao-Shu and Shen, Xiaoyong and Lu, Cewu and Tai, Yu-Wing}
,
title
=
{Cross-Domain Adaptation for Animal Pose Estimation}
,
booktitle
=
{The IEEE International Conference on Computer Vision (ICCV)}
,
month
=
{October}
,
year
=
{2019}
}
```
</details>
Results on AnimalPose validation set (1117 instances)
| Arch | Input Size | AP | AP
<sup>
50
</sup>
| AP
<sup>
75
</sup>
| AR | AR
<sup>
50
</sup>
| ckpt | log |
| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
|
[
pose_resnet_50
](
/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res50_8xb64-210e_animalpose-256x256.py
)
| 256x256 | 0.691 | 0.947 | 0.770 | 0.736 | 0.955 |
[
ckpt
](
https://download.openmmlab.com/mmpose/animal/resnet/res50_animalpose_256x256-e1f30bff_20210426.pth
)
|
[
log
](
https://download.openmmlab.com/mmpose/animal/resnet/res50_animalpose_256x256_20210426.log.json
)
|
|
[
pose_resnet_101
](
/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res101_8xb64-210e_animalpose-256x256.py
)
| 256x256 | 0.696 | 0.948 | 0.774 | 0.736 | 0.951 |
[
ckpt
](
https://download.openmmlab.com/mmpose/animal/resnet/res101_animalpose_256x256-85563f4a_20210426.pth
)
|
[
log
](
https://download.openmmlab.com/mmpose/animal/resnet/res101_animalpose_256x256_20210426.log.json
)
|
|
[
pose_resnet_152
](
/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res152_8xb32-210e_animalpose-256x256.py
)
| 256x256 | 0.704 | 0.938 | 0.786 | 0.748 | 0.946 |
[
ckpt
](
https://download.openmmlab.com/mmpose/animal/resnet/res152_animalpose_256x256-a0a7506c_20210426.pth
)
|
[
log
](
https://download.openmmlab.com/mmpose/animal/resnet/res152_animalpose_256x256_20210426.log.json
)
|
configs/animal_2d_keypoint/topdown_heatmap/animalpose/resnet_animalpose.yml
0 → 100644
View file @
ca8a762a
Models
:
-
Config
:
configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res50_8xb64-210e_animalpose-256x256.py
In Collection
:
SimpleBaseline2D
Metadata
:
Architecture
:
&id001
-
SimpleBaseline2D
-
ResNet
Training Data
:
Animal-Pose
Name
:
td-hm_res50_8xb64-210e_animalpose-256x256
Results
:
-
Dataset
:
Animal-Pose
Metrics
:
AP
:
0.691
AP@0.5
:
0.947
AP@0.75
:
0.770
AR
:
0.736
AR@0.5
:
0.955
Task
:
Animal 2D Keypoint
Weights
:
https://download.openmmlab.com/mmpose/animal/resnet/res50_animalpose_256x256-e1f30bff_20210426.pth
-
Config
:
configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res101_8xb64-210e_animalpose-256x256.py
In Collection
:
SimpleBaseline2D
Metadata
:
Architecture
:
*id001
Training Data
:
Animal-Pose
Name
:
td-hm_res101_8xb64-210e_animalpose-256x256
Results
:
-
Dataset
:
Animal-Pose
Metrics
:
AP
:
0.696
AP@0.5
:
0.948
AP@0.75
:
0.774
AR
:
0.736
AR@0.5
:
0.951
Task
:
Animal 2D Keypoint
Weights
:
https://download.openmmlab.com/mmpose/animal/resnet/res101_animalpose_256x256-85563f4a_20210426.pth
-
Config
:
configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res152_8xb32-210e_animalpose-256x256.py
In Collection
:
SimpleBaseline2D
Metadata
:
Architecture
:
*id001
Training Data
:
Animal-Pose
Name
:
td-hm_res152_8xb32-210e_animalpose-256x256
Results
:
-
Dataset
:
Animal-Pose
Metrics
:
AP
:
0.704
AP@0.5
:
0.938
AP@0.75
:
0.786
AR
:
0.748
AR@0.5
:
0.946
Task
:
Animal 2D Keypoint
Weights
:
https://download.openmmlab.com/mmpose/animal/resnet/res152_animalpose_256x256-a0a7506c_20210426.pth
configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_hrnet-w32_8xb64-210e_animalpose-256x256.py
0 → 100644
View file @
ca8a762a
_base_
=
[
'../../../_base_/default_runtime.py'
]
# runtime
train_cfg
=
dict
(
max_epochs
=
210
,
val_interval
=
10
)
# optimizer
optim_wrapper
=
dict
(
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
5e-4
,
))
# learning policy
param_scheduler
=
[
dict
(
type
=
'LinearLR'
,
begin
=
0
,
end
=
500
,
start_factor
=
0.001
,
by_epoch
=
False
),
# warm-up
dict
(
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
210
,
milestones
=
[
170
,
200
],
gamma
=
0.1
,
by_epoch
=
True
)
]
# automatically scaling LR based on the actual training batch size
auto_scale_lr
=
dict
(
base_batch_size
=
512
)
# hooks
default_hooks
=
dict
(
checkpoint
=
dict
(
save_best
=
'coco/AP'
,
rule
=
'greater'
))
# codec settings
codec
=
dict
(
type
=
'MSRAHeatmap'
,
input_size
=
(
256
,
256
),
heatmap_size
=
(
64
,
64
),
sigma
=
2
)
# model settings
model
=
dict
(
type
=
'TopdownPoseEstimator'
,
data_preprocessor
=
dict
(
type
=
'PoseDataPreprocessor'
,
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
bgr_to_rgb
=
True
),
backbone
=
dict
(
type
=
'HRNet'
,
in_channels
=
3
,
extra
=
dict
(
stage1
=
dict
(
num_modules
=
1
,
num_branches
=
1
,
block
=
'BOTTLENECK'
,
num_blocks
=
(
4
,
),
num_channels
=
(
64
,
)),
stage2
=
dict
(
num_modules
=
1
,
num_branches
=
2
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
),
num_channels
=
(
32
,
64
)),
stage3
=
dict
(
num_modules
=
4
,
num_branches
=
3
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
),
num_channels
=
(
32
,
64
,
128
)),
stage4
=
dict
(
num_modules
=
3
,
num_branches
=
4
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
,
4
),
num_channels
=
(
32
,
64
,
128
,
256
))),
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
'https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w32-36af842e.pth'
),
),
head
=
dict
(
type
=
'HeatmapHead'
,
in_channels
=
32
,
out_channels
=
20
,
deconv_out_channels
=
None
,
loss
=
dict
(
type
=
'KeypointMSELoss'
,
use_target_weight
=
True
),
decoder
=
codec
),
test_cfg
=
dict
(
flip_test
=
True
,
flip_mode
=
'heatmap'
,
shift_heatmap
=
True
,
))
# base dataset settings
dataset_type
=
'AnimalPoseDataset'
data_mode
=
'topdown'
data_root
=
'data/animalpose/'
# pipelines
train_pipeline
=
[
dict
(
type
=
'LoadImage'
),
dict
(
type
=
'GetBBoxCenterScale'
),
dict
(
type
=
'RandomFlip'
,
direction
=
'horizontal'
),
dict
(
type
=
'RandomHalfBody'
),
dict
(
type
=
'RandomBBoxTransform'
),
dict
(
type
=
'TopdownAffine'
,
input_size
=
codec
[
'input_size'
]),
dict
(
type
=
'GenerateTarget'
,
encoder
=
codec
),
dict
(
type
=
'PackPoseInputs'
)
]
val_pipeline
=
[
dict
(
type
=
'LoadImage'
),
dict
(
type
=
'GetBBoxCenterScale'
),
dict
(
type
=
'TopdownAffine'
,
input_size
=
codec
[
'input_size'
]),
dict
(
type
=
'PackPoseInputs'
)
]
# data loaders
train_dataloader
=
dict
(
batch_size
=
64
,
num_workers
=
2
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_mode
=
data_mode
,
ann_file
=
'annotations/animalpose_train.json'
,
data_prefix
=
dict
(
img
=
''
),
pipeline
=
train_pipeline
,
))
val_dataloader
=
dict
(
batch_size
=
32
,
num_workers
=
2
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
,
round_up
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_mode
=
data_mode
,
ann_file
=
'annotations/animalpose_val.json'
,
data_prefix
=
dict
(
img
=
''
),
test_mode
=
True
,
pipeline
=
val_pipeline
,
))
test_dataloader
=
val_dataloader
# evaluators
val_evaluator
=
dict
(
type
=
'CocoMetric'
,
ann_file
=
data_root
+
'annotations/animalpose_val.json'
)
test_evaluator
=
val_evaluator
configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_hrnet-w48_8xb64-210e_animalpose-256x256.py
0 → 100644
View file @
ca8a762a
_base_
=
[
'../../../_base_/default_runtime.py'
]
# runtime
train_cfg
=
dict
(
max_epochs
=
210
,
val_interval
=
10
)
# optimizer
optim_wrapper
=
dict
(
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
5e-4
,
))
# learning policy
param_scheduler
=
[
dict
(
type
=
'LinearLR'
,
begin
=
0
,
end
=
500
,
start_factor
=
0.001
,
by_epoch
=
False
),
# warm-up
dict
(
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
210
,
milestones
=
[
170
,
200
],
gamma
=
0.1
,
by_epoch
=
True
)
]
# automatically scaling LR based on the actual training batch size
auto_scale_lr
=
dict
(
base_batch_size
=
512
)
# hooks
default_hooks
=
dict
(
checkpoint
=
dict
(
save_best
=
'coco/AP'
,
rule
=
'greater'
))
# codec settings
codec
=
dict
(
type
=
'MSRAHeatmap'
,
input_size
=
(
256
,
256
),
heatmap_size
=
(
64
,
64
),
sigma
=
2
)
# model settings
model
=
dict
(
type
=
'TopdownPoseEstimator'
,
data_preprocessor
=
dict
(
type
=
'PoseDataPreprocessor'
,
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
bgr_to_rgb
=
True
),
backbone
=
dict
(
type
=
'HRNet'
,
in_channels
=
3
,
extra
=
dict
(
stage1
=
dict
(
num_modules
=
1
,
num_branches
=
1
,
block
=
'BOTTLENECK'
,
num_blocks
=
(
4
,
),
num_channels
=
(
64
,
)),
stage2
=
dict
(
num_modules
=
1
,
num_branches
=
2
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
),
num_channels
=
(
48
,
96
)),
stage3
=
dict
(
num_modules
=
4
,
num_branches
=
3
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
),
num_channels
=
(
48
,
96
,
192
)),
stage4
=
dict
(
num_modules
=
3
,
num_branches
=
4
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
,
4
),
num_channels
=
(
48
,
96
,
192
,
384
))),
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
'https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w48-8ef0771d.pth'
),
),
head
=
dict
(
type
=
'HeatmapHead'
,
in_channels
=
48
,
out_channels
=
20
,
deconv_out_channels
=
None
,
loss
=
dict
(
type
=
'KeypointMSELoss'
,
use_target_weight
=
True
),
decoder
=
codec
),
test_cfg
=
dict
(
flip_test
=
True
,
flip_mode
=
'heatmap'
,
shift_heatmap
=
True
,
))
# base dataset settings
dataset_type
=
'AnimalPoseDataset'
data_mode
=
'topdown'
data_root
=
'data/animalpose/'
# pipelines
train_pipeline
=
[
dict
(
type
=
'LoadImage'
),
dict
(
type
=
'GetBBoxCenterScale'
),
dict
(
type
=
'RandomFlip'
,
direction
=
'horizontal'
),
dict
(
type
=
'RandomHalfBody'
),
dict
(
type
=
'RandomBBoxTransform'
),
dict
(
type
=
'TopdownAffine'
,
input_size
=
codec
[
'input_size'
]),
dict
(
type
=
'GenerateTarget'
,
encoder
=
codec
),
dict
(
type
=
'PackPoseInputs'
)
]
val_pipeline
=
[
dict
(
type
=
'LoadImage'
),
dict
(
type
=
'GetBBoxCenterScale'
),
dict
(
type
=
'TopdownAffine'
,
input_size
=
codec
[
'input_size'
]),
dict
(
type
=
'PackPoseInputs'
)
]
# data loaders
train_dataloader
=
dict
(
batch_size
=
64
,
num_workers
=
2
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_mode
=
data_mode
,
ann_file
=
'annotations/animalpose_train.json'
,
data_prefix
=
dict
(
img
=
''
),
pipeline
=
train_pipeline
,
))
val_dataloader
=
dict
(
batch_size
=
32
,
num_workers
=
2
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
,
round_up
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_mode
=
data_mode
,
ann_file
=
'annotations/animalpose_val.json'
,
data_prefix
=
dict
(
img
=
''
),
test_mode
=
True
,
pipeline
=
val_pipeline
,
))
test_dataloader
=
val_dataloader
# evaluators
val_evaluator
=
dict
(
type
=
'CocoMetric'
,
ann_file
=
data_root
+
'annotations/animalpose_val.json'
)
test_evaluator
=
val_evaluator
configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res101_8xb64-210e_animalpose-256x256.py
0 → 100644
View file @
ca8a762a
_base_
=
[
'../../../_base_/default_runtime.py'
]
# runtime
train_cfg
=
dict
(
max_epochs
=
210
,
val_interval
=
10
)
# optimizer
optim_wrapper
=
dict
(
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
5e-4
,
))
# learning policy
param_scheduler
=
[
dict
(
type
=
'LinearLR'
,
begin
=
0
,
end
=
500
,
start_factor
=
0.001
,
by_epoch
=
False
),
# warm-up
dict
(
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
210
,
milestones
=
[
170
,
200
],
gamma
=
0.1
,
by_epoch
=
True
)
]
# automatically scaling LR based on the actual training batch size
auto_scale_lr
=
dict
(
base_batch_size
=
512
)
# hooks
default_hooks
=
dict
(
checkpoint
=
dict
(
save_best
=
'coco/AP'
,
rule
=
'greater'
))
# codec settings
codec
=
dict
(
type
=
'MSRAHeatmap'
,
input_size
=
(
256
,
256
),
heatmap_size
=
(
64
,
64
),
sigma
=
2
)
# model settings
model
=
dict
(
type
=
'TopdownPoseEstimator'
,
data_preprocessor
=
dict
(
type
=
'PoseDataPreprocessor'
,
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
bgr_to_rgb
=
True
),
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
101
,
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
'torchvision://resnet101'
),
),
head
=
dict
(
type
=
'HeatmapHead'
,
in_channels
=
2048
,
out_channels
=
20
,
loss
=
dict
(
type
=
'KeypointMSELoss'
,
use_target_weight
=
True
),
decoder
=
codec
),
test_cfg
=
dict
(
flip_test
=
True
,
flip_mode
=
'heatmap'
,
shift_heatmap
=
True
,
))
# base dataset settings
dataset_type
=
'AnimalPoseDataset'
data_mode
=
'topdown'
data_root
=
'data/animalpose/'
# pipelines
train_pipeline
=
[
dict
(
type
=
'LoadImage'
),
dict
(
type
=
'GetBBoxCenterScale'
),
dict
(
type
=
'RandomFlip'
,
direction
=
'horizontal'
),
dict
(
type
=
'RandomHalfBody'
),
dict
(
type
=
'RandomBBoxTransform'
),
dict
(
type
=
'TopdownAffine'
,
input_size
=
codec
[
'input_size'
]),
dict
(
type
=
'GenerateTarget'
,
encoder
=
codec
),
dict
(
type
=
'PackPoseInputs'
)
]
val_pipeline
=
[
dict
(
type
=
'LoadImage'
),
dict
(
type
=
'GetBBoxCenterScale'
),
dict
(
type
=
'TopdownAffine'
,
input_size
=
codec
[
'input_size'
]),
dict
(
type
=
'PackPoseInputs'
)
]
# data loaders
train_dataloader
=
dict
(
batch_size
=
64
,
num_workers
=
2
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_mode
=
data_mode
,
ann_file
=
'annotations/animalpose_train.json'
,
data_prefix
=
dict
(
img
=
''
),
pipeline
=
train_pipeline
,
))
val_dataloader
=
dict
(
batch_size
=
32
,
num_workers
=
2
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
,
round_up
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_mode
=
data_mode
,
ann_file
=
'annotations/animalpose_val.json'
,
data_prefix
=
dict
(
img
=
''
),
test_mode
=
True
,
pipeline
=
val_pipeline
,
))
test_dataloader
=
val_dataloader
# evaluators
val_evaluator
=
dict
(
type
=
'CocoMetric'
,
ann_file
=
data_root
+
'annotations/animalpose_val.json'
)
test_evaluator
=
val_evaluator
configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res152_8xb32-210e_animalpose-256x256.py
0 → 100644
View file @
ca8a762a
_base_
=
[
'../../../_base_/default_runtime.py'
]
# runtime
train_cfg
=
dict
(
max_epochs
=
210
,
val_interval
=
10
)
# optimizer
optim_wrapper
=
dict
(
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
5e-4
,
))
# learning policy
param_scheduler
=
[
dict
(
type
=
'LinearLR'
,
begin
=
0
,
end
=
500
,
start_factor
=
0.001
,
by_epoch
=
False
),
# warm-up
dict
(
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
210
,
milestones
=
[
170
,
200
],
gamma
=
0.1
,
by_epoch
=
True
)
]
# automatically scaling LR based on the actual training batch size
auto_scale_lr
=
dict
(
base_batch_size
=
256
)
# hooks
default_hooks
=
dict
(
checkpoint
=
dict
(
save_best
=
'coco/AP'
,
rule
=
'greater'
))
# codec settings
codec
=
dict
(
type
=
'MSRAHeatmap'
,
input_size
=
(
256
,
256
),
heatmap_size
=
(
64
,
64
),
sigma
=
2
)
# model settings
model
=
dict
(
type
=
'TopdownPoseEstimator'
,
data_preprocessor
=
dict
(
type
=
'PoseDataPreprocessor'
,
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
bgr_to_rgb
=
True
),
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
152
,
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
'torchvision://resnet152'
),
),
head
=
dict
(
type
=
'HeatmapHead'
,
in_channels
=
2048
,
out_channels
=
20
,
loss
=
dict
(
type
=
'KeypointMSELoss'
,
use_target_weight
=
True
),
decoder
=
codec
),
test_cfg
=
dict
(
flip_test
=
True
,
flip_mode
=
'heatmap'
,
shift_heatmap
=
True
,
))
# base dataset settings
dataset_type
=
'AnimalPoseDataset'
data_mode
=
'topdown'
data_root
=
'data/animalpose/'
# pipelines
train_pipeline
=
[
dict
(
type
=
'LoadImage'
),
dict
(
type
=
'GetBBoxCenterScale'
),
dict
(
type
=
'RandomFlip'
,
direction
=
'horizontal'
),
dict
(
type
=
'RandomHalfBody'
),
dict
(
type
=
'RandomBBoxTransform'
),
dict
(
type
=
'TopdownAffine'
,
input_size
=
codec
[
'input_size'
]),
dict
(
type
=
'GenerateTarget'
,
encoder
=
codec
),
dict
(
type
=
'PackPoseInputs'
)
]
val_pipeline
=
[
dict
(
type
=
'LoadImage'
),
dict
(
type
=
'GetBBoxCenterScale'
),
dict
(
type
=
'TopdownAffine'
,
input_size
=
codec
[
'input_size'
]),
dict
(
type
=
'PackPoseInputs'
)
]
# data loaders
train_dataloader
=
dict
(
batch_size
=
32
,
num_workers
=
2
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_mode
=
data_mode
,
ann_file
=
'annotations/animalpose_train.json'
,
data_prefix
=
dict
(
img
=
''
),
pipeline
=
train_pipeline
,
))
val_dataloader
=
dict
(
batch_size
=
32
,
num_workers
=
2
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
,
round_up
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_mode
=
data_mode
,
ann_file
=
'annotations/animalpose_val.json'
,
data_prefix
=
dict
(
img
=
''
),
test_mode
=
True
,
pipeline
=
val_pipeline
,
))
test_dataloader
=
val_dataloader
# evaluators
val_evaluator
=
dict
(
type
=
'CocoMetric'
,
ann_file
=
data_root
+
'annotations/animalpose_val.json'
)
test_evaluator
=
val_evaluator
configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res50_8xb64-210e_animalpose-256x256.py
0 → 100644
View file @
ca8a762a
_base_
=
[
'../../../_base_/default_runtime.py'
]
# runtime
train_cfg
=
dict
(
max_epochs
=
210
,
val_interval
=
10
)
# optimizer
optim_wrapper
=
dict
(
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
5e-4
,
))
# learning policy
param_scheduler
=
[
dict
(
type
=
'LinearLR'
,
begin
=
0
,
end
=
500
,
start_factor
=
0.001
,
by_epoch
=
False
),
# warm-up
dict
(
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
210
,
milestones
=
[
170
,
200
],
gamma
=
0.1
,
by_epoch
=
True
)
]
# automatically scaling LR based on the actual training batch size
auto_scale_lr
=
dict
(
base_batch_size
=
512
)
# hooks
default_hooks
=
dict
(
checkpoint
=
dict
(
save_best
=
'coco/AP'
,
rule
=
'greater'
))
# codec settings
codec
=
dict
(
type
=
'MSRAHeatmap'
,
input_size
=
(
256
,
256
),
heatmap_size
=
(
64
,
64
),
sigma
=
2
)
# model settings
model
=
dict
(
type
=
'TopdownPoseEstimator'
,
data_preprocessor
=
dict
(
type
=
'PoseDataPreprocessor'
,
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
bgr_to_rgb
=
True
),
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
'torchvision://resnet50'
),
),
head
=
dict
(
type
=
'HeatmapHead'
,
in_channels
=
2048
,
out_channels
=
20
,
loss
=
dict
(
type
=
'KeypointMSELoss'
,
use_target_weight
=
True
),
decoder
=
codec
),
test_cfg
=
dict
(
flip_test
=
True
,
flip_mode
=
'heatmap'
,
shift_heatmap
=
True
,
))
# base dataset settings
dataset_type
=
'AnimalPoseDataset'
data_mode
=
'topdown'
data_root
=
'data/animalpose/'
# pipelines
train_pipeline
=
[
dict
(
type
=
'LoadImage'
),
dict
(
type
=
'GetBBoxCenterScale'
),
dict
(
type
=
'RandomFlip'
,
direction
=
'horizontal'
),
dict
(
type
=
'RandomHalfBody'
),
dict
(
type
=
'RandomBBoxTransform'
),
dict
(
type
=
'TopdownAffine'
,
input_size
=
codec
[
'input_size'
]),
dict
(
type
=
'GenerateTarget'
,
encoder
=
codec
),
dict
(
type
=
'PackPoseInputs'
)
]
val_pipeline
=
[
dict
(
type
=
'LoadImage'
),
dict
(
type
=
'GetBBoxCenterScale'
),
dict
(
type
=
'TopdownAffine'
,
input_size
=
codec
[
'input_size'
]),
dict
(
type
=
'PackPoseInputs'
)
]
# data loaders
train_dataloader
=
dict
(
batch_size
=
64
,
num_workers
=
2
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_mode
=
data_mode
,
ann_file
=
'annotations/animalpose_train.json'
,
data_prefix
=
dict
(
img
=
''
),
pipeline
=
train_pipeline
,
))
val_dataloader
=
dict
(
batch_size
=
32
,
num_workers
=
2
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
,
round_up
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_mode
=
data_mode
,
ann_file
=
'annotations/animalpose_val.json'
,
data_prefix
=
dict
(
img
=
''
),
test_mode
=
True
,
pipeline
=
val_pipeline
,
))
test_dataloader
=
val_dataloader
# evaluators
val_evaluator
=
dict
(
type
=
'CocoMetric'
,
ann_file
=
data_root
+
'annotations/animalpose_val.json'
)
test_evaluator
=
val_evaluator
configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext-m_udp_8xb64-210e_ap10k-256x256.py
0 → 100644
View file @
ca8a762a
_base_
=
[
'../../../_base_/default_runtime.py'
]
# runtime
max_epochs
=
210
stage2_num_epochs
=
30
base_lr
=
4e-3
train_cfg
=
dict
(
max_epochs
=
max_epochs
,
val_interval
=
10
)
randomness
=
dict
(
seed
=
21
)
# optimizer
optim_wrapper
=
dict
(
type
=
'OptimWrapper'
,
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
base_lr
,
weight_decay
=
0.05
),
paramwise_cfg
=
dict
(
norm_decay_mult
=
0
,
bias_decay_mult
=
0
,
bypass_duplicate
=
True
))
# learning policy
param_scheduler
=
[
dict
(
type
=
'LinearLR'
,
start_factor
=
1.0e-5
,
by_epoch
=
False
,
begin
=
0
,
end
=
1000
),
dict
(
# use cosine lr from 105 to 210 epoch
type
=
'CosineAnnealingLR'
,
eta_min
=
base_lr
*
0.05
,
begin
=
max_epochs
//
2
,
end
=
max_epochs
,
T_max
=
max_epochs
//
2
,
by_epoch
=
True
,
convert_to_iter_based
=
True
),
]
# automatically scaling LR based on the actual training batch size
auto_scale_lr
=
dict
(
base_batch_size
=
1024
)
# codec settings
codec
=
dict
(
type
=
'UDPHeatmap'
,
input_size
=
(
256
,
256
),
heatmap_size
=
(
64
,
64
),
sigma
=
2
)
# model settings
model
=
dict
(
type
=
'TopdownPoseEstimator'
,
data_preprocessor
=
dict
(
type
=
'PoseDataPreprocessor'
,
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
bgr_to_rgb
=
True
),
backbone
=
dict
(
_scope_
=
'mmdet'
,
type
=
'CSPNeXt'
,
arch
=
'P5'
,
expand_ratio
=
0.5
,
deepen_factor
=
0.67
,
widen_factor
=
0.75
,
out_indices
=
(
4
,
),
channel_attention
=
True
,
norm_cfg
=
dict
(
type
=
'SyncBN'
),
act_cfg
=
dict
(
type
=
'SiLU'
),
init_cfg
=
dict
(
type
=
'Pretrained'
,
prefix
=
'backbone.'
,
checkpoint
=
'https://download.openmmlab.com/mmdetection/v3.0/'
'rtmdet/cspnext_rsb_pretrain/'
'cspnext-m_8xb256-rsb-a1-600e_in1k-ecb3bbd9.pth'
)),
head
=
dict
(
type
=
'HeatmapHead'
,
in_channels
=
768
,
out_channels
=
17
,
loss
=
dict
(
type
=
'KeypointMSELoss'
,
use_target_weight
=
True
),
decoder
=
codec
),
test_cfg
=
dict
(
flip_test
=
True
,
flip_mode
=
'heatmap'
,
shift_heatmap
=
False
,
))
# base dataset settings
dataset_type
=
'AP10KDataset'
data_mode
=
'topdown'
data_root
=
'data/ap10k/'
# pipelines
train_pipeline
=
[
dict
(
type
=
'LoadImage'
),
dict
(
type
=
'GetBBoxCenterScale'
),
dict
(
type
=
'RandomFlip'
,
direction
=
'horizontal'
),
dict
(
type
=
'RandomHalfBody'
),
dict
(
type
=
'RandomBBoxTransform'
,
scale_factor
=
[
0.6
,
1.4
],
rotate_factor
=
80
),
dict
(
type
=
'TopdownAffine'
,
input_size
=
codec
[
'input_size'
],
use_udp
=
True
),
dict
(
type
=
'mmdet.YOLOXHSVRandomAug'
),
dict
(
type
=
'Albumentation'
,
transforms
=
[
dict
(
type
=
'Blur'
,
p
=
0.1
),
dict
(
type
=
'MedianBlur'
,
p
=
0.1
),
dict
(
type
=
'CoarseDropout'
,
max_holes
=
1
,
max_height
=
0.4
,
max_width
=
0.4
,
min_holes
=
1
,
min_height
=
0.2
,
min_width
=
0.2
,
p
=
1.
),
]),
dict
(
type
=
'GenerateTarget'
,
encoder
=
codec
),
dict
(
type
=
'PackPoseInputs'
)
]
val_pipeline
=
[
dict
(
type
=
'LoadImage'
),
dict
(
type
=
'GetBBoxCenterScale'
),
dict
(
type
=
'TopdownAffine'
,
input_size
=
codec
[
'input_size'
],
use_udp
=
True
),
dict
(
type
=
'PackPoseInputs'
)
]
train_pipeline_stage2
=
[
dict
(
type
=
'LoadImage'
),
dict
(
type
=
'GetBBoxCenterScale'
),
dict
(
type
=
'RandomFlip'
,
direction
=
'horizontal'
),
dict
(
type
=
'RandomHalfBody'
),
dict
(
type
=
'RandomBBoxTransform'
,
shift_factor
=
0.
,
scale_factor
=
[
0.75
,
1.25
],
rotate_factor
=
60
),
dict
(
type
=
'TopdownAffine'
,
input_size
=
codec
[
'input_size'
],
use_udp
=
True
),
dict
(
type
=
'mmdet.YOLOXHSVRandomAug'
),
dict
(
type
=
'Albumentation'
,
transforms
=
[
dict
(
type
=
'Blur'
,
p
=
0.1
),
dict
(
type
=
'MedianBlur'
,
p
=
0.1
),
dict
(
type
=
'CoarseDropout'
,
max_holes
=
1
,
max_height
=
0.4
,
max_width
=
0.4
,
min_holes
=
1
,
min_height
=
0.2
,
min_width
=
0.2
,
p
=
0.5
),
]),
dict
(
type
=
'GenerateTarget'
,
encoder
=
codec
),
dict
(
type
=
'PackPoseInputs'
)
]
# data loaders
train_dataloader
=
dict
(
batch_size
=
64
,
num_workers
=
10
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_mode
=
data_mode
,
ann_file
=
'annotations/ap10k-train-split1.json'
,
data_prefix
=
dict
(
img
=
'data/'
),
pipeline
=
train_pipeline
,
))
val_dataloader
=
dict
(
batch_size
=
32
,
num_workers
=
10
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
,
round_up
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_mode
=
data_mode
,
ann_file
=
'annotations/ap10k-val-split1.json'
,
data_prefix
=
dict
(
img
=
'data/'
),
test_mode
=
True
,
pipeline
=
val_pipeline
,
))
test_dataloader
=
dict
(
batch_size
=
32
,
num_workers
=
10
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
,
round_up
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_mode
=
data_mode
,
ann_file
=
'annotations/ap10k-test-split1.json'
,
data_prefix
=
dict
(
img
=
'data/'
),
test_mode
=
True
,
pipeline
=
val_pipeline
,
))
# hooks
default_hooks
=
dict
(
checkpoint
=
dict
(
save_best
=
'coco/AP'
,
rule
=
'greater'
,
max_keep_ckpts
=
1
))
custom_hooks
=
[
dict
(
type
=
'EMAHook'
,
ema_type
=
'ExpMomentumEMA'
,
momentum
=
0.0002
,
update_buffers
=
True
,
priority
=
49
),
dict
(
type
=
'mmdet.PipelineSwitchHook'
,
switch_epoch
=
max_epochs
-
stage2_num_epochs
,
switch_pipeline
=
train_pipeline_stage2
)
]
# evaluators
val_evaluator
=
dict
(
type
=
'CocoMetric'
,
ann_file
=
data_root
+
'annotations/ap10k-val-split1.json'
)
test_evaluator
=
dict
(
type
=
'CocoMetric'
,
ann_file
=
data_root
+
'annotations/ap10k-test-split1.json'
)
configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext_udp_ap10k.md
0 → 100644
View file @
ca8a762a
<!-- [ALGORITHM] -->
<details>
<summary
align=
"right"
><a
href=
"https://arxiv.org/abs/2212.07784"
>
RTMDet (ArXiv 2022)
</a></summary>
```
bibtex
@misc
{
lyu2022rtmdet
,
title
=
{RTMDet: An Empirical Study of Designing Real-Time Object Detectors}
,
author
=
{Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen}
,
year
=
{2022}
,
eprint
=
{2212.07784}
,
archivePrefix
=
{arXiv}
,
primaryClass
=
{cs.CV}
}
```
</details>
<!-- [ALGORITHM] -->
<details>
<summary
align=
"right"
><a
href=
"http://openaccess.thecvf.com/content_CVPR_2020/html/Huang_The_Devil_Is_in_the_Details_Delving_Into_Unbiased_Data_CVPR_2020_paper.html"
>
UDP (CVPR'2020)
</a></summary>
```
bibtex
@InProceedings
{
Huang_2020_CVPR
,
author
=
{Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan}
,
title
=
{The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation}
,
booktitle
=
{The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}
,
month
=
{June}
,
year
=
{2020}
}
```
</details>
<!-- [DATASET] -->
<details>
<summary
align=
"right"
><a
href=
"https://arxiv.org/abs/2108.12617"
>
AP-10K (NeurIPS'2021)
</a></summary>
```
bibtex
@misc
{
yu2021ap10k
,
title
=
{AP-10K: A Benchmark for Animal Pose Estimation in the Wild}
,
author
=
{Hang Yu and Yufei Xu and Jing Zhang and Wei Zhao and Ziyu Guan and Dacheng Tao}
,
year
=
{2021}
,
eprint
=
{2108.12617}
,
archivePrefix
=
{arXiv}
,
primaryClass
=
{cs.CV}
}
```
</details>
Results on AP-10K validation set
| Arch | Input Size | AP | AP
<sup>
50
</sup>
| AP
<sup>
75
</sup>
| AP
<sup>
M
</sup>
| AP
<sup>
L
</sup>
| ckpt | log |
| :----------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :------------: | :------------: | :-----------------------------------------: | :----------------------------------------: |
|
[
pose_cspnext_m
](
/configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext-m_udp_8xb64-210e_ap10k-256x256.py
)
| 256x256 | 0.703 | 0.944 | 0.776 | 0.513 | 0.710 |
[
ckpt
](
https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-ap10k_pt-in1k_210e-256x256-1f2d947a_20230123.pth
)
|
[
log
](
https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-ap10k_pt-in1k_210e-256x256-1f2d947a_20230123.json
)
|
configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext_udp_ap10k.yml
0 → 100644
View file @
ca8a762a
Models
:
-
Config
:
configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext-m_udp_8xb64-210e_ap10k-256x256.py
In Collection
:
UDP
Metadata
:
Architecture
:
&id001
-
UDP
-
HRNet
Training Data
:
AP-10K
Name
:
cspnext-m_udp_8xb64-210e_ap10k-256x256
Results
:
-
Dataset
:
AP-10K
Metrics
:
AP
:
0.703
AP@0.5
:
0.944
AP@0.75
:
0.776
AP (L)
:
0.71
AP (M)
:
0.513
Task
:
Animal 2D Keypoint
Weights
:
https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-ap10k_pt-in1k_210e-256x256-1f2d947a_20230123.pth
configs/animal_2d_keypoint/topdown_heatmap/ap10k/hrnet_ap10k.md
0 → 100644
View file @
ca8a762a
<!-- [ALGORITHM] -->
<details>
<summary
align=
"right"
><a
href=
"http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html"
>
HRNet (CVPR'2019)
</a></summary>
```
bibtex
@inproceedings
{
sun2019deep
,
title
=
{Deep high-resolution representation learning for human pose estimation}
,
author
=
{Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong}
,
booktitle
=
{Proceedings of the IEEE conference on computer vision and pattern recognition}
,
pages
=
{5693--5703}
,
year
=
{2019}
}
```
</details>
<!-- [DATASET] -->
<details>
<summary
align=
"right"
><a
href=
"https://arxiv.org/abs/2108.12617"
>
AP-10K (NeurIPS'2021)
</a></summary>
```
bibtex
@misc
{
yu2021ap10k
,
title
=
{AP-10K: A Benchmark for Animal Pose Estimation in the Wild}
,
author
=
{Hang Yu and Yufei Xu and Jing Zhang and Wei Zhao and Ziyu Guan and Dacheng Tao}
,
year
=
{2021}
,
eprint
=
{2108.12617}
,
archivePrefix
=
{arXiv}
,
primaryClass
=
{cs.CV}
}
```
</details>
Results on AP-10K validation set
| Arch | Input Size | AP | AP
<sup>
50
</sup>
| AP
<sup>
75
</sup>
| AP
<sup>
M
</sup>
| AP
<sup>
L
</sup>
| ckpt | log |
| :----------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :------------: | :------------: | :-----------------------------------------: | :----------------------------------------: |
|
[
pose_hrnet_w32
](
/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_hrnet-w32_8xb64-210e_ap10k-256x256.py
)
| 256x256 | 0.722 | 0.935 | 0.789 | 0.557 | 0.729 |
[
ckpt
](
https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_ap10k_256x256-18aac840_20211029.pth
)
|
[
log
](
https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_ap10k_256x256-18aac840_20211029.log.json
)
|
|
[
pose_hrnet_w48
](
/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_hrnet-w48_8xb64-210e_ap10k-256x256.py
)
| 256x256 | 0.728 | 0.936 | 0.802 | 0.577 | 0.735 |
[
ckpt
](
https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_ap10k_256x256-d95ab412_20211029.pth
)
|
[
log
](
https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_ap10k_256x256-d95ab412_20211029.log.json
)
|
configs/animal_2d_keypoint/topdown_heatmap/ap10k/hrnet_ap10k.yml
0 → 100644
View file @
ca8a762a
Models
:
-
Config
:
configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_hrnet-w32_8xb64-210e_ap10k-256x256.py
In Collection
:
HRNet
Metadata
:
Architecture
:
&id001
-
HRNet
Training Data
:
AP-10K
Name
:
td-hm_hrnet-w32_8xb64-210e_ap10k-256x256
Results
:
-
Dataset
:
AP-10K
Metrics
:
AP
:
0.722
AP@0.5
:
0.935
AP@0.75
:
0.789
AP (L)
:
0.729
AP (M)
:
0.557
Task
:
Animal 2D Keypoint
Weights
:
https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_ap10k_256x256-18aac840_20211029.pth
-
Config
:
configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_hrnet-w48_8xb64-210e_ap10k-256x256.py
In Collection
:
HRNet
Metadata
:
Architecture
:
*id001
Training Data
:
AP-10K
Name
:
td-hm_hrnet-w48_8xb64-210e_ap10k-256x256
Results
:
-
Dataset
:
AP-10K
Metrics
:
AP
:
0.728
AP@0.5
:
0.936
AP@0.75
:
0.802
AP (L)
:
0.735
AP (M)
:
0.577
Task
:
Animal 2D Keypoint
Weights
:
https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_ap10k_256x256-d95ab412_20211029.pth
Prev
1
2
3
4
5
6
7
8
9
…
15
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment