Commit 37c8cebc authored by Sugon_ldc's avatar Sugon_ldc
Browse files

add new model

parents
Pipeline #318 failed with stages
in 0 seconds
Collections:
- Name: SimpleBaseline2D
Paper:
Title: Simple baselines for human pose estimation and tracking
URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
Models:
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/res50_macaque_256x192.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: &id001
- SimpleBaseline2D
Training Data: MacaquePose
Name: topdown_heatmap_res50_macaque_256x192
Results:
- Dataset: MacaquePose
Metrics:
AP: 0.799
AP@0.5: 0.952
AP@0.75: 0.919
AR: 0.837
AR@0.5: 0.964
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_macaque_256x192-98f1dd3a_20210407.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/res101_macaque_256x192.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: *id001
Training Data: MacaquePose
Name: topdown_heatmap_res101_macaque_256x192
Results:
- Dataset: MacaquePose
Metrics:
AP: 0.79
AP@0.5: 0.953
AP@0.75: 0.908
AR: 0.828
AR@0.5: 0.967
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_macaque_256x192-e3b9c6bb_20210407.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/res152_macaque_256x192.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: *id001
Training Data: MacaquePose
Name: topdown_heatmap_res152_macaque_256x192
Results:
- Dataset: MacaquePose
Metrics:
AP: 0.794
AP@0.5: 0.951
AP@0.75: 0.915
AR: 0.834
AR@0.5: 0.968
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res152_macaque_256x192-c42abc02_20210407.pth
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/zebra.py'
]
evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=9,
dataset_joints=9,
dataset_channel=[
[0, 1, 2, 3, 4, 5, 6, 7, 8],
],
inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8])
# model settings
model = dict(
type='TopDown',
pretrained='torchvision://resnet101',
backbone=dict(type='ResNet', depth=101),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=2048,
out_channels=channel_cfg['num_output_channels'],
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[160, 160],
heatmap_size=[40, 40],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'])
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownGetBboxCenterScale', padding=0.8),
dict(type='TopDownRandomShiftBboxCenter', shift_factor=0.25, prob=0.3),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownGetBboxCenterScale', padding=0.8),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
]
test_pipeline = val_pipeline
data_root = 'data/zebra'
data = dict(
samples_per_gpu=64,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalZebraDataset',
ann_file=f'{data_root}/annotations/zebra_train.json',
img_prefix=f'{data_root}/images/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalZebraDataset',
ann_file=f'{data_root}/annotations/zebra_test.json',
img_prefix=f'{data_root}/images/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalZebraDataset',
ann_file=f'{data_root}/annotations/zebra_test.json',
img_prefix=f'{data_root}/images/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/zebra.py'
]
evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=9,
dataset_joints=9,
dataset_channel=[
[0, 1, 2, 3, 4, 5, 6, 7, 8],
],
inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8])
# model settings
model = dict(
type='TopDown',
pretrained='torchvision://resnet152',
backbone=dict(type='ResNet', depth=152),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=2048,
out_channels=channel_cfg['num_output_channels'],
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[160, 160],
heatmap_size=[40, 40],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'])
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownGetBboxCenterScale', padding=0.8),
dict(type='TopDownRandomShiftBboxCenter', shift_factor=0.25, prob=0.3),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownGetBboxCenterScale', padding=0.8),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
]
test_pipeline = val_pipeline
data_root = 'data/zebra'
data = dict(
samples_per_gpu=32,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalZebraDataset',
ann_file=f'{data_root}/annotations/zebra_train.json',
img_prefix=f'{data_root}/images/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalZebraDataset',
ann_file=f'{data_root}/annotations/zebra_test.json',
img_prefix=f'{data_root}/images/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalZebraDataset',
ann_file=f'{data_root}/annotations/zebra_test.json',
img_prefix=f'{data_root}/images/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/zebra.py'
]
evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=9,
dataset_joints=9,
dataset_channel=[
[0, 1, 2, 3, 4, 5, 6, 7, 8],
],
inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8])
# model settings
model = dict(
type='TopDown',
pretrained='torchvision://resnet50',
backbone=dict(type='ResNet', depth=50),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=2048,
out_channels=channel_cfg['num_output_channels'],
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[160, 160],
heatmap_size=[40, 40],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'])
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownGetBboxCenterScale', padding=0.8),
dict(type='TopDownRandomShiftBboxCenter', shift_factor=0.25, prob=0.3),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownGetBboxCenterScale', padding=0.8),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
]
test_pipeline = val_pipeline
data_root = 'data/zebra'
data = dict(
samples_per_gpu=64,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalZebraDataset',
ann_file=f'{data_root}/annotations/zebra_train.json',
img_prefix=f'{data_root}/images/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalZebraDataset',
ann_file=f'{data_root}/annotations/zebra_test.json',
img_prefix=f'{data_root}/images/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalZebraDataset',
ann_file=f'{data_root}/annotations/zebra_test.json',
img_prefix=f'{data_root}/images/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)
<!-- [ALGORITHM] -->
<details>
<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html">SimpleBaseline2D (ECCV'2018)</a></summary>
```bibtex
@inproceedings{xiao2018simple,
title={Simple baselines for human pose estimation and tracking},
author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={466--481},
year={2018}
}
```
</details>
<!-- [DATASET] -->
<details>
<summary align="right"><a href="https://elifesciences.org/articles/47994">Grévy’s Zebra (Elife'2019)</a></summary>
```bibtex
@article{graving2019deepposekit,
title={DeepPoseKit, a software toolkit for fast and robust animal pose estimation using deep learning},
author={Graving, Jacob M and Chae, Daniel and Naik, Hemal and Li, Liang and Koger, Benjamin and Costelloe, Blair R and Couzin, Iain D},
journal={Elife},
volume={8},
pages={e47994},
year={2019},
publisher={eLife Sciences Publications Limited}
}
```
</details>
Results on Grévy’s Zebra test set
| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
| :--------------------------------------------------------- | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: |
| [pose_resnet_50](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/res50_zebra_160x160.py) | 160x160 | 1.000 | 0.914 | 1.86 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res50_zebra_160x160-5a104833_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res50_zebra_160x160_20210407.log.json) |
| [pose_resnet_101](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/res101_zebra_160x160.py) | 160x160 | 1.000 | 0.916 | 1.82 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res101_zebra_160x160-e8cb2010_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res101_zebra_160x160_20210407.log.json) |
| [pose_resnet_152](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/res152_zebra_160x160.py) | 160x160 | 1.000 | 0.921 | 1.66 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res152_zebra_160x160-05de71dd_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res152_zebra_160x160_20210407.log.json) |
Collections:
- Name: SimpleBaseline2D
Paper:
Title: Simple baselines for human pose estimation and tracking
URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
Models:
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/res50_zebra_160x160.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: &id001
- SimpleBaseline2D
Training Data: "Gr\xE9vy\u2019s Zebra"
Name: topdown_heatmap_res50_zebra_160x160
Results:
- Dataset: "Gr\xE9vy\u2019s Zebra"
Metrics:
AUC: 0.914
EPE: 1.86
PCK@0.2: 1.0
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_zebra_160x160-5a104833_20210407.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/res101_zebra_160x160.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: *id001
Training Data: "Gr\xE9vy\u2019s Zebra"
Name: topdown_heatmap_res101_zebra_160x160
Results:
- Dataset: "Gr\xE9vy\u2019s Zebra"
Metrics:
AUC: 0.916
EPE: 1.82
PCK@0.2: 1.0
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_zebra_160x160-e8cb2010_20210407.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/res152_zebra_160x160.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: *id001
Training Data: "Gr\xE9vy\u2019s Zebra"
Name: topdown_heatmap_res152_zebra_160x160
Results:
- Dataset: "Gr\xE9vy\u2019s Zebra"
Metrics:
AUC: 0.921
EPE: 1.66
PCK@0.2: 1.0
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res152_zebra_160x160-05de71dd_20210407.pth
# Image-based Human Body 2D Pose Estimation
Multi-person human pose estimation is defined as the task of detecting the poses (or keypoints) of all people from an input image.
Existing approaches can be categorized into top-down and bottom-up approaches.
Top-down methods (e.g. deeppose) divide the task into two stages: human detection and pose estimation. They perform human detection first, followed by single-person pose estimation given human bounding boxes.
Bottom-up approaches (e.g. AE) first detect all the keypoints and then group/associate them into person instances.
## Data preparation
Please follow [DATA Preparation](/docs/en/tasks/2d_body_keypoint.md) to prepare data.
## Demo
Please follow [Demo](/demo/docs/2d_human_pose_demo.md#2d-human-pose-demo) to run demos.
<img src="demo/resources/demo_coco.gif" width="600px" alt>
# Associative embedding: End-to-end learning for joint detection and grouping (AE)
<!-- [ALGORITHM] -->
<details>
<summary align="right"><a href="https://arxiv.org/abs/1611.05424">Associative Embedding (NIPS'2017)</a></summary>
```bibtex
@inproceedings{newell2017associative,
title={Associative embedding: End-to-end learning for joint detection and grouping},
author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
booktitle={Advances in neural information processing systems},
pages={2277--2287},
year={2017}
}
```
</details>
AE is one of the most popular 2D bottom-up pose estimation approaches, that first detect all the keypoints and
then group/associate them into person instances.
In order to group all the predicted keypoints to individuals, a tag is also predicted for each detected keypoint.
Tags of the same person are similar, while tags of different people are different. Thus the keypoints can be grouped
according to the tags.
<!-- [ALGORITHM] -->
<details>
<summary align="right"><a href="https://arxiv.org/abs/1611.05424">Associative Embedding (NIPS'2017)</a></summary>
```bibtex
@inproceedings{newell2017associative,
title={Associative embedding: End-to-end learning for joint detection and grouping},
author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
booktitle={Advances in neural information processing systems},
pages={2277--2287},
year={2017}
}
```
</details>
<!-- [ALGORITHM] -->
<details>
<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2020/html/Cheng_HigherHRNet_Scale-Aware_Representation_Learning_for_Bottom-Up_Human_Pose_Estimation_CVPR_2020_paper.html">HigherHRNet (CVPR'2020)</a></summary>
```bibtex
@inproceedings{cheng2020higherhrnet,
title={HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose Estimation},
author={Cheng, Bowen and Xiao, Bin and Wang, Jingdong and Shi, Honghui and Huang, Thomas S and Zhang, Lei},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={5386--5395},
year={2020}
}
```
</details>
<!-- [DATASET] -->
<details>
<summary align="right"><a href="https://arxiv.org/abs/1711.06475">AI Challenger (ArXiv'2017)</a></summary>
```bibtex
@article{wu2017ai,
title={Ai challenger: A large-scale dataset for going deeper in image understanding},
author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
journal={arXiv preprint arXiv:1711.06475},
year={2017}
}
```
</details>
Results on AIC validation set without multi-scale test
| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log |
| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
| [HigherHRNet-w32](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_w32_aic_512x512.py) | 512x512 | 0.315 | 0.710 | 0.243 | 0.379 | 0.757 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_aic_512x512-9a674c33_20210130.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_aic_512x512_20210130.log.json) |
Results on AIC validation set with multi-scale test. 3 default scales (\[2, 1, 0.5\]) are used
| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log |
| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
| [HigherHRNet-w32](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_w32_aic_512x512.py) | 512x512 | 0.323 | 0.718 | 0.254 | 0.379 | 0.758 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_aic_512x512-9a674c33_20210130.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_aic_512x512_20210130.log.json) |
Collections:
- Name: HigherHRNet
Paper:
Title: 'HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose
Estimation'
URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Cheng_HigherHRNet_Scale-Aware_Representation_Learning_for_Bottom-Up_Human_Pose_Estimation_CVPR_2020_paper.html
README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/higherhrnet.md
Models:
- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_w32_aic_512x512.py
In Collection: HigherHRNet
Metadata:
Architecture: &id001
- Associative Embedding
- HigherHRNet
Training Data: AI Challenger
Name: associative_embedding_higherhrnet_w32_aic_512x512
Results:
- Dataset: AI Challenger
Metrics:
AP: 0.315
AP@0.5: 0.71
AP@0.75: 0.243
AR: 0.379
AR@0.5: 0.757
Task: Body 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_aic_512x512-9a674c33_20210130.pth
- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_w32_aic_512x512.py
In Collection: HigherHRNet
Metadata:
Architecture: *id001
Training Data: AI Challenger
Name: associative_embedding_higherhrnet_w32_aic_512x512
Results:
- Dataset: AI Challenger
Metrics:
AP: 0.323
AP@0.5: 0.718
AP@0.75: 0.254
AR: 0.379
AR@0.5: 0.758
Task: Body 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_aic_512x512-9a674c33_20210130.pth
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/aic.py'
]
checkpoint_config = dict(interval=50)
evaluation = dict(interval=50, metric='mAP', save_best='AP')
optimizer = dict(
type='Adam',
lr=0.0015,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[200, 260])
total_epochs = 300
channel_cfg = dict(
num_output_channels=14,
dataset_joints=14,
dataset_channel=[
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
],
inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
data_cfg = dict(
image_size=512,
base_size=256,
base_sigma=2,
heatmap_size=[128, 256],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'],
num_scales=2,
scale_aware_sigma=False,
)
# model settings
model = dict(
type='AssociativeEmbedding',
pretrained='https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w32-36af842e.pth',
backbone=dict(
type='HRNet',
in_channels=3,
extra=dict(
stage1=dict(
num_modules=1,
num_branches=1,
block='BOTTLENECK',
num_blocks=(4, ),
num_channels=(64, )),
stage2=dict(
num_modules=1,
num_branches=2,
block='BASIC',
num_blocks=(4, 4),
num_channels=(32, 64)),
stage3=dict(
num_modules=4,
num_branches=3,
block='BASIC',
num_blocks=(4, 4, 4),
num_channels=(32, 64, 128)),
stage4=dict(
num_modules=3,
num_branches=4,
block='BASIC',
num_blocks=(4, 4, 4, 4),
num_channels=(32, 64, 128, 256))),
),
keypoint_head=dict(
type='AEHigherResolutionHead',
in_channels=32,
num_joints=14,
tag_per_joint=True,
extra=dict(final_conv_kernel=1, ),
num_deconv_layers=1,
num_deconv_filters=[32],
num_deconv_kernels=[4],
num_basic_blocks=4,
cat_output=[True],
with_ae_loss=[True, False],
loss_keypoint=dict(
type='MultiLossFactory',
num_joints=14,
num_stages=2,
ae_loss_type='exp',
with_ae_loss=[True, False],
push_loss_factor=[0.01, 0.01],
pull_loss_factor=[0.001, 0.001],
with_heatmaps_loss=[True, True],
heatmaps_loss_factor=[1.0, 1.0])),
train_cfg=dict(),
test_cfg=dict(
num_joints=channel_cfg['dataset_joints'],
max_num_people=30,
scale_factor=[1],
with_heatmaps=[True, True],
with_ae=[True, False],
project2image=True,
align_corners=False,
nms_kernel=5,
nms_padding=2,
tag_per_joint=True,
detection_threshold=0.1,
tag_threshold=1,
use_detection_val=True,
ignore_too_much=False,
adjust=True,
refine=True,
flip_test=True))
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='BottomUpRandomAffine',
rot_factor=30,
scale_factor=[0.75, 1.5],
scale_type='short',
trans_factor=40),
dict(type='BottomUpRandomFlip', flip_prob=0.5),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='BottomUpGenerateTarget',
sigma=2,
max_num_people=30,
),
dict(
type='Collect',
keys=['img', 'joints', 'targets', 'masks'],
meta_keys=[]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
dict(
type='BottomUpResizeAlign',
transforms=[
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'aug_data', 'test_scale_factor', 'base_size',
'center', 'scale', 'flip_index'
]),
]
test_pipeline = val_pipeline
data_root = 'data/aic'
data = dict(
workers_per_gpu=2,
train_dataloader=dict(samples_per_gpu=24),
val_dataloader=dict(samples_per_gpu=1),
test_dataloader=dict(samples_per_gpu=1),
train=dict(
type='BottomUpAicDataset',
ann_file=f'{data_root}/annotations/aic_train.json',
img_prefix=f'{data_root}/ai_challenger_keypoint_train_20170902/'
'keypoint_train_images_20170902/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='BottomUpAicDataset',
ann_file=f'{data_root}/annotations/aic_val.json',
img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
'keypoint_validation_images_20170911/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='BottomUpAicDataset',
ann_file=f'{data_root}/annotations/aic_val.json',
img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
'keypoint_validation_images_20170911/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/aic.py'
]
checkpoint_config = dict(interval=50)
evaluation = dict(interval=50, metric='mAP', save_best='AP')
optimizer = dict(
type='Adam',
lr=0.0015,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[200, 260])
total_epochs = 300
channel_cfg = dict(
num_output_channels=14,
dataset_joints=14,
dataset_channel=[
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
],
inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
data_cfg = dict(
image_size=512,
base_size=256,
base_sigma=2,
heatmap_size=[128, 256],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'],
num_scales=2,
scale_aware_sigma=False,
)
# model settings
model = dict(
type='AssociativeEmbedding',
pretrained='https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w32-36af842e.pth',
backbone=dict(
type='HRNet',
in_channels=3,
extra=dict(
stage1=dict(
num_modules=1,
num_branches=1,
block='BOTTLENECK',
num_blocks=(4, ),
num_channels=(64, )),
stage2=dict(
num_modules=1,
num_branches=2,
block='BASIC',
num_blocks=(4, 4),
num_channels=(32, 64)),
stage3=dict(
num_modules=4,
num_branches=3,
block='BASIC',
num_blocks=(4, 4, 4),
num_channels=(32, 64, 128)),
stage4=dict(
num_modules=3,
num_branches=4,
block='BASIC',
num_blocks=(4, 4, 4, 4),
num_channels=(32, 64, 128, 256))),
),
keypoint_head=dict(
type='AEHigherResolutionHead',
in_channels=32,
num_joints=14,
tag_per_joint=True,
extra=dict(final_conv_kernel=1, ),
num_deconv_layers=1,
num_deconv_filters=[32],
num_deconv_kernels=[4],
num_basic_blocks=4,
cat_output=[True],
with_ae_loss=[True, False],
loss_keypoint=dict(
type='MultiLossFactory',
num_joints=14,
num_stages=2,
ae_loss_type='exp',
with_ae_loss=[True, False],
push_loss_factor=[0.01, 0.01],
pull_loss_factor=[0.001, 0.001],
with_heatmaps_loss=[True, True],
heatmaps_loss_factor=[1.0, 1.0])),
train_cfg=dict(),
test_cfg=dict(
num_joints=channel_cfg['dataset_joints'],
max_num_people=30,
scale_factor=[1],
with_heatmaps=[True, True],
with_ae=[True, False],
project2image=False,
nms_kernel=5,
nms_padding=2,
tag_per_joint=True,
detection_threshold=0.1,
tag_threshold=1,
use_detection_val=True,
ignore_too_much=False,
adjust=True,
refine=True,
flip_test=True,
use_udp=True))
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='BottomUpRandomAffine',
rot_factor=30,
scale_factor=[0.75, 1.5],
scale_type='short',
trans_factor=40,
use_udp=True),
dict(type='BottomUpRandomFlip', flip_prob=0.5),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='BottomUpGenerateTarget',
sigma=2,
max_num_people=30,
use_udp=True,
),
dict(
type='Collect',
keys=['img', 'joints', 'targets', 'masks'],
meta_keys=[]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='BottomUpGetImgSize', test_scale_factor=[1], use_udp=True),
dict(
type='BottomUpResizeAlign',
transforms=[
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
],
use_udp=True),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'aug_data', 'test_scale_factor', 'base_size',
'center', 'scale', 'flip_index'
]),
]
test_pipeline = val_pipeline
data_root = 'data/aic'
data = dict(
workers_per_gpu=2,
train_dataloader=dict(samples_per_gpu=24),
val_dataloader=dict(samples_per_gpu=1),
test_dataloader=dict(samples_per_gpu=1),
train=dict(
type='BottomUpAicDataset',
ann_file=f'{data_root}/annotations/aic_train.json',
img_prefix=f'{data_root}/ai_challenger_keypoint_train_20170902/'
'keypoint_train_images_20170902/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='BottomUpAicDataset',
ann_file=f'{data_root}/annotations/aic_val.json',
img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
'keypoint_validation_images_20170911/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='BottomUpAicDataset',
ann_file=f'{data_root}/annotations/aic_val.json',
img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
'keypoint_validation_images_20170911/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)
<!-- [ALGORITHM] -->
<details>
<summary align="right"><a href="https://arxiv.org/abs/1611.05424">Associative Embedding (NIPS'2017)</a></summary>
```bibtex
@inproceedings{newell2017associative,
title={Associative embedding: End-to-end learning for joint detection and grouping},
author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
booktitle={Advances in neural information processing systems},
pages={2277--2287},
year={2017}
}
```
</details>
<!-- [ALGORITHM] -->
<details>
<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html">HRNet (CVPR'2019)</a></summary>
```bibtex
@inproceedings{sun2019deep,
title={Deep high-resolution representation learning for human pose estimation},
author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5693--5703},
year={2019}
}
```
</details>
<!-- [DATASET] -->
<details>
<summary align="right"><a href="https://arxiv.org/abs/1711.06475">AI Challenger (ArXiv'2017)</a></summary>
```bibtex
@article{wu2017ai,
title={Ai challenger: A large-scale dataset for going deeper in image understanding},
author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
journal={arXiv preprint arXiv:1711.06475},
year={2017}
}
```
</details>
Results on AIC validation set without multi-scale test
| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log |
| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
| [HRNet-w32](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/hrnet_w32_aic_512x512.py) | 512x512 | 0.303 | 0.697 | 0.225 | 0.373 | 0.755 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/hrnet_w32_aic_512x512-77e2a98a_20210131.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/hrnet_w32_aic_512x512_20210131.log.json) |
Results on AIC validation set with multi-scale test. 3 default scales (\[2, 1, 0.5\]) are used
| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log |
| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
| [HRNet-w32](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/hrnet_w32_aic_512x512.py) | 512x512 | 0.318 | 0.717 | 0.246 | 0.379 | 0.764 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/hrnet_w32_aic_512x512-77e2a98a_20210131.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/hrnet_w32_aic_512x512_20210131.log.json) |
Collections:
- Name: HRNet
Paper:
Title: Deep high-resolution representation learning for human pose estimation
URL: http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html
README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnet.md
Models:
- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/hrnet_w32_aic_512x512.py
In Collection: HRNet
Metadata:
Architecture: &id001
- Associative Embedding
- HRNet
Training Data: AI Challenger
Name: associative_embedding_hrnet_w32_aic_512x512
Results:
- Dataset: AI Challenger
Metrics:
AP: 0.303
AP@0.5: 0.697
AP@0.75: 0.225
AR: 0.373
AR@0.5: 0.755
Task: Body 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/bottom_up/hrnet_w32_aic_512x512-77e2a98a_20210131.pth
- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/hrnet_w32_aic_512x512.py
In Collection: HRNet
Metadata:
Architecture: *id001
Training Data: AI Challenger
Name: associative_embedding_hrnet_w32_aic_512x512
Results:
- Dataset: AI Challenger
Metrics:
AP: 0.318
AP@0.5: 0.717
AP@0.75: 0.246
AR: 0.379
AR@0.5: 0.764
Task: Body 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/bottom_up/hrnet_w32_aic_512x512-77e2a98a_20210131.pth
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/aic.py'
]
checkpoint_config = dict(interval=50)
evaluation = dict(interval=50, metric='mAP', save_best='AP')
optimizer = dict(
type='Adam',
lr=0.0015,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[200, 260])
total_epochs = 300
channel_cfg = dict(
num_output_channels=14,
dataset_joints=14,
dataset_channel=[
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
],
inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
data_cfg = dict(
image_size=512,
base_size=256,
base_sigma=2,
heatmap_size=[128],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'],
num_scales=1,
scale_aware_sigma=False,
)
# model settings
model = dict(
type='AssociativeEmbedding',
pretrained='https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w32-36af842e.pth',
backbone=dict(
type='HRNet',
in_channels=3,
extra=dict(
stage1=dict(
num_modules=1,
num_branches=1,
block='BOTTLENECK',
num_blocks=(4, ),
num_channels=(64, )),
stage2=dict(
num_modules=1,
num_branches=2,
block='BASIC',
num_blocks=(4, 4),
num_channels=(32, 64)),
stage3=dict(
num_modules=4,
num_branches=3,
block='BASIC',
num_blocks=(4, 4, 4),
num_channels=(32, 64, 128)),
stage4=dict(
num_modules=3,
num_branches=4,
block='BASIC',
num_blocks=(4, 4, 4, 4),
num_channels=(32, 64, 128, 256))),
),
keypoint_head=dict(
type='AESimpleHead',
in_channels=32,
num_joints=14,
num_deconv_layers=0,
tag_per_joint=True,
with_ae_loss=[True],
extra=dict(final_conv_kernel=1, ),
loss_keypoint=dict(
type='MultiLossFactory',
num_joints=14,
num_stages=1,
ae_loss_type='exp',
with_ae_loss=[True],
push_loss_factor=[0.01],
pull_loss_factor=[0.001],
with_heatmaps_loss=[True],
heatmaps_loss_factor=[1.0])),
train_cfg=dict(),
test_cfg=dict(
num_joints=channel_cfg['dataset_joints'],
max_num_people=30,
scale_factor=[1],
with_heatmaps=[True],
with_ae=[True],
project2image=True,
align_corners=False,
nms_kernel=5,
nms_padding=2,
tag_per_joint=True,
detection_threshold=0.1,
tag_threshold=1,
use_detection_val=True,
ignore_too_much=False,
adjust=True,
refine=True,
flip_test=True))
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='BottomUpRandomAffine',
rot_factor=30,
scale_factor=[0.75, 1.5],
scale_type='short',
trans_factor=40),
dict(type='BottomUpRandomFlip', flip_prob=0.5),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='BottomUpGenerateTarget',
sigma=2,
max_num_people=30,
),
dict(
type='Collect',
keys=['img', 'joints', 'targets', 'masks'],
meta_keys=[]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
dict(
type='BottomUpResizeAlign',
transforms=[
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'aug_data', 'test_scale_factor', 'base_size',
'center', 'scale', 'flip_index'
]),
]
test_pipeline = val_pipeline
data_root = 'data/aic'
data = dict(
workers_per_gpu=2,
train_dataloader=dict(samples_per_gpu=24),
val_dataloader=dict(samples_per_gpu=1),
test_dataloader=dict(samples_per_gpu=1),
train=dict(
type='BottomUpAicDataset',
ann_file=f'{data_root}/annotations/aic_train.json',
img_prefix=f'{data_root}/ai_challenger_keypoint_train_20170902/'
'keypoint_train_images_20170902/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='BottomUpAicDataset',
ann_file=f'{data_root}/annotations/aic_val.json',
img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
'keypoint_validation_images_20170911/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='BottomUpAicDataset',
ann_file=f'{data_root}/annotations/aic_val.json',
img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
'keypoint_validation_images_20170911/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)
<!-- [ALGORITHM] -->
<details>
<summary align="right"><a href="https://arxiv.org/abs/1611.05424">Associative Embedding (NIPS'2017)</a></summary>
```bibtex
@inproceedings{newell2017associative,
title={Associative embedding: End-to-end learning for joint detection and grouping},
author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
booktitle={Advances in neural information processing systems},
pages={2277--2287},
year={2017}
}
```
</details>
<!-- [ALGORITHM] -->
<details>
<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2020/html/Cheng_HigherHRNet_Scale-Aware_Representation_Learning_for_Bottom-Up_Human_Pose_Estimation_CVPR_2020_paper.html">HigherHRNet (CVPR'2020)</a></summary>
```bibtex
@inproceedings{cheng2020higherhrnet,
title={HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose Estimation},
author={Cheng, Bowen and Xiao, Bin and Wang, Jingdong and Shi, Honghui and Huang, Thomas S and Zhang, Lei},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={5386--5395},
year={2020}
}
```
</details>
<!-- [DATASET] -->
<details>
<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary>
```bibtex
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
```
</details>
Results on COCO val2017 without multi-scale test
| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log |
| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
| [HigherHRNet-w32](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_512x512.py) | 512x512 | 0.677 | 0.870 | 0.738 | 0.723 | 0.890 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_512x512-8ae85183_20200713.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_512x512_20200713.log.json) |
| [HigherHRNet-w32](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_640x640.py) | 640x640 | 0.686 | 0.871 | 0.747 | 0.733 | 0.898 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_640x640-a22fe938_20200712.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_640x640_20200712.log.json) |
| [HigherHRNet-w48](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w48_coco_512x512.py) | 512x512 | 0.686 | 0.873 | 0.741 | 0.731 | 0.892 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet48_coco_512x512-60fedcbc_20200712.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet48_coco_512x512_20200712.log.json) |
Results on COCO val2017 with multi-scale test. 3 default scales (\[2, 1, 0.5\]) are used
| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log |
| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
| [HigherHRNet-w32](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_512x512.py) | 512x512 | 0.706 | 0.881 | 0.771 | 0.747 | 0.901 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_512x512-8ae85183_20200713.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_512x512_20200713.log.json) |
| [HigherHRNet-w32](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_640x640.py) | 640x640 | 0.706 | 0.880 | 0.770 | 0.749 | 0.902 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_640x640-a22fe938_20200712.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_640x640_20200712.log.json) |
| [HigherHRNet-w48](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w48_coco_512x512.py) | 512x512 | 0.716 | 0.884 | 0.775 | 0.755 | 0.901 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet48_coco_512x512-60fedcbc_20200712.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet48_coco_512x512_20200712.log.json) |
Collections:
- Name: HigherHRNet
Paper:
Title: 'HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose
Estimation'
URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Cheng_HigherHRNet_Scale-Aware_Representation_Learning_for_Bottom-Up_Human_Pose_Estimation_CVPR_2020_paper.html
README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/higherhrnet.md
Models:
- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_512x512.py
In Collection: HigherHRNet
Metadata:
Architecture: &id001
- Associative Embedding
- HigherHRNet
Training Data: COCO
Name: associative_embedding_higherhrnet_w32_coco_512x512
Results:
- Dataset: COCO
Metrics:
AP: 0.677
AP@0.5: 0.87
AP@0.75: 0.738
AR: 0.723
AR@0.5: 0.89
Task: Body 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_512x512-8ae85183_20200713.pth
- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_640x640.py
In Collection: HigherHRNet
Metadata:
Architecture: *id001
Training Data: COCO
Name: associative_embedding_higherhrnet_w32_coco_640x640
Results:
- Dataset: COCO
Metrics:
AP: 0.686
AP@0.5: 0.871
AP@0.75: 0.747
AR: 0.733
AR@0.5: 0.898
Task: Body 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_640x640-a22fe938_20200712.pth
- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w48_coco_512x512.py
In Collection: HigherHRNet
Metadata:
Architecture: *id001
Training Data: COCO
Name: associative_embedding_higherhrnet_w48_coco_512x512
Results:
- Dataset: COCO
Metrics:
AP: 0.686
AP@0.5: 0.873
AP@0.75: 0.741
AR: 0.731
AR@0.5: 0.892
Task: Body 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet48_coco_512x512-60fedcbc_20200712.pth
- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_512x512.py
In Collection: HigherHRNet
Metadata:
Architecture: *id001
Training Data: COCO
Name: associative_embedding_higherhrnet_w32_coco_512x512
Results:
- Dataset: COCO
Metrics:
AP: 0.706
AP@0.5: 0.881
AP@0.75: 0.771
AR: 0.747
AR@0.5: 0.901
Task: Body 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_512x512-8ae85183_20200713.pth
- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_640x640.py
In Collection: HigherHRNet
Metadata:
Architecture: *id001
Training Data: COCO
Name: associative_embedding_higherhrnet_w32_coco_640x640
Results:
- Dataset: COCO
Metrics:
AP: 0.706
AP@0.5: 0.88
AP@0.75: 0.77
AR: 0.749
AR@0.5: 0.902
Task: Body 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_640x640-a22fe938_20200712.pth
- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w48_coco_512x512.py
In Collection: HigherHRNet
Metadata:
Architecture: *id001
Training Data: COCO
Name: associative_embedding_higherhrnet_w48_coco_512x512
Results:
- Dataset: COCO
Metrics:
AP: 0.716
AP@0.5: 0.884
AP@0.75: 0.775
AR: 0.755
AR@0.5: 0.901
Task: Body 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet48_coco_512x512-60fedcbc_20200712.pth
<!-- [ALGORITHM] -->
<details>
<summary align="right"><a href="https://arxiv.org/abs/1611.05424">Associative Embedding (NIPS'2017)</a></summary>
```bibtex
@inproceedings{newell2017associative,
title={Associative embedding: End-to-end learning for joint detection and grouping},
author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
booktitle={Advances in neural information processing systems},
pages={2277--2287},
year={2017}
}
```
</details>
<!-- [ALGORITHM] -->
<details>
<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2020/html/Cheng_HigherHRNet_Scale-Aware_Representation_Learning_for_Bottom-Up_Human_Pose_Estimation_CVPR_2020_paper.html">HigherHRNet (CVPR'2020)</a></summary>
```bibtex
@inproceedings{cheng2020higherhrnet,
title={HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose Estimation},
author={Cheng, Bowen and Xiao, Bin and Wang, Jingdong and Shi, Honghui and Huang, Thomas S and Zhang, Lei},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={5386--5395},
year={2020}
}
```
</details>
<!-- [ALGORITHM] -->
<details>
<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2020/html/Huang_The_Devil_Is_in_the_Details_Delving_Into_Unbiased_Data_CVPR_2020_paper.html">UDP (CVPR'2020)</a></summary>
```bibtex
@InProceedings{Huang_2020_CVPR,
author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2020}
}
```
</details>
<!-- [DATASET] -->
<details>
<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary>
```bibtex
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
```
</details>
Results on COCO val2017 without multi-scale test
| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log |
| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
| [HigherHRNet-w32_udp](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_512x512_udp.py) | 512x512 | 0.678 | 0.862 | 0.736 | 0.724 | 0.890 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_512x512_udp-8cc64794_20210222.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_512x512_udp_20210222.log.json) |
| [HigherHRNet-w48_udp](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w48_coco_512x512_udp.py) | 512x512 | 0.690 | 0.872 | 0.750 | 0.734 | 0.891 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet48_coco_512x512_udp-7cad61ef_20210222.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet48_coco_512x512_udp_20210222.log.json) |
Collections:
- Name: HigherHRNet
Paper:
Title: 'HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose
Estimation'
URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Cheng_HigherHRNet_Scale-Aware_Representation_Learning_for_Bottom-Up_Human_Pose_Estimation_CVPR_2020_paper.html
README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/higherhrnet.md
Models:
- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_512x512_udp.py
In Collection: HigherHRNet
Metadata:
Architecture: &id001
- Associative Embedding
- HigherHRNet
- UDP
Training Data: COCO
Name: associative_embedding_higherhrnet_w32_coco_512x512_udp
Results:
- Dataset: COCO
Metrics:
AP: 0.678
AP@0.5: 0.862
AP@0.75: 0.736
AR: 0.724
AR@0.5: 0.89
Task: Body 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_512x512_udp-8cc64794_20210222.pth
- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w48_coco_512x512_udp.py
In Collection: HigherHRNet
Metadata:
Architecture: *id001
Training Data: COCO
Name: associative_embedding_higherhrnet_w48_coco_512x512_udp
Results:
- Dataset: COCO
Metrics:
AP: 0.69
AP@0.5: 0.872
AP@0.75: 0.75
AR: 0.734
AR@0.5: 0.891
Task: Body 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet48_coco_512x512_udp-7cad61ef_20210222.pth
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/coco.py'
]
checkpoint_config = dict(interval=50)
evaluation = dict(interval=50, metric='mAP', save_best='AP')
optimizer = dict(
type='Adam',
lr=0.0015,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[200, 260])
total_epochs = 300
channel_cfg = dict(
dataset_joints=17,
dataset_channel=[
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
])
data_cfg = dict(
image_size=512,
base_size=256,
base_sigma=2,
heatmap_size=[128, 256],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'],
num_scales=2,
scale_aware_sigma=False,
)
# model settings
model = dict(
type='AssociativeEmbedding',
pretrained='https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w32-36af842e.pth',
backbone=dict(
type='HRNet',
in_channels=3,
extra=dict(
stage1=dict(
num_modules=1,
num_branches=1,
block='BOTTLENECK',
num_blocks=(4, ),
num_channels=(64, )),
stage2=dict(
num_modules=1,
num_branches=2,
block='BASIC',
num_blocks=(4, 4),
num_channels=(32, 64)),
stage3=dict(
num_modules=4,
num_branches=3,
block='BASIC',
num_blocks=(4, 4, 4),
num_channels=(32, 64, 128)),
stage4=dict(
num_modules=3,
num_branches=4,
block='BASIC',
num_blocks=(4, 4, 4, 4),
num_channels=(32, 64, 128, 256))),
),
keypoint_head=dict(
type='AEHigherResolutionHead',
in_channels=32,
num_joints=17,
tag_per_joint=True,
extra=dict(final_conv_kernel=1, ),
num_deconv_layers=1,
num_deconv_filters=[32],
num_deconv_kernels=[4],
num_basic_blocks=4,
cat_output=[True],
with_ae_loss=[True, False],
loss_keypoint=dict(
type='MultiLossFactory',
num_joints=17,
num_stages=2,
ae_loss_type='exp',
with_ae_loss=[True, False],
push_loss_factor=[0.001, 0.001],
pull_loss_factor=[0.001, 0.001],
with_heatmaps_loss=[True, True],
heatmaps_loss_factor=[1.0, 1.0])),
train_cfg=dict(),
test_cfg=dict(
num_joints=channel_cfg['dataset_joints'],
max_num_people=30,
scale_factor=[1],
with_heatmaps=[True, True],
with_ae=[True, False],
project2image=True,
align_corners=False,
nms_kernel=5,
nms_padding=2,
tag_per_joint=True,
detection_threshold=0.1,
tag_threshold=1,
use_detection_val=True,
ignore_too_much=False,
adjust=True,
refine=True,
flip_test=True))
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='BottomUpRandomAffine',
rot_factor=30,
scale_factor=[0.75, 1.5],
scale_type='short',
trans_factor=40),
dict(type='BottomUpRandomFlip', flip_prob=0.5),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='BottomUpGenerateTarget',
sigma=2,
max_num_people=30,
),
dict(
type='Collect',
keys=['img', 'joints', 'targets', 'masks'],
meta_keys=[]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
dict(
type='BottomUpResizeAlign',
transforms=[
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'aug_data', 'test_scale_factor', 'base_size',
'center', 'scale', 'flip_index'
]),
]
test_pipeline = val_pipeline
data_root = 'data/coco'
data = dict(
workers_per_gpu=2,
train_dataloader=dict(samples_per_gpu=24),
val_dataloader=dict(samples_per_gpu=1),
test_dataloader=dict(samples_per_gpu=1),
train=dict(
type='BottomUpCocoDataset',
ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
img_prefix=f'{data_root}/train2017/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='BottomUpCocoDataset',
ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
img_prefix=f'{data_root}/val2017/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='BottomUpCocoDataset',
ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
img_prefix=f'{data_root}/val2017/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment