v1.0

ca8a762a · chenzk · ca8a762a · ca8a762a · ca8a762a · ca8a762a
Commit ca8a762a authored Jan 23, 2024 by chenzk
20 changed files
--- a/configs/body_2d_keypoint/associative_embedding/coco/hrnet_coco.yml
+++ b/configs/body_2d_keypoint/associative_embedding/coco/hrnet_coco.yml
+Collections:
+- Name: AE
+  Paper:
+    Title: "Associative embedding: End-to-end learning for joint detection and grouping"
+    URL: https://arxiv.org/abs/1611.05424
+  README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/algorithms/associative_embedding.md
+Models:
+- Config: configs/body_2d_keypoint/associative_embedding/coco/ae_hrnet-w32_8xb24-300e_coco-512x512.py
+  In Collection: AE
+  Metadata:
+    Architecture:
+    - AE
+    - HRNet
+    Training Data: COCO
+  Name: ae_hrnet-w32_8xb24-300e_coco-512x512
+  Results:
+  - Dataset: COCO
+    Metrics:
+      AP: 0.656
+      AP@0.5: 0.864
+      AP@0.75: 0.719
+      AR: 0.711
+      AR@0.5: 0.893
+    Task: Body 2D Keypoint
+  Weights: https://download.openmmlab.com/mmpose/bottom_up/hrnet_w32_coco_512x512-bcb8c247_20200816.pth
--- a/configs/body_2d_keypoint/cid/coco/cid_hrnet-w32_8xb20-140e_coco-512x512.py
+++ b/configs/body_2d_keypoint/cid/coco/cid_hrnet-w32_8xb20-140e_coco-512x512.py
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=140, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+    type='Adam',
+    lr=1e-3,
+))
+
+# learning policy
+param_scheduler = [
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=140,
+        milestones=[90, 120],
+        gamma=0.1,
+        by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=160)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+    type='DecoupledHeatmap', input_size=(512, 512), heatmap_size=(128, 128))
+
+# model settings
+model = dict(
+    type='BottomupPoseEstimator',
+    data_preprocessor=dict(
+        type='PoseDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True),
+    backbone=dict(
+        type='HRNet',
+        in_channels=3,
+        extra=dict(
+            stage1=dict(
+                num_modules=1,
+                num_branches=1,
+                block='BOTTLENECK',
+                num_blocks=(4, ),
+                num_channels=(64, )),
+            stage2=dict(
+                num_modules=1,
+                num_branches=2,
+                block='BASIC',
+                num_blocks=(4, 4),
+                num_channels=(32, 64)),
+            stage3=dict(
+                num_modules=4,
+                num_branches=3,
+                block='BASIC',
+                num_blocks=(4, 4, 4),
+                num_channels=(32, 64, 128)),
+            stage4=dict(
+                num_modules=3,
+                num_branches=4,
+                block='BASIC',
+                num_blocks=(4, 4, 4, 4),
+                num_channels=(32, 64, 128, 256),
+                multiscale_output=True)),
+        init_cfg=dict(
+            type='Pretrained',
+            checkpoint='https://download.openmmlab.com/mmpose/'
+            'pretrain_models/hrnet_w32-36af842e.pth'),
+    ),
+    neck=dict(
+        type='FeatureMapProcessor',
+        concat=True,
+    ),
+    head=dict(
+        type='CIDHead',
+        in_channels=480,
+        num_keypoints=17,
+        gfd_channels=32,
+        coupled_heatmap_loss=dict(type='FocalHeatmapLoss', loss_weight=1.0),
+        decoupled_heatmap_loss=dict(type='FocalHeatmapLoss', loss_weight=4.0),
+        contrastive_loss=dict(
+            type='InfoNCELoss', temperature=0.05, loss_weight=1.0),
+        decoder=codec,
+    ),
+    train_cfg=dict(max_train_instances=200),
+    test_cfg=dict(
+        multiscale_test=False,
+        flip_test=True,
+        shift_heatmap=False,
+        align_corners=False))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'bottomup'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+    dict(type='LoadImage'),
+    dict(type='BottomupRandomAffine', input_size=codec['input_size']),
+    dict(type='RandomFlip', direction='horizontal'),
+    dict(type='GenerateTarget', encoder=codec),
+    dict(type='BottomupGetHeatmapMask'),
+    dict(type='PackPoseInputs'),
+]
+val_pipeline = [
+    dict(type='LoadImage'),
+    dict(
+        type='BottomupResize',
+        input_size=codec['input_size'],
+        size_factor=64,
+        resize_mode='expand'),
+    dict(
+        type='PackPoseInputs',
+        meta_keys=('id', 'img_id', 'img_path', 'crowd_index', 'ori_shape',
+                   'img_shape', 'input_size', 'input_center', 'input_scale',
+                   'flip', 'flip_direction', 'flip_indices', 'raw_ann_info',
+                   'skeleton_links'))
+]
+
+# data loaders
+train_dataloader = dict(
+    batch_size=20,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_mode=data_mode,
+        ann_file='annotations/person_keypoints_train2017.json',
+        data_prefix=dict(img='train2017/'),
+        pipeline=train_pipeline,
+    ))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_mode=data_mode,
+        ann_file='annotations/person_keypoints_val2017.json',
+        data_prefix=dict(img='val2017/'),
+        test_mode=True,
+        pipeline=val_pipeline,
+    ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+    type='CocoMetric',
+    ann_file=data_root + 'annotations/person_keypoints_val2017.json',
+    nms_thr=0.8,
+    score_mode='keypoint',
+)
+test_evaluator = val_evaluator
--- a/configs/body_2d_keypoint/cid/coco/cid_hrnet-w48_8xb20-140e_coco-512x512.py
+++ b/configs/body_2d_keypoint/cid/coco/cid_hrnet-w48_8xb20-140e_coco-512x512.py
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=140, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+    type='Adam',
+    lr=1e-3,
+))
+
+# learning policy
+param_scheduler = [
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=140,
+        milestones=[90, 120],
+        gamma=0.1,
+        by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=160)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+    type='DecoupledHeatmap', input_size=(512, 512), heatmap_size=(128, 128))
+
+# model settings
+model = dict(
+    type='BottomupPoseEstimator',
+    data_preprocessor=dict(
+        type='PoseDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True),
+    backbone=dict(
+        type='HRNet',
+        in_channels=3,
+        extra=dict(
+            stage1=dict(
+                num_modules=1,
+                num_branches=1,
+                block='BOTTLENECK',
+                num_blocks=(4, ),
+                num_channels=(64, )),
+            stage2=dict(
+                num_modules=1,
+                num_branches=2,
+                block='BASIC',
+                num_blocks=(4, 4),
+                num_channels=(48, 96)),
+            stage3=dict(
+                num_modules=4,
+                num_branches=3,
+                block='BASIC',
+                num_blocks=(4, 4, 4),
+                num_channels=(48, 96, 192)),
+            stage4=dict(
+                num_modules=3,
+                num_branches=4,
+                block='BASIC',
+                num_blocks=(4, 4, 4, 4),
+                num_channels=(48, 96, 192, 384),
+                multiscale_output=True)),
+        init_cfg=dict(
+            type='Pretrained',
+            checkpoint='https://download.openmmlab.com/mmpose/'
+            'pretrain_models/hrnet_w48-8ef0771d.pth'),
+    ),
+    neck=dict(
+        type='FeatureMapProcessor',
+        concat=True,
+    ),
+    head=dict(
+        type='CIDHead',
+        in_channels=720,
+        num_keypoints=17,
+        gfd_channels=48,
+        coupled_heatmap_loss=dict(type='FocalHeatmapLoss', loss_weight=1.0),
+        decoupled_heatmap_loss=dict(type='FocalHeatmapLoss', loss_weight=4.0),
+        contrastive_loss=dict(
+            type='InfoNCELoss', temperature=0.05, loss_weight=1.0),
+        decoder=codec,
+    ),
+    train_cfg=dict(max_train_instances=200),
+    test_cfg=dict(
+        multiscale_test=False,
+        flip_test=True,
+        shift_heatmap=False,
+        align_corners=False))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'bottomup'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+    dict(type='LoadImage'),
+    dict(type='BottomupRandomAffine', input_size=codec['input_size']),
+    dict(type='RandomFlip', direction='horizontal'),
+    dict(type='GenerateTarget', encoder=codec),
+    dict(type='BottomupGetHeatmapMask'),
+    dict(type='PackPoseInputs'),
+]
+val_pipeline = [
+    dict(type='LoadImage'),
+    dict(
+        type='BottomupResize',
+        input_size=codec['input_size'],
+        size_factor=64,
+        resize_mode='expand'),
+    dict(
+        type='PackPoseInputs',
+        meta_keys=('id', 'img_id', 'img_path', 'crowd_index', 'ori_shape',
+                   'img_shape', 'input_size', 'input_center', 'input_scale',
+                   'flip', 'flip_direction', 'flip_indices', 'raw_ann_info',
+                   'skeleton_links'))
+]
+
+# data loaders
+train_dataloader = dict(
+    batch_size=20,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_mode=data_mode,
+        ann_file='annotations/person_keypoints_train2017.json',
+        data_prefix=dict(img='train2017/'),
+        pipeline=train_pipeline,
+    ))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_mode=data_mode,
+        ann_file='annotations/person_keypoints_val2017.json',
+        data_prefix=dict(img='val2017/'),
+        test_mode=True,
+        pipeline=val_pipeline,
+    ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+    type='CocoMetric',
+    ann_file=data_root + 'annotations/person_keypoints_val2017.json',
+    nms_thr=0.8,
+    score_mode='keypoint',
+)
+test_evaluator = val_evaluator
--- a/configs/body_2d_keypoint/cid/coco/hrnet_coco.md
+++ b/configs/body_2d_keypoint/cid/coco/hrnet_coco.md
+<!-- [ALGORITHM] -->
+
+<details>
+<summary align="right"><a href="https://openaccess.thecvf.com/content/CVPR2022/html/Wang_Contextual_Instance_Decoupling_for_Robust_Multi-Person_Pose_Estimation_CVPR_2022_paper.html">CID (CVPR'2022)</a></summary>
+
+```bibtex
+@InProceedings{Wang_2022_CVPR,
+    author    = {Wang, Dongkai and Zhang, Shiliang},
+    title     = {Contextual Instance Decoupling for Robust Multi-Person Pose Estimation},
+    booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+    month     = {June},
+    year      = {2022},
+    pages     = {11060-11068}
+}
+```
+
+</details>
+
+<!-- [DATASET] -->
+
+<details>
+<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary>
+
+```bibtex
+@inproceedings{lin2014microsoft,
+  title={Microsoft coco: Common objects in context},
+  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+  booktitle={European conference on computer vision},
+  pages={740--755},
+  year={2014},
+  organization={Springer}
+}
+```
+
+</details>
+
+Results on COCO val2017 without multi-scale test
+
+| Arch                                          | Input Size |  AP   | AP<sup>50</sup> | AP<sup>75</sup> |  AR   | AR<sup>50</sup> |                     ckpt                      |                      log                      |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [CID](/configs/body_2d_keypoint/cid/coco/cid_hrnet-w32_8xb20-140e_coco-512x512.py) |  512x512   | 0.704 |      0.894      |      0.775      | 0.753 |      0.928      | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/cid/coco/cid_hrnet-w32_8xb20-140e_coco-512x512_42b7e6e6-20230207.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/cid/coco/cid_hrnet-w32_8xb20-140e_coco-512x512_20230207.json) |
+| [CID](/configs/body_2d_keypoint/cid/coco/cid_hrnet-w48_8xb20-140e_coco-512x512.py) |  512x512   | 0.715 |      0.900      |      0.782      | 0.765 |      0.935      | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/cid/coco/cid_hrnet-w48_8xb20-140e_coco-512x512_a36c3ecf-20230207.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/cid/coco/cid_hrnet-w48_8xb20-140e_coco-512x512_20230207.json) |
--- a/configs/body_2d_keypoint/cid/coco/hrnet_coco.yml
+++ b/configs/body_2d_keypoint/cid/coco/hrnet_coco.yml
+Collections:
+- Name: CID
+  Paper:
+    Title: Contextual Instance Decoupling for Robust Multi-Person Pose Estimation
+    URL: https://openaccess.thecvf.com/content/CVPR2022/html/Wang_Contextual_Instance_Decoupling_for_Robust_Multi-Person_Pose_Estimation_CVPR_2022_paper.html
+  README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/algorithms/cid.md
+Models:
+- Config: configs/body_2d_keypoint/cid/coco/cid_hrnet-w32_8xb20-140e_coco-512x512.py
+  In Collection: CID
+  Metadata:
+    Architecture: &id001
+    - CID
+    - HRNet
+    Training Data: COCO
+  Name: cid_hrnet-w32_8xb20-140e_coco-512x512
+  Results:
+  - Dataset: COCO
+    Metrics:
+      AP: 0.704
+      AP@0.5: 0.894
+      AP@0.75: 0.775
+      AR: 0.753
+      AR@0.5: 0.928
+    Task: Body 2D Keypoint
+  Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/cid/coco/cid_hrnet-w32_8xb20-140e_coco-512x512_42b7e6e6-20230207.pth
+- Config: configs/body_2d_keypoint/cid/coco/cid_hrnet-w48_8xb20-140e_coco-512x512.py
+  In Collection: CID
+  Metadata:
+    Architecture: *id001
+    Training Data: COCO
+  Name: cid_hrnet-w48_8xb20-140e_coco-512x512
+  Results:
+  - Dataset: COCO
+    Metrics:
+      AP: 0.715
+      AP@0.5: 0.9
+      AP@0.75: 0.782
+      AR: 0.765
+      AR@0.5: 0.935
+    Task: Body 2D Keypoint
+  Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/cid/coco/cid_hrnet-w48_8xb20-140e_coco-512x512_a36c3ecf-20230207.pth
--- a/configs/body_2d_keypoint/dekr/README.md
+++ b/configs/body_2d_keypoint/dekr/README.md
+# Bottom-up Human Pose Estimation via Disentangled Keypoint Regression (DEKR)
+
+<!-- [ALGORITHM] -->
+
+<details>
+<summary align="right"><a href="https://arxiv.org/abs/2104.02300">DEKR (CVPR'2021)</a></summary>
+
+```bibtex
+@inproceedings{geng2021bottom,
+  title={Bottom-up human pose estimation via disentangled keypoint regression},
+  author={Geng, Zigang and Sun, Ke and Xiao, Bin and Zhang, Zhaoxiang and Wang, Jingdong},
+  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+  pages={14676--14686},
+  year={2021}
+}
+```
+
+</details>
+
+DEKR is a popular 2D bottom-up pose estimation approach that simultaneously detects all the instances and regresses the offsets from the instance centers to joints.
+
+In order to predict the offsets more accurately, the offsets of different joints are regressed using separated branches with deformable convolutional layers. Thus convolution kernels with different shapes are adopted to extract features for the corresponding joint.
--- a/configs/body_2d_keypoint/dekr/coco/dekr_hrnet-w32_8xb10-140e_coco-512x512.py
+++ b/configs/body_2d_keypoint/dekr/coco/dekr_hrnet-w32_8xb10-140e_coco-512x512.py
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=140, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+    type='Adam',
+    lr=1e-3,
+))
+
+# learning policy
+param_scheduler = [
+    dict(
+        type='LinearLR', begin=0, end=500, start_factor=0.001,
+        by_epoch=False),  # warm-up
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=140,
+        milestones=[90, 120],
+        gamma=0.1,
+        by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=80)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+    type='SPR',
+    input_size=(512, 512),
+    heatmap_size=(128, 128),
+    sigma=(4, 2),
+    minimal_diagonal_length=32**0.5,
+    generate_keypoint_heatmaps=True,
+    decode_max_instances=30)
+
+# model settings
+model = dict(
+    type='BottomupPoseEstimator',
+    data_preprocessor=dict(
+        type='PoseDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True),
+    backbone=dict(
+        type='HRNet',
+        in_channels=3,
+        extra=dict(
+            stage1=dict(
+                num_modules=1,
+                num_branches=1,
+                block='BOTTLENECK',
+                num_blocks=(4, ),
+                num_channels=(64, )),
+            stage2=dict(
+                num_modules=1,
+                num_branches=2,
+                block='BASIC',
+                num_blocks=(4, 4),
+                num_channels=(32, 64)),
+            stage3=dict(
+                num_modules=4,
+                num_branches=3,
+                block='BASIC',
+                num_blocks=(4, 4, 4),
+                num_channels=(32, 64, 128)),
+            stage4=dict(
+                num_modules=3,
+                num_branches=4,
+                block='BASIC',
+                num_blocks=(4, 4, 4, 4),
+                num_channels=(32, 64, 128, 256),
+                multiscale_output=True)),
+        init_cfg=dict(
+            type='Pretrained',
+            checkpoint='https://download.openmmlab.com/mmpose/'
+            'pretrain_models/hrnet_w32-36af842e.pth'),
+    ),
+    neck=dict(
+        type='FeatureMapProcessor',
+        concat=True,
+    ),
+    head=dict(
+        type='DEKRHead',
+        in_channels=480,
+        num_keypoints=17,
+        heatmap_loss=dict(type='KeypointMSELoss', use_target_weight=True),
+        displacement_loss=dict(
+            type='SoftWeightSmoothL1Loss',
+            use_target_weight=True,
+            supervise_empty=False,
+            beta=1 / 9,
+            loss_weight=0.002,
+        ),
+        decoder=codec,
+        # This rescore net is adapted from the official repo.
+        # If you are not using the original COCO dataset for training,
+        # please make sure to remove the `rescore_cfg` item
+        rescore_cfg=dict(
+            in_channels=74,
+            norm_indexes=(5, 6),
+            init_cfg=dict(
+                type='Pretrained',
+                checkpoint='https://download.openmmlab.com/mmpose/'
+                'pretrain_models/kpt_rescore_coco-33d58c5c.pth')),
+    ),
+    test_cfg=dict(
+        multiscale_test=False,
+        flip_test=True,
+        nms_dist_thr=0.05,
+        shift_heatmap=True,
+        align_corners=False))
+
+# enable DDP training when rescore net is used
+find_unused_parameters = True
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'bottomup'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+    dict(type='LoadImage'),
+    dict(type='BottomupRandomAffine', input_size=codec['input_size']),
+    dict(type='RandomFlip', direction='horizontal'),
+    dict(type='GenerateTarget', encoder=codec),
+    dict(type='BottomupGetHeatmapMask'),
+    dict(type='PackPoseInputs'),
+]
+val_pipeline = [
+    dict(type='LoadImage'),
+    dict(
+        type='BottomupResize',
+        input_size=codec['input_size'],
+        size_factor=32,
+        resize_mode='expand'),
+    dict(
+        type='PackPoseInputs',
+        meta_keys=('id', 'img_id', 'img_path', 'crowd_index', 'ori_shape',
+                   'img_shape', 'input_size', 'input_center', 'input_scale',
+                   'flip', 'flip_direction', 'flip_indices', 'raw_ann_info',
+                   'skeleton_links'))
+]
+
+# data loaders
+train_dataloader = dict(
+    batch_size=10,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_mode=data_mode,
+        ann_file='annotations/person_keypoints_train2017.json',
+        data_prefix=dict(img='train2017/'),
+        pipeline=train_pipeline,
+    ))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_mode=data_mode,
+        ann_file='annotations/person_keypoints_val2017.json',
+        data_prefix=dict(img='val2017/'),
+        test_mode=True,
+        pipeline=val_pipeline,
+    ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+    type='CocoMetric',
+    ann_file=data_root + 'annotations/person_keypoints_val2017.json',
+    nms_mode='none',
+    score_mode='keypoint',
+)
+test_evaluator = val_evaluator
--- a/configs/body_2d_keypoint/dekr/coco/dekr_hrnet-w48_8xb10-140e_coco-640x640.py
+++ b/configs/body_2d_keypoint/dekr/coco/dekr_hrnet-w48_8xb10-140e_coco-640x640.py
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=140, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+    type='Adam',
+    lr=1e-3,
+))
+
+# learning policy
+param_scheduler = [
+    dict(
+        type='LinearLR', begin=0, end=500, start_factor=0.001,
+        by_epoch=False),  # warm-up
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=140,
+        milestones=[90, 120],
+        gamma=0.1,
+        by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=80)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+    type='SPR',
+    input_size=(640, 640),
+    heatmap_size=(160, 160),
+    sigma=(4, 2),
+    minimal_diagonal_length=32**0.5,
+    generate_keypoint_heatmaps=True,
+    decode_max_instances=30)
+
+# model settings
+model = dict(
+    type='BottomupPoseEstimator',
+    data_preprocessor=dict(
+        type='PoseDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True),
+    backbone=dict(
+        type='HRNet',
+        in_channels=3,
+        extra=dict(
+            stage1=dict(
+                num_modules=1,
+                num_branches=1,
+                block='BOTTLENECK',
+                num_blocks=(4, ),
+                num_channels=(64, )),
+            stage2=dict(
+                num_modules=1,
+                num_branches=2,
+                block='BASIC',
+                num_blocks=(4, 4),
+                num_channels=(48, 96)),
+            stage3=dict(
+                num_modules=4,
+                num_branches=3,
+                block='BASIC',
+                num_blocks=(4, 4, 4),
+                num_channels=(48, 96, 192)),
+            stage4=dict(
+                num_modules=3,
+                num_branches=4,
+                block='BASIC',
+                num_blocks=(4, 4, 4, 4),
+                num_channels=(48, 96, 192, 384),
+                multiscale_output=True)),
+        init_cfg=dict(
+            type='Pretrained',
+            checkpoint='https://download.openmmlab.com/mmpose/'
+            'pretrain_models/hrnet_w48-8ef0771d.pth'),
+    ),
+    neck=dict(
+        type='FeatureMapProcessor',
+        concat=True,
+    ),
+    head=dict(
+        type='DEKRHead',
+        in_channels=720,
+        num_keypoints=17,
+        num_heatmap_filters=48,
+        heatmap_loss=dict(type='KeypointMSELoss', use_target_weight=True),
+        displacement_loss=dict(
+            type='SoftWeightSmoothL1Loss',
+            use_target_weight=True,
+            supervise_empty=False,
+            beta=1 / 9,
+            loss_weight=0.002,
+        ),
+        decoder=codec,
+        # This rescore net is adapted from the official repo.
+        # If you are not using the original COCO dataset for training,
+        # please make sure to remove the `rescore_cfg` item
+        rescore_cfg=dict(
+            in_channels=74,
+            norm_indexes=(5, 6),
+            init_cfg=dict(
+                type='Pretrained',
+                checkpoint='https://download.openmmlab.com/mmpose/'
+                'pretrain_models/kpt_rescore_coco-33d58c5c.pth')),
+    ),
+    test_cfg=dict(
+        multiscale_test=False,
+        flip_test=True,
+        nms_dist_thr=0.05,
+        shift_heatmap=True,
+        align_corners=False))
+
+# enable DDP training when rescore net is used
+find_unused_parameters = True
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'bottomup'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+    dict(type='LoadImage'),
+    dict(type='BottomupRandomAffine', input_size=codec['input_size']),
+    dict(type='RandomFlip', direction='horizontal'),
+    dict(type='GenerateTarget', encoder=codec),
+    dict(type='BottomupGetHeatmapMask'),
+    dict(type='PackPoseInputs'),
+]
+val_pipeline = [
+    dict(type='LoadImage'),
+    dict(
+        type='BottomupResize',
+        input_size=codec['input_size'],
+        size_factor=32,
+        resize_mode='expand'),
+    dict(
+        type='PackPoseInputs',
+        meta_keys=('id', 'img_id', 'img_path', 'crowd_index', 'ori_shape',
+                   'img_shape', 'input_size', 'input_center', 'input_scale',
+                   'flip', 'flip_direction', 'flip_indices', 'raw_ann_info',
+                   'skeleton_links'))
+]
+
+# data loaders
+train_dataloader = dict(
+    batch_size=10,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_mode=data_mode,
+        ann_file='annotations/person_keypoints_train2017.json',
+        data_prefix=dict(img='train2017/'),
+        pipeline=train_pipeline,
+    ))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_mode=data_mode,
+        ann_file='annotations/person_keypoints_val2017.json',
+        data_prefix=dict(img='val2017/'),
+        test_mode=True,
+        pipeline=val_pipeline,
+    ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+    type='CocoMetric',
+    ann_file=data_root + 'annotations/person_keypoints_val2017.json',
+    nms_mode='none',
+    score_mode='keypoint',
+)
+test_evaluator = val_evaluator
--- a/configs/body_2d_keypoint/dekr/coco/hrnet_coco.md
+++ b/configs/body_2d_keypoint/dekr/coco/hrnet_coco.md
+<!-- [ALGORITHM] -->
+
+<details>
+<summary align="right"><a href="https://arxiv.org/abs/2104.02300">DEKR (CVPR'2021)</a></summary>
+
+```bibtex
+@inproceedings{geng2021bottom,
+  title={Bottom-up human pose estimation via disentangled keypoint regression},
+  author={Geng, Zigang and Sun, Ke and Xiao, Bin and Zhang, Zhaoxiang and Wang, Jingdong},
+  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+  pages={14676--14686},
+  year={2021}
+}
+```
+
+</details>
+
+<!-- [ALGORITHM] -->
+
+<details>
+<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html">HRNet (CVPR'2019)</a></summary>
+
+```bibtex
+@inproceedings{sun2019deep,
+  title={Deep high-resolution representation learning for human pose estimation},
+  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+  pages={5693--5703},
+  year={2019}
+}
+```
+
+</details>
+
+<!-- [DATASET] -->
+
+<details>
+<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary>
+
+```bibtex
+@inproceedings{lin2014microsoft,
+  title={Microsoft coco: Common objects in context},
+  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+  booktitle={European conference on computer vision},
+  pages={740--755},
+  year={2014},
+  organization={Springer}
+}
+```
+
+</details>
+
+Results on COCO val2017 without multi-scale test
+
+| Arch                                          | Input Size |  AP   | AP<sup>50</sup> | AP<sup>75</sup> |  AR   | AR<sup>50</sup> |                     ckpt                      |                      log                      |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [HRNet-w32](/configs/body_2d_keypoint/dekr/coco/dekr_hrnet-w32_8xb10-140e_coco-512x512.py) |  512x512   | 0.686 |      0.868      |      0.750      | 0.735 |      0.898      | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/coco/dekr_hrnet-w32_8xb10-140e_coco-512x512_ac7c17bf-20221228.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/coco/dekr_hrnet-w32_8xb10-140e_coco-512x512_20221228.json) |
+| [HRNet-w48](/configs/body_2d_keypoint/dekr/coco/dekr_hrnet-w48_8xb10-140e_coco-640x640.py) |  640x640   | 0.714 |      0.883      |      0.777      | 0.762 |      0.915      | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/coco/dekr_hrnet-w48_8xb10-140e_coco-640x640_74796c32-20230124.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/coco/dekr_hrnet-w48_8xb10-140e_coco-640x640_20230124.json) |
--- a/configs/body_2d_keypoint/dekr/coco/hrnet_coco.yml
+++ b/configs/body_2d_keypoint/dekr/coco/hrnet_coco.yml
+Collections:
+- Name: DEKR
+  Paper:
+    Title: Bottom-up human pose estimation via disentangled keypoint regression
+    URL: https://arxiv.org/abs/2104.02300
+  README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/algorithms/dekr.md
+Models:
+- Config: configs/body_2d_keypoint/dekr/coco/dekr_hrnet-w32_8xb10-140e_coco-512x512.py
+  In Collection: DEKR
+  Metadata:
+    Architecture: &id001
+    - DEKR
+    - HRNet
+    Training Data: COCO
+  Name: dekr_hrnet-w32_8xb10-140e_coco-512x512
+  Results:
+  - Dataset: COCO
+    Metrics:
+      AP: 0.686
+      AP@0.5: 0.868
+      AP@0.75: 0.750
+      AR: 0.735
+      AR@0.5: 0.898
+    Task: Body 2D Keypoint
+  Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/coco/dekr_hrnet-w32_8xb10-140e_coco-512x512_ac7c17bf-20221228.pth
+- Config: configs/body_2d_keypoint/dekr/coco/dekr_hrnet-w48_8xb10-140e_coco-640x640.py
+  In Collection: DEKR
+  Metadata:
+    Architecture: *id001
+    Training Data: COCO
+  Name: dekr_hrnet-w48_8xb10-140e_coco-640x640
+  Results:
+  - Dataset: COCO
+    Metrics:
+      AP: 0.714
+      AP@0.5: 0.883
+      AP@0.75: 0.777
+      AR: 0.762
+      AR@0.5: 0.915
+    Task: Body 2D Keypoint
+  Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/coco/dekr_hrnet-w48_8xb10-140e_coco-640x640_74796c32-20230124.pth
--- a/configs/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w32_8xb10-300e_crowdpose-512x512.py
+++ b/configs/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w32_8xb10-300e_crowdpose-512x512.py
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=300, val_interval=20)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+    type='Adam',
+    lr=1e-3,
+))
+
+# learning policy
+param_scheduler = [
+    dict(
+        type='LinearLR', begin=0, end=500, start_factor=0.001,
+        by_epoch=False),  # warm-up
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=300,
+        milestones=[200, 260],
+        gamma=0.1,
+        by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=80)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='crowdpose/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+    type='SPR',
+    input_size=(512, 512),
+    heatmap_size=(128, 128),
+    sigma=(4, 2),
+    minimal_diagonal_length=32**0.5,
+    generate_keypoint_heatmaps=True,
+    decode_max_instances=30)
+
+# model settings
+model = dict(
+    type='BottomupPoseEstimator',
+    data_preprocessor=dict(
+        type='PoseDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True),
+    backbone=dict(
+        type='HRNet',
+        in_channels=3,
+        extra=dict(
+            stage1=dict(
+                num_modules=1,
+                num_branches=1,
+                block='BOTTLENECK',
+                num_blocks=(4, ),
+                num_channels=(64, )),
+            stage2=dict(
+                num_modules=1,
+                num_branches=2,
+                block='BASIC',
+                num_blocks=(4, 4),
+                num_channels=(32, 64)),
+            stage3=dict(
+                num_modules=4,
+                num_branches=3,
+                block='BASIC',
+                num_blocks=(4, 4, 4),
+                num_channels=(32, 64, 128)),
+            stage4=dict(
+                num_modules=3,
+                num_branches=4,
+                block='BASIC',
+                num_blocks=(4, 4, 4, 4),
+                num_channels=(32, 64, 128, 256),
+                multiscale_output=True)),
+        init_cfg=dict(
+            type='Pretrained',
+            checkpoint='https://download.openmmlab.com/mmpose/'
+            'pretrain_models/hrnet_w32-36af842e.pth'),
+    ),
+    neck=dict(
+        type='FeatureMapProcessor',
+        concat=True,
+    ),
+    head=dict(
+        type='DEKRHead',
+        in_channels=480,
+        num_keypoints=14,
+        heatmap_loss=dict(type='KeypointMSELoss', use_target_weight=True),
+        displacement_loss=dict(
+            type='SoftWeightSmoothL1Loss',
+            use_target_weight=True,
+            supervise_empty=False,
+            beta=1 / 9,
+            loss_weight=0.004,
+        ),
+        decoder=codec,
+        # This rescore net is adapted from the official repo.
+        # If you are not using the original CrowdPose dataset for training,
+        # please make sure to remove the `rescore_cfg` item
+        rescore_cfg=dict(
+            in_channels=59,
+            norm_indexes=(0, 1),
+            init_cfg=dict(
+                type='Pretrained',
+                checkpoint='https://download.openmmlab.com/mmpose/'
+                'pretrain_models/kpt_rescore_crowdpose-300c7efe.pth')),
+    ),
+    test_cfg=dict(
+        multiscale_test=False,
+        flip_test=True,
+        nms_dist_thr=0.05,
+        shift_heatmap=True,
+        align_corners=False))
+
+# enable DDP training when rescore net is used
+find_unused_parameters = True
+
+# base dataset settings
+dataset_type = 'CrowdPoseDataset'
+data_mode = 'bottomup'
+data_root = 'data/crowdpose/'
+
+# pipelines
+train_pipeline = [
+    dict(type='LoadImage'),
+    dict(type='BottomupRandomAffine', input_size=codec['input_size']),
+    dict(type='RandomFlip', direction='horizontal'),
+    dict(type='GenerateTarget', encoder=codec),
+    dict(type='PackPoseInputs'),
+]
+val_pipeline = [
+    dict(type='LoadImage'),
+    dict(
+        type='BottomupResize',
+        input_size=codec['input_size'],
+        size_factor=32,
+        resize_mode='expand'),
+    dict(
+        type='PackPoseInputs',
+        meta_keys=('id', 'img_id', 'img_path', 'crowd_index', 'ori_shape',
+                   'img_shape', 'input_size', 'input_center', 'input_scale',
+                   'flip', 'flip_direction', 'flip_indices', 'raw_ann_info',
+                   'skeleton_links'))
+]
+
+# data loaders
+train_dataloader = dict(
+    batch_size=10,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_mode=data_mode,
+        ann_file='annotations/mmpose_crowdpose_trainval.json',
+        data_prefix=dict(img='images/'),
+        pipeline=train_pipeline,
+    ))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_mode=data_mode,
+        ann_file='annotations/mmpose_crowdpose_test.json',
+        data_prefix=dict(img='images/'),
+        test_mode=True,
+        pipeline=val_pipeline,
+    ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+    type='CocoMetric',
+    ann_file=data_root + 'annotations/mmpose_crowdpose_test.json',
+    nms_mode='none',
+    score_mode='keypoint',
+    use_area=False,
+    iou_type='keypoints_crowd',
+    prefix='crowdpose')
+test_evaluator = val_evaluator
--- a/configs/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w48_8xb5-300e_crowdpose-640x640.py
+++ b/configs/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w48_8xb5-300e_crowdpose-640x640.py
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=300, val_interval=20)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+    type='Adam',
+    lr=1e-3,
+))
+
+# learning policy
+param_scheduler = [
+    dict(
+        type='LinearLR', begin=0, end=500, start_factor=0.001,
+        by_epoch=False),  # warm-up
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=300,
+        milestones=[200, 260],
+        gamma=0.1,
+        by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=40)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='crowdpose/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+    type='SPR',
+    input_size=(640, 640),
+    heatmap_size=(160, 160),
+    sigma=(4, 2),
+    minimal_diagonal_length=32**0.5,
+    generate_keypoint_heatmaps=True,
+    decode_max_instances=30)
+
+# model settings
+model = dict(
+    type='BottomupPoseEstimator',
+    data_preprocessor=dict(
+        type='PoseDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True),
+    backbone=dict(
+        type='HRNet',
+        in_channels=3,
+        extra=dict(
+            stage1=dict(
+                num_modules=1,
+                num_branches=1,
+                block='BOTTLENECK',
+                num_blocks=(4, ),
+                num_channels=(64, )),
+            stage2=dict(
+                num_modules=1,
+                num_branches=2,
+                block='BASIC',
+                num_blocks=(4, 4),
+                num_channels=(48, 96)),
+            stage3=dict(
+                num_modules=4,
+                num_branches=3,
+                block='BASIC',
+                num_blocks=(4, 4, 4),
+                num_channels=(48, 96, 192)),
+            stage4=dict(
+                num_modules=3,
+                num_branches=4,
+                block='BASIC',
+                num_blocks=(4, 4, 4, 4),
+                num_channels=(48, 96, 192, 384),
+                multiscale_output=True)),
+        init_cfg=dict(
+            type='Pretrained',
+            checkpoint='https://download.openmmlab.com/mmpose/'
+            'pretrain_models/hrnet_w48-8ef0771d.pth'),
+    ),
+    neck=dict(
+        type='FeatureMapProcessor',
+        concat=True,
+    ),
+    head=dict(
+        type='DEKRHead',
+        in_channels=720,
+        num_keypoints=14,
+        num_heatmap_filters=48,
+        heatmap_loss=dict(type='KeypointMSELoss', use_target_weight=True),
+        displacement_loss=dict(
+            type='SoftWeightSmoothL1Loss',
+            use_target_weight=True,
+            supervise_empty=False,
+            beta=1 / 9,
+            loss_weight=0.004,
+        ),
+        decoder=codec,
+        # This rescore net is adapted from the official repo.
+        # If you are not using the original CrowdPose dataset for training,
+        # please make sure to remove the `rescore_cfg` item
+        rescore_cfg=dict(
+            in_channels=59,
+            norm_indexes=(0, 1),
+            init_cfg=dict(
+                type='Pretrained',
+                checkpoint='https://download.openmmlab.com/mmpose/'
+                'pretrain_models/kpt_rescore_crowdpose-300c7efe.pth')),
+    ),
+    test_cfg=dict(
+        multiscale_test=False,
+        flip_test=True,
+        nms_dist_thr=0.05,
+        shift_heatmap=True,
+        align_corners=False))
+
+# enable DDP training when rescore net is used
+find_unused_parameters = True
+
+# base dataset settings
+dataset_type = 'CrowdPoseDataset'
+data_mode = 'bottomup'
+data_root = 'data/crowdpose/'
+
+# pipelines
+train_pipeline = [
+    dict(type='LoadImage'),
+    dict(type='BottomupRandomAffine', input_size=codec['input_size']),
+    dict(type='RandomFlip', direction='horizontal'),
+    dict(type='GenerateTarget', encoder=codec),
+    dict(type='PackPoseInputs'),
+]
+val_pipeline = [
+    dict(type='LoadImage'),
+    dict(
+        type='BottomupResize',
+        input_size=codec['input_size'],
+        size_factor=32,
+        resize_mode='expand'),
+    dict(
+        type='PackPoseInputs',
+        meta_keys=('id', 'img_id', 'img_path', 'crowd_index', 'ori_shape',
+                   'img_shape', 'input_size', 'input_center', 'input_scale',
+                   'flip', 'flip_direction', 'flip_indices', 'raw_ann_info',
+                   'skeleton_links'))
+]
+
+# data loaders
+train_dataloader = dict(
+    batch_size=5,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_mode=data_mode,
+        ann_file='annotations/mmpose_crowdpose_trainval.json',
+        data_prefix=dict(img='images/'),
+        pipeline=train_pipeline,
+    ))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_mode=data_mode,
+        ann_file='annotations/mmpose_crowdpose_test.json',
+        data_prefix=dict(img='images/'),
+        test_mode=True,
+        pipeline=val_pipeline,
+    ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+    type='CocoMetric',
+    ann_file=data_root + 'annotations/mmpose_crowdpose_test.json',
+    nms_mode='none',
+    score_mode='keypoint',
+    use_area=False,
+    iou_type='keypoints_crowd',
+    prefix='crowdpose')
+test_evaluator = val_evaluator
--- a/configs/body_2d_keypoint/dekr/crowdpose/hrnet_crowdpose.md
+++ b/configs/body_2d_keypoint/dekr/crowdpose/hrnet_crowdpose.md
+<!-- [ALGORITHM] -->
+
+<details>
+<summary align="right"><a href="https://arxiv.org/abs/2104.02300">DEKR (CVPR'2021)</a></summary>
+
+```bibtex
+@inproceedings{geng2021bottom,
+  title={Bottom-up human pose estimation via disentangled keypoint regression},
+  author={Geng, Zigang and Sun, Ke and Xiao, Bin and Zhang, Zhaoxiang and Wang, Jingdong},
+  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+  pages={14676--14686},
+  year={2021}
+}
+```
+
+</details>
+
+<!-- [ALGORITHM] -->
+
+<details>
+<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html">HRNet (CVPR'2019)</a></summary>
+
+```bibtex
+@inproceedings{sun2019deep,
+  title={Deep high-resolution representation learning for human pose estimation},
+  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+  pages={5693--5703},
+  year={2019}
+}
+```
+
+</details>
+
+<!-- [DATASET] -->
+
+<details>
+<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Li_CrowdPose_Efficient_Crowded_Scenes_Pose_Estimation_and_a_New_Benchmark_CVPR_2019_paper.html">CrowdPose (CVPR'2019)</a></summary>
+
+```bibtex
+@article{li2018crowdpose,
+  title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
+  author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
+  journal={arXiv preprint arXiv:1812.00324},
+  year={2018}
+}
+```
+
+</details>
+
+Results on CrowdPose test without multi-scale test
+
+| Arch                                           | Input Size |  AP   | AP<sup>50</sup> | AP<sup>75</sup> | AP (E) | AP (M) | AP (H) |                      ckpt                      |                      log                      |
+| :--------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :----: | :----: | :----: | :--------------------------------------------: | :-------------------------------------------: |
+| [HRNet-w32](/configs/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w32_8xb10-300e_crowdpose-512x512.py) |  512x512   | 0.663 |      0.857      |      0.714      | 0.740  | 0.671  | 0.576  | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w32_8xb10-300e_crowdpose-512x512_147bae97-20221228.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w32_8xb10-300e_crowdpose-512x512_20221228.json) |
+| [HRNet-w48](/configs/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w48_8xb5-300e_crowdpose-640x640.py) |  640x640   | 0.679 |      0.869      |      0.731      | 0.753  | 0.688  | 0.593  | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w48_8xb5-300e_crowdpose-640x640_4ea6031e-20230128.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w48_8xb5-300e_crowdpose-640x640_20230128.json) |
--- a/configs/body_2d_keypoint/dekr/crowdpose/hrnet_crowdpose.yml
+++ b/configs/body_2d_keypoint/dekr/crowdpose/hrnet_crowdpose.yml
+Models:
+- Config: configs/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w32_8xb10-300e_crowdpose-512x512.py
+  In Collection: DEKR
+  Metadata:
+    Architecture: &id001
+    - DEKR
+    - HRNet
+    Training Data: CrowdPose
+  Name: dekr_hrnet-w32_8xb10-300e_crowdpose-512x512
+  Results:
+  - Dataset: CrowdPose
+    Metrics:
+      AP: 0.663
+      AP@0.5: 0.857
+      AP@0.75: 0.714
+      AP (E): 0.74
+      AP (M): 0.671
+      AP (L): 0.576
+    Task: Body 2D Keypoint
+  Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w32_8xb10-300e_crowdpose-512x512_147bae97-20221228.pth
+- Config: configs/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w48_8xb5-300e_crowdpose-640x640.py
+  In Collection: DEKR
+  Metadata:
+    Architecture: *id001
+    Training Data: CrowdPose
+  Name: dekr_hrnet-w48_8xb5-300e_crowdpose-640x640
+  Results:
+  - Dataset: CrowdPose
+    Metrics:
+      AP: 0.679
+      AP@0.5: 0.869
+      AP@0.75: 0.731
+      AP (E): 0.753
+      AP (M): 0.688
+      AP (L): 0.593
+    Task: Body 2D Keypoint
+  Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w48_8xb5-300e_crowdpose-640x640_4ea6031e-20230128.pth
--- a/configs/body_2d_keypoint/edpose/coco/edpose_coco.md
+++ b/configs/body_2d_keypoint/edpose/coco/edpose_coco.md
+<!-- [ALGORITHM] -->
+
+<details>
+<summary align="right"><a href="https://arxiv.org/pdf/2302.01593.pdf">ED-Pose (ICLR'2023)</a></summary>
+
+```bibtex
+@inproceedings{
+yang2023explicit,
+title={Explicit Box Detection Unifies End-to-End Multi-Person Pose Estimation},
+author={Jie Yang and Ailing Zeng and Shilong Liu and Feng Li and Ruimao Zhang and Lei Zhang},
+booktitle={International Conference on Learning Representations},
+year={2023},
+url={https://openreview.net/forum?id=s4WVupnJjmX}
+}
+```
+
+</details>
+
+<!-- [BACKBONE] -->
+
+<details>
+<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html">ResNet (CVPR'2016)</a></summary>
+
+```bibtex
+@inproceedings{he2016deep,
+  title={Deep residual learning for image recognition},
+  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+  pages={770--778},
+  year={2016}
+}
+```
+
+</details>
+
+<!-- [DATASET] -->
+
+<details>
+<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary>
+
+```bibtex
+@inproceedings{lin2014microsoft,
+  title={Microsoft coco: Common objects in context},
+  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+  booktitle={European conference on computer vision},
+  pages={740--755},
+  year={2014},
+  organization={Springer}
+}
+```
+
+</details>
+
+Results on COCO val2017.
+
+| Arch                                          | BackBone  |  AP   | AP<sup>50</sup> | AP<sup>75</sup> |  AR   | AR<sup>50</sup> |                      ckpt                      |                      log                      |
+| :-------------------------------------------- | :-------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :--------------------------------------------: | :-------------------------------------------: |
+| [edpose_res50_coco](/configs/body_2d_keypoint/edpose/coco/edpose_res50_8xb2-50e_coco-800x1333.py) | ResNet-50 | 0.716 |      0.897      |      0.783      | 0.793 |      0.943      | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/edpose/coco/edpose_res50_coco_3rdparty.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/edpose/coco/edpose_res50_coco_3rdparty.json) |
+
+The checkpoint is converted from the official repo. The training of EDPose is not supported yet. It will be supported in the future updates.
+
+The above config follows [Pure Python style](https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta). Please install `mmengine>=0.8.2` to use this config.
--- a/configs/body_2d_keypoint/edpose/coco/edpose_coco.yml
+++ b/configs/body_2d_keypoint/edpose/coco/edpose_coco.yml
+Collections:
+- Name: ED-Pose
+  Paper:
+    Title: Explicit Box Detection Unifies End-to-End Multi-Person Pose Estimation
+    URL: https://arxiv.org/pdf/2302.01593.pdf
+  README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/algorithms/edpose.md
+Models:
+- Config: configs/body_2d_keypoint/edpose/coco/edpose_res50_8xb2-50e_coco-800x1333.py
+  In Collection: ED-Pose
+  Alias: edpose
+  Metadata:
+    Architecture: &id001
+    - ED-Pose
+    - ResNet
+    Training Data: COCO
+  Name: edpose_res50_8xb2-50e_coco-800x1333
+  Results:
+  - Dataset: COCO
+    Metrics:
+      AP: 0.716
+      AP@0.5: 0.897
+      AP@0.75: 0.783
+      AR: 0.793
+      AR@0.5: 0.943
+    Task: Body 2D Keypoint
+  Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/edpose/coco/edpose_res50_coco_3rdparty.pth
--- a/configs/body_2d_keypoint/edpose/coco/edpose_res50_8xb2-50e_coco-800x1333.py
+++ b/configs/body_2d_keypoint/edpose/coco/edpose_res50_8xb2-50e_coco-800x1333.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmengine.config import read_base
+
+with read_base():
+    from mmpose.configs._base_.default_runtime import *  # noqa
+
+from mmcv.transforms import RandomChoice, RandomChoiceResize
+from mmengine.dataset import DefaultSampler
+from mmengine.model import PretrainedInit
+from mmengine.optim import LinearLR, MultiStepLR
+from torch.nn import GroupNorm
+from torch.optim import Adam
+
+from mmpose.codecs import EDPoseLabel
+from mmpose.datasets import (BottomupRandomChoiceResize, BottomupRandomCrop,
+                             CocoDataset, LoadImage, PackPoseInputs,
+                             RandomFlip)
+from mmpose.evaluation import CocoMetric
+from mmpose.models import (BottomupPoseEstimator, ChannelMapper, EDPoseHead,
+                           PoseDataPreprocessor, ResNet)
+from mmpose.models.utils import FrozenBatchNorm2d
+
+# runtime
+train_cfg.update(max_epochs=50, val_interval=10)  # noqa
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+    type=Adam,
+    lr=1e-3,
+))
+
+# learning policy
+param_scheduler = [
+    dict(type=LinearLR, begin=0, end=500, start_factor=0.001,
+         by_epoch=False),  # warm-up
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=140,
+        milestones=[33, 45],
+        gamma=0.1,
+        by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=80)
+
+# hooks
+default_hooks.update(  # noqa
+    checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(type=EDPoseLabel, num_select=50, num_keypoints=17)
+
+# model settings
+model = dict(
+    type=BottomupPoseEstimator,
+    data_preprocessor=dict(
+        type=PoseDataPreprocessor,
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=1),
+    backbone=dict(
+        type=ResNet,
+        depth=50,
+        num_stages=4,
+        out_indices=(1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type=FrozenBatchNorm2d, requires_grad=False),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(
+            type=PretrainedInit, checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type=ChannelMapper,
+        in_channels=[512, 1024, 2048],
+        kernel_size=1,
+        out_channels=256,
+        act_cfg=None,
+        norm_cfg=dict(type=GroupNorm, num_groups=32),
+        num_outs=4),
+    head=dict(
+        type=EDPoseHead,
+        num_queries=900,
+        num_feature_levels=4,
+        num_keypoints=17,
+        as_two_stage=True,
+        encoder=dict(
+            num_layers=6,
+            layer_cfg=dict(  # DeformableDetrTransformerEncoderLayer
+                self_attn_cfg=dict(  # MultiScaleDeformableAttention
+                    embed_dims=256,
+                    num_heads=8,
+                    num_levels=4,
+                    num_points=4,
+                    batch_first=True),
+                ffn_cfg=dict(
+                    embed_dims=256,
+                    feedforward_channels=2048,
+                    num_fcs=2,
+                    ffn_drop=0.0))),
+        decoder=dict(
+            num_layers=6,
+            embed_dims=256,
+            layer_cfg=dict(  # DeformableDetrTransformerDecoderLayer
+                self_attn_cfg=dict(  # MultiheadAttention
+                    embed_dims=256,
+                    num_heads=8,
+                    batch_first=True),
+                cross_attn_cfg=dict(  # MultiScaleDeformableAttention
+                    embed_dims=256,
+                    batch_first=True),
+                ffn_cfg=dict(
+                    embed_dims=256, feedforward_channels=2048, ffn_drop=0.1)),
+            query_dim=4,
+            num_feature_levels=4,
+            num_group=100,
+            num_dn=100,
+            num_box_decoder_layers=2,
+            return_intermediate=True),
+        out_head=dict(num_classes=2),
+        positional_encoding=dict(
+            num_pos_feats=128,
+            temperatureH=20,
+            temperatureW=20,
+            normalize=True),
+        denosing_cfg=dict(
+            dn_box_noise_scale=0.4,
+            dn_label_noise_ratio=0.5,
+            dn_labelbook_size=100,
+            dn_attn_mask_type_list=['match2dn', 'dn2dn', 'group2group']),
+        data_decoder=codec),
+    test_cfg=dict(Pmultiscale_test=False, flip_test=False, num_select=50),
+    train_cfg=dict())
+
+# enable DDP training when rescore net is used
+find_unused_parameters = True
+
+# base dataset settings
+dataset_type = CocoDataset
+data_mode = 'bottomup'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+    dict(type=LoadImage),
+    dict(type=RandomFlip, direction='horizontal'),
+    dict(
+        type=RandomChoice,
+        transforms=[
+            [
+                dict(
+                    type=RandomChoiceResize,
+                    scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
+                            (608, 1333), (640, 1333), (672, 1333), (704, 1333),
+                            (736, 1333), (768, 1333), (800, 1333)],
+                    keep_ratio=True)
+            ],
+            [
+                dict(
+                    type=BottomupRandomChoiceResize,
+                    # The radio of all image in train dataset < 7
+                    # follow the original implement
+                    scales=[(400, 4200), (500, 4200), (600, 4200)],
+                    keep_ratio=True),
+                dict(
+                    type=BottomupRandomCrop,
+                    crop_type='absolute_range',
+                    crop_size=(384, 600),
+                    allow_negative_crop=True),
+                dict(
+                    type=BottomupRandomChoiceResize,
+                    scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
+                            (608, 1333), (640, 1333), (672, 1333), (704, 1333),
+                            (736, 1333), (768, 1333), (800, 1333)],
+                    keep_ratio=True)
+            ]
+        ]),
+    dict(type=PackPoseInputs),
+]
+
+val_pipeline = [
+    dict(type=LoadImage),
+    dict(
+        type=BottomupRandomChoiceResize,
+        scales=[(800, 1333)],
+        keep_ratio=True,
+        backend='pillow'),
+    dict(
+        type=PackPoseInputs,
+        meta_keys=('id', 'img_id', 'img_path', 'crowd_index', 'ori_shape',
+                   'img_shape', 'input_size', 'input_center', 'input_scale',
+                   'flip', 'flip_direction', 'flip_indices', 'raw_ann_info',
+                   'skeleton_links'))
+]
+
+# data loaders
+train_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    sampler=dict(type=DefaultSampler, shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_mode=data_mode,
+        ann_file='annotations/person_keypoints_train2017.json',
+        data_prefix=dict(img='train2017/'),
+        pipeline=train_pipeline,
+    ))
+
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=8,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type=DefaultSampler, shuffle=False, round_up=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_mode=data_mode,
+        ann_file='annotations/person_keypoints_val2017.json',
+        data_prefix=dict(img='val2017/'),
+        test_mode=True,
+        pipeline=val_pipeline,
+    ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+    type=CocoMetric,
+    nms_mode='none',
+    score_mode='keypoint',
+)
+test_evaluator = val_evaluator
--- a/configs/body_2d_keypoint/integral_regression/README.md
+++ b/configs/body_2d_keypoint/integral_regression/README.md
+# Top-down integral-regression-based pose estimation
+
+Top-down methods divide the task into two stages: object detection, followed by single-object pose estimation given object bounding boxes. At the 2nd stage, integral regression based methods use a simple integral operation relates and unifies the heatmap and joint regression differentiably, thus obtain the keypoint coordinates given the features extracted from the bounding box area, following the paradigm introduced in [Integral Human Pose Regression](https://arxiv.org/abs/1711.08229).
+
+## Results and Models
+
+### COCO Dataset
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+|        Model         | Input Size |  AP   |  AR   |                 Details and Download                  |
+| :------------------: | :--------: | :---: | :---: | :---------------------------------------------------: |
+| ResNet-50+Debias-IPR |  256x256   | 0.675 | 0.765 | [resnet_debias_coco.md](./coco/resnet_debias_coco.md) |
+|    ResNet-50+DSNT    |  256x256   | 0.674 | 0.764 |   [resnet_dsnt_coco.md](./coco/resnet_dsnt_coco.md)   |
+|    ResNet-50+IPR     |  256x256   | 0.633 | 0.730 |    [resnet_ipr_coco.md](./coco/resnet_ipr_coco.md)    |
--- a/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256.py
+++ b/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256.py
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+    type='Adam',
+    lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+    dict(
+        type='LinearLR', begin=0, end=500, start_factor=0.001,
+        by_epoch=False),  # warm-up
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=train_cfg['max_epochs'],
+        milestones=[170, 200],
+        gamma=0.1,
+        by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(
+    type='IntegralRegressionLabel',
+    input_size=(256, 256),
+    heatmap_size=(64, 64),
+    sigma=2.0,
+    normalize=True)
+
+# model settings
+model = dict(
+    type='TopdownPoseEstimator',
+    data_preprocessor=dict(
+        type='PoseDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+    ),
+    head=dict(
+        type='DSNTHead',
+        in_channels=2048,
+        in_featuremap_size=(8, 8),
+        num_joints=17,
+        loss=dict(
+            type='MultipleLossWrapper',
+            losses=[
+                dict(type='SmoothL1Loss', use_target_weight=True),
+                dict(type='KeypointMSELoss', use_target_weight=True)
+            ]),
+        decoder=codec),
+    test_cfg=dict(
+        flip_test=True,
+        shift_coords=True,
+        shift_heatmap=True,
+    ),
+    init_cfg=dict(
+        type='Pretrained',
+        checkpoint='https://download.openmmlab.com/mmpose/'
+        'pretrain_models/td-hm_res50_8xb64-210e_coco-256x192.pth'))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+    dict(type='LoadImage'),
+    dict(type='GetBBoxCenterScale'),
+    dict(type='RandomFlip', direction='horizontal'),
+    dict(type='RandomHalfBody'),
+    dict(type='RandomBBoxTransform'),
+    dict(type='TopdownAffine', input_size=codec['input_size']),
+    dict(type='GenerateTarget', encoder=codec),
+    dict(type='PackPoseInputs')
+]
+test_pipeline = [
+    dict(type='LoadImage'),
+    dict(type='GetBBoxCenterScale'),
+    dict(type='TopdownAffine', input_size=codec['input_size']),
+    dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+    batch_size=64,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_mode=data_mode,
+        ann_file='annotations/person_keypoints_train2017.json',
+        data_prefix=dict(img='train2017/'),
+        pipeline=train_pipeline,
+    ))
+val_dataloader = dict(
+    batch_size=32,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_mode=data_mode,
+        ann_file='annotations/person_keypoints_val2017.json',
+        bbox_file=f'{data_root}person_detection_results/'
+        'COCO_val2017_detections_AP_H_56_person.json',
+        data_prefix=dict(img='val2017/'),
+        test_mode=True,
+        pipeline=test_pipeline,
+    ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# evaluators
+val_evaluator = dict(
+    type='CocoMetric',
+    ann_file=f'{data_root}annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
--- a/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_debias-8xb64-210e_coco-256x256.py
+++ b/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_debias-8xb64-210e_coco-256x256.py
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+    type='Adam',
+    lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+    dict(
+        type='LinearLR', begin=0, end=500, start_factor=0.001,
+        by_epoch=False),  # warm-up
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=train_cfg['max_epochs'],
+        milestones=[170, 200],
+        gamma=0.1,
+        by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(
+    type='IntegralRegressionLabel',
+    input_size=(256, 256),
+    heatmap_size=(64, 64),
+    sigma=2.0,
+    normalize=True)
+
+# model settings
+model = dict(
+    type='TopdownPoseEstimator',
+    data_preprocessor=dict(
+        type='PoseDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+    ),
+    head=dict(
+        type='DSNTHead',
+        in_channels=2048,
+        in_featuremap_size=(8, 8),
+        num_joints=17,
+        debias=True,
+        beta=10.,
+        loss=dict(
+            type='MultipleLossWrapper',
+            losses=[
+                dict(type='SmoothL1Loss', use_target_weight=True),
+                dict(type='JSDiscretLoss', use_target_weight=True)
+            ]),
+        decoder=codec),
+    test_cfg=dict(
+        flip_test=True,
+        shift_coords=True,
+        shift_heatmap=True,
+    ),
+    init_cfg=dict(
+        type='Pretrained',
+        checkpoint='https://download.openmmlab.com/mmpose/'
+        'pretrain_models/td-hm_res50_8xb64-210e_coco-256x192.pth'))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+    dict(type='LoadImage'),
+    dict(type='GetBBoxCenterScale'),
+    dict(type='RandomFlip', direction='horizontal'),
+    dict(type='RandomHalfBody'),
+    dict(type='RandomBBoxTransform'),
+    dict(type='TopdownAffine', input_size=codec['input_size']),
+    dict(type='GenerateTarget', encoder=codec),
+    dict(type='PackPoseInputs')
+]
+test_pipeline = [
+    dict(type='LoadImage'),
+    dict(type='GetBBoxCenterScale'),
+    dict(type='TopdownAffine', input_size=codec['input_size']),
+    dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+    batch_size=64,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_mode=data_mode,
+        ann_file='annotations/person_keypoints_train2017.json',
+        data_prefix=dict(img='train2017/'),
+        pipeline=train_pipeline,
+    ))
+val_dataloader = dict(
+    batch_size=32,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_mode=data_mode,
+        ann_file='annotations/person_keypoints_val2017.json',
+        bbox_file=f'{data_root}person_detection_results/'
+        'COCO_val2017_detections_AP_H_56_person.json',
+        data_prefix=dict(img='val2017/'),
+        test_mode=True,
+        pipeline=test_pipeline,
+    ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# evaluators
+val_evaluator = dict(
+    type='CocoMetric',
+    ann_file=f'{data_root}annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator