fix_mmdetection

eb1107e4 · raojy · 7aa442d5 · eb1107e4 · eb1107e4 · eb1107e4
Commit eb1107e4 authored Apr 01, 2026 by raojy
20 changed files
--- a/mmde/mmdet/.mim/configs/condinst/condinst_r50_fpn_ms-poly-90k_coco_instance.py
+++ b/mmde/mmdet/.mim/configs/condinst/condinst_r50_fpn_ms-poly-90k_coco_instance.py
+_base_ = '../common/ms-poly-90k_coco-instance.py'
+
+# model settings
+model = dict(
+    type='CondInst',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_mask=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+        style='pytorch'),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_output',  # use P5
+        num_outs=5,
+        relu_before_extra_convs=True),
+    bbox_head=dict(
+        type='CondInstBboxHead',
+        num_params=169,
+        num_classes=80,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        strides=[8, 16, 32, 64, 128],
+        norm_on_bbox=True,
+        centerness_on_reg=True,
+        dcn_on_last_conv=False,
+        center_sampling=True,
+        conv_bias=True,
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='GIoULoss', loss_weight=1.0),
+        loss_centerness=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
+    mask_head=dict(
+        type='CondInstMaskHead',
+        num_layers=3,
+        feat_channels=8,
+        size_of_interest=8,
+        mask_out_stride=4,
+        max_masks_to_train=300,
+        mask_feature_head=dict(
+            in_channels=256,
+            feat_channels=128,
+            start_level=0,
+            end_level=2,
+            out_channels=8,
+            mask_stride=8,
+            num_stacked_convs=4,
+            norm_cfg=dict(type='BN', requires_grad=True)),
+        loss_mask=dict(
+            type='DiceLoss',
+            use_sigmoid=True,
+            activate=True,
+            eps=5e-6,
+            loss_weight=1.0)),
+    # model training and testing settings
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.6),
+        max_per_img=100,
+        mask_thr=0.5))
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(lr=0.01))
--- a/mmde/mmdet/.mim/configs/condinst/metafile.yml
+++ b/mmde/mmdet/.mim/configs/condinst/metafile.yml
+Collections:
+  - Name: CondInst
+    Metadata:
+      Training Data: COCO
+      Training Techniques:
+        - SGD with Momentum
+        - Weight Decay
+      Training Resources: 8x A100 GPUs
+      Architecture:
+        - FPN
+        - FCOS
+        - ResNet
+    Paper: https://arxiv.org/abs/2003.05664
+    README: configs/condinst/README.md
+
+Models:
+  - Name: condinst_r50_fpn_ms-poly-90k_coco_instance
+    In Collection: CondInst
+    Config: configs/condinst/condinst_r50_fpn_ms-poly-90k_coco_instance.py
+    Metadata:
+      Training Memory (GB): 4.4
+      Iterations: 90000
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 39.8
+      - Task: Instance Segmentation
+        Dataset: COCO
+        Metrics:
+          mask AP: 36.0
+    Weights: https://download.openmmlab.com/mmdetection/v3.0/condinst/condinst_r50_fpn_ms-poly-90k_coco_instance/condinst_r50_fpn_ms-poly-90k_coco_instance_20221129_125223-4c186406.pth
--- a/mmde/mmdet/.mim/configs/conditional_detr/conditional-detr_r50_8xb2-50e_coco.py
+++ b/mmde/mmdet/.mim/configs/conditional_detr/conditional-detr_r50_8xb2-50e_coco.py
+_base_ = ['../detr/detr_r50_8xb2-150e_coco.py']
+model = dict(
+    type='ConditionalDETR',
+    num_queries=300,
+    decoder=dict(
+        num_layers=6,
+        layer_cfg=dict(
+            self_attn_cfg=dict(
+                _delete_=True,
+                embed_dims=256,
+                num_heads=8,
+                attn_drop=0.1,
+                cross_attn=False),
+            cross_attn_cfg=dict(
+                _delete_=True,
+                embed_dims=256,
+                num_heads=8,
+                attn_drop=0.1,
+                cross_attn=True))),
+    bbox_head=dict(
+        type='ConditionalDETRHead',
+        loss_cls=dict(
+            _delete_=True,
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=2.0)),
+    # training and testing settings
+    train_cfg=dict(
+        assigner=dict(
+            type='HungarianAssigner',
+            match_costs=[
+                dict(type='FocalLossCost', weight=2.0),
+                dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'),
+                dict(type='IoUCost', iou_mode='giou', weight=2.0)
+            ])))
+
+# learning policy
+train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=50, val_interval=1)
+
+param_scheduler = [dict(type='MultiStepLR', end=50, milestones=[40])]
--- a/mmde/mmdet/.mim/configs/conditional_detr/metafile.yml
+++ b/mmde/mmdet/.mim/configs/conditional_detr/metafile.yml
+Collections:
+  - Name: Conditional DETR
+    Metadata:
+      Training Data: COCO
+      Training Techniques:
+        - AdamW
+        - Multi Scale Train
+        - Gradient Clip
+      Training Resources: 8x A100 GPUs
+      Architecture:
+        - ResNet
+        - Transformer
+    Paper:
+      URL: https://arxiv.org/abs/2108.06152
+      Title: 'Conditional DETR for Fast Training Convergence'
+    README: configs/conditional_detr/README.md
+    Code:
+      URL: https://github.com/open-mmlab/mmdetection/blob/f4112c9e5611468ffbd57cfba548fd1289264b52/mmdet/models/detectors/conditional_detr.py#L14
+      Version: v3.0.0rc6
+
+Models:
+  - Name: conditional-detr_r50_8xb2-50e_coco
+    In Collection: Conditional DETR
+    Config: configs/conditional_detr/conditional-detr_r50_8xb2-50e_coco.py
+    Metadata:
+      Epochs: 50
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 40.9
+    Weights: https://download.openmmlab.com/mmdetection/v3.0/conditional_detr/conditional-detr_r50_8xb2-50e_coco/conditional-detr_r50_8xb2-50e_coco_20221121_180202-c83a1dc0.pth
--- a/mmde/mmdet/.mim/configs/convnext/cascade-mask-rcnn_convnext-s-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco.py
+++ b/mmde/mmdet/.mim/configs/convnext/cascade-mask-rcnn_convnext-s-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco.py
+_base_ = './cascade-mask-rcnn_convnext-t-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco.py'  # noqa
+
+# please install mmpretrain
+# import mmpretrain.models to trigger register_module in mmpretrain
+custom_imports = dict(
+    imports=['mmpretrain.models'], allow_failed_imports=False)
+checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-small_3rdparty_32xb128-noema_in1k_20220301-303e75e3.pth'  # noqa
+
+model = dict(
+    backbone=dict(
+        _delete_=True,
+        type='mmpretrain.ConvNeXt',
+        arch='small',
+        out_indices=[0, 1, 2, 3],
+        drop_path_rate=0.6,
+        layer_scale_init_value=1.0,
+        gap_before_final_norm=False,
+        init_cfg=dict(
+            type='Pretrained', checkpoint=checkpoint_file,
+            prefix='backbone.')))
+
+optim_wrapper = dict(paramwise_cfg={
+    'decay_rate': 0.7,
+    'decay_type': 'layer_wise',
+    'num_layers': 12
+})
--- a/mmde/mmdet/.mim/configs/convnext/cascade-mask-rcnn_convnext-t-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco.py
+++ b/mmde/mmdet/.mim/configs/convnext/cascade-mask-rcnn_convnext-t-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco.py
+_base_ = [
+    '../_base_/models/cascade-mask-rcnn_r50_fpn.py',
+    '../_base_/datasets/coco_instance.py',
+    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
+]
+
+# please install mmpretrain
+# import mmpretrain.models to trigger register_module in mmpretrain
+custom_imports = dict(
+    imports=['mmpretrain.models'], allow_failed_imports=False)
+checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-tiny_3rdparty_32xb128-noema_in1k_20220301-795e9634.pth'  # noqa
+
+model = dict(
+    backbone=dict(
+        _delete_=True,
+        type='mmpretrain.ConvNeXt',
+        arch='tiny',
+        out_indices=[0, 1, 2, 3],
+        drop_path_rate=0.4,
+        layer_scale_init_value=1.0,
+        gap_before_final_norm=False,
+        init_cfg=dict(
+            type='Pretrained', checkpoint=checkpoint_file,
+            prefix='backbone.')),
+    neck=dict(in_channels=[96, 192, 384, 768]),
+    roi_head=dict(bbox_head=[
+        dict(
+            type='ConvFCBBoxHead',
+            num_shared_convs=4,
+            num_shared_fcs=1,
+            in_channels=256,
+            conv_out_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=80,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            reg_decoded_bbox=True,
+            norm_cfg=dict(type='SyncBN', requires_grad=True),
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='GIoULoss', loss_weight=10.0)),
+        dict(
+            type='ConvFCBBoxHead',
+            num_shared_convs=4,
+            num_shared_fcs=1,
+            in_channels=256,
+            conv_out_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=80,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.05, 0.05, 0.1, 0.1]),
+            reg_class_agnostic=False,
+            reg_decoded_bbox=True,
+            norm_cfg=dict(type='SyncBN', requires_grad=True),
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='GIoULoss', loss_weight=10.0)),
+        dict(
+            type='ConvFCBBoxHead',
+            num_shared_convs=4,
+            num_shared_fcs=1,
+            in_channels=256,
+            conv_out_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=80,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.033, 0.033, 0.067, 0.067]),
+            reg_class_agnostic=False,
+            reg_decoded_bbox=True,
+            norm_cfg=dict(type='SyncBN', requires_grad=True),
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='GIoULoss', loss_weight=10.0))
+    ]))
+
+# augmentation strategy originates from DETR / Sparse RCNN
+train_pipeline = [
+    dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+    dict(type='RandomFlip', prob=0.5),
+    dict(
+        type='RandomChoice',
+        transforms=[[
+            dict(
+                type='RandomChoiceResize',
+                scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
+                        (608, 1333), (640, 1333), (672, 1333), (704, 1333),
+                        (736, 1333), (768, 1333), (800, 1333)],
+                keep_ratio=True)
+        ],
+                    [
+                        dict(
+                            type='RandomChoiceResize',
+                            scales=[(400, 1333), (500, 1333), (600, 1333)],
+                            keep_ratio=True),
+                        dict(
+                            type='RandomCrop',
+                            crop_type='absolute_range',
+                            crop_size=(384, 600),
+                            allow_negative_crop=True),
+                        dict(
+                            type='RandomChoiceResize',
+                            scales=[(480, 1333), (512, 1333), (544, 1333),
+                                    (576, 1333), (608, 1333), (640, 1333),
+                                    (672, 1333), (704, 1333), (736, 1333),
+                                    (768, 1333), (800, 1333)],
+                            keep_ratio=True)
+                    ]]),
+    dict(type='PackDetInputs')
+]
+train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
+
+max_epochs = 36
+train_cfg = dict(max_epochs=max_epochs)
+
+# learning rate
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0,
+        end=1000),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=max_epochs,
+        by_epoch=True,
+        milestones=[27, 33],
+        gamma=0.1)
+]
+
+# Enable automatic-mixed-precision training with AmpOptimWrapper.
+optim_wrapper = dict(
+    type='AmpOptimWrapper',
+    constructor='LearningRateDecayOptimizerConstructor',
+    paramwise_cfg={
+        'decay_rate': 0.7,
+        'decay_type': 'layer_wise',
+        'num_layers': 6
+    },
+    optimizer=dict(
+        _delete_=True,
+        type='AdamW',
+        lr=0.0002,
+        betas=(0.9, 0.999),
+        weight_decay=0.05))
--- a/mmde/mmdet/.mim/configs/convnext/mask-rcnn_convnext-t-p4-w7_fpn_amp-ms-crop-3x_coco.py
+++ b/mmde/mmdet/.mim/configs/convnext/mask-rcnn_convnext-t-p4-w7_fpn_amp-ms-crop-3x_coco.py
+_base_ = [
+    '../_base_/models/mask-rcnn_r50_fpn.py',
+    '../_base_/datasets/coco_instance.py',
+    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
+]
+
+# please install mmpretrain
+# import mmpretrain.models to trigger register_module in mmpretrain
+custom_imports = dict(
+    imports=['mmpretrain.models'], allow_failed_imports=False)
+checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-tiny_3rdparty_32xb128-noema_in1k_20220301-795e9634.pth'  # noqa
+
+model = dict(
+    backbone=dict(
+        _delete_=True,
+        type='mmpretrain.ConvNeXt',
+        arch='tiny',
+        out_indices=[0, 1, 2, 3],
+        drop_path_rate=0.4,
+        layer_scale_init_value=1.0,
+        gap_before_final_norm=False,
+        init_cfg=dict(
+            type='Pretrained', checkpoint=checkpoint_file,
+            prefix='backbone.')),
+    neck=dict(in_channels=[96, 192, 384, 768]))
+
+# augmentation strategy originates from DETR / Sparse RCNN
+train_pipeline = [
+    dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+    dict(type='RandomFlip', prob=0.5),
+    dict(
+        type='RandomChoice',
+        transforms=[[
+            dict(
+                type='RandomChoiceResize',
+                scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
+                        (608, 1333), (640, 1333), (672, 1333), (704, 1333),
+                        (736, 1333), (768, 1333), (800, 1333)],
+                keep_ratio=True)
+        ],
+                    [
+                        dict(
+                            type='RandomChoiceResize',
+                            scales=[(400, 1333), (500, 1333), (600, 1333)],
+                            keep_ratio=True),
+                        dict(
+                            type='RandomCrop',
+                            crop_type='absolute_range',
+                            crop_size=(384, 600),
+                            allow_negative_crop=True),
+                        dict(
+                            type='RandomChoiceResize',
+                            scales=[(480, 1333), (512, 1333), (544, 1333),
+                                    (576, 1333), (608, 1333), (640, 1333),
+                                    (672, 1333), (704, 1333), (736, 1333),
+                                    (768, 1333), (800, 1333)],
+                            keep_ratio=True)
+                    ]]),
+    dict(type='PackDetInputs')
+]
+train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
+
+max_epochs = 36
+train_cfg = dict(max_epochs=max_epochs)
+
+# learning rate
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0,
+        end=1000),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=max_epochs,
+        by_epoch=True,
+        milestones=[27, 33],
+        gamma=0.1)
+]
+
+# Enable automatic-mixed-precision training with AmpOptimWrapper.
+optim_wrapper = dict(
+    type='AmpOptimWrapper',
+    constructor='LearningRateDecayOptimizerConstructor',
+    paramwise_cfg={
+        'decay_rate': 0.95,
+        'decay_type': 'layer_wise',
+        'num_layers': 6
+    },
+    optimizer=dict(
+        _delete_=True,
+        type='AdamW',
+        lr=0.0001,
+        betas=(0.9, 0.999),
+        weight_decay=0.05,
+    ))
--- a/mmde/mmdet/.mim/configs/convnext/metafile.yml
+++ b/mmde/mmdet/.mim/configs/convnext/metafile.yml
+Models:
+  - Name: mask-rcnn_convnext-t-p4-w7_fpn_amp-ms-crop-3x_coco
+    In Collection: Mask R-CNN
+    Config: configs/convnext/mask-rcnn_convnext-t-p4-w7_fpn_amp-ms-crop-3x_coco.py
+    Metadata:
+      Training Memory (GB): 7.3
+      Epochs: 36
+      Training Data: COCO
+      Training Techniques:
+        - AdamW
+        - Mixed Precision Training
+      Training Resources: 8x A100 GPUs
+      Architecture:
+        - ConvNeXt
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 46.2
+      - Task: Instance Segmentation
+        Dataset: COCO
+        Metrics:
+          mask AP: 41.7
+    Weights: https://download.openmmlab.com/mmdetection/v2.0/convnext/mask_rcnn_convnext-t_p4_w7_fpn_fp16_ms-crop_3x_coco/mask_rcnn_convnext-t_p4_w7_fpn_fp16_ms-crop_3x_coco_20220426_154953-050731f4.pth
+    Paper:
+      URL: https://arxiv.org/abs/2201.03545
+      Title: 'A ConvNet for the 2020s'
+    README: configs/convnext/README.md
+    Code:
+      URL: https://github.com/open-mmlab/mmdetection/blob/v2.16.0/mmdet/models/backbones/swin.py#L465
+      Version: v2.16.0
+
+  - Name: cascade-mask-rcnn_convnext-t-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco
+    In Collection: Cascade Mask R-CNN
+    Config: configs/convnext/cascade-mask-rcnn_convnext-t-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco.py
+    Metadata:
+      Training Memory (GB): 9.0
+      Epochs: 36
+      Training Data: COCO
+      Training Techniques:
+        - AdamW
+        - Mixed Precision Training
+      Training Resources: 8x A100 GPUs
+      Architecture:
+        - ConvNeXt
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 50.3
+      - Task: Instance Segmentation
+        Dataset: COCO
+        Metrics:
+          mask AP: 43.6
+    Weights: https://download.openmmlab.com/mmdetection/v2.0/convnext/cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco/cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco_20220509_204200-8f07c40b.pth
+    Paper:
+      URL: https://arxiv.org/abs/2201.03545
+      Title: 'A ConvNet for the 2020s'
+    README: configs/convnext/README.md
+    Code:
+      URL: https://github.com/open-mmlab/mmdetection/blob/v2.16.0/mmdet/models/backbones/swin.py#L465
+      Version: v2.25.0
+
+  - Name: cascade-mask-rcnn_convnext-s-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco
+    In Collection: Cascade Mask R-CNN
+    Config: configs/convnext/cascade-mask-rcnn_convnext-s-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco.py
+    Metadata:
+      Training Memory (GB): 12.3
+      Epochs: 36
+      Training Data: COCO
+      Training Techniques:
+        - AdamW
+        - Mixed Precision Training
+      Training Resources: 8x A100 GPUs
+      Architecture:
+        - ConvNeXt
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 51.8
+      - Task: Instance Segmentation
+        Dataset: COCO
+        Metrics:
+          mask AP: 44.8
+    Weights: https://download.openmmlab.com/mmdetection/v2.0/convnext/cascade_mask_rcnn_convnext-s_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco/cascade_mask_rcnn_convnext-s_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco_20220510_201004-3d24f5a4.pth
+    Paper:
+      URL: https://arxiv.org/abs/2201.03545
+      Title: 'A ConvNet for the 2020s'
+    README: configs/convnext/README.md
+    Code:
+      URL: https://github.com/open-mmlab/mmdetection/blob/v2.16.0/mmdet/models/backbones/swin.py#L465
+      Version: v2.25.0
--- a/mmde/mmdet/.mim/configs/cornernet/cornernet_hourglass104_10xb5-crop511-210e-mstest_coco.py
+++ b/mmde/mmdet/.mim/configs/cornernet/cornernet_hourglass104_10xb5-crop511-210e-mstest_coco.py
+_base_ = './cornernet_hourglass104_8xb6-210e-mstest_coco.py'
+
+train_dataloader = dict(batch_size=5)
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (10 GPUs) x (5 samples per GPU)
+auto_scale_lr = dict(base_batch_size=50)
--- a/mmde/mmdet/.mim/configs/cornernet/cornernet_hourglass104_32xb3-210e-mstest_coco.py
+++ b/mmde/mmdet/.mim/configs/cornernet/cornernet_hourglass104_32xb3-210e-mstest_coco.py
+_base_ = './cornernet_hourglass104_8xb6-210e-mstest_coco.py'
+
+train_dataloader = dict(batch_size=3)
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (32 GPUs) x (3 samples per GPU)
+auto_scale_lr = dict(base_batch_size=96)
--- a/mmde/mmdet/.mim/configs/cornernet/cornernet_hourglass104_8xb6-210e-mstest_coco.py
+++ b/mmde/mmdet/.mim/configs/cornernet/cornernet_hourglass104_8xb6-210e-mstest_coco.py
+_base_ = [
+    '../_base_/default_runtime.py', '../_base_/datasets/coco_detection.py'
+]
+
+data_preprocessor = dict(
+    type='DetDataPreprocessor',
+    mean=[123.675, 116.28, 103.53],
+    std=[58.395, 57.12, 57.375],
+    bgr_to_rgb=True)
+
+# model settings
+model = dict(
+    type='CornerNet',
+    data_preprocessor=data_preprocessor,
+    backbone=dict(
+        type='HourglassNet',
+        downsample_times=5,
+        num_stacks=2,
+        stage_channels=[256, 256, 384, 384, 384, 512],
+        stage_blocks=[2, 2, 2, 2, 2, 4],
+        norm_cfg=dict(type='BN', requires_grad=True)),
+    neck=None,
+    bbox_head=dict(
+        type='CornerHead',
+        num_classes=80,
+        in_channels=256,
+        num_feat_levels=2,
+        corner_emb_channels=1,
+        loss_heatmap=dict(
+            type='GaussianFocalLoss', alpha=2.0, gamma=4.0, loss_weight=1),
+        loss_embedding=dict(
+            type='AssociativeEmbeddingLoss',
+            pull_weight=0.10,
+            push_weight=0.10),
+        loss_offset=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1)),
+    # training and testing settings
+    train_cfg=None,
+    test_cfg=dict(
+        corner_topk=100,
+        local_maximum_kernel=3,
+        distance_threshold=0.5,
+        score_thr=0.05,
+        max_per_img=100,
+        nms=dict(type='soft_nms', iou_threshold=0.5, method='gaussian')))
+
+# data settings
+train_pipeline = [
+    dict(type='LoadImageFromFile', backend_args=_base_.backend_args),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='PhotoMetricDistortion',
+        brightness_delta=32,
+        contrast_range=(0.5, 1.5),
+        saturation_range=(0.5, 1.5),
+        hue_delta=18),
+    dict(
+        # The cropped images are padded into squares during training,
+        # but may be smaller than crop_size.
+        type='RandomCenterCropPad',
+        crop_size=(511, 511),
+        ratios=(0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3),
+        test_mode=False,
+        test_pad_mode=None,
+        mean=data_preprocessor['mean'],
+        std=data_preprocessor['std'],
+        # Image data is not converted to rgb.
+        to_rgb=data_preprocessor['bgr_to_rgb']),
+    # Make sure the output is always crop_size.
+    dict(type='Resize', scale=(511, 511), keep_ratio=False),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PackDetInputs'),
+]
+
+test_pipeline = [
+    dict(
+        type='LoadImageFromFile',
+        to_float32=True,
+        backend_args=_base_.backend_args,
+    ),
+    # don't need Resize
+    dict(
+        type='RandomCenterCropPad',
+        crop_size=None,
+        ratios=None,
+        border=None,
+        test_mode=True,
+        test_pad_mode=['logical_or', 127],
+        mean=data_preprocessor['mean'],
+        std=data_preprocessor['std'],
+        # Image data is not converted to rgb.
+        to_rgb=data_preprocessor['bgr_to_rgb']),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'border'))
+]
+
+train_dataloader = dict(
+    batch_size=6,
+    num_workers=3,
+    batch_sampler=None,
+    dataset=dict(pipeline=train_pipeline))
+val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+test_dataloader = val_dataloader
+
+# optimizer
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(type='Adam', lr=0.0005),
+    clip_grad=dict(max_norm=35, norm_type=2))
+
+max_epochs = 210
+
+# learning rate
+param_scheduler = [
+    dict(
+        type='LinearLR',
+        start_factor=1.0 / 3,
+        by_epoch=False,
+        begin=0,
+        end=500),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=max_epochs,
+        by_epoch=True,
+        milestones=[180],
+        gamma=0.1)
+]
+
+train_cfg = dict(
+    type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (6 samples per GPU)
+auto_scale_lr = dict(base_batch_size=48)
+
+tta_model = dict(
+    type='DetTTAModel',
+    tta_cfg=dict(
+        nms=dict(type='soft_nms', iou_threshold=0.5, method='gaussian'),
+        max_per_img=100))
+
+tta_pipeline = [
+    dict(
+        type='LoadImageFromFile',
+        to_float32=True,
+        backend_args=_base_.backend_args),
+    dict(
+        type='TestTimeAug',
+        transforms=[
+            [
+                # ``RandomFlip`` must be placed before ``RandomCenterCropPad``,
+                # otherwise bounding box coordinates after flipping cannot be
+                # recovered correctly.
+                dict(type='RandomFlip', prob=1.),
+                dict(type='RandomFlip', prob=0.)
+            ],
+            [
+                dict(
+                    type='RandomCenterCropPad',
+                    crop_size=None,
+                    ratios=None,
+                    border=None,
+                    test_mode=True,
+                    test_pad_mode=['logical_or', 127],
+                    mean=data_preprocessor['mean'],
+                    std=data_preprocessor['std'],
+                    # Image data is not converted to rgb.
+                    to_rgb=data_preprocessor['bgr_to_rgb'])
+            ],
+            [dict(type='LoadAnnotations', with_bbox=True)],
+            [
+                dict(
+                    type='PackDetInputs',
+                    meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                               'flip', 'flip_direction', 'border'))
+            ]
+        ])
+]
--- a/mmde/mmdet/.mim/configs/cornernet/metafile.yml
+++ b/mmde/mmdet/.mim/configs/cornernet/metafile.yml
+Collections:
+  - Name: CornerNet
+    Metadata:
+      Training Data: COCO
+      Training Techniques:
+        - Adam
+      Training Resources: 8x V100 GPUs
+      Architecture:
+        - Corner Pooling
+        - Stacked Hourglass Network
+    Paper:
+      URL: https://arxiv.org/abs/1808.01244
+      Title: 'CornerNet: Detecting Objects as Paired Keypoints'
+    README: configs/cornernet/README.md
+    Code:
+      URL: https://github.com/open-mmlab/mmdetection/blob/v2.3.0/mmdet/models/detectors/cornernet.py#L9
+      Version: v2.3.0
+
+Models:
+  - Name: cornernet_hourglass104_10xb5-crop511-210e-mstest_coco
+    In Collection: CornerNet
+    Config: configs/cornernet/cornernet_hourglass104_10xb5-crop511-210e-mstest_coco.py
+    Metadata:
+      Training Resources: 10x V100 GPUs
+      Batch Size: 50
+      Training Memory (GB): 13.9
+      inference time (ms/im):
+        - value: 238.1
+          hardware: V100
+          backend: PyTorch
+          batch size: 1
+          mode: FP32
+          resolution: (800, 1333)
+      Epochs: 210
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 41.2
+    Weights: https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco/cornernet_hourglass104_mstest_10x5_210e_coco_20200824_185720-5fefbf1c.pth
+
+  - Name: cornernet_hourglass104_8xb6-210e-mstest_coco
+    In Collection: CornerNet
+    Config: configs/cornernet/cornernet_hourglass104_8xb6-210e-mstest_coco.py
+    Metadata:
+      Batch Size: 48
+      Training Memory (GB): 15.9
+      inference time (ms/im):
+        - value: 238.1
+          hardware: V100
+          backend: PyTorch
+          batch size: 1
+          mode: FP32
+          resolution: (800, 1333)
+      Epochs: 210
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 41.2
+    Weights: https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco/cornernet_hourglass104_mstest_8x6_210e_coco_20200825_150618-79b44c30.pth
+
+  - Name: cornernet_hourglass104_32xb3-210e-mstest_coco
+    In Collection: CornerNet
+    Config: configs/cornernet/cornernet_hourglass104_32xb3-210e-mstest_coco.py
+    Metadata:
+      Training Resources: 32x V100 GPUs
+      Batch Size: 96
+      Training Memory (GB): 9.5
+      inference time (ms/im):
+        - value: 256.41
+          hardware: V100
+          backend: PyTorch
+          batch size: 1
+          mode: FP32
+          resolution: (800, 1333)
+      Epochs: 210
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 40.4
+    Weights: https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco/cornernet_hourglass104_mstest_32x3_210e_coco_20200819_203110-1efaea91.pth
--- a/mmde/mmdet/.mim/configs/crowddet/crowddet-rcnn_r50_fpn_8xb2-30e_crowdhuman.py
+++ b/mmde/mmdet/.mim/configs/crowddet/crowddet-rcnn_r50_fpn_8xb2-30e_crowdhuman.py
+_base_ = ['../_base_/default_runtime.py']
+
+model = dict(
+    type='CrowdDet',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[103.53, 116.28, 123.675],
+        std=[57.375, 57.12, 58.395],
+        bgr_to_rgb=False,
+        pad_size_divisor=64,
+        # This option is set according to https://github.com/Purkialo/CrowdDet/
+        # blob/master/lib/data/CrowdHuman.py The images in the entire batch are
+        # resize together.
+        batch_augments=[
+            dict(type='BatchResize', scale=(1400, 800), pad_size_divisor=64)
+        ]),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5,
+        upsample_cfg=dict(mode='bilinear', align_corners=False)),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[8],
+            ratios=[1.0, 2.0, 3.0],
+            strides=[4, 8, 16, 32, 64],
+            centers=[(8, 8), (8, 8), (8, 8), (8, 8), (8, 8)]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[0.0, 0.0, 0.0, 0.0],
+            target_stds=[1.0, 1.0, 1.0, 1.0],
+            clip_border=False),
+        loss_cls=dict(type='CrossEntropyLoss', loss_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+    roi_head=dict(
+        type='MultiInstanceRoIHead',
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(
+                type='RoIAlign',
+                output_size=7,
+                sampling_ratio=-1,
+                aligned=True,
+                use_torchvision=True),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=dict(
+            type='MultiInstanceBBoxHead',
+            with_refine=False,
+            num_shared_fcs=2,
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=1,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type='CrossEntropyLoss',
+                loss_weight=1.0,
+                use_sigmoid=False,
+                reduction='none'),
+            loss_bbox=dict(
+                type='SmoothL1Loss', loss_weight=1.0, reduction='none'))),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=(0.3, 0.7),
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=-1,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=2400,
+            max_per_img=2000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=2),
+        rcnn=dict(
+            assigner=dict(
+                type='MultiInstanceAssigner',
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.3,
+                match_low_quality=False,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='MultiInsRandomSampler',
+                num=512,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            pos_weight=-1,
+            debug=False)),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1200,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=2),
+        rcnn=dict(
+            nms=dict(type='nms', iou_threshold=0.5),
+            score_thr=0.01,
+            max_per_img=500)))
+
+dataset_type = 'CrowdHumanDataset'
+data_root = 'data/CrowdHuman/'
+
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+
+# data_root = 's3://openmmlab/datasets/tracking/CrowdHuman/'
+
+# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/tracking/',
+#         'data/': 's3://openmmlab/datasets/tracking/'
+#     }))
+backend_args = None
+
+train_pipeline = [
+    dict(type='LoadImageFromFile', backend_args=backend_args),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(type='RandomFlip', prob=0.5),
+    dict(
+        type='PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
+                   'flip_direction'))
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile', backend_args=backend_args),
+    dict(type='Resize', scale=(1400, 800), keep_ratio=True),
+    # avoid bboxes being resized
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    batch_sampler=None,  # The 'batch_sampler' may decrease the precision
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='annotation_train.odgt',
+        data_prefix=dict(img='Images/'),
+        filter_cfg=dict(filter_empty_gt=True, min_size=32),
+        pipeline=train_pipeline,
+        backend_args=backend_args))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='annotation_val.odgt',
+        data_prefix=dict(img='Images/'),
+        test_mode=True,
+        pipeline=test_pipeline,
+        backend_args=backend_args))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+    type='CrowdHumanMetric',
+    ann_file=data_root + 'annotation_val.odgt',
+    metric=['AP', 'MR', 'JI'],
+    backend_args=backend_args)
+test_evaluator = val_evaluator
+
+train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=30, val_interval=1)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=800),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=30,
+        by_epoch=True,
+        milestones=[24, 27],
+        gamma=0.1)
+]
+
+# optimizer
+auto_scale_lr = dict(base_batch_size=16)
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(type='SGD', lr=0.002, momentum=0.9, weight_decay=0.0001))
--- a/mmde/mmdet/.mim/configs/crowddet/crowddet-rcnn_refine_r50_fpn_8xb2-30e_crowdhuman.py
+++ b/mmde/mmdet/.mim/configs/crowddet/crowddet-rcnn_refine_r50_fpn_8xb2-30e_crowdhuman.py
+_base_ = './crowddet-rcnn_r50_fpn_8xb2-30e_crowdhuman.py'
+
+model = dict(roi_head=dict(bbox_head=dict(with_refine=True)))
--- a/mmde/mmdet/.mim/configs/crowddet/metafile.yml
+++ b/mmde/mmdet/.mim/configs/crowddet/metafile.yml
+Collections:
+  - Name: CrowdDet
+    Metadata:
+      Training Data: CrowdHuman
+      Training Techniques:
+        - SGD
+        - EMD Loss
+      Training Resources: 8x A100 GPUs
+      Architecture:
+        - FPN
+        - RPN
+        - ResNet
+        - RoIPool
+    Paper:
+      URL: https://arxiv.org/abs/2003.09163
+      Title: 'Detection in Crowded Scenes: One Proposal, Multiple Predictions'
+    README: configs/crowddet/README.md
+    Code:
+      URL: https://github.com/open-mmlab/mmdetection/blob/v3.0.0rc3/mmdet/models/detectors/crowddet.py
+      Version: v3.0.0rc3
+
+Models:
+  - Name: crowddet-rcnn_refine_r50_fpn_8xb2-30e_crowdhuman
+    In Collection: CrowdDet
+    Config: configs/crowddet/crowddet-rcnn_refine_r50_fpn_8xb2-30e_crowdhuman.py
+    Metadata:
+      Training Memory (GB): 4.8
+      Epochs: 30
+    Results:
+      - Task: Object Detection
+        Dataset: CrowdHuman
+        Metrics:
+          box AP: 90.32
+    Weights: https://download.openmmlab.com/mmdetection/v3.0/crowddet/crowddet-rcnn_refine_r50_fpn_8xb2-30e_crowdhuman/crowddet-rcnn_refine_r50_fpn_8xb2-30e_crowdhuman_20221024_215917-45602806.pth
+
+  - Name: crowddet-rcnn_r50_fpn_8xb2-30e_crowdhuman
+    In Collection: CrowdDet
+    Config: configs/crowddet/crowddet-rcnn_r50_fpn_8xb2-30e_crowdhuman.py
+    Metadata:
+      Training Memory (GB): 4.4
+      Epochs: 30
+    Results:
+      - Task: Object Detection
+        Dataset: CrowdHuman
+        Metrics:
+          box AP: 90.0
+    Weights: https://download.openmmlab.com/mmdetection/v3.0/crowddet/crowddet-rcnn_r50_fpn_8xb2-30e_crowdhuman/crowddet-rcnn_r50_fpn_8xb2-30e_crowdhuman_20221023_174954-dc319c2d.pth
--- a/mmde/mmdet/.mim/configs/dab_detr/dab-detr_r50_8xb2-50e_coco.py
+++ b/mmde/mmdet/.mim/configs/dab_detr/dab-detr_r50_8xb2-50e_coco.py
+_base_ = [
+    '../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py'
+]
+model = dict(
+    type='DABDETR',
+    num_queries=300,
+    with_random_refpoints=False,
+    num_patterns=0,
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=1),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(3, ),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=False),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='ChannelMapper',
+        in_channels=[2048],
+        kernel_size=1,
+        out_channels=256,
+        act_cfg=None,
+        norm_cfg=None,
+        num_outs=1),
+    encoder=dict(
+        num_layers=6,
+        layer_cfg=dict(
+            self_attn_cfg=dict(
+                embed_dims=256, num_heads=8, dropout=0., batch_first=True),
+            ffn_cfg=dict(
+                embed_dims=256,
+                feedforward_channels=2048,
+                num_fcs=2,
+                ffn_drop=0.,
+                act_cfg=dict(type='PReLU')))),
+    decoder=dict(
+        num_layers=6,
+        query_dim=4,
+        query_scale_type='cond_elewise',
+        with_modulated_hw_attn=True,
+        layer_cfg=dict(
+            self_attn_cfg=dict(
+                embed_dims=256,
+                num_heads=8,
+                attn_drop=0.,
+                proj_drop=0.,
+                cross_attn=False),
+            cross_attn_cfg=dict(
+                embed_dims=256,
+                num_heads=8,
+                attn_drop=0.,
+                proj_drop=0.,
+                cross_attn=True),
+            ffn_cfg=dict(
+                embed_dims=256,
+                feedforward_channels=2048,
+                num_fcs=2,
+                ffn_drop=0.,
+                act_cfg=dict(type='PReLU'))),
+        return_intermediate=True),
+    positional_encoding=dict(num_feats=128, temperature=20, normalize=True),
+    bbox_head=dict(
+        type='DABDETRHead',
+        num_classes=80,
+        embed_dims=256,
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=5.0),
+        loss_iou=dict(type='GIoULoss', loss_weight=2.0)),
+    # training and testing settings
+    train_cfg=dict(
+        assigner=dict(
+            type='HungarianAssigner',
+            match_costs=[
+                dict(type='FocalLossCost', weight=2., eps=1e-8),
+                dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'),
+                dict(type='IoUCost', iou_mode='giou', weight=2.0)
+            ])),
+    test_cfg=dict(max_per_img=300))
+
+# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
+# from the default setting in mmdet.
+train_pipeline = [
+    dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(type='RandomFlip', prob=0.5),
+    dict(
+        type='RandomChoice',
+        transforms=[[
+            dict(
+                type='RandomChoiceResize',
+                scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
+                        (608, 1333), (640, 1333), (672, 1333), (704, 1333),
+                        (736, 1333), (768, 1333), (800, 1333)],
+                keep_ratio=True)
+        ],
+                    [
+                        dict(
+                            type='RandomChoiceResize',
+                            scales=[(400, 1333), (500, 1333), (600, 1333)],
+                            keep_ratio=True),
+                        dict(
+                            type='RandomCrop',
+                            crop_type='absolute_range',
+                            crop_size=(384, 600),
+                            allow_negative_crop=True),
+                        dict(
+                            type='RandomChoiceResize',
+                            scales=[(480, 1333), (512, 1333), (544, 1333),
+                                    (576, 1333), (608, 1333), (640, 1333),
+                                    (672, 1333), (704, 1333), (736, 1333),
+                                    (768, 1333), (800, 1333)],
+                            keep_ratio=True)
+                    ]]),
+    dict(type='PackDetInputs')
+]
+train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
+
+# optimizer
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(type='AdamW', lr=0.0001, weight_decay=0.0001),
+    clip_grad=dict(max_norm=0.1, norm_type=2),
+    paramwise_cfg=dict(
+        custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)}))
+
+# learning policy
+max_epochs = 50
+train_cfg = dict(
+    type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+
+param_scheduler = [
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=max_epochs,
+        by_epoch=True,
+        milestones=[40],
+        gamma=0.1)
+]
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (2 samples per GPU)
+auto_scale_lr = dict(base_batch_size=16, enable=False)
--- a/mmde/mmdet/.mim/configs/dab_detr/metafile.yml
+++ b/mmde/mmdet/.mim/configs/dab_detr/metafile.yml
+Collections:
+  - Name: DAB-DETR
+    Metadata:
+      Training Data: COCO
+      Training Techniques:
+        - AdamW
+        - Multi Scale Train
+        - Gradient Clip
+      Training Resources: 8x A100 GPUs
+      Architecture:
+        - ResNet
+        - Transformer
+    Paper:
+      URL: https://arxiv.org/abs/2201.12329
+      Title: 'DAB-DETR: Dynamic Anchor Boxes are Better Queries for DETR'
+    README: configs/dab_detr/README.md
+    Code:
+      URL: https://github.com/open-mmlab/mmdetection/blob/f4112c9e5611468ffbd57cfba548fd1289264b52/mmdet/models/detectors/dab_detr.py#L15
+      Version: v3.0.0rc6
+
+Models:
+  - Name: dab-detr_r50_8xb2-50e_coco
+    In Collection: DAB-DETR
+    Config: configs/dab_detr/dab-detr_r50_8xb2-50e_coco.py
+    Metadata:
+      Epochs: 50
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 42.3
+    Weights: https://download.openmmlab.com/mmdetection/v3.0/dab_detr/dab-detr_r50_8xb2-50e_coco/dab-detr_r50_8xb2-50e_coco_20221122_120837-c1035c8c.pth
--- a/mmde/mmdet/.mim/configs/dcn/cascade-mask-rcnn_r101-dconv-c3-c5_fpn_1x_coco.py
+++ b/mmde/mmdet/.mim/configs/dcn/cascade-mask-rcnn_r101-dconv-c3-c5_fpn_1x_coco.py
+_base_ = '../cascade_rcnn/cascade-mask-rcnn_r101_fpn_1x_coco.py'
+model = dict(
+    backbone=dict(
+        dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False),
+        stage_with_dcn=(False, True, True, True)))
--- a/mmde/mmdet/.mim/configs/dcn/cascade-mask-rcnn_r50-dconv-c3-c5_fpn_1x_coco.py
+++ b/mmde/mmdet/.mim/configs/dcn/cascade-mask-rcnn_r50-dconv-c3-c5_fpn_1x_coco.py
+_base_ = '../cascade_rcnn/cascade-mask-rcnn_r50_fpn_1x_coco.py'
+model = dict(
+    backbone=dict(
+        dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False),
+        stage_with_dcn=(False, True, True, True)))
--- a/mmde/mmdet/.mim/configs/dcn/cascade-mask-rcnn_x101-32x4d-dconv-c3-c5_fpn_1x_coco.py
+++ b/mmde/mmdet/.mim/configs/dcn/cascade-mask-rcnn_x101-32x4d-dconv-c3-c5_fpn_1x_coco.py
+_base_ = '../cascade_rcnn/cascade-mask-rcnn_x101-32x4d_fpn_1x_coco.py'
+model = dict(
+    backbone=dict(
+        dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False),
+        stage_with_dcn=(False, True, True, True)))