"docs/source/git@developer.sourcefind.cn:Fzc7075/nunchaku.git" did not exist on "2ec15e4833b7128ec50178b97d7fdc98a1252af1"
Commit a8562a56 authored by luopl's avatar luopl
Browse files

Initial commit

parents
Pipeline #1564 canceled with stages
# Copyright (c) OpenMMLab. All rights reserved.
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
# mmcv >= 2.0.1
# mmengine >= 0.8.0
from mmengine.config import read_base
with read_base():
from ..common.ms_poly_3x_coco_instance import *
from .._base_.models.mask_rcnn_r50_fpn import *
from mmdet.models.backbones import ResNeXt
model = dict(
# ResNeXt-101-32x8d model trained with Caffe2 at FB,
# so the mean and std need to be changed.
data_preprocessor=dict(
mean=[103.530, 116.280, 123.675],
std=[57.375, 57.120, 58.395],
bgr_to_rgb=False),
backbone=dict(
type=ResNeXt,
depth=101,
groups=32,
base_width=8,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type=BatchNorm2d, requires_grad=False),
style='pytorch',
init_cfg=dict(
type=PretrainedInit,
checkpoint='open-mmlab://detectron2/resnext101_32x8d')))
# Copyright (c) OpenMMLab. All rights reserved.
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
# mmcv >= 2.0.1
# mmengine >= 0.8.0
from mmengine.config import read_base
with read_base():
from .mask_rcnn_x101_32x4d_fpn_1x_coco import *
model = dict(
backbone=dict(
type=ResNeXt,
depth=101,
groups=64,
base_width=4,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
style='pytorch',
init_cfg=dict(
type=PretrainedInit, checkpoint='open-mmlab://resnext101_64x4d')))
# Copyright (c) OpenMMLab. All rights reserved.
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
# mmcv >= 2.0.1
# mmengine >= 0.8.0
from mmengine.config import read_base
with read_base():
from .mask_rcnn_x101_32x4d_fpn_2x_coco import *
model = dict(
backbone=dict(
type=ResNeXt,
depth=101,
groups=64,
base_width=4,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
style='pytorch',
init_cfg=dict(
type=PretrainedInit, checkpoint='open-mmlab://resnext101_64x4d')))
# Copyright (c) OpenMMLab. All rights reserved.
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
# mmcv >= 2.0.1
# mmengine >= 0.8.0
from mmengine.config import read_base
with read_base():
from ..common.ms_poly_3x_coco_instance import *
from .._base_.models.mask_rcnn_r50_fpn import *
from mmdet.models.backbones import ResNeXt
model = dict(
backbone=dict(
type=ResNeXt,
depth=101,
groups=64,
base_width=4,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
style='pytorch',
init_cfg=dict(
type=PretrainedInit, checkpoint='open-mmlab://resnext101_64x4d')))
# Copyright (c) OpenMMLab. All rights reserved.
from mmcv.transforms import RandomChoice, RandomChoiceResize
from mmengine.config import read_base
from mmengine.model.weight_init import PretrainedInit
from mmengine.optim.optimizer import OptimWrapper
from mmengine.optim.scheduler import MultiStepLR
from mmengine.runner import EpochBasedTrainLoop, TestLoop, ValLoop
from torch.nn.modules.activation import ReLU
from torch.nn.modules.batchnorm import BatchNorm2d
from torch.nn.modules.normalization import GroupNorm
from torch.optim.adamw import AdamW
from mmdet.datasets.transforms.transforms import RandomCrop
from mmdet.models import MaskFormer
from mmdet.models.backbones import ResNet
from mmdet.models.data_preprocessors.data_preprocessor import \
DetDataPreprocessor
from mmdet.models.dense_heads.maskformer_head import MaskFormerHead
from mmdet.models.layers.pixel_decoder import TransformerEncoderPixelDecoder
from mmdet.models.losses import CrossEntropyLoss, DiceLoss, FocalLoss
from mmdet.models.seg_heads.panoptic_fusion_heads import MaskFormerFusionHead
from mmdet.models.task_modules.assigners.hungarian_assigner import \
HungarianAssigner
from mmdet.models.task_modules.assigners.match_cost import (ClassificationCost,
DiceCost,
FocalLossCost)
from mmdet.models.task_modules.samplers import MaskPseudoSampler
with read_base():
from .._base_.datasets.coco_panoptic import *
from .._base_.default_runtime import *
data_preprocessor = dict(
type=DetDataPreprocessor,
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_size_divisor=1,
pad_mask=True,
mask_pad_value=0,
pad_seg=True,
seg_pad_value=255)
num_things_classes = 80
num_stuff_classes = 53
num_classes = num_things_classes + num_stuff_classes
model = dict(
type=MaskFormer,
data_preprocessor=data_preprocessor,
backbone=dict(
type=ResNet,
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type=BatchNorm2d, requires_grad=False),
norm_eval=True,
style='pytorch',
init_cfg=dict(
type=PretrainedInit, checkpoint='torchvision://resnet50')),
panoptic_head=dict(
type=MaskFormerHead,
in_channels=[256, 512, 1024, 2048], # pass to pixel_decoder inside
feat_channels=256,
out_channels=256,
num_things_classes=num_things_classes,
num_stuff_classes=num_stuff_classes,
num_queries=100,
pixel_decoder=dict(
type=TransformerEncoderPixelDecoder,
norm_cfg=dict(type=GroupNorm, num_groups=32),
act_cfg=dict(type=ReLU),
encoder=dict( # DetrTransformerEncoder
num_layers=6,
layer_cfg=dict( # DetrTransformerEncoderLayer
self_attn_cfg=dict( # MultiheadAttention
embed_dims=256,
num_heads=8,
dropout=0.1,
batch_first=True),
ffn_cfg=dict(
embed_dims=256,
feedforward_channels=2048,
num_fcs=2,
ffn_drop=0.1,
act_cfg=dict(type=ReLU, inplace=True)))),
positional_encoding=dict(num_feats=128, normalize=True)),
enforce_decoder_input_project=False,
positional_encoding=dict(num_feats=128, normalize=True),
transformer_decoder=dict( # DetrTransformerDecoder
num_layers=6,
layer_cfg=dict( # DetrTransformerDecoderLayer
self_attn_cfg=dict( # MultiheadAttention
embed_dims=256,
num_heads=8,
dropout=0.1,
batch_first=True),
cross_attn_cfg=dict( # MultiheadAttention
embed_dims=256,
num_heads=8,
dropout=0.1,
batch_first=True),
ffn_cfg=dict(
embed_dims=256,
feedforward_channels=2048,
num_fcs=2,
ffn_drop=0.1,
act_cfg=dict(type=ReLU, inplace=True))),
return_intermediate=True),
loss_cls=dict(
type=CrossEntropyLoss,
use_sigmoid=False,
loss_weight=1.0,
reduction='mean',
class_weight=[1.0] * num_classes + [0.1]),
loss_mask=dict(
type=FocalLoss,
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
reduction='mean',
loss_weight=20.0),
loss_dice=dict(
type=DiceLoss,
use_sigmoid=True,
activate=True,
reduction='mean',
naive_dice=True,
eps=1.0,
loss_weight=1.0)),
panoptic_fusion_head=dict(
type=MaskFormerFusionHead,
num_things_classes=num_things_classes,
num_stuff_classes=num_stuff_classes,
loss_panoptic=None,
init_cfg=None),
train_cfg=dict(
assigner=dict(
type=HungarianAssigner,
match_costs=[
dict(type=ClassificationCost, weight=1.0),
dict(type=FocalLossCost, weight=20.0, binary_input=True),
dict(type=DiceCost, weight=1.0, pred_act=True, eps=1.0)
]),
sampler=dict(type=MaskPseudoSampler)),
test_cfg=dict(
panoptic_on=True,
# For now, the dataset does not support
# evaluating semantic segmentation metric.
semantic_on=False,
instance_on=False,
# max_per_image is for instance segmentation.
max_per_image=100,
object_mask_thr=0.8,
iou_thr=0.8,
# In MaskFormer's panoptic postprocessing,
# it will not filter masks whose score is smaller than 0.5 .
filter_low_score=False),
init_cfg=None)
# dataset settings
train_pipeline = [
dict(type=LoadImageFromFile),
dict(
type=LoadPanopticAnnotations,
with_bbox=True,
with_mask=True,
with_seg=True),
dict(type=RandomFlip, prob=0.5),
# dict(type=Resize, scale=(1333, 800), keep_ratio=True),
dict(
type=RandomChoice,
transforms=[[
dict(
type=RandomChoiceResize,
scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
(608, 1333), (640, 1333), (672, 1333), (704, 1333),
(736, 1333), (768, 1333), (800, 1333)],
resize_type=Resize,
keep_ratio=True)
],
[
dict(
type=RandomChoiceResize,
scales=[(400, 1333), (500, 1333), (600, 1333)],
resize_type=Resize,
keep_ratio=True),
dict(
type=RandomCrop,
crop_type='absolute_range',
crop_size=(384, 600),
allow_negative_crop=True),
dict(
type=RandomChoiceResize,
scales=[(480, 1333), (512, 1333), (544, 1333),
(576, 1333), (608, 1333), (640, 1333),
(672, 1333), (704, 1333), (736, 1333),
(768, 1333), (800, 1333)],
resize_type=Resize,
keep_ratio=True)
]]),
dict(type=PackDetInputs)
]
train_dataloader.update(
dict(batch_size=1, num_workers=1, dataset=dict(pipeline=train_pipeline)))
val_dataloader.update(dict(batch_size=1, num_workers=1))
test_dataloader = val_dataloader
# optimizer
optim_wrapper = dict(
type=OptimWrapper,
optimizer=dict(
type=AdamW,
lr=0.0001,
weight_decay=0.0001,
eps=1e-8,
betas=(0.9, 0.999)),
paramwise_cfg=dict(
custom_keys={
'backbone': dict(lr_mult=0.1, decay_mult=1.0),
'query_embed': dict(lr_mult=1.0, decay_mult=0.0)
},
norm_decay_mult=0.0),
clip_grad=dict(max_norm=0.01, norm_type=2))
max_epochs = 75
# learning rate
param_scheduler = dict(
type=MultiStepLR,
begin=0,
end=max_epochs,
by_epoch=True,
milestones=[50],
gamma=0.1)
train_cfg = dict(
type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=1)
val_cfg = dict(type=ValLoop)
test_cfg = dict(type=TestLoop)
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (16 GPUs) x (1 samples per GPU).
auto_scale_lr = dict(enable=False, base_batch_size=16)
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.config import read_base
from mmengine.optim.scheduler import LinearLR
from mmdet.models.backbones import SwinTransformer
from mmdet.models.layers import PixelDecoder
with read_base():
from .maskformer_r50_ms_16xb1_75e_coco import *
pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth' # noqa
depths = [2, 2, 18, 2]
model.update(
dict(
backbone=dict(
_delete_=True,
type=SwinTransformer,
pretrain_img_size=384,
embed_dims=192,
patch_size=4,
window_size=12,
mlp_ratio=4,
depths=depths,
num_heads=[6, 12, 24, 48],
qkv_bias=True,
qk_scale=None,
drop_rate=0.,
attn_drop_rate=0.,
drop_path_rate=0.3,
patch_norm=True,
out_indices=(0, 1, 2, 3),
with_cp=False,
convert_weights=True,
init_cfg=dict(type=PretrainedInit, checkpoint=pretrained)),
panoptic_head=dict(
in_channels=[192, 384, 768, 1536], # pass to pixel_decoder inside
pixel_decoder=dict(
_delete_=True,
type=PixelDecoder,
norm_cfg=dict(type=GroupNorm, num_groups=32),
act_cfg=dict(type=ReLU)),
enforce_decoder_input_project=True)))
# optimizer
# weight_decay = 0.01
# norm_weight_decay = 0.0
# embed_weight_decay = 0.0
embed_multi = dict(lr_mult=1.0, decay_mult=0.0)
norm_multi = dict(lr_mult=1.0, decay_mult=0.0)
custom_keys = {
'norm': norm_multi,
'absolute_pos_embed': embed_multi,
'relative_position_bias_table': embed_multi,
'query_embed': embed_multi
}
optim_wrapper.update(
dict(
optimizer=dict(lr=6e-5, weight_decay=0.01),
paramwise_cfg=dict(custom_keys=custom_keys, norm_decay_mult=0.0)))
max_epochs = 300
# learning rate
param_scheduler = [
dict(type=LinearLR, start_factor=1e-6, by_epoch=False, begin=0, end=1500),
dict(
type=MultiStepLR,
begin=0,
end=max_epochs,
by_epoch=True,
milestones=[250],
gamma=0.1)
]
train_cfg.update(dict(max_epochs=max_epochs))
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# USER SHOULD NOT CHANGE ITS VALUES.
# base_batch_size = (64 GPUs) x (1 samples per GPU)
auto_scale_lr.update(dict(base_batch_size=64))
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.config import read_base
from mmengine.model.weight_init import PretrainedInit
with read_base():
from .panoptic_fpn_r50_fpn_1x_coco import *
model.update(
dict(
backbone=dict(
depth=101,
init_cfg=dict(
type=PretrainedInit, checkpoint='torchvision://resnet101'))))
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.config import read_base
from mmengine.model.weight_init import PretrainedInit
with read_base():
from .panoptic_fpn_r50_fpn_ms_3x_coco import *
model.update(
dict(
backbone=dict(
depth=101,
init_cfg=dict(
type=PretrainedInit, checkpoint='torchvision://resnet101'))))
# Copyright (c) OpenMMLab. All rights reserved.
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
# mmcv >= 2.0.1
# mmengine >= 0.8.0
from mmengine.config import read_base
with read_base():
from .._base_.models.mask_rcnn_r50_fpn import *
from .._base_.datasets.coco_panoptic import *
from .._base_.schedules.schedule_1x import *
from .._base_.default_runtime import *
from mmcv.ops import nms
from torch.nn import GroupNorm
from mmdet.models.data_preprocessors.data_preprocessor import \
DetDataPreprocessor
from mmdet.models.detectors.panoptic_fpn import PanopticFPN
from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
from mmdet.models.seg_heads.panoptic_fpn_head import PanopticFPNHead
from mmdet.models.seg_heads.panoptic_fusion_heads import HeuristicFusionHead
model.update(
dict(
type=PanopticFPN,
data_preprocessor=dict(
type=DetDataPreprocessor,
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_size_divisor=32,
pad_mask=True,
mask_pad_value=0,
pad_seg=True,
seg_pad_value=255),
semantic_head=dict(
type=PanopticFPNHead,
num_things_classes=80,
num_stuff_classes=53,
in_channels=256,
inner_channels=128,
start_level=0,
end_level=4,
norm_cfg=dict(type=GroupNorm, num_groups=32, requires_grad=True),
conv_cfg=None,
loss_seg=dict(
type=CrossEntropyLoss, ignore_index=255, loss_weight=0.5)),
panoptic_fusion_head=dict(
type=HeuristicFusionHead,
num_things_classes=80,
num_stuff_classes=53),
test_cfg=dict(
rcnn=dict(
score_thr=0.6,
nms=dict(type=nms, iou_threshold=0.5, class_agnostic=True),
max_per_img=100,
mask_thr_binary=0.5),
# used in HeuristicFusionHead
panoptic=dict(mask_overlap=0.5, stuff_area_limit=4096))))
# Forced to remove NumClassCheckHook
custom_hooks = []
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.config import read_base
from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
with read_base():
from .panoptic_fpn_r50_fpn_1x_coco import *
from mmcv.transforms import RandomResize
from mmcv.transforms.loading import LoadImageFromFile
from mmdet.datasets.transforms.formatting import PackDetInputs
from mmdet.datasets.transforms.loading import LoadPanopticAnnotations
from mmdet.datasets.transforms.transforms import RandomFlip
# In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)],
# multiscale_mode='range'
train_pipeline = [
dict(type=LoadImageFromFile),
dict(
type=LoadPanopticAnnotations,
with_bbox=True,
with_mask=True,
with_seg=True),
dict(type=RandomResize, scale=[(1333, 640), (1333, 800)], keep_ratio=True),
dict(type=RandomFlip, prob=0.5),
dict(type=PackDetInputs)
]
train_dataloader.update(dict(dataset=dict(pipeline=train_pipeline)))
# TODO: Use RepeatDataset to speed up training
# training schedule for 3x
train_cfg.update(dict(max_epochs=36, val_interval=3))
# learning rate
param_scheduler = [
dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=500),
dict(
type=MultiStepLR,
begin=0,
end=36,
by_epoch=True,
milestones=[24, 33],
gamma=0.1)
]
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.config import read_base
with read_base():
from .._base_.models.faster_rcnn_r50_fpn import *
from .._base_.models.faster_rcnn_r50_fpn import model
from .._base_.default_runtime import *
from mmcv.ops import RoIAlign
from mmengine.hooks import LoggerHook, SyncBuffersHook
from mmengine.model.weight_init import PretrainedInit
from mmengine.optim import MultiStepLR, OptimWrapper
from mmengine.runner.runner import EpochBasedTrainLoop, TestLoop, ValLoop
from torch.nn.modules.batchnorm import BatchNorm2d
from torch.nn.modules.normalization import GroupNorm
from torch.optim import SGD
from mmdet.engine.hooks import TrackVisualizationHook
from mmdet.models import (QDTrack, QuasiDenseEmbedHead, QuasiDenseTracker,
QuasiDenseTrackHead, SingleRoIExtractor,
TrackDataPreprocessor)
from mmdet.models.losses import (L1Loss, MarginL2Loss,
MultiPosCrossEntropyLoss, SmoothL1Loss)
from mmdet.models.task_modules import (CombinedSampler,
InstanceBalancedPosSampler,
MaxIoUAssigner, RandomSampler)
from mmdet.visualization import TrackLocalVisualizer
detector = model
detector.pop('data_preprocessor')
detector['backbone'].update(
dict(
norm_cfg=dict(type=BatchNorm2d, requires_grad=False),
style='caffe',
init_cfg=dict(
type=PretrainedInit,
checkpoint='open-mmlab://detectron2/resnet50_caffe')))
detector.rpn_head.loss_bbox.update(
dict(type=SmoothL1Loss, beta=1.0 / 9.0, loss_weight=1.0))
detector.rpn_head.bbox_coder.update(dict(clip_border=False))
detector.roi_head.bbox_head.update(dict(num_classes=1))
detector.roi_head.bbox_head.bbox_coder.update(dict(clip_border=False))
detector['init_cfg'] = dict(
type=PretrainedInit,
checkpoint= # noqa: E251
'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/'
'faster_rcnn_r50_fpn_1x_coco-person/'
'faster_rcnn_r50_fpn_1x_coco-person_20201216_175929-d022e227.pth'
# noqa: E501
)
del model
model = dict(
type=QDTrack,
data_preprocessor=dict(
type=TrackDataPreprocessor,
mean=[103.530, 116.280, 123.675],
std=[1.0, 1.0, 1.0],
bgr_to_rgb=False,
pad_size_divisor=32),
detector=detector,
track_head=dict(
type=QuasiDenseTrackHead,
roi_extractor=dict(
type=SingleRoIExtractor,
roi_layer=dict(type=RoIAlign, output_size=7, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
embed_head=dict(
type=QuasiDenseEmbedHead,
num_convs=4,
num_fcs=1,
embed_channels=256,
norm_cfg=dict(type=GroupNorm, num_groups=32),
loss_track=dict(type=MultiPosCrossEntropyLoss, loss_weight=0.25),
loss_track_aux=dict(
type=MarginL2Loss,
neg_pos_ub=3,
pos_margin=0,
neg_margin=0.1,
hard_mining=True,
loss_weight=1.0)),
loss_bbox=dict(type=L1Loss, loss_weight=1.0),
train_cfg=dict(
assigner=dict(
type=MaxIoUAssigner,
pos_iou_thr=0.7,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=False,
ignore_iof_thr=-1),
sampler=dict(
type=CombinedSampler,
num=256,
pos_fraction=0.5,
neg_pos_ub=3,
add_gt_as_proposals=True,
pos_sampler=dict(type=InstanceBalancedPosSampler),
neg_sampler=dict(type=RandomSampler)))),
tracker=dict(
type=QuasiDenseTracker,
init_score_thr=0.9,
obj_score_thr=0.5,
match_score_thr=0.5,
memo_tracklet_frames=30,
memo_backdrop_frames=1,
memo_momentum=0.8,
nms_conf_thr=0.5,
nms_backdrop_iou_thr=0.3,
nms_class_iou_thr=0.7,
with_cats=True,
match_metric='bisoftmax'))
# optimizer
optim_wrapper = dict(
type=OptimWrapper,
optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001),
clip_grad=dict(max_norm=35, norm_type=2))
# learning policy
param_scheduler = [
dict(type=MultiStepLR, begin=0, end=4, by_epoch=True, milestones=[3])
]
# runtime settings
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=4, val_interval=4)
val_cfg = dict(type=ValLoop)
test_cfg = dict(type=TestLoop)
default_hooks.update(
logger=dict(type=LoggerHook, interval=50),
visualization=dict(type=TrackVisualizationHook, draw=False))
visualizer.update(
type=TrackLocalVisualizer, vis_backends=vis_backends, name='visualizer')
# custom hooks
custom_hooks = [
# Synchronize model buffers such as running_mean and running_var in BN
# at the end of each epoch
dict(type=SyncBuffersHook)
]
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.config import read_base
with read_base():
from .._base_.datasets.mot_challenge import *
from .qdtrack_faster_rcnn_r50_fpn_4e_base import *
from mmdet.evaluation import CocoVideoMetric, MOTChallengeMetric
# evaluator
val_evaluator = [
dict(type=CocoVideoMetric, metric=['bbox'], classwise=True),
dict(type=MOTChallengeMetric, metric=['HOTA', 'CLEAR', 'Identity'])
]
# Copyright (c) OpenMMLab. All rights reserved.
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
# mmcv >= 2.0.1
# mmengine >= 0.8.0
from mmengine.config import read_base
with read_base():
from .._base_.models.retinanet_r50_fpn import *
from .._base_.datasets.coco_detection import *
from .._base_.schedules.schedule_1x import *
from .._base_.default_runtime import *
from .retinanet_tta import *
from torch.optim.sgd import SGD
# optimizer
optim_wrapper.update(
dict(optimizer=dict(type=SGD, lr=0.01, momentum=0.9, weight_decay=0.0001)))
# Copyright (c) OpenMMLab. All rights reserved.
from mmcv.transforms.loading import LoadImageFromFile
from mmcv.transforms.processing import TestTimeAug
from mmdet.datasets.transforms.formatting import PackDetInputs
from mmdet.datasets.transforms.loading import LoadAnnotations
from mmdet.datasets.transforms.transforms import RandomFlip, Resize
from mmdet.models.test_time_augs.det_tta import DetTTAModel
tta_model = dict(
type=DetTTAModel,
tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.5), max_per_img=100))
img_scales = [(1333, 800), (666, 400), (2000, 1200)]
tta_pipeline = [
dict(type=LoadImageFromFile, backend_args=None),
dict(
type=TestTimeAug,
transforms=[
[dict(type=Resize, scale=s, keep_ratio=True) for s in img_scales],
[dict(type=RandomFlip, prob=1.),
dict(type=RandomFlip, prob=0.)],
[dict(type=LoadAnnotations, with_bbox=True)],
[
dict(
type=PackDetInputs,
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'flip', 'flip_direction'))
]
])
]
# Copyright (c) OpenMMLab. All rights reserved.
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
# mmcv >= 2.0.1
# mmengine >= 0.8.0
from mmengine.config import read_base
with read_base():
from .rtmdet_l_8xb32_300e_coco import *
from mmcv.transforms.loading import LoadImageFromFile
from mmcv.transforms.processing import RandomResize
from mmengine.hooks.ema_hook import EMAHook
from torch.nn.modules.activation import SiLU
from mmdet.datasets.transforms.formatting import PackDetInputs
from mmdet.datasets.transforms.loading import (FilterAnnotations,
LoadAnnotations)
from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
Pad, RandomCrop, RandomFlip,
Resize, YOLOXHSVRandomAug)
from mmdet.engine.hooks.pipeline_switch_hook import PipelineSwitchHook
from mmdet.models.dense_heads.rtmdet_ins_head import RTMDetInsSepBNHead
from mmdet.models.layers.ema import ExpMomentumEMA
from mmdet.models.losses.dice_loss import DiceLoss
from mmdet.models.losses.gfocal_loss import QualityFocalLoss
from mmdet.models.losses.iou_loss import GIoULoss
from mmdet.models.task_modules.coders.distance_point_bbox_coder import \
DistancePointBBoxCoder
from mmdet.models.task_modules.prior_generators.point_generator import \
MlvlPointGenerator
model.merge(
dict(
bbox_head=dict(
_delete_=True,
type=RTMDetInsSepBNHead,
num_classes=80,
in_channels=256,
stacked_convs=2,
share_conv=True,
pred_kernel_size=1,
feat_channels=256,
act_cfg=dict(type=SiLU, inplace=True),
norm_cfg=dict(type='SyncBN', requires_grad=True),
anchor_generator=dict(
type=MlvlPointGenerator, offset=0, strides=[8, 16, 32]),
bbox_coder=dict(type=DistancePointBBoxCoder),
loss_cls=dict(
type=QualityFocalLoss,
use_sigmoid=True,
beta=2.0,
loss_weight=1.0),
loss_bbox=dict(type=GIoULoss, loss_weight=2.0),
loss_mask=dict(
type=DiceLoss, loss_weight=2.0, eps=5e-6, reduction='mean')),
test_cfg=dict(
nms_pre=1000,
min_bbox_size=0,
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.6),
max_per_img=100,
mask_thr_binary=0.5),
))
train_pipeline = [
dict(type=LoadImageFromFile, backend_args=backend_args),
dict(
type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
dict(type=CachedMosaic, img_scale=(640, 640), pad_val=114.0),
dict(
type=RandomResize,
scale=(1280, 1280),
ratio_range=(0.1, 2.0),
resize_type=Resize,
keep_ratio=True),
dict(
type=RandomCrop,
crop_size=(640, 640),
recompute_bbox=True,
allow_negative_crop=True),
dict(type=YOLOXHSVRandomAug),
dict(type=RandomFlip, prob=0.5),
dict(type=Pad, size=(640, 640), pad_val=dict(img=(114, 114, 114))),
dict(
type=CachedMixUp,
img_scale=(640, 640),
ratio_range=(1.0, 1.0),
max_cached_images=20,
pad_val=(114, 114, 114)),
dict(type=FilterAnnotations, min_gt_bbox_wh=(1, 1)),
dict(type=PackDetInputs)
]
train_dataloader.update(
dict(pin_memory=True, dataset=dict(pipeline=train_pipeline)))
train_pipeline_stage2 = [
dict(type=LoadImageFromFile, backend_args=backend_args),
dict(
type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
dict(
type=RandomResize,
scale=(640, 640),
ratio_range=(0.1, 2.0),
resize_type=Resize,
keep_ratio=True),
dict(
type=RandomCrop,
crop_size=(640, 640),
recompute_bbox=True,
allow_negative_crop=True),
dict(type=FilterAnnotations, min_gt_bbox_wh=(1, 1)),
dict(type=YOLOXHSVRandomAug),
dict(type=RandomFlip, prob=0.5),
dict(type=Pad, size=(640, 640), pad_val=dict(img=(114, 114, 114))),
dict(type=PackDetInputs)
]
custom_hooks = [
dict(
type=EMAHook,
ema_type=ExpMomentumEMA,
momentum=0.0002,
update_buffers=True,
priority=49),
dict(
type=PipelineSwitchHook,
switch_epoch=280,
switch_pipeline=train_pipeline_stage2)
]
val_evaluator.update(dict(metric=['bbox', 'segm']))
test_evaluator = val_evaluator
# Copyright (c) OpenMMLab. All rights reserved.
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
# mmcv >= 2.0.1
# mmengine >= 0.8.0
from mmengine.config import read_base
with read_base():
from .rtmdet_ins_l_8xb32_300e_coco import *
model.update(
dict(
backbone=dict(deepen_factor=0.67, widen_factor=0.75),
neck=dict(
in_channels=[192, 384, 768], out_channels=192, num_csp_blocks=2),
bbox_head=dict(in_channels=192, feat_channels=192)))
# Copyright (c) OpenMMLab. All rights reserved.
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
# mmcv >= 2.0.1
# mmengine >= 0.8.0
from mmengine.config import read_base
with read_base():
from .rtmdet_ins_l_8xb32_300e_coco import *
from mmcv.transforms.loading import LoadImageFromFile
from mmcv.transforms.processing import RandomResize
from mmengine.hooks.ema_hook import EMAHook
from mmdet.datasets.transforms.formatting import PackDetInputs
from mmdet.datasets.transforms.loading import (FilterAnnotations,
LoadAnnotations)
from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
Pad, RandomCrop, RandomFlip,
Resize, YOLOXHSVRandomAug)
from mmdet.engine.hooks.pipeline_switch_hook import PipelineSwitchHook
from mmdet.models.layers.ema import ExpMomentumEMA
checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' # noqa
model.update(
dict(
backbone=dict(
deepen_factor=0.33,
widen_factor=0.5,
init_cfg=dict(
type='Pretrained', prefix='backbone.', checkpoint=checkpoint)),
neck=dict(
in_channels=[128, 256, 512], out_channels=128, num_csp_blocks=1),
bbox_head=dict(in_channels=128, feat_channels=128)))
train_pipeline = [
dict(type=LoadImageFromFile, backend_args=backend_args),
dict(
type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
dict(type=CachedMosaic, img_scale=(640, 640), pad_val=114.0),
dict(
type=RandomResize,
scale=(1280, 1280),
ratio_range=(0.5, 2.0),
resize_type=Resize,
keep_ratio=True),
dict(
type=RandomCrop,
crop_size=(640, 640),
recompute_bbox=True,
allow_negative_crop=True),
dict(type=YOLOXHSVRandomAug),
dict(type=RandomFlip, prob=0.5),
dict(type=Pad, size=(640, 640), pad_val=dict(img=(114, 114, 114))),
dict(
type=CachedMixUp,
img_scale=(640, 640),
ratio_range=(1.0, 1.0),
max_cached_images=20,
pad_val=(114, 114, 114)),
dict(type=FilterAnnotations, min_gt_bbox_wh=(1, 1)),
dict(type=PackDetInputs)
]
train_pipeline_stage2 = [
dict(type=LoadImageFromFile, backend_args=backend_args),
dict(
type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
dict(
type=RandomResize,
scale=(640, 640),
ratio_range=(0.5, 2.0),
resize_type=Resize,
keep_ratio=True),
dict(
type=RandomCrop,
crop_size=(640, 640),
recompute_bbox=True,
allow_negative_crop=True),
dict(type=FilterAnnotations, min_gt_bbox_wh=(1, 1)),
dict(type=YOLOXHSVRandomAug),
dict(type=RandomFlip, prob=0.5),
dict(type=Pad, size=(640, 640), pad_val=dict(img=(114, 114, 114))),
dict(type=PackDetInputs)
]
train_dataloader.update(dict(dataset=dict(pipeline=train_pipeline)))
custom_hooks = [
dict(
type=EMAHook,
ema_type=ExpMomentumEMA,
momentum=0.0002,
update_buffers=True,
priority=49),
dict(
type=PipelineSwitchHook,
switch_epoch=280,
switch_pipeline=train_pipeline_stage2)
]
# Copyright (c) OpenMMLab. All rights reserved.
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
# mmcv >= 2.0.1
# mmengine >= 0.8.0
from mmengine.config import read_base
with read_base():
from .rtmdet_ins_s_8xb32_300e_coco import *
from mmcv.transforms.loading import LoadImageFromFile
from mmcv.transforms.processing import RandomResize
from mmdet.datasets.transforms.formatting import PackDetInputs
from mmdet.datasets.transforms.loading import (FilterAnnotations,
LoadAnnotations)
from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
Pad, RandomCrop, RandomFlip,
Resize, YOLOXHSVRandomAug)
checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa
model.update(
dict(
backbone=dict(
deepen_factor=0.167,
widen_factor=0.375,
init_cfg=dict(
type='Pretrained', prefix='backbone.', checkpoint=checkpoint)),
neck=dict(
in_channels=[96, 192, 384], out_channels=96, num_csp_blocks=1),
bbox_head=dict(in_channels=96, feat_channels=96)))
train_pipeline = [
dict(type=LoadImageFromFile, backend_args=backend_args),
dict(
type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
dict(
type=CachedMosaic,
img_scale=(640, 640),
pad_val=114.0,
max_cached_images=20,
random_pop=False),
dict(
type=RandomResize,
scale=(1280, 1280),
ratio_range=(0.5, 2.0),
resize_type=Resize,
keep_ratio=True),
dict(type=RandomCrop, crop_size=(640, 640)),
dict(type=YOLOXHSVRandomAug),
dict(type=RandomFlip, prob=0.5),
dict(type=Pad, size=(640, 640), pad_val=dict(img=(114, 114, 114))),
dict(
type=CachedMixUp,
img_scale=(640, 640),
ratio_range=(1.0, 1.0),
max_cached_images=10,
random_pop=False,
pad_val=(114, 114, 114),
prob=0.5),
dict(type=FilterAnnotations, min_gt_bbox_wh=(1, 1)),
dict(type=PackDetInputs)
]
train_dataloader.update(dict(dataset=dict(pipeline=train_pipeline)))
# Copyright (c) OpenMMLab. All rights reserved.
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
# mmcv >= 2.0.1
# mmengine >= 0.8.0
from mmengine.config import read_base
with read_base():
from .rtmdet_ins_l_8xb32_300e_coco import *
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
model.update(
dict(
backbone=dict(deepen_factor=1.33, widen_factor=1.25),
neck=dict(
in_channels=[320, 640, 1280], out_channels=320, num_csp_blocks=4),
bbox_head=dict(in_channels=320, feat_channels=320)))
base_lr = 0.002
# optimizer
optim_wrapper.update(dict(optimizer=dict(lr=base_lr)))
# learning rate
param_scheduler = [
dict(
type=LinearLR, start_factor=1.0e-5, by_epoch=False, begin=0, end=1000),
dict(
# use cosine lr from 150 to 300 epoch
type=CosineAnnealingLR,
eta_min=base_lr * 0.05,
begin=max_epochs // 2,
end=max_epochs,
T_max=max_epochs // 2,
by_epoch=True,
convert_to_iter_based=True),
]
# Copyright (c) OpenMMLab. All rights reserved.
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
# mmcv >= 2.0.1
# mmengine >= 0.8.0
from mmengine.config import read_base
with read_base():
from .._base_.default_runtime import *
from .._base_.schedules.schedule_1x import *
from .._base_.datasets.coco_detection import *
from .rtmdet_tta import *
from mmcv.ops import nms
from mmcv.transforms.loading import LoadImageFromFile
from mmcv.transforms.processing import RandomResize
from mmengine.hooks.ema_hook import EMAHook
from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
from torch.nn import SyncBatchNorm
from torch.nn.modules.activation import SiLU
from torch.optim.adamw import AdamW
from mmdet.datasets.transforms.formatting import PackDetInputs
from mmdet.datasets.transforms.loading import LoadAnnotations
from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
Pad, RandomCrop, RandomFlip,
Resize, YOLOXHSVRandomAug)
from mmdet.engine.hooks.pipeline_switch_hook import PipelineSwitchHook
from mmdet.models.backbones.cspnext import CSPNeXt
from mmdet.models.data_preprocessors.data_preprocessor import \
DetDataPreprocessor
from mmdet.models.dense_heads.rtmdet_head import RTMDetSepBNHead
from mmdet.models.detectors.rtmdet import RTMDet
from mmdet.models.layers.ema import ExpMomentumEMA
from mmdet.models.losses.gfocal_loss import QualityFocalLoss
from mmdet.models.losses.iou_loss import GIoULoss
from mmdet.models.necks.cspnext_pafpn import CSPNeXtPAFPN
from mmdet.models.task_modules.assigners.dynamic_soft_label_assigner import \
DynamicSoftLabelAssigner
from mmdet.models.task_modules.coders.distance_point_bbox_coder import \
DistancePointBBoxCoder
from mmdet.models.task_modules.prior_generators.point_generator import \
MlvlPointGenerator
model = dict(
type=RTMDet,
data_preprocessor=dict(
type=DetDataPreprocessor,
mean=[103.53, 116.28, 123.675],
std=[57.375, 57.12, 58.395],
bgr_to_rgb=False,
batch_augments=None),
backbone=dict(
type=CSPNeXt,
arch='P5',
expand_ratio=0.5,
deepen_factor=1,
widen_factor=1,
channel_attention=True,
norm_cfg=dict(type=SyncBatchNorm),
act_cfg=dict(type=SiLU, inplace=True)),
neck=dict(
type=CSPNeXtPAFPN,
in_channels=[256, 512, 1024],
out_channels=256,
num_csp_blocks=3,
expand_ratio=0.5,
norm_cfg=dict(type=SyncBatchNorm),
act_cfg=dict(type=SiLU, inplace=True)),
bbox_head=dict(
type=RTMDetSepBNHead,
num_classes=80,
in_channels=256,
stacked_convs=2,
feat_channels=256,
anchor_generator=dict(
type=MlvlPointGenerator, offset=0, strides=[8, 16, 32]),
bbox_coder=dict(type=DistancePointBBoxCoder),
loss_cls=dict(
type=QualityFocalLoss, use_sigmoid=True, beta=2.0,
loss_weight=1.0),
loss_bbox=dict(type=GIoULoss, loss_weight=2.0),
with_objectness=False,
exp_on_reg=True,
share_conv=True,
pred_kernel_size=1,
norm_cfg=dict(type=SyncBatchNorm),
act_cfg=dict(type=SiLU, inplace=True)),
train_cfg=dict(
assigner=dict(type=DynamicSoftLabelAssigner, topk=13),
allowed_border=-1,
pos_weight=-1,
debug=False),
test_cfg=dict(
nms_pre=30000,
min_bbox_size=0,
score_thr=0.001,
nms=dict(type=nms, iou_threshold=0.65),
max_per_img=300),
)
train_pipeline = [
dict(type=LoadImageFromFile, backend_args=backend_args),
dict(type=LoadAnnotations, with_bbox=True),
dict(type=CachedMosaic, img_scale=(640, 640), pad_val=114.0),
dict(
type=RandomResize,
scale=(1280, 1280),
ratio_range=(0.1, 2.0),
resize_type=Resize,
keep_ratio=True),
dict(type=RandomCrop, crop_size=(640, 640)),
dict(type=YOLOXHSVRandomAug),
dict(type=RandomFlip, prob=0.5),
dict(type=Pad, size=(640, 640), pad_val=dict(img=(114, 114, 114))),
dict(
type=CachedMixUp,
img_scale=(640, 640),
ratio_range=(1.0, 1.0),
max_cached_images=20,
pad_val=(114, 114, 114)),
dict(type=PackDetInputs)
]
train_pipeline_stage2 = [
dict(type=LoadImageFromFile, backend_args=backend_args),
dict(type=LoadAnnotations, with_bbox=True),
dict(
type=RandomResize,
scale=(640, 640),
ratio_range=(0.1, 2.0),
resize_type=Resize,
keep_ratio=True),
dict(type=RandomCrop, crop_size=(640, 640)),
dict(type=YOLOXHSVRandomAug),
dict(type=RandomFlip, prob=0.5),
dict(type=Pad, size=(640, 640), pad_val=dict(img=(114, 114, 114))),
dict(type=PackDetInputs)
]
test_pipeline = [
dict(type=LoadImageFromFile, backend_args=backend_args),
dict(type=Resize, scale=(640, 640), keep_ratio=True),
dict(type=Pad, size=(640, 640), pad_val=dict(img=(114, 114, 114))),
dict(type=LoadAnnotations, with_bbox=True),
dict(
type=PackDetInputs,
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor'))
]
train_dataloader.update(
dict(
batch_size=32,
num_workers=10,
batch_sampler=None,
pin_memory=True,
dataset=dict(pipeline=train_pipeline)))
val_dataloader.update(
dict(batch_size=5, num_workers=10, dataset=dict(pipeline=test_pipeline)))
test_dataloader = val_dataloader
max_epochs = 300
stage2_num_epochs = 20
base_lr = 0.004
interval = 10
train_cfg.update(
dict(
max_epochs=max_epochs,
val_interval=interval,
dynamic_intervals=[(max_epochs - stage2_num_epochs, 1)]))
val_evaluator.update(dict(proposal_nums=(100, 1, 10)))
test_evaluator = val_evaluator
# optimizer
optim_wrapper = dict(
type=OptimWrapper,
optimizer=dict(type=AdamW, lr=base_lr, weight_decay=0.05),
paramwise_cfg=dict(
norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
# learning rate
param_scheduler = [
dict(
type=LinearLR, start_factor=1.0e-5, by_epoch=False, begin=0, end=1000),
dict(
# use cosine lr from 150 to 300 epoch
type=CosineAnnealingLR,
eta_min=base_lr * 0.05,
begin=max_epochs // 2,
end=max_epochs,
T_max=max_epochs // 2,
by_epoch=True,
convert_to_iter_based=True),
]
# hooks
default_hooks.update(
dict(
checkpoint=dict(
interval=interval,
max_keep_ckpts=3 # only keep latest 3 checkpoints
)))
custom_hooks = [
dict(
type=EMAHook,
ema_type=ExpMomentumEMA,
momentum=0.0002,
update_buffers=True,
priority=49),
dict(
type=PipelineSwitchHook,
switch_epoch=max_epochs - stage2_num_epochs,
switch_pipeline=train_pipeline_stage2)
]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment