Commit 26b83c4a authored by dengjb's avatar dengjb
Browse files

update codes

parent 2f6baaee
Pipeline #1045 failed with stages
in 0 seconds
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.config import read_base
from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
with read_base():
from .panoptic_fpn_r50_fpn_1x_coco import *
from mmcv.transforms import RandomResize
from mmcv.transforms.loading import LoadImageFromFile
from mmdet.datasets.transforms.formatting import PackDetInputs
from mmdet.datasets.transforms.loading import LoadPanopticAnnotations
from mmdet.datasets.transforms.transforms import RandomFlip
# In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)],
# multiscale_mode='range'
train_pipeline = [
dict(type=LoadImageFromFile),
dict(
type=LoadPanopticAnnotations,
with_bbox=True,
with_mask=True,
with_seg=True),
dict(type=RandomResize, scale=[(1333, 640), (1333, 800)], keep_ratio=True),
dict(type=RandomFlip, prob=0.5),
dict(type=PackDetInputs)
]
train_dataloader.update(dict(dataset=dict(pipeline=train_pipeline)))
# TODO: Use RepeatDataset to speed up training
# training schedule for 3x
train_cfg.update(dict(max_epochs=36, val_interval=3))
# learning rate
param_scheduler = [
dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=500),
dict(
type=MultiStepLR,
begin=0,
end=36,
by_epoch=True,
milestones=[24, 33],
gamma=0.1)
]
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.config import read_base
with read_base():
from .._base_.models.faster_rcnn_r50_fpn import *
from .._base_.models.faster_rcnn_r50_fpn import model
from .._base_.default_runtime import *
from mmcv.ops import RoIAlign
from mmengine.hooks import LoggerHook, SyncBuffersHook
from mmengine.model.weight_init import PretrainedInit
from mmengine.optim import MultiStepLR, OptimWrapper
from mmengine.runner.runner import EpochBasedTrainLoop, TestLoop, ValLoop
from torch.nn.modules.batchnorm import BatchNorm2d
from torch.nn.modules.normalization import GroupNorm
from torch.optim import SGD
from mmdet.engine.hooks import TrackVisualizationHook
from mmdet.models import (QDTrack, QuasiDenseEmbedHead, QuasiDenseTracker,
QuasiDenseTrackHead, SingleRoIExtractor,
TrackDataPreprocessor)
from mmdet.models.losses import (L1Loss, MarginL2Loss,
MultiPosCrossEntropyLoss, SmoothL1Loss)
from mmdet.models.task_modules import (CombinedSampler,
InstanceBalancedPosSampler,
MaxIoUAssigner, RandomSampler)
from mmdet.visualization import TrackLocalVisualizer
detector = model
detector.pop('data_preprocessor')
detector['backbone'].update(
dict(
norm_cfg=dict(type=BatchNorm2d, requires_grad=False),
style='caffe',
init_cfg=dict(
type=PretrainedInit,
checkpoint='open-mmlab://detectron2/resnet50_caffe')))
detector.rpn_head.loss_bbox.update(
dict(type=SmoothL1Loss, beta=1.0 / 9.0, loss_weight=1.0))
detector.rpn_head.bbox_coder.update(dict(clip_border=False))
detector.roi_head.bbox_head.update(dict(num_classes=1))
detector.roi_head.bbox_head.bbox_coder.update(dict(clip_border=False))
detector['init_cfg'] = dict(
type=PretrainedInit,
checkpoint= # noqa: E251
'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/'
'faster_rcnn_r50_fpn_1x_coco-person/'
'faster_rcnn_r50_fpn_1x_coco-person_20201216_175929-d022e227.pth'
# noqa: E501
)
del model
model = dict(
type=QDTrack,
data_preprocessor=dict(
type=TrackDataPreprocessor,
mean=[103.530, 116.280, 123.675],
std=[1.0, 1.0, 1.0],
bgr_to_rgb=False,
pad_size_divisor=32),
detector=detector,
track_head=dict(
type=QuasiDenseTrackHead,
roi_extractor=dict(
type=SingleRoIExtractor,
roi_layer=dict(type=RoIAlign, output_size=7, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
embed_head=dict(
type=QuasiDenseEmbedHead,
num_convs=4,
num_fcs=1,
embed_channels=256,
norm_cfg=dict(type=GroupNorm, num_groups=32),
loss_track=dict(type=MultiPosCrossEntropyLoss, loss_weight=0.25),
loss_track_aux=dict(
type=MarginL2Loss,
neg_pos_ub=3,
pos_margin=0,
neg_margin=0.1,
hard_mining=True,
loss_weight=1.0)),
loss_bbox=dict(type=L1Loss, loss_weight=1.0),
train_cfg=dict(
assigner=dict(
type=MaxIoUAssigner,
pos_iou_thr=0.7,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=False,
ignore_iof_thr=-1),
sampler=dict(
type=CombinedSampler,
num=256,
pos_fraction=0.5,
neg_pos_ub=3,
add_gt_as_proposals=True,
pos_sampler=dict(type=InstanceBalancedPosSampler),
neg_sampler=dict(type=RandomSampler)))),
tracker=dict(
type=QuasiDenseTracker,
init_score_thr=0.9,
obj_score_thr=0.5,
match_score_thr=0.5,
memo_tracklet_frames=30,
memo_backdrop_frames=1,
memo_momentum=0.8,
nms_conf_thr=0.5,
nms_backdrop_iou_thr=0.3,
nms_class_iou_thr=0.7,
with_cats=True,
match_metric='bisoftmax'))
# optimizer
optim_wrapper = dict(
type=OptimWrapper,
optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001),
clip_grad=dict(max_norm=35, norm_type=2))
# learning policy
param_scheduler = [
dict(type=MultiStepLR, begin=0, end=4, by_epoch=True, milestones=[3])
]
# runtime settings
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=4, val_interval=4)
val_cfg = dict(type=ValLoop)
test_cfg = dict(type=TestLoop)
default_hooks.update(
logger=dict(type=LoggerHook, interval=50),
visualization=dict(type=TrackVisualizationHook, draw=False))
visualizer.update(
type=TrackLocalVisualizer, vis_backends=vis_backends, name='visualizer')
# custom hooks
custom_hooks = [
# Synchronize model buffers such as running_mean and running_var in BN
# at the end of each epoch
dict(type=SyncBuffersHook)
]
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.config import read_base
with read_base():
from .._base_.datasets.mot_challenge import *
from .qdtrack_faster_rcnn_r50_fpn_4e_base import *
from mmdet.evaluation import CocoVideoMetric, MOTChallengeMetric
# evaluator
val_evaluator = [
dict(type=CocoVideoMetric, metric=['bbox'], classwise=True),
dict(type=MOTChallengeMetric, metric=['HOTA', 'CLEAR', 'Identity'])
]
# Copyright (c) OpenMMLab. All rights reserved.
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
# mmcv >= 2.0.1
# mmengine >= 0.8.0
from mmengine.config import read_base
with read_base():
from .._base_.models.retinanet_r50_fpn import *
from .._base_.datasets.coco_detection import *
from .._base_.schedules.schedule_1x import *
from .._base_.default_runtime import *
from .retinanet_tta import *
from torch.optim.sgd import SGD
# optimizer
optim_wrapper.update(
dict(optimizer=dict(type=SGD, lr=0.01, momentum=0.9, weight_decay=0.0001)))
# Copyright (c) OpenMMLab. All rights reserved.
from mmcv.transforms.loading import LoadImageFromFile
from mmcv.transforms.processing import TestTimeAug
from mmdet.datasets.transforms.formatting import PackDetInputs
from mmdet.datasets.transforms.loading import LoadAnnotations
from mmdet.datasets.transforms.transforms import RandomFlip, Resize
from mmdet.models.test_time_augs.det_tta import DetTTAModel
tta_model = dict(
type=DetTTAModel,
tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.5), max_per_img=100))
img_scales = [(1333, 800), (666, 400), (2000, 1200)]
tta_pipeline = [
dict(type=LoadImageFromFile, backend_args=None),
dict(
type=TestTimeAug,
transforms=[
[dict(type=Resize, scale=s, keep_ratio=True) for s in img_scales],
[dict(type=RandomFlip, prob=1.),
dict(type=RandomFlip, prob=0.)],
[dict(type=LoadAnnotations, with_bbox=True)],
[
dict(
type=PackDetInputs,
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'flip', 'flip_direction'))
]
])
]
# Copyright (c) OpenMMLab. All rights reserved.
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
# mmcv >= 2.0.1
# mmengine >= 0.8.0
from mmengine.config import read_base
with read_base():
from .rtmdet_l_8xb32_300e_coco import *
from mmcv.transforms.loading import LoadImageFromFile
from mmcv.transforms.processing import RandomResize
from mmengine.hooks.ema_hook import EMAHook
from torch.nn.modules.activation import SiLU
from mmdet.datasets.transforms.formatting import PackDetInputs
from mmdet.datasets.transforms.loading import (FilterAnnotations,
LoadAnnotations)
from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
Pad, RandomCrop, RandomFlip,
Resize, YOLOXHSVRandomAug)
from mmdet.engine.hooks.pipeline_switch_hook import PipelineSwitchHook
from mmdet.models.dense_heads.rtmdet_ins_head import RTMDetInsSepBNHead
from mmdet.models.layers.ema import ExpMomentumEMA
from mmdet.models.losses.dice_loss import DiceLoss
from mmdet.models.losses.gfocal_loss import QualityFocalLoss
from mmdet.models.losses.iou_loss import GIoULoss
from mmdet.models.task_modules.coders.distance_point_bbox_coder import \
DistancePointBBoxCoder
from mmdet.models.task_modules.prior_generators.point_generator import \
MlvlPointGenerator
model.merge(
dict(
bbox_head=dict(
_delete_=True,
type=RTMDetInsSepBNHead,
num_classes=80,
in_channels=256,
stacked_convs=2,
share_conv=True,
pred_kernel_size=1,
feat_channels=256,
act_cfg=dict(type=SiLU, inplace=True),
norm_cfg=dict(type='SyncBN', requires_grad=True),
anchor_generator=dict(
type=MlvlPointGenerator, offset=0, strides=[8, 16, 32]),
bbox_coder=dict(type=DistancePointBBoxCoder),
loss_cls=dict(
type=QualityFocalLoss,
use_sigmoid=True,
beta=2.0,
loss_weight=1.0),
loss_bbox=dict(type=GIoULoss, loss_weight=2.0),
loss_mask=dict(
type=DiceLoss, loss_weight=2.0, eps=5e-6, reduction='mean')),
test_cfg=dict(
nms_pre=1000,
min_bbox_size=0,
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.6),
max_per_img=100,
mask_thr_binary=0.5),
))
train_pipeline = [
dict(type=LoadImageFromFile, backend_args=backend_args),
dict(
type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
dict(type=CachedMosaic, img_scale=(640, 640), pad_val=114.0),
dict(
type=RandomResize,
scale=(1280, 1280),
ratio_range=(0.1, 2.0),
resize_type=Resize,
keep_ratio=True),
dict(
type=RandomCrop,
crop_size=(640, 640),
recompute_bbox=True,
allow_negative_crop=True),
dict(type=YOLOXHSVRandomAug),
dict(type=RandomFlip, prob=0.5),
dict(type=Pad, size=(640, 640), pad_val=dict(img=(114, 114, 114))),
dict(
type=CachedMixUp,
img_scale=(640, 640),
ratio_range=(1.0, 1.0),
max_cached_images=20,
pad_val=(114, 114, 114)),
dict(type=FilterAnnotations, min_gt_bbox_wh=(1, 1)),
dict(type=PackDetInputs)
]
train_dataloader.update(
dict(pin_memory=True, dataset=dict(pipeline=train_pipeline)))
train_pipeline_stage2 = [
dict(type=LoadImageFromFile, backend_args=backend_args),
dict(
type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
dict(
type=RandomResize,
scale=(640, 640),
ratio_range=(0.1, 2.0),
resize_type=Resize,
keep_ratio=True),
dict(
type=RandomCrop,
crop_size=(640, 640),
recompute_bbox=True,
allow_negative_crop=True),
dict(type=FilterAnnotations, min_gt_bbox_wh=(1, 1)),
dict(type=YOLOXHSVRandomAug),
dict(type=RandomFlip, prob=0.5),
dict(type=Pad, size=(640, 640), pad_val=dict(img=(114, 114, 114))),
dict(type=PackDetInputs)
]
custom_hooks = [
dict(
type=EMAHook,
ema_type=ExpMomentumEMA,
momentum=0.0002,
update_buffers=True,
priority=49),
dict(
type=PipelineSwitchHook,
switch_epoch=280,
switch_pipeline=train_pipeline_stage2)
]
val_evaluator.update(dict(metric=['bbox', 'segm']))
test_evaluator = val_evaluator
# Copyright (c) OpenMMLab. All rights reserved.
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
# mmcv >= 2.0.1
# mmengine >= 0.8.0
from mmengine.config import read_base
with read_base():
from .rtmdet_ins_l_8xb32_300e_coco import *
model.update(
dict(
backbone=dict(deepen_factor=0.67, widen_factor=0.75),
neck=dict(
in_channels=[192, 384, 768], out_channels=192, num_csp_blocks=2),
bbox_head=dict(in_channels=192, feat_channels=192)))
# Copyright (c) OpenMMLab. All rights reserved.
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
# mmcv >= 2.0.1
# mmengine >= 0.8.0
from mmengine.config import read_base
with read_base():
from .rtmdet_ins_l_8xb32_300e_coco import *
from mmcv.transforms.loading import LoadImageFromFile
from mmcv.transforms.processing import RandomResize
from mmengine.hooks.ema_hook import EMAHook
from mmdet.datasets.transforms.formatting import PackDetInputs
from mmdet.datasets.transforms.loading import (FilterAnnotations,
LoadAnnotations)
from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
Pad, RandomCrop, RandomFlip,
Resize, YOLOXHSVRandomAug)
from mmdet.engine.hooks.pipeline_switch_hook import PipelineSwitchHook
from mmdet.models.layers.ema import ExpMomentumEMA
checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' # noqa
model.update(
dict(
backbone=dict(
deepen_factor=0.33,
widen_factor=0.5,
init_cfg=dict(
type='Pretrained', prefix='backbone.', checkpoint=checkpoint)),
neck=dict(
in_channels=[128, 256, 512], out_channels=128, num_csp_blocks=1),
bbox_head=dict(in_channels=128, feat_channels=128)))
train_pipeline = [
dict(type=LoadImageFromFile, backend_args=backend_args),
dict(
type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
dict(type=CachedMosaic, img_scale=(640, 640), pad_val=114.0),
dict(
type=RandomResize,
scale=(1280, 1280),
ratio_range=(0.5, 2.0),
resize_type=Resize,
keep_ratio=True),
dict(
type=RandomCrop,
crop_size=(640, 640),
recompute_bbox=True,
allow_negative_crop=True),
dict(type=YOLOXHSVRandomAug),
dict(type=RandomFlip, prob=0.5),
dict(type=Pad, size=(640, 640), pad_val=dict(img=(114, 114, 114))),
dict(
type=CachedMixUp,
img_scale=(640, 640),
ratio_range=(1.0, 1.0),
max_cached_images=20,
pad_val=(114, 114, 114)),
dict(type=FilterAnnotations, min_gt_bbox_wh=(1, 1)),
dict(type=PackDetInputs)
]
train_pipeline_stage2 = [
dict(type=LoadImageFromFile, backend_args=backend_args),
dict(
type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
dict(
type=RandomResize,
scale=(640, 640),
ratio_range=(0.5, 2.0),
resize_type=Resize,
keep_ratio=True),
dict(
type=RandomCrop,
crop_size=(640, 640),
recompute_bbox=True,
allow_negative_crop=True),
dict(type=FilterAnnotations, min_gt_bbox_wh=(1, 1)),
dict(type=YOLOXHSVRandomAug),
dict(type=RandomFlip, prob=0.5),
dict(type=Pad, size=(640, 640), pad_val=dict(img=(114, 114, 114))),
dict(type=PackDetInputs)
]
train_dataloader.update(dict(dataset=dict(pipeline=train_pipeline)))
custom_hooks = [
dict(
type=EMAHook,
ema_type=ExpMomentumEMA,
momentum=0.0002,
update_buffers=True,
priority=49),
dict(
type=PipelineSwitchHook,
switch_epoch=280,
switch_pipeline=train_pipeline_stage2)
]
# Copyright (c) OpenMMLab. All rights reserved.
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
# mmcv >= 2.0.1
# mmengine >= 0.8.0
from mmengine.config import read_base
with read_base():
from .rtmdet_ins_s_8xb32_300e_coco import *
from mmcv.transforms.loading import LoadImageFromFile
from mmcv.transforms.processing import RandomResize
from mmdet.datasets.transforms.formatting import PackDetInputs
from mmdet.datasets.transforms.loading import (FilterAnnotations,
LoadAnnotations)
from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
Pad, RandomCrop, RandomFlip,
Resize, YOLOXHSVRandomAug)
checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa
model.update(
dict(
backbone=dict(
deepen_factor=0.167,
widen_factor=0.375,
init_cfg=dict(
type='Pretrained', prefix='backbone.', checkpoint=checkpoint)),
neck=dict(
in_channels=[96, 192, 384], out_channels=96, num_csp_blocks=1),
bbox_head=dict(in_channels=96, feat_channels=96)))
train_pipeline = [
dict(type=LoadImageFromFile, backend_args=backend_args),
dict(
type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
dict(
type=CachedMosaic,
img_scale=(640, 640),
pad_val=114.0,
max_cached_images=20,
random_pop=False),
dict(
type=RandomResize,
scale=(1280, 1280),
ratio_range=(0.5, 2.0),
resize_type=Resize,
keep_ratio=True),
dict(type=RandomCrop, crop_size=(640, 640)),
dict(type=YOLOXHSVRandomAug),
dict(type=RandomFlip, prob=0.5),
dict(type=Pad, size=(640, 640), pad_val=dict(img=(114, 114, 114))),
dict(
type=CachedMixUp,
img_scale=(640, 640),
ratio_range=(1.0, 1.0),
max_cached_images=10,
random_pop=False,
pad_val=(114, 114, 114),
prob=0.5),
dict(type=FilterAnnotations, min_gt_bbox_wh=(1, 1)),
dict(type=PackDetInputs)
]
train_dataloader.update(dict(dataset=dict(pipeline=train_pipeline)))
# Copyright (c) OpenMMLab. All rights reserved.
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
# mmcv >= 2.0.1
# mmengine >= 0.8.0
from mmengine.config import read_base
with read_base():
from .rtmdet_ins_l_8xb32_300e_coco import *
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
model.update(
dict(
backbone=dict(deepen_factor=1.33, widen_factor=1.25),
neck=dict(
in_channels=[320, 640, 1280], out_channels=320, num_csp_blocks=4),
bbox_head=dict(in_channels=320, feat_channels=320)))
base_lr = 0.002
# optimizer
optim_wrapper.update(dict(optimizer=dict(lr=base_lr)))
# learning rate
param_scheduler = [
dict(
type=LinearLR, start_factor=1.0e-5, by_epoch=False, begin=0, end=1000),
dict(
# use cosine lr from 150 to 300 epoch
type=CosineAnnealingLR,
eta_min=base_lr * 0.05,
begin=max_epochs // 2,
end=max_epochs,
T_max=max_epochs // 2,
by_epoch=True,
convert_to_iter_based=True),
]
# Copyright (c) OpenMMLab. All rights reserved.
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
# mmcv >= 2.0.1
# mmengine >= 0.8.0
from mmengine.config import read_base
with read_base():
from .._base_.default_runtime import *
from .._base_.schedules.schedule_1x import *
from .._base_.datasets.coco_detection import *
from .rtmdet_tta import *
from mmcv.ops import nms
from mmcv.transforms.loading import LoadImageFromFile
from mmcv.transforms.processing import RandomResize
from mmengine.hooks.ema_hook import EMAHook
from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
from torch.nn import SyncBatchNorm
from torch.nn.modules.activation import SiLU
from torch.optim.adamw import AdamW
from mmdet.datasets.transforms.formatting import PackDetInputs
from mmdet.datasets.transforms.loading import LoadAnnotations
from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
Pad, RandomCrop, RandomFlip,
Resize, YOLOXHSVRandomAug)
from mmdet.engine.hooks.pipeline_switch_hook import PipelineSwitchHook
from mmdet.models.backbones.cspnext import CSPNeXt
from mmdet.models.data_preprocessors.data_preprocessor import \
DetDataPreprocessor
from mmdet.models.dense_heads.rtmdet_head import RTMDetSepBNHead
from mmdet.models.detectors.rtmdet import RTMDet
from mmdet.models.layers.ema import ExpMomentumEMA
from mmdet.models.losses.gfocal_loss import QualityFocalLoss
from mmdet.models.losses.iou_loss import GIoULoss
from mmdet.models.necks.cspnext_pafpn import CSPNeXtPAFPN
from mmdet.models.task_modules.assigners.dynamic_soft_label_assigner import \
DynamicSoftLabelAssigner
from mmdet.models.task_modules.coders.distance_point_bbox_coder import \
DistancePointBBoxCoder
from mmdet.models.task_modules.prior_generators.point_generator import \
MlvlPointGenerator
model = dict(
type=RTMDet,
data_preprocessor=dict(
type=DetDataPreprocessor,
mean=[103.53, 116.28, 123.675],
std=[57.375, 57.12, 58.395],
bgr_to_rgb=False,
batch_augments=None),
backbone=dict(
type=CSPNeXt,
arch='P5',
expand_ratio=0.5,
deepen_factor=1,
widen_factor=1,
channel_attention=True,
norm_cfg=dict(type=SyncBatchNorm),
act_cfg=dict(type=SiLU, inplace=True)),
neck=dict(
type=CSPNeXtPAFPN,
in_channels=[256, 512, 1024],
out_channels=256,
num_csp_blocks=3,
expand_ratio=0.5,
norm_cfg=dict(type=SyncBatchNorm),
act_cfg=dict(type=SiLU, inplace=True)),
bbox_head=dict(
type=RTMDetSepBNHead,
num_classes=80,
in_channels=256,
stacked_convs=2,
feat_channels=256,
anchor_generator=dict(
type=MlvlPointGenerator, offset=0, strides=[8, 16, 32]),
bbox_coder=dict(type=DistancePointBBoxCoder),
loss_cls=dict(
type=QualityFocalLoss, use_sigmoid=True, beta=2.0,
loss_weight=1.0),
loss_bbox=dict(type=GIoULoss, loss_weight=2.0),
with_objectness=False,
exp_on_reg=True,
share_conv=True,
pred_kernel_size=1,
norm_cfg=dict(type=SyncBatchNorm),
act_cfg=dict(type=SiLU, inplace=True)),
train_cfg=dict(
assigner=dict(type=DynamicSoftLabelAssigner, topk=13),
allowed_border=-1,
pos_weight=-1,
debug=False),
test_cfg=dict(
nms_pre=30000,
min_bbox_size=0,
score_thr=0.001,
nms=dict(type=nms, iou_threshold=0.65),
max_per_img=300),
)
train_pipeline = [
dict(type=LoadImageFromFile, backend_args=backend_args),
dict(type=LoadAnnotations, with_bbox=True),
dict(type=CachedMosaic, img_scale=(640, 640), pad_val=114.0),
dict(
type=RandomResize,
scale=(1280, 1280),
ratio_range=(0.1, 2.0),
resize_type=Resize,
keep_ratio=True),
dict(type=RandomCrop, crop_size=(640, 640)),
dict(type=YOLOXHSVRandomAug),
dict(type=RandomFlip, prob=0.5),
dict(type=Pad, size=(640, 640), pad_val=dict(img=(114, 114, 114))),
dict(
type=CachedMixUp,
img_scale=(640, 640),
ratio_range=(1.0, 1.0),
max_cached_images=20,
pad_val=(114, 114, 114)),
dict(type=PackDetInputs)
]
train_pipeline_stage2 = [
dict(type=LoadImageFromFile, backend_args=backend_args),
dict(type=LoadAnnotations, with_bbox=True),
dict(
type=RandomResize,
scale=(640, 640),
ratio_range=(0.1, 2.0),
resize_type=Resize,
keep_ratio=True),
dict(type=RandomCrop, crop_size=(640, 640)),
dict(type=YOLOXHSVRandomAug),
dict(type=RandomFlip, prob=0.5),
dict(type=Pad, size=(640, 640), pad_val=dict(img=(114, 114, 114))),
dict(type=PackDetInputs)
]
test_pipeline = [
dict(type=LoadImageFromFile, backend_args=backend_args),
dict(type=Resize, scale=(640, 640), keep_ratio=True),
dict(type=Pad, size=(640, 640), pad_val=dict(img=(114, 114, 114))),
dict(type=LoadAnnotations, with_bbox=True),
dict(
type=PackDetInputs,
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor'))
]
train_dataloader.update(
dict(
batch_size=32,
num_workers=10,
batch_sampler=None,
pin_memory=True,
dataset=dict(pipeline=train_pipeline)))
val_dataloader.update(
dict(batch_size=5, num_workers=10, dataset=dict(pipeline=test_pipeline)))
test_dataloader = val_dataloader
max_epochs = 300
stage2_num_epochs = 20
base_lr = 0.004
interval = 10
train_cfg.update(
dict(
max_epochs=max_epochs,
val_interval=interval,
dynamic_intervals=[(max_epochs - stage2_num_epochs, 1)]))
val_evaluator.update(dict(proposal_nums=(100, 1, 10)))
test_evaluator = val_evaluator
# optimizer
optim_wrapper = dict(
type=OptimWrapper,
optimizer=dict(type=AdamW, lr=base_lr, weight_decay=0.05),
paramwise_cfg=dict(
norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
# learning rate
param_scheduler = [
dict(
type=LinearLR, start_factor=1.0e-5, by_epoch=False, begin=0, end=1000),
dict(
# use cosine lr from 150 to 300 epoch
type=CosineAnnealingLR,
eta_min=base_lr * 0.05,
begin=max_epochs // 2,
end=max_epochs,
T_max=max_epochs // 2,
by_epoch=True,
convert_to_iter_based=True),
]
# hooks
default_hooks.update(
dict(
checkpoint=dict(
interval=interval,
max_keep_ckpts=3 # only keep latest 3 checkpoints
)))
custom_hooks = [
dict(
type=EMAHook,
ema_type=ExpMomentumEMA,
momentum=0.0002,
update_buffers=True,
priority=49),
dict(
type=PipelineSwitchHook,
switch_epoch=max_epochs - stage2_num_epochs,
switch_pipeline=train_pipeline_stage2)
]
# Copyright (c) OpenMMLab. All rights reserved.
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
# mmcv >= 2.0.1
# mmengine >= 0.8.0
from mmengine.config import read_base
with read_base():
from .rtmdet_l_8xb32_300e_coco import *
model.update(
dict(
backbone=dict(deepen_factor=0.67, widen_factor=0.75),
neck=dict(
in_channels=[192, 384, 768], out_channels=192, num_csp_blocks=2),
bbox_head=dict(in_channels=192, feat_channels=192)))
# Copyright (c) OpenMMLab. All rights reserved.
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
# mmcv >= 2.0.1
# mmengine >= 0.8.0
from mmengine.config import read_base
with read_base():
from .rtmdet_l_8xb32_300e_coco import *
from mmcv.transforms.loading import LoadImageFromFile
from mmcv.transforms.processing import RandomResize
from mmengine.hooks.ema_hook import EMAHook
from mmdet.datasets.transforms.formatting import PackDetInputs
from mmdet.datasets.transforms.loading import LoadAnnotations
from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
Pad, RandomCrop, RandomFlip,
Resize, YOLOXHSVRandomAug)
from mmdet.engine.hooks.pipeline_switch_hook import PipelineSwitchHook
from mmdet.models.layers.ema import ExpMomentumEMA
checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' # noqa
model.update(
dict(
backbone=dict(
deepen_factor=0.33,
widen_factor=0.5,
init_cfg=dict(
type='Pretrained', prefix='backbone.', checkpoint=checkpoint)),
neck=dict(
in_channels=[128, 256, 512], out_channels=128, num_csp_blocks=1),
bbox_head=dict(in_channels=128, feat_channels=128, exp_on_reg=False)))
train_pipeline = [
dict(type=LoadImageFromFile, backend_args=backend_args),
dict(type=LoadAnnotations, with_bbox=True),
dict(type=CachedMosaic, img_scale=(640, 640), pad_val=114.0),
dict(
type=RandomResize,
scale=(1280, 1280),
ratio_range=(0.5, 2.0),
resize_type=Resize,
keep_ratio=True),
dict(type=RandomCrop, crop_size=(640, 640)),
dict(type=YOLOXHSVRandomAug),
dict(type=RandomFlip, prob=0.5),
dict(type=Pad, size=(640, 640), pad_val=dict(img=(114, 114, 114))),
dict(
type=CachedMixUp,
img_scale=(640, 640),
ratio_range=(1.0, 1.0),
max_cached_images=20,
pad_val=(114, 114, 114)),
dict(type=PackDetInputs)
]
train_pipeline_stage2 = [
dict(type=LoadImageFromFile, backend_args=backend_args),
dict(type=LoadAnnotations, with_bbox=True),
dict(
type=RandomResize,
scale=(640, 640),
ratio_range=(0.5, 2.0),
resize_type=Resize,
keep_ratio=True),
dict(type=RandomCrop, crop_size=(640, 640)),
dict(type=YOLOXHSVRandomAug),
dict(type=RandomFlip, prob=0.5),
dict(type=Pad, size=(640, 640), pad_val=dict(img=(114, 114, 114))),
dict(type=PackDetInputs)
]
train_dataloader.update(dict(dataset=dict(pipeline=train_pipeline)))
custom_hooks = [
dict(
type=EMAHook,
ema_type=ExpMomentumEMA,
momentum=0.0002,
update_buffers=True,
priority=49),
dict(
type=PipelineSwitchHook,
switch_epoch=280,
switch_pipeline=train_pipeline_stage2)
]
# Copyright (c) OpenMMLab. All rights reserved.
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
# mmcv >= 2.0.1
# mmengine >= 0.8.0
from mmengine.config import read_base
with read_base():
from .rtmdet_s_8xb32_300e_coco import *
from mmcv.transforms.loading import LoadImageFromFile
from mmcv.transforms.processing import RandomResize
from mmdet.datasets.transforms.formatting import PackDetInputs
from mmdet.datasets.transforms.loading import LoadAnnotations
from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
Pad, RandomCrop, RandomFlip,
Resize, YOLOXHSVRandomAug)
checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa
model.update(
dict(
backbone=dict(
deepen_factor=0.167,
widen_factor=0.375,
init_cfg=dict(
type='Pretrained', prefix='backbone.', checkpoint=checkpoint)),
neck=dict(
in_channels=[96, 192, 384], out_channels=96, num_csp_blocks=1),
bbox_head=dict(in_channels=96, feat_channels=96, exp_on_reg=False)))
train_pipeline = [
dict(type=LoadImageFromFile, backend_args=backend_args),
dict(type=LoadAnnotations, with_bbox=True),
dict(
type=CachedMosaic,
img_scale=(640, 640),
pad_val=114.0,
max_cached_images=20,
random_pop=False),
dict(
type=RandomResize,
scale=(1280, 1280),
ratio_range=(0.5, 2.0),
resize_type=Resize,
keep_ratio=True),
dict(type=RandomCrop, crop_size=(640, 640)),
dict(type=YOLOXHSVRandomAug),
dict(type=RandomFlip, prob=0.5),
dict(type=Pad, size=(640, 640), pad_val=dict(img=(114, 114, 114))),
dict(
type=CachedMixUp,
img_scale=(640, 640),
ratio_range=(1.0, 1.0),
max_cached_images=10,
random_pop=False,
pad_val=(114, 114, 114),
prob=0.5),
dict(type=PackDetInputs)
]
train_dataloader.update(dict(dataset=dict(pipeline=train_pipeline)))
# Copyright (c) OpenMMLab. All rights reserved.
from mmcv.transforms.loading import LoadImageFromFile
from mmcv.transforms.processing import TestTimeAug
from mmdet.datasets.transforms.formatting import PackDetInputs
from mmdet.datasets.transforms.loading import LoadAnnotations
from mmdet.datasets.transforms.transforms import Pad, RandomFlip, Resize
from mmdet.models.test_time_augs.det_tta import DetTTAModel
tta_model = dict(
type=DetTTAModel,
tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.6), max_per_img=100))
img_scales = [(640, 640), (320, 320), (960, 960)]
tta_pipeline = [
dict(type=LoadImageFromFile, backend_args=None),
dict(
type=TestTimeAug,
transforms=[
[dict(type=Resize, scale=s, keep_ratio=True) for s in img_scales],
[
# ``RandomFlip`` must be placed before ``Pad``, otherwise
# bounding box coordinates after flipping cannot be
# recovered correctly.
dict(type=RandomFlip, prob=1.),
dict(type=RandomFlip, prob=0.)
],
[
dict(
type=Pad,
size=(960, 960),
pad_val=dict(img=(114, 114, 114))),
],
[dict(type=LoadAnnotations, with_bbox=True)],
[
dict(
type=PackDetInputs,
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'flip', 'flip_direction'))
]
])
]
# Copyright (c) OpenMMLab. All rights reserved.
# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
# mmcv >= 2.0.1
# mmengine >= 0.8.0
from mmengine.config import read_base
with read_base():
from .rtmdet_l_8xb32_300e_coco import *
model.update(
dict(
backbone=dict(deepen_factor=1.33, widen_factor=1.25),
neck=dict(
in_channels=[320, 640, 1280], out_channels=320, num_csp_blocks=4),
bbox_head=dict(in_channels=320, feat_channels=320)))
# Copyright (c) OpenMMLab. All rights reserved.
from .ade20k import (ADE20KInstanceDataset, ADE20KPanopticDataset,
ADE20KSegDataset)
from .base_det_dataset import BaseDetDataset
from .base_semseg_dataset import BaseSegDataset
from .base_video_dataset import BaseVideoDataset
from .cityscapes import CityscapesDataset
from .coco import CocoDataset
from .coco_caption import CocoCaptionDataset
from .coco_panoptic import CocoPanopticDataset
from .coco_semantic import CocoSegDataset
from .crowdhuman import CrowdHumanDataset
from .dataset_wrappers import ConcatDataset, MultiImageMixDataset
from .deepfashion import DeepFashionDataset
from .dod import DODDataset
from .dsdl import DSDLDetDataset
from .flickr30k import Flickr30kDataset
from .isaid import iSAIDDataset
from .lvis import LVISDataset, LVISV1Dataset, LVISV05Dataset
from .mdetr_style_refcoco import MDETRStyleRefCocoDataset
from .mot_challenge_dataset import MOTChallengeDataset
from .objects365 import Objects365V1Dataset, Objects365V2Dataset
from .odvg import ODVGDataset
from .openimages import OpenImagesChallengeDataset, OpenImagesDataset
from .refcoco import RefCocoDataset
from .reid_dataset import ReIDDataset
from .samplers import (AspectRatioBatchSampler, ClassAwareSampler,
CustomSampleSizeSampler, GroupMultiSourceSampler,
MultiSourceSampler, TrackAspectRatioBatchSampler,
TrackImgSampler)
from .utils import get_loading_pipeline
from .v3det import V3DetDataset
from .voc import VOCDataset
from .wider_face import WIDERFaceDataset
from .xml_style import XMLDataset
from .youtube_vis_dataset import YouTubeVISDataset
__all__ = [
'XMLDataset', 'CocoDataset', 'DeepFashionDataset', 'VOCDataset',
'CityscapesDataset', 'LVISDataset', 'LVISV05Dataset', 'LVISV1Dataset',
'WIDERFaceDataset', 'get_loading_pipeline', 'CocoPanopticDataset',
'MultiImageMixDataset', 'OpenImagesDataset', 'OpenImagesChallengeDataset',
'AspectRatioBatchSampler', 'ClassAwareSampler', 'MultiSourceSampler',
'GroupMultiSourceSampler', 'BaseDetDataset', 'CrowdHumanDataset',
'Objects365V1Dataset', 'Objects365V2Dataset', 'DSDLDetDataset',
'BaseVideoDataset', 'MOTChallengeDataset', 'TrackImgSampler',
'ReIDDataset', 'YouTubeVISDataset', 'TrackAspectRatioBatchSampler',
'ADE20KPanopticDataset', 'CocoCaptionDataset', 'RefCocoDataset',
'BaseSegDataset', 'ADE20KSegDataset', 'CocoSegDataset',
'ADE20KInstanceDataset', 'iSAIDDataset', 'V3DetDataset', 'ConcatDataset',
'ODVGDataset', 'MDETRStyleRefCocoDataset', 'DODDataset',
'CustomSampleSizeSampler', 'Flickr30kDataset'
]
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
from typing import List
from mmengine import fileio
from mmdet.registry import DATASETS
from .base_semseg_dataset import BaseSegDataset
from .coco import CocoDataset
from .coco_panoptic import CocoPanopticDataset
ADE_PALETTE = [(120, 120, 120), (180, 120, 120), (6, 230, 230), (80, 50, 50),
(4, 200, 3), (120, 120, 80), (140, 140, 140), (204, 5, 255),
(230, 230, 230), (4, 250, 7), (224, 5, 255), (235, 255, 7),
(150, 5, 61), (120, 120, 70), (8, 255, 51), (255, 6, 82),
(143, 255, 140), (204, 255, 4), (255, 51, 7), (204, 70, 3),
(0, 102, 200), (61, 230, 250), (255, 6, 51), (11, 102, 255),
(255, 7, 71), (255, 9, 224), (9, 7, 230), (220, 220, 220),
(255, 9, 92), (112, 9, 255), (8, 255, 214), (7, 255, 224),
(255, 184, 6), (10, 255, 71), (255, 41, 10), (7, 255, 255),
(224, 255, 8), (102, 8, 255), (255, 61, 6), (255, 194, 7),
(255, 122, 8), (0, 255, 20), (255, 8, 41), (255, 5, 153),
(6, 51, 255), (235, 12, 255), (160, 150, 20), (0, 163, 255),
(140, 140, 140), (250, 10, 15), (20, 255, 0), (31, 255, 0),
(255, 31, 0), (255, 224, 0), (153, 255, 0), (0, 0, 255),
(255, 71, 0), (0, 235, 255), (0, 173, 255), (31, 0, 255),
(11, 200, 200), (255, 82, 0), (0, 255, 245), (0, 61, 255),
(0, 255, 112), (0, 255, 133), (255, 0, 0), (255, 163, 0),
(255, 102, 0), (194, 255, 0), (0, 143, 255), (51, 255, 0),
(0, 82, 255), (0, 255, 41), (0, 255, 173), (10, 0, 255),
(173, 255, 0), (0, 255, 153), (255, 92, 0), (255, 0, 255),
(255, 0, 245), (255, 0, 102), (255, 173, 0), (255, 0, 20),
(255, 184, 184), (0, 31, 255), (0, 255, 61), (0, 71, 255),
(255, 0, 204), (0, 255, 194), (0, 255, 82), (0, 10, 255),
(0, 112, 255), (51, 0, 255), (0, 194, 255), (0, 122, 255),
(0, 255, 163), (255, 153, 0), (0, 255, 10), (255, 112, 0),
(143, 255, 0), (82, 0, 255), (163, 255, 0), (255, 235, 0),
(8, 184, 170), (133, 0, 255), (0, 255, 92), (184, 0, 255),
(255, 0, 31), (0, 184, 255), (0, 214, 255), (255, 0, 112),
(92, 255, 0), (0, 224, 255), (112, 224, 255), (70, 184, 160),
(163, 0, 255), (153, 0, 255), (71, 255, 0), (255, 0, 163),
(255, 204, 0), (255, 0, 143), (0, 255, 235), (133, 255, 0),
(255, 0, 235), (245, 0, 255), (255, 0, 122), (255, 245, 0),
(10, 190, 212), (214, 255, 0), (0, 204, 255), (20, 0, 255),
(255, 255, 0), (0, 153, 255), (0, 41, 255), (0, 255, 204),
(41, 0, 255), (41, 255, 0), (173, 0, 255), (0, 245, 255),
(71, 0, 255), (122, 0, 255), (0, 255, 184), (0, 92, 255),
(184, 255, 0), (0, 133, 255), (255, 214, 0), (25, 194, 194),
(102, 255, 0), (92, 0, 255)]
@DATASETS.register_module()
class ADE20KPanopticDataset(CocoPanopticDataset):
METAINFO = {
'classes':
('bed', 'window', 'cabinet', 'person', 'door', 'table', 'curtain',
'chair', 'car', 'painting, picture', 'sofa', 'shelf', 'mirror',
'armchair', 'seat', 'fence', 'desk', 'wardrobe, closet, press',
'lamp', 'tub', 'rail', 'cushion', 'box', 'column, pillar',
'signboard, sign', 'chest of drawers, chest, bureau, dresser',
'counter', 'sink', 'fireplace', 'refrigerator, icebox', 'stairs',
'case, display case, showcase, vitrine',
'pool table, billiard table, snooker table', 'pillow',
'screen door, screen', 'bookcase', 'coffee table',
'toilet, can, commode, crapper, pot, potty, stool, throne', 'flower',
'book', 'bench', 'countertop', 'stove', 'palm, palm tree',
'kitchen island', 'computer', 'swivel chair', 'boat',
'arcade machine', 'bus', 'towel', 'light', 'truck', 'chandelier',
'awning, sunshade, sunblind', 'street lamp', 'booth', 'tv',
'airplane', 'clothes', 'pole',
'bannister, banister, balustrade, balusters, handrail',
'ottoman, pouf, pouffe, puff, hassock', 'bottle', 'van', 'ship',
'fountain', 'washer, automatic washer, washing machine',
'plaything, toy', 'stool', 'barrel, cask', 'basket, handbasket',
'bag', 'minibike, motorbike', 'oven', 'ball', 'food, solid food',
'step, stair', 'trade name', 'microwave', 'pot', 'animal', 'bicycle',
'dishwasher', 'screen', 'sculpture', 'hood, exhaust hood', 'sconce',
'vase', 'traffic light', 'tray', 'trash can', 'fan', 'plate',
'monitor', 'bulletin board', 'radiator', 'glass, drinking glass',
'clock', 'flag', 'wall', 'building', 'sky', 'floor', 'tree',
'ceiling', 'road, route', 'grass', 'sidewalk, pavement',
'earth, ground', 'mountain, mount', 'plant', 'water', 'house', 'sea',
'rug', 'field', 'rock, stone', 'base, pedestal, stand', 'sand',
'skyscraper', 'grandstand, covered stand', 'path', 'runway',
'stairway, staircase', 'river', 'bridge, span', 'blind, screen',
'hill', 'bar', 'hovel, hut, hutch, shack, shanty', 'tower',
'dirt track', 'land, ground, soil',
'escalator, moving staircase, moving stairway',
'buffet, counter, sideboard',
'poster, posting, placard, notice, bill, card', 'stage',
'conveyer belt, conveyor belt, conveyer, conveyor, transporter',
'canopy', 'pool', 'falls', 'tent', 'cradle', 'tank, storage tank',
'lake', 'blanket, cover', 'pier', 'crt screen', 'shower'),
'thing_classes':
('bed', 'window', 'cabinet', 'person', 'door', 'table', 'curtain',
'chair', 'car', 'painting, picture', 'sofa', 'shelf', 'mirror',
'armchair', 'seat', 'fence', 'desk', 'wardrobe, closet, press',
'lamp', 'tub', 'rail', 'cushion', 'box', 'column, pillar',
'signboard, sign', 'chest of drawers, chest, bureau, dresser',
'counter', 'sink', 'fireplace', 'refrigerator, icebox', 'stairs',
'case, display case, showcase, vitrine',
'pool table, billiard table, snooker table', 'pillow',
'screen door, screen', 'bookcase', 'coffee table',
'toilet, can, commode, crapper, pot, potty, stool, throne', 'flower',
'book', 'bench', 'countertop', 'stove', 'palm, palm tree',
'kitchen island', 'computer', 'swivel chair', 'boat',
'arcade machine', 'bus', 'towel', 'light', 'truck', 'chandelier',
'awning, sunshade, sunblind', 'street lamp', 'booth', 'tv',
'airplane', 'clothes', 'pole',
'bannister, banister, balustrade, balusters, handrail',
'ottoman, pouf, pouffe, puff, hassock', 'bottle', 'van', 'ship',
'fountain', 'washer, automatic washer, washing machine',
'plaything, toy', 'stool', 'barrel, cask', 'basket, handbasket',
'bag', 'minibike, motorbike', 'oven', 'ball', 'food, solid food',
'step, stair', 'trade name', 'microwave', 'pot', 'animal', 'bicycle',
'dishwasher', 'screen', 'sculpture', 'hood, exhaust hood', 'sconce',
'vase', 'traffic light', 'tray', 'trash can', 'fan', 'plate',
'monitor', 'bulletin board', 'radiator', 'glass, drinking glass',
'clock', 'flag'),
'stuff_classes':
('wall', 'building', 'sky', 'floor', 'tree', 'ceiling', 'road, route',
'grass', 'sidewalk, pavement', 'earth, ground', 'mountain, mount',
'plant', 'water', 'house', 'sea', 'rug', 'field', 'rock, stone',
'base, pedestal, stand', 'sand', 'skyscraper',
'grandstand, covered stand', 'path', 'runway', 'stairway, staircase',
'river', 'bridge, span', 'blind, screen', 'hill', 'bar',
'hovel, hut, hutch, shack, shanty', 'tower', 'dirt track',
'land, ground, soil', 'escalator, moving staircase, moving stairway',
'buffet, counter, sideboard',
'poster, posting, placard, notice, bill, card', 'stage',
'conveyer belt, conveyor belt, conveyer, conveyor, transporter',
'canopy', 'pool', 'falls', 'tent', 'cradle', 'tank, storage tank',
'lake', 'blanket, cover', 'pier', 'crt screen', 'shower'),
'palette':
ADE_PALETTE
}
@DATASETS.register_module()
class ADE20KInstanceDataset(CocoDataset):
METAINFO = {
'classes':
('bed', 'windowpane', 'cabinet', 'person', 'door', 'table', 'curtain',
'chair', 'car', 'painting', 'sofa', 'shelf', 'mirror', 'armchair',
'seat', 'fence', 'desk', 'wardrobe', 'lamp', 'bathtub', 'railing',
'cushion', 'box', 'column', 'signboard', 'chest of drawers',
'counter', 'sink', 'fireplace', 'refrigerator', 'stairs', 'case',
'pool table', 'pillow', 'screen door', 'bookcase', 'coffee table',
'toilet', 'flower', 'book', 'bench', 'countertop', 'stove', 'palm',
'kitchen island', 'computer', 'swivel chair', 'boat',
'arcade machine', 'bus', 'towel', 'light', 'truck', 'chandelier',
'awning', 'streetlight', 'booth', 'television receiver', 'airplane',
'apparel', 'pole', 'bannister', 'ottoman', 'bottle', 'van', 'ship',
'fountain', 'washer', 'plaything', 'stool', 'barrel', 'basket', 'bag',
'minibike', 'oven', 'ball', 'food', 'step', 'trade name', 'microwave',
'pot', 'animal', 'bicycle', 'dishwasher', 'screen', 'sculpture',
'hood', 'sconce', 'vase', 'traffic light', 'tray', 'ashcan', 'fan',
'plate', 'monitor', 'bulletin board', 'radiator', 'glass', 'clock',
'flag'),
'palette': [(204, 5, 255), (230, 230, 230), (224, 5, 255),
(150, 5, 61), (8, 255, 51), (255, 6, 82), (255, 51, 7),
(204, 70, 3), (0, 102, 200), (255, 6, 51), (11, 102, 255),
(255, 7, 71), (220, 220, 220), (8, 255, 214),
(7, 255, 224), (255, 184, 6), (10, 255, 71), (7, 255, 255),
(224, 255, 8), (102, 8, 255), (255, 61, 6), (255, 194, 7),
(0, 255, 20), (255, 8, 41), (255, 5, 153), (6, 51, 255),
(235, 12, 255), (0, 163, 255), (250, 10, 15), (20, 255, 0),
(255, 224, 0), (0, 0, 255), (255, 71, 0), (0, 235, 255),
(0, 173, 255), (0, 255, 245), (0, 255, 112), (0, 255, 133),
(255, 0, 0), (255, 163, 0), (194, 255, 0), (0, 143, 255),
(51, 255, 0), (0, 82, 255), (0, 255, 41), (0, 255, 173),
(10, 0, 255), (173, 255, 0), (255, 92, 0), (255, 0, 245),
(255, 0, 102), (255, 173, 0), (255, 0, 20), (0, 31, 255),
(0, 255, 61), (0, 71, 255), (255, 0, 204), (0, 255, 194),
(0, 255, 82), (0, 112, 255), (51, 0, 255), (0, 122, 255),
(255, 153, 0), (0, 255, 10), (163, 255, 0), (255, 235, 0),
(8, 184, 170), (184, 0, 255), (255, 0, 31), (0, 214, 255),
(255, 0, 112), (92, 255, 0), (70, 184, 160), (163, 0, 255),
(71, 255, 0), (255, 0, 163), (255, 204, 0), (255, 0, 143),
(133, 255, 0), (255, 0, 235), (245, 0, 255), (255, 0, 122),
(255, 245, 0), (214, 255, 0), (0, 204, 255), (255, 255, 0),
(0, 153, 255), (0, 41, 255), (0, 255, 204), (41, 0, 255),
(41, 255, 0), (173, 0, 255), (0, 245, 255), (0, 255, 184),
(0, 92, 255), (184, 255, 0), (255, 214, 0), (25, 194, 194),
(102, 255, 0), (92, 0, 255)],
}
@DATASETS.register_module()
class ADE20KSegDataset(BaseSegDataset):
"""ADE20K dataset.
In segmentation map annotation for ADE20K, 0 stands for background, which
is not included in 150 categories. The ``img_suffix`` is fixed to '.jpg',
and ``seg_map_suffix`` is fixed to '.png'.
"""
METAINFO = dict(
classes=('wall', 'building', 'sky', 'floor', 'tree', 'ceiling', 'road',
'bed ', 'windowpane', 'grass', 'cabinet', 'sidewalk',
'person', 'earth', 'door', 'table', 'mountain', 'plant',
'curtain', 'chair', 'car', 'water', 'painting', 'sofa',
'shelf', 'house', 'sea', 'mirror', 'rug', 'field', 'armchair',
'seat', 'fence', 'desk', 'rock', 'wardrobe', 'lamp',
'bathtub', 'railing', 'cushion', 'base', 'box', 'column',
'signboard', 'chest of drawers', 'counter', 'sand', 'sink',
'skyscraper', 'fireplace', 'refrigerator', 'grandstand',
'path', 'stairs', 'runway', 'case', 'pool table', 'pillow',
'screen door', 'stairway', 'river', 'bridge', 'bookcase',
'blind', 'coffee table', 'toilet', 'flower', 'book', 'hill',
'bench', 'countertop', 'stove', 'palm', 'kitchen island',
'computer', 'swivel chair', 'boat', 'bar', 'arcade machine',
'hovel', 'bus', 'towel', 'light', 'truck', 'tower',
'chandelier', 'awning', 'streetlight', 'booth',
'television receiver', 'airplane', 'dirt track', 'apparel',
'pole', 'land', 'bannister', 'escalator', 'ottoman', 'bottle',
'buffet', 'poster', 'stage', 'van', 'ship', 'fountain',
'conveyer belt', 'canopy', 'washer', 'plaything',
'swimming pool', 'stool', 'barrel', 'basket', 'waterfall',
'tent', 'bag', 'minibike', 'cradle', 'oven', 'ball', 'food',
'step', 'tank', 'trade name', 'microwave', 'pot', 'animal',
'bicycle', 'lake', 'dishwasher', 'screen', 'blanket',
'sculpture', 'hood', 'sconce', 'vase', 'traffic light',
'tray', 'ashcan', 'fan', 'pier', 'crt screen', 'plate',
'monitor', 'bulletin board', 'shower', 'radiator', 'glass',
'clock', 'flag'),
palette=ADE_PALETTE)
def __init__(self,
img_suffix='.jpg',
seg_map_suffix='.png',
return_classes=False,
**kwargs) -> None:
self.return_classes = return_classes
super().__init__(
img_suffix=img_suffix, seg_map_suffix=seg_map_suffix, **kwargs)
def load_data_list(self) -> List[dict]:
"""Load annotation from directory or annotation file.
Returns:
List[dict]: All data info of dataset.
"""
data_list = []
img_dir = self.data_prefix.get('img_path', None)
ann_dir = self.data_prefix.get('seg_map_path', None)
for img in fileio.list_dir_or_file(
dir_path=img_dir,
list_dir=False,
suffix=self.img_suffix,
recursive=True,
backend_args=self.backend_args):
data_info = dict(img_path=osp.join(img_dir, img))
if ann_dir is not None:
seg_map = img.replace(self.img_suffix, self.seg_map_suffix)
data_info['seg_map_path'] = osp.join(ann_dir, seg_map)
data_info['label_map'] = self.label_map
if self.return_classes:
data_info['text'] = list(self._metainfo['classes'])
data_list.append(data_info)
return data_list
# Copyright (c) OpenMMLab. All rights reserved.
from .coco_api import COCO, COCOeval, COCOPanoptic
from .cocoeval_mp import COCOevalMP
__all__ = ['COCO', 'COCOeval', 'COCOPanoptic', 'COCOevalMP']
# Copyright (c) OpenMMLab. All rights reserved.
# This file add snake case alias for coco api
import warnings
from collections import defaultdict
from typing import List, Optional, Union
import pycocotools
from pycocotools.coco import COCO as _COCO
from pycocotools.cocoeval import COCOeval as _COCOeval
class COCO(_COCO):
"""This class is almost the same as official pycocotools package.
It implements some snake case function aliases. So that the COCO class has
the same interface as LVIS class.
"""
def __init__(self, annotation_file=None):
if getattr(pycocotools, '__version__', '0') >= '12.0.2':
warnings.warn(
'mmpycocotools is deprecated. Please install official pycocotools by "pip install pycocotools"', # noqa: E501
UserWarning)
super().__init__(annotation_file=annotation_file)
self.img_ann_map = self.imgToAnns
self.cat_img_map = self.catToImgs
def get_ann_ids(self, img_ids=[], cat_ids=[], area_rng=[], iscrowd=None):
return self.getAnnIds(img_ids, cat_ids, area_rng, iscrowd)
def get_cat_ids(self, cat_names=[], sup_names=[], cat_ids=[]):
return self.getCatIds(cat_names, sup_names, cat_ids)
def get_img_ids(self, img_ids=[], cat_ids=[]):
return self.getImgIds(img_ids, cat_ids)
def load_anns(self, ids):
return self.loadAnns(ids)
def load_cats(self, ids):
return self.loadCats(ids)
def load_imgs(self, ids):
return self.loadImgs(ids)
# just for the ease of import
COCOeval = _COCOeval
class COCOPanoptic(COCO):
"""This wrapper is for loading the panoptic style annotation file.
The format is shown in the CocoPanopticDataset class.
Args:
annotation_file (str, optional): Path of annotation file.
Defaults to None.
"""
def __init__(self, annotation_file: Optional[str] = None) -> None:
super(COCOPanoptic, self).__init__(annotation_file)
def createIndex(self) -> None:
"""Create index."""
# create index
print('creating index...')
# anns stores 'segment_id -> annotation'
anns, cats, imgs = {}, {}, {}
img_to_anns, cat_to_imgs = defaultdict(list), defaultdict(list)
if 'annotations' in self.dataset:
for ann in self.dataset['annotations']:
for seg_ann in ann['segments_info']:
# to match with instance.json
seg_ann['image_id'] = ann['image_id']
img_to_anns[ann['image_id']].append(seg_ann)
# segment_id is not unique in coco dataset orz...
# annotations from different images but
# may have same segment_id
if seg_ann['id'] in anns.keys():
anns[seg_ann['id']].append(seg_ann)
else:
anns[seg_ann['id']] = [seg_ann]
# filter out annotations from other images
img_to_anns_ = defaultdict(list)
for k, v in img_to_anns.items():
img_to_anns_[k] = [x for x in v if x['image_id'] == k]
img_to_anns = img_to_anns_
if 'images' in self.dataset:
for img_info in self.dataset['images']:
img_info['segm_file'] = img_info['file_name'].replace(
'.jpg', '.png')
imgs[img_info['id']] = img_info
if 'categories' in self.dataset:
for cat in self.dataset['categories']:
cats[cat['id']] = cat
if 'annotations' in self.dataset and 'categories' in self.dataset:
for ann in self.dataset['annotations']:
for seg_ann in ann['segments_info']:
cat_to_imgs[seg_ann['category_id']].append(ann['image_id'])
print('index created!')
self.anns = anns
self.imgToAnns = img_to_anns
self.catToImgs = cat_to_imgs
self.imgs = imgs
self.cats = cats
def load_anns(self,
ids: Union[List[int], int] = []) -> Optional[List[dict]]:
"""Load anns with the specified ids.
``self.anns`` is a list of annotation lists instead of a
list of annotations.
Args:
ids (Union[List[int], int]): Integer ids specifying anns.
Returns:
anns (List[dict], optional): Loaded ann objects.
"""
anns = []
if hasattr(ids, '__iter__') and hasattr(ids, '__len__'):
# self.anns is a list of annotation lists instead of
# a list of annotations
for id in ids:
anns += self.anns[id]
return anns
elif type(ids) == int:
return self.anns[ids]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment