"vscode:/vscode.git/clone" did not exist on "6b3e52c9e820cbbdf04a113663e54d1e1ba9fbcd"
Commit 0f94318a authored by Zhe Chen's avatar Zhe Chen Committed by zhe chen
Browse files

[release] Add configs and models for cityscapes (#31)

* add _base_ files

* support mapillary dataset

* reorganize ade20k configs

* move figs to docs

* release configs and models of cityscapes

* update README.md

* update load_from urls
parent 0f982e7a
# --------------------------------------------------------
# InternImage
# Copyright (c) 2022 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
_base_ = [
'../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes_extra.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
]
load_from = 'https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_l_512x1024_80k_mapillary.pth'
model = dict(
backbone=dict(
_delete_=True,
type='InternImage',
core_op='DCNv3',
channels=160,
depths=[5, 5, 22, 5],
groups=[10, 20, 40, 80],
mlp_ratio=4.,
drop_path_rate=0.4,
norm_layer='LN',
layer_scale=1.0,
offset_scale=2.0,
post_norm=True,
with_cp=False,
out_indices=(0, 1, 2, 3),
init_cfg=None),
decode_head=dict(num_classes=150, in_channels=[160, 320, 640, 1280]),
auxiliary_head=dict(num_classes=150, in_channels=640),
test_cfg=dict(mode='whole'))
optimizer = dict(
_delete_=True, type='AdamW', lr=0.00002, betas=(0.9, 0.999), weight_decay=0.05,
constructor='CustomLayerDecayOptimizerConstructor',
paramwise_cfg=dict(num_layers=37, layer_decay_rate=0.94,
depths=[5, 5, 22, 5], offset_lr_scale=1.0))
lr_config = dict(_delete_=True, policy='poly',
warmup='linear',
warmup_iters=1500,
warmup_ratio=1e-6,
power=1.0, min_lr=0.0, by_epoch=False)
# By default, models are trained on 8 GPUs with 2 images per GPU
data = dict(samples_per_gpu=2)
runner = dict(type='IterBasedRunner')
optimizer_config = dict(_delete_=True, grad_clip=dict(max_norm=0.1, norm_type=2))
checkpoint_config = dict(by_epoch=False, interval=1000, max_keep_ckpts=1)
evaluation = dict(interval=4000, metric='mIoU', save_best='mIoU')
# fp16 = dict(loss_scale=dict(init_scale=512))
# --------------------------------------------------------
# InternImage
# Copyright (c) 2022 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
_base_ = [
'../_base_/models/upernet_r50.py', '../_base_/datasets/mapillary.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
]
pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_l_22k_192to384.pth'
model = dict(
backbone=dict(
_delete_=True,
type='InternImage',
core_op='DCNv3',
channels=160,
depths=[5, 5, 22, 5],
groups=[10, 20, 40, 80],
mlp_ratio=4.,
drop_path_rate=0.4,
norm_layer='LN',
layer_scale=1.0,
offset_scale=2.0,
post_norm=True,
with_cp=False,
out_indices=(0, 1, 2, 3),
init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
decode_head=dict(num_classes=150, in_channels=[160, 320, 640, 1280]),
auxiliary_head=dict(num_classes=150, in_channels=640),
test_cfg=dict(mode='whole'))
optimizer = dict(
_delete_=True, type='AdamW', lr=0.00002, betas=(0.9, 0.999), weight_decay=0.05,
constructor='CustomLayerDecayOptimizerConstructor',
paramwise_cfg=dict(num_layers=37, layer_decay_rate=0.94,
depths=[5, 5, 22, 5], offset_lr_scale=1.0))
lr_config = dict(_delete_=True, policy='poly',
warmup='linear',
warmup_iters=1500,
warmup_ratio=1e-6,
power=1.0, min_lr=0.0, by_epoch=False)
# By default, models are trained on 8 GPUs with 2 images per GPU
data = dict(samples_per_gpu=2)
runner = dict(type='IterBasedRunner')
optimizer_config = dict(_delete_=True, grad_clip=dict(max_norm=0.1, norm_type=2))
checkpoint_config = dict(by_epoch=False, interval=1000, max_keep_ckpts=1)
evaluation = dict(interval=8000, metric='mIoU', save_best='mIoU')
# fp16 = dict(loss_scale=dict(init_scale=512))
# --------------------------------------------------------
# InternImage
# Copyright (c) 2022 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
_base_ = [
'../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
]
pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_s_1k_224.pth'
model = dict(
backbone=dict(
_delete_=True,
type='InternImage',
core_op='DCNv3',
channels=80,
depths=[4, 4, 21, 4],
groups=[5, 10, 20, 40],
mlp_ratio=4.,
drop_path_rate=0.3,
norm_layer='LN',
layer_scale=1.0,
offset_scale=1.0,
post_norm=True,
with_cp=False,
init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
decode_head=dict(num_classes=150, in_channels=[80, 160, 320, 640]),
auxiliary_head=dict(num_classes=150, in_channels=320),
test_cfg=dict(mode='whole')
)
optimizer = dict(
_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.05,
constructor='CustomLayerDecayOptimizerConstructor',
paramwise_cfg=dict(num_layers=33, layer_decay_rate=1.0,
depths=[4, 4, 21, 4]))
lr_config = dict(_delete_=True, policy='poly',
warmup='linear',
warmup_iters=1500,
warmup_ratio=1e-6,
power=1.0, min_lr=0.0, by_epoch=False)
# By default, models are trained on 8 GPUs with 2 images per GPU
data=dict(samples_per_gpu=2)
runner = dict(type='IterBasedRunner')
checkpoint_config = dict(by_epoch=False, interval=1000, max_keep_ckpts=1)
evaluation = dict(interval=16000, metric='mIoU', save_best='mIoU')
# fp16 = dict(loss_scale=dict(init_scale=512))
# --------------------------------------------------------
# InternImage
# Copyright (c) 2022 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
_base_ = [
'../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
]
pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_t_1k_224.pth'
model = dict(
backbone=dict(
_delete_=True,
type='InternImage',
core_op='DCNv3',
channels=64,
depths=[4, 4, 18, 4],
groups=[4, 8, 16, 32],
mlp_ratio=4.,
drop_path_rate=0.2,
norm_layer='LN',
layer_scale=1.0,
offset_scale=1.0,
post_norm=False,
with_cp=False,
out_indices=(0, 1, 2, 3),
init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
decode_head=dict(num_classes=150, in_channels=[64, 128, 256, 512]),
auxiliary_head=dict(num_classes=150, in_channels=256),
test_cfg=dict(mode='whole')
)
optimizer = dict(
_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.05,
constructor='CustomLayerDecayOptimizerConstructor',
paramwise_cfg=dict(num_layers=30, layer_decay_rate=1.0,
depths=[4, 4, 18, 4]))
lr_config = dict(_delete_=True, policy='poly',
warmup='linear',
warmup_iters=1500,
warmup_ratio=1e-6,
power=1.0, min_lr=0.0, by_epoch=False)
# By default, models are trained on 8 GPUs with 2 images per GPU
data=dict(samples_per_gpu=2)
runner = dict(type='IterBasedRunner')
checkpoint_config = dict(by_epoch=False, interval=1000, max_keep_ckpts=1)
evaluation = dict(interval=16000, metric='mIoU', save_best='mIoU')
# fp16 = dict(loss_scale=dict(init_scale=512))
# --------------------------------------------------------
# InternImage
# Copyright (c) 2022 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
_base_ = [
'../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
]
pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_xl_22k_192to384.pth'
model = dict(
backbone=dict(
_delete_=True,
type='InternImage',
core_op='DCNv3',
channels=192,
depths=[5, 5, 24, 5],
groups=[12, 24, 48, 96],
mlp_ratio=4.,
drop_path_rate=0.4,
norm_layer='LN',
layer_scale=1.0,
offset_scale=2.0,
post_norm=True,
with_cp=False,
out_indices=(0, 1, 2, 3),
init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
decode_head=dict(num_classes=150, in_channels=[192, 384, 768, 1536]),
auxiliary_head=dict(num_classes=150, in_channels=768),
test_cfg=dict(mode='whole'))
optimizer = dict(
_delete_=True, type='AdamW', lr=0.00002, betas=(0.9, 0.999), weight_decay=0.05,
constructor='CustomLayerDecayOptimizerConstructor',
paramwise_cfg=dict(num_layers=39, layer_decay_rate=0.94,
depths=[5, 5, 24, 5], offset_lr_scale=1.0))
lr_config = dict(_delete_=True, policy='poly',
warmup='linear',
warmup_iters=1500,
warmup_ratio=1e-6,
power=1.0, min_lr=0.0, by_epoch=False)
# By default, models are trained on 8 GPUs with 2 images per GPU
data = dict(samples_per_gpu=2)
runner = dict(type='IterBasedRunner')
optimizer_config = dict(_delete_=True, grad_clip=dict(max_norm=0.1, norm_type=2))
checkpoint_config = dict(by_epoch=False, interval=1000, max_keep_ckpts=1)
evaluation = dict(interval=16000, metric='mIoU', save_best='mIoU')
# fp16 = dict(loss_scale=dict(init_scale=512))
# --------------------------------------------------------
# InternImage
# Copyright (c) 2022 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
_base_ = [
'../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes_extra.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
]
load_from = 'https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_xl_512x1024_80k_mapillary.pth'
model = dict(
backbone=dict(
_delete_=True,
type='InternImage',
core_op='DCNv3',
channels=192,
depths=[5, 5, 24, 5],
groups=[12, 24, 48, 96],
mlp_ratio=4.,
drop_path_rate=0.4,
norm_layer='LN',
layer_scale=1.0,
offset_scale=2.0,
post_norm=True,
with_cp=False,
out_indices=(0, 1, 2, 3),
init_cfg=None),
decode_head=dict(num_classes=150, in_channels=[192, 384, 768, 1536]),
auxiliary_head=dict(num_classes=150, in_channels=768),
test_cfg=dict(mode='whole'))
optimizer = dict(
_delete_=True, type='AdamW', lr=0.00002, betas=(0.9, 0.999), weight_decay=0.05,
constructor='CustomLayerDecayOptimizerConstructor',
paramwise_cfg=dict(num_layers=39, layer_decay_rate=0.94,
depths=[5, 5, 24, 5], offset_lr_scale=1.0))
lr_config = dict(_delete_=True, policy='poly',
warmup='linear',
warmup_iters=1500,
warmup_ratio=1e-6,
power=1.0, min_lr=0.0, by_epoch=False)
# By default, models are trained on 8 GPUs with 2 images per GPU
data = dict(samples_per_gpu=2)
runner = dict(type='IterBasedRunner')
optimizer_config = dict(_delete_=True, grad_clip=dict(max_norm=0.1, norm_type=2))
checkpoint_config = dict(by_epoch=False, interval=1000, max_keep_ckpts=1)
evaluation = dict(interval=4000, metric='mIoU', save_best='mIoU')
# fp16 = dict(loss_scale=dict(init_scale=512))
# --------------------------------------------------------
# InternImage
# Copyright (c) 2022 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
_base_ = [
'../_base_/models/upernet_r50.py', '../_base_/datasets/mapillary.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
]
pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_xl_22k_192to384.pth'
model = dict(
backbone=dict(
_delete_=True,
type='InternImage',
core_op='DCNv3',
channels=192,
depths=[5, 5, 24, 5],
groups=[12, 24, 48, 96],
mlp_ratio=4.,
drop_path_rate=0.4,
norm_layer='LN',
layer_scale=1.0,
offset_scale=2.0,
post_norm=True,
with_cp=False,
out_indices=(0, 1, 2, 3),
init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
decode_head=dict(num_classes=150, in_channels=[192, 384, 768, 1536]),
auxiliary_head=dict(num_classes=150, in_channels=768),
test_cfg=dict(mode='whole'))
optimizer = dict(
_delete_=True, type='AdamW', lr=0.00002, betas=(0.9, 0.999), weight_decay=0.05,
constructor='CustomLayerDecayOptimizerConstructor',
paramwise_cfg=dict(num_layers=39, layer_decay_rate=0.94,
depths=[5, 5, 24, 5], offset_lr_scale=1.0))
lr_config = dict(_delete_=True, policy='poly',
warmup='linear',
warmup_iters=1500,
warmup_ratio=1e-6,
power=1.0, min_lr=0.0, by_epoch=False)
# By default, models are trained on 8 GPUs with 2 images per GPU
data = dict(samples_per_gpu=2)
runner = dict(type='IterBasedRunner')
optimizer_config = dict(_delete_=True, grad_clip=dict(max_norm=0.1, norm_type=2))
checkpoint_config = dict(by_epoch=False, interval=1000, max_keep_ckpts=1)
evaluation = dict(interval=8000, metric='mIoU', save_best='mIoU')
# fp16 = dict(loss_scale=dict(init_scale=512))
# UPerNet
[Unified Perceptual Parsing for Scene Understanding](https://arxiv.org/pdf/1807.10221.pdf)
## Introduction
<!-- [ABSTRACT] -->
Humans recognize the visual world at multiple levels: we effortlessly categorize scenes and detect objects inside, while also identifying the textures and surfaces of the objects along with their different compositional parts. In this paper, we study a new task called Unified Perceptual Parsing, which requires the machine vision systems to recognize as many visual concepts as possible from a given image. A multi-task framework called UPerNet and a training strategy are developed to learn from heterogeneous image annotations. We benchmark our framework on Unified Perceptual Parsing and show that it is able to effectively segment a wide range of concepts from images. The trained networks are further applied to discover visual knowledge in natural scenes. Models are available at [this https URL](https://github.com/CSAILVision/unifiedparsing).
<!-- [IMAGE] -->
<div align=center>
<img src="https://user-images.githubusercontent.com/24582831/142903077-44e8e0da-7276-4bda-bd2b-0df1680ca845.png" width="70%"/>
</div>
## Results and models
### ADE20K
**ADE20K Semantic Segmentation**
| backbone | resolution | single scale | multi scale | #params | FLOPs | Download |
| :------------: | :--------: | :----------: | :---------: | :-----: | :---: | :---: |
| InternImage-T | 512x512 | 47.9 | 48.1 | 59M | 944G | [ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_t_512_160k_ade20k.pth) \| [cfg](./upernet_internimage_t_512_160k_ade20k.py) |
| InternImage-S | 512x512 | 50.1 | 50.9 | 80M | 1017G | [ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_s_512_160k_ade20k.pth) \| [cfg](./upernet_internimage_s_512_160k_ade20k.py) |
| InternImage-B | 512x512 | 50.8 | 51.3 | 128M | 1185G | [ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_b_512_160k_ade20k.pth) \| [cfg](./upernet_internimage_b_512_160k_ade20k.py) |
| InternImage-L | 640x640 | 53.9 | 54.1 | 256M | 2526G | [ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_l_640_160k_ade20k.pth) \| [cfg](./upernet_internimage_l_640_160k_ade20k.py) |
| InternImage-XL | 640x640 | 55.0 | 55.3 | 368M | 3142G | [ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_xl_640_160k_ade20k.pth) \| [cfg](./upernet_internimage_xl_640_160k_ade20k.py) |
...@@ -5,3 +5,4 @@ ...@@ -5,3 +5,4 @@
# -------------------------------------------------------- # --------------------------------------------------------
from .models import * # noqa: F401,F403 from .models import * # noqa: F401,F403
from .datasets import * # noqa: F401,F403
\ No newline at end of file
# Copyright (c) OpenMMLab. All rights reserved.
from .mapillary import MapillaryDataset # noqa: F401,F403
from .nyu_depth_v2 import NYUDepthV2Dataset # noqa: F401,F403
from .pipelines import * # noqa: F401,F403
# --------------------------------------------------------
# InternImage
# Copyright (c) 2022 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
from mmseg.datasets.builder import DATASETS
from mmseg.datasets.custom import CustomDataset
@DATASETS.register_module()
class MapillaryDataset(CustomDataset):
"""Mapillary dataset.
"""
CLASSES = ('Bird', 'Ground Animal', 'Curb', 'Fence', 'Guard Rail', 'Barrier',
'Wall', 'Bike Lane', 'Crosswalk - Plain', 'Curb Cut', 'Parking', 'Pedestrian Area',
'Rail Track', 'Road', 'Service Lane', 'Sidewalk', 'Bridge', 'Building', 'Tunnel',
'Person', 'Bicyclist', 'Motorcyclist', 'Other Rider', 'Lane Marking - Crosswalk',
'Lane Marking - General', 'Mountain', 'Sand', 'Sky', 'Snow', 'Terrain', 'Vegetation',
'Water', 'Banner', 'Bench', 'Bike Rack', 'Billboard', 'Catch Basin', 'CCTV Camera',
'Fire Hydrant', 'Junction Box', 'Mailbox', 'Manhole', 'Phone Booth', 'Pothole',
'Street Light', 'Pole', 'Traffic Sign Frame', 'Utility Pole', 'Traffic Light',
'Traffic Sign (Back)', 'Traffic Sign (Front)', 'Trash Can', 'Bicycle', 'Boat',
'Bus', 'Car', 'Caravan', 'Motorcycle', 'On Rails', 'Other Vehicle', 'Trailer',
'Truck', 'Wheeled Slow', 'Car Mount', 'Ego Vehicle', 'Unlabeled')
PALETTE = [[165, 42, 42], [0, 192, 0], [196, 196, 196], [190, 153, 153],
[180, 165, 180], [90, 120, 150], [102, 102, 156], [128, 64, 255],
[140, 140, 200], [170, 170, 170], [250, 170, 160], [96, 96, 96],
[230, 150, 140], [128, 64, 128], [110, 110, 110], [244, 35, 232],
[150, 100, 100], [70, 70, 70], [150, 120, 90], [220, 20, 60],
[255, 0, 0], [255, 0, 100], [255, 0, 200], [200, 128, 128],
[255, 255, 255], [64, 170, 64], [230, 160, 50], [70, 130, 180],
[190, 255, 255], [152, 251, 152], [107, 142, 35], [0, 170, 30],
[255, 255, 128], [250, 0, 30], [100, 140, 180], [220, 220, 220],
[220, 128, 128], [222, 40, 40], [100, 170, 30], [40, 40, 40],
[33, 33, 33], [100, 128, 160], [142, 0, 0], [70, 100, 150],
[210, 170, 100], [153, 153, 153], [128, 128, 128], [0, 0, 80],
[250, 170, 30], [192, 192, 192], [220, 220, 0], [140, 140, 20],
[119, 11, 32], [150, 0, 255], [0, 60, 100], [0, 0, 142], [0, 0, 90],
[0, 0, 230], [0, 80, 100], [128, 64, 64], [0, 0, 110], [0, 0, 70],
[0, 0, 192], [32, 32, 32], [120, 10, 10], [0, 0, 0]]
def __init__(self, **kwargs):
super(MapillaryDataset, self).__init__(
img_suffix='.jpg',
seg_map_suffix='.png',
reduce_zero_label=False,
**kwargs)
\ No newline at end of file
# --------------------------------------------------------
# InternImage
# Copyright (c) 2022 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
from mmseg.datasets.builder import DATASETS
from mmseg.datasets.custom import CustomDataset
@DATASETS.register_module()
class NYUDepthV2Dataset(CustomDataset):
"""NYU Depth V2 dataset.
"""
CLASSES = ('wall', 'floor', 'cabinet', 'bed', 'chair',
'sofa', 'table', 'door', 'window', 'bookshelf',
'picture', 'counter', 'blinds', 'desk', 'shelves',
'curtain', 'dresser', 'pillow', 'mirror', 'floor mat',
'clothes', 'ceiling', 'books', 'refridgerator', 'television',
'paper', 'towel', 'shower curtain', 'box', 'whiteboard',
'person', 'night stand', 'toilet', 'sink', 'lamp',
'bathtub', 'bag', 'otherstructure', 'otherfurniture', 'otherprop')
PALETTE = [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50],
[4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255],
[230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7],
[150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82],
[143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3],
[0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255],
[255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220],
[255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224],
[255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255],
[224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7],]
def __init__(self, split, **kwargs):
super(NYUDepthV2Dataset, self).__init__(
img_suffix='.png',
seg_map_suffix='.png',
split=split,
reduce_zero_label=True,
**kwargs)
\ No newline at end of file
# Copyright (c) OpenMMLab. All rights reserved.
from .formatting import DefaultFormatBundle, ToMask
from .transform import MapillaryHack, PadShortSide, SETR_Resize
__all__ = [
'DefaultFormatBundle', 'ToMask', 'SETR_Resize',
'PadShortSide', 'MapillaryHack'
]
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
from mmcv.parallel import DataContainer as DC
from mmseg.datasets.builder import PIPELINES
from mmseg.datasets.pipelines.formatting import to_tensor
@PIPELINES.register_module(force=True)
class DefaultFormatBundle(object):
"""Default formatting bundle.
It simplifies the pipeline of formatting common fields, including "img"
and "gt_semantic_seg". These fields are formatted as follows.
- img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True)
- gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor,
(3)to DataContainer (stack=True)
"""
def __call__(self, results):
"""Call function to transform and format common fields in results.
Args:
results (dict): Result dict contains the data to convert.
Returns:
dict: The result dict contains the data that is formatted with
default bundle.
"""
if 'img' in results:
img = results['img']
if len(img.shape) < 3:
img = np.expand_dims(img, -1)
img = np.ascontiguousarray(img.transpose(2, 0, 1))
results['img'] = DC(to_tensor(img), stack=True)
if 'gt_semantic_seg' in results:
# convert to long
results['gt_semantic_seg'] = DC(to_tensor(
results['gt_semantic_seg'][None, ...].astype(np.int64)),
stack=True)
if 'gt_masks' in results:
results['gt_masks'] = DC(to_tensor(results['gt_masks']))
if 'gt_labels' in results:
results['gt_labels'] = DC(to_tensor(results['gt_labels']))
return results
def __repr__(self):
return self.__class__.__name__
@PIPELINES.register_module()
class ToMask(object):
"""Transfer gt_semantic_seg to binary mask and generate gt_labels."""
def __init__(self, ignore_index=255):
self.ignore_index = ignore_index
def __call__(self, results):
gt_semantic_seg = results['gt_semantic_seg']
gt_labels = np.unique(gt_semantic_seg)
# remove ignored region
gt_labels = gt_labels[gt_labels != self.ignore_index]
gt_masks = []
for class_id in gt_labels:
gt_masks.append(gt_semantic_seg == class_id)
if len(gt_masks) == 0:
# Some image does not have annotation (all ignored)
gt_masks = np.empty((0, ) + results['pad_shape'][:-1], dtype=np.int64)
gt_labels = np.empty((0, ), dtype=np.int64)
else:
gt_masks = np.asarray(gt_masks, dtype=np.int64)
gt_labels = np.asarray(gt_labels, dtype=np.int64)
results['gt_labels'] = gt_labels
results['gt_masks'] = gt_masks
return results
def __repr__(self):
return self.__class__.__name__ + \
f'(ignore_index={self.ignore_index})'
# Copyright (c) OpenMMLab. All rights reserved.
import mmcv
import numpy as np
from mmseg.datasets.builder import PIPELINES
@PIPELINES.register_module()
class SETR_Resize(object):
"""Resize images & seg.
This transform resizes the input image to some scale. If the input dict
contains the key "scale", then the scale in the input dict is used,
otherwise the specified scale in the init method is used.
``img_scale`` can either be a tuple (single-scale) or a list of tuple
(multi-scale). There are 3 multiscale modes:
- ``ratio_range is not None``: randomly sample a ratio from the ratio range
and multiply it with the image scale.
- ``ratio_range is None and multiscale_mode == "range"``: randomly sample a
scale from the a range.
- ``ratio_range is None and multiscale_mode == "value"``: randomly sample a
scale from multiple scales.
Args:
img_scale (tuple or list[tuple]): Images scales for resizing.
multiscale_mode (str): Either "range" or "value".
ratio_range (tuple[float]): (min_ratio, max_ratio)
keep_ratio (bool): Whether to keep the aspect ratio when resizing the
image.
"""
def __init__(self,
img_scale=None,
multiscale_mode='range',
ratio_range=None,
keep_ratio=True,
crop_size=None,
setr_multi_scale=False):
if img_scale is None:
self.img_scale = None
else:
if isinstance(img_scale, list):
self.img_scale = img_scale
else:
self.img_scale = [img_scale]
# assert mmcv.is_list_of(self.img_scale, tuple)
if ratio_range is not None:
# mode 1: given a scale and a range of image ratio
assert len(self.img_scale) == 1
else:
# mode 2: given multiple scales or a range of scales
assert multiscale_mode in ['value', 'range']
self.multiscale_mode = multiscale_mode
self.ratio_range = ratio_range
self.keep_ratio = keep_ratio
self.crop_size = crop_size
self.setr_multi_scale = setr_multi_scale
@staticmethod
def random_select(img_scales):
"""Randomly select an img_scale from given candidates.
Args:
img_scales (list[tuple]): Images scales for selection.
Returns:
(tuple, int): Returns a tuple ``(img_scale, scale_dix)``,
where ``img_scale`` is the selected image scale and
``scale_idx`` is the selected index in the given candidates.
"""
assert mmcv.is_list_of(img_scales, tuple)
scale_idx = np.random.randint(len(img_scales))
img_scale = img_scales[scale_idx]
return img_scale, scale_idx
@staticmethod
def random_sample(img_scales):
"""Randomly sample an img_scale when ``multiscale_mode=='range'``.
Args:
img_scales (list[tuple]): Images scale range for sampling.
There must be two tuples in img_scales, which specify the lower
and uper bound of image scales.
Returns:
(tuple, None): Returns a tuple ``(img_scale, None)``, where
``img_scale`` is sampled scale and None is just a placeholder
to be consistent with :func:`random_select`.
"""
assert mmcv.is_list_of(img_scales, tuple) and len(img_scales) == 2
img_scale_long = [max(s) for s in img_scales]
img_scale_short = [min(s) for s in img_scales]
long_edge = np.random.randint(
min(img_scale_long),
max(img_scale_long) + 1)
short_edge = np.random.randint(
min(img_scale_short),
max(img_scale_short) + 1)
img_scale = (long_edge, short_edge)
return img_scale, None
@staticmethod
def random_sample_ratio(img_scale, ratio_range):
"""Randomly sample an img_scale when ``ratio_range`` is specified.
A ratio will be randomly sampled from the range specified by
``ratio_range``. Then it would be multiplied with ``img_scale`` to
generate sampled scale.
Args:
img_scale (tuple): Images scale base to multiply with ratio.
ratio_range (tuple[float]): The minimum and maximum ratio to scale
the ``img_scale``.
Returns:
(tuple, None): Returns a tuple ``(scale, None)``, where
``scale`` is sampled ratio multiplied with ``img_scale`` and
None is just a placeholder to be consistent with
:func:`random_select`.
"""
assert isinstance(img_scale, tuple) and len(img_scale) == 2
min_ratio, max_ratio = ratio_range
assert min_ratio <= max_ratio
ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio
scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio)
return scale, None
def _random_scale(self, results):
"""Randomly sample an img_scale according to ``ratio_range`` and
``multiscale_mode``.
If ``ratio_range`` is specified, a ratio will be sampled and be
multiplied with ``img_scale``.
If multiple scales are specified by ``img_scale``, a scale will be
sampled according to ``multiscale_mode``.
Otherwise, single scale will be used.
Args:
results (dict): Result dict from :obj:`dataset`.
Returns:
dict: Two new keys 'scale` and 'scale_idx` are added into
``results``, which would be used by subsequent pipelines.
"""
if self.ratio_range is not None:
scale, scale_idx = self.random_sample_ratio(
self.img_scale[0], self.ratio_range)
elif len(self.img_scale) == 1:
scale, scale_idx = self.img_scale[0], 0
elif self.multiscale_mode == 'range':
scale, scale_idx = self.random_sample(self.img_scale)
elif self.multiscale_mode == 'value':
scale, scale_idx = self.random_select(self.img_scale)
else:
raise NotImplementedError
results['scale'] = scale
results['scale_idx'] = scale_idx
def _resize_img(self, results):
"""Resize images with ``results['scale']``."""
if self.keep_ratio:
if self.setr_multi_scale:
if min(results['scale']) < self.crop_size[0]:
new_short = self.crop_size[0]
else:
new_short = min(results['scale'])
h, w = results['img'].shape[:2]
if h > w:
new_h, new_w = new_short * h / w, new_short
else:
new_h, new_w = new_short, new_short * w / h
results['scale'] = (new_h, new_w)
img, scale_factor = mmcv.imrescale(results['img'],
results['scale'],
return_scale=True)
# the w_scale and h_scale has minor difference
# a real fix should be done in the mmcv.imrescale in the future
new_h, new_w = img.shape[:2]
h, w = results['img'].shape[:2]
w_scale = new_w / w
h_scale = new_h / h
else:
img, w_scale, h_scale = mmcv.imresize(results['img'],
results['scale'],
return_scale=True)
scale_factor = np.array([w_scale, h_scale, w_scale, h_scale],
dtype=np.float32)
results['img'] = img
results['img_shape'] = img.shape
results['pad_shape'] = img.shape # in case that there is no padding
results['scale_factor'] = scale_factor
results['keep_ratio'] = self.keep_ratio
def _resize_seg(self, results):
"""Resize semantic segmentation map with ``results['scale']``."""
for key in results.get('seg_fields', []):
if self.keep_ratio:
gt_seg = mmcv.imrescale(results[key],
results['scale'],
interpolation='nearest')
else:
gt_seg = mmcv.imresize(results[key],
results['scale'],
interpolation='nearest')
results['gt_semantic_seg'] = gt_seg
def __call__(self, results):
"""Call function to resize images, bounding boxes, masks, semantic
segmentation map.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor',
'keep_ratio' keys are added into result dict.
"""
if 'scale' not in results:
self._random_scale(results)
self._resize_img(results)
self._resize_seg(results)
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += (f'(img_scale={self.img_scale}, '
f'multiscale_mode={self.multiscale_mode}, '
f'ratio_range={self.ratio_range}, '
f'keep_ratio={self.keep_ratio})')
return repr_str
@PIPELINES.register_module()
class PadShortSide(object):
"""Pad the image & mask.
Pad to the minimum size that is equal or larger than a number.
Added keys are "pad_shape", "pad_fixed_size",
Args:
size (int, optional): Fixed padding size.
pad_val (float, optional): Padding value. Default: 0.
seg_pad_val (float, optional): Padding value of segmentation map.
Default: 255.
"""
def __init__(self, size=None, pad_val=0, seg_pad_val=255):
self.size = size
self.pad_val = pad_val
self.seg_pad_val = seg_pad_val
# only one of size and size_divisor should be valid
assert size is not None
def _pad_img(self, results):
"""Pad images according to ``self.size``."""
h, w = results['img'].shape[:2]
new_h = max(h, self.size)
new_w = max(w, self.size)
padded_img = mmcv.impad(results['img'],
shape=(new_h, new_w),
pad_val=self.pad_val)
results['img'] = padded_img
results['pad_shape'] = padded_img.shape
# results['unpad_shape'] = (h, w)
def _pad_seg(self, results):
"""Pad masks according to ``results['pad_shape']``."""
for key in results.get('seg_fields', []):
results[key] = mmcv.impad(results[key],
shape=results['pad_shape'][:2],
pad_val=self.seg_pad_val)
def __call__(self, results):
"""Call function to pad images, masks, semantic segmentation maps.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Updated result dict.
"""
h, w = results['img'].shape[:2]
if h >= self.size and w >= self.size: # 短边比窗口大,跳过
pass
else:
self._pad_img(results)
self._pad_seg(results)
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += f'(size={self.size}, pad_val={self.pad_val})'
return repr_str
@PIPELINES.register_module()
class MapillaryHack(object):
"""map MV 65 class to 19 class like Cityscapes."""
def __init__(self):
self.map = [[13, 24, 41], [2, 15], [17], [6], [3],
[45, 47], [48], [50], [30], [29], [27], [19], [20, 21, 22],
[55], [61], [54], [58], [57], [52]]
self.others = [i for i in range(66)]
for i in self.map:
for j in i:
if j in self.others:
self.others.remove(j)
def __call__(self, results):
"""Call function to process the image with gamma correction.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Processed results.
"""
gt_map = results['gt_semantic_seg']
# others -> 255
new_gt_map = np.zeros_like(gt_map)
for value in self.others:
new_gt_map[gt_map == value] = 255
for index, map in enumerate(self.map):
for value in map:
new_gt_map[gt_map == value] = index
results['gt_semantic_seg'] = new_gt_map
return results
def __repr__(self):
repr_str = self.__class__.__name__
return repr_str
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment