Commit 0f94318a authored by Zhe Chen's avatar Zhe Chen Committed by zhe chen
Browse files

[release] Add configs and models for cityscapes (#31)

* add _base_ files

* support mapillary dataset

* reorganize ade20k configs

* move figs to docs

* release configs and models of cityscapes

* update README.md

* update load_from urls
parent 0f982e7a
# dataset settings
dataset_type = 'STAREDataset'
data_root = 'data/STARE'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
img_scale = (605, 700)
crop_size = (128, 128)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=img_scale,
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=40000,
dataset=dict(
type=dataset_type,
data_root=data_root,
img_dir='images/training',
ann_dir='annotations/training',
pipeline=train_pipeline)),
val=dict(
type=dataset_type,
data_root=data_root,
img_dir='images/validation',
ann_dir='annotations/validation',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
data_root=data_root,
img_dir='images/validation',
ann_dir='annotations/validation',
pipeline=test_pipeline))
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
model = dict(
type='EncoderDecoder',
pretrained=None,
backbone=dict(
type='MixVisionTransformer',
in_channels=3,
embed_dims=32,
num_stages=4,
num_layers=[2, 2, 2, 2],
num_heads=[1, 2, 5, 8],
patch_sizes=[7, 3, 3, 3],
sr_ratios=[8, 4, 2, 1],
out_indices=(0, 1, 2, 3),
mlp_ratio=4,
qkv_bias=True,
drop_rate=0.0,
attn_drop_rate=0.0,
drop_path_rate=0.1),
decode_head=dict(
type='SegformerHead',
in_channels=[32, 64, 160, 256],
in_index=[0, 1, 2, 3],
channels=256,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))
\ No newline at end of file
# optimizer
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
optimizer_config = dict()
# learning policy
lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
# runtime settings
runner = dict(type='IterBasedRunner', max_iters=20000)
checkpoint_config = dict(by_epoch=False, interval=2000)
evaluation = dict(interval=2000, metric='mIoU', pre_eval=True)
# optimizer
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
optimizer_config = dict()
# learning policy
lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
# runtime settings
runner = dict(type='IterBasedRunner', max_iters=320000)
checkpoint_config = dict(by_epoch=False, interval=32000)
evaluation = dict(interval=32000, metric='mIoU')
# optimizer
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
optimizer_config = dict()
# learning policy
lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
# runtime settings
runner = dict(type='IterBasedRunner', max_iters=40000)
checkpoint_config = dict(by_epoch=False, interval=4000)
evaluation = dict(interval=4000, metric='mIoU', pre_eval=True)
# optimizer
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
optimizer_config = dict()
# learning policy
lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
# runtime settings
runner = dict(type='IterBasedRunner', max_iters=80000)
checkpoint_config = dict(by_epoch=False, interval=8000)
evaluation = dict(interval=8000, metric='mIoU', pre_eval=True)
# ADE20K
Introduced by Zhou et al. in [Scene Parsing Through ADE20K Dataset](https://paperswithcode.com/paper/scene-parsing-through-ade20k-dataset).
The ADE20K semantic segmentation dataset contains more than 20K scene-centric images exhaustively annotated with pixel-level objects and object parts labels. There are totally 150 semantic categories, which include stuffs like sky, road, grass, and discrete objects like person, car, bed.
## Model Zoo
### UperNet + InternImage
| backbone | resolution | mIoU (ss/ms) | train speed | train time | #param | FLOPs | Config | Download |
|:--------------:|:----------:|:-----------:|:-----------:|:----------:|:-------:|:-----:|:-----:|:-------------------:|
| InternImage-T | 512x512 | 47.9 / 48.1 | 0.23s / iter | 10.5h | 59M | 944G | [config](./upernet_internimage_t_512_160k_ade20k.py) | [ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_t_512_160k_ade20k.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_t_512_160k_ade20k.log.json) |
| InternImage-S | 512x512 | 50.1 / 50.9 | 0.25s / iter | 11.5h | 80M | 1017G | [config](./upernet_internimage_s_512_160k_ade20k.py) | [ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_s_512_160k_ade20k.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_s_512_160k_ade20k.log.json) |
| InternImage-B | 512x512 | 50.8 / 51.3 | 0.26s / iter | 12h | 128M | 1185G | [config](./upernet_internimage_b_512_160k_ade20k.py) | [ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_b_512_160k_ade20k.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_b_512_160k_ade20k.log.json) |
| InternImage-L | 640x640 | 53.9 / 54.1 | 0.42s / iter | 19h | 256M | 2526G | [config](./upernet_internimage_l_640_160k_ade20k.py)| [ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_l_640_160k_ade20k.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_l_640_160k_ade20k.log.json) |
| InternImage-XL | 640x640 | 55.0 / 55.3 | 0.47s / iter | 22h | 368M | 3142G | [config](./upernet_internimage_xl_640_160k_ade20k.py) | [ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_xl_640_160k_ade20k.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_xl_640_160k_ade20k.log.json) |
- Training speed is measured with A100 GPU.
- Please set `with_cp=True` to save memory if you meet `out-of-memory` issues.
- The logs are our recent newly trained ones. There are slight differences between the results in logs and our paper.
......@@ -50,8 +50,8 @@ test_pipeline = [
optimizer = dict(
_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.05,
constructor='CustomLayerDecayOptimizerConstructor',
paramwise_cfg=dict(num_layers=30, layer_decay_rate=1.0,
depths=[4, 4, 18, 4]))
paramwise_cfg=dict(num_layers=33, layer_decay_rate=1.0,
depths=[4, 4, 21, 4]))
lr_config = dict(_delete_=True, policy='poly',
warmup='linear',
warmup_iters=1500,
......
# Cityscapes
Introduced by Cordts et al. in [The Cityscapes Dataset for Semantic Urban Scene Understanding](https://paperswithcode.com/paper/the-cityscapes-dataset-for-semantic-urban).
Cityscapes is a large-scale database which focuses on semantic understanding of urban street scenes. It provides semantic, instance-wise, and dense pixel annotations for 30 classes grouped into 8 categories (flat surfaces, humans, vehicles, constructions, objects, nature, sky, and void). The dataset consists of around 5000 fine annotated images and 20000 coarse annotated ones. Data was captured in 50 cities during several months, daytimes, and good weather conditions. It was originally recorded as video so the frames were manually selected to have the following features: large number of dynamic objects, varying scene layout, and varying background.
## Model Zoo
### UperNet + InternImage
| backbone | resolution | mIoU (ss/ms) | train speed | train time | #params | FLOPs | Config | Download |
|:--------------:|:----------:|:------------:|:-----------:|:----------:|:-------:|:-----:|:----:|:----:|
| InternImage-T | 512x1024 | 82.58 / 83.40 | 0.32s / iter | 14.5h | 59M | 1889G | [config](./upernet_internimage_t_512x1024_160k_cityscapes.py) | [ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_t_512x1024_160k_cityscapes.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_t_512x1024_160k_cityscapes.log.json) |
| InternImage-S | 512x1024 | 82.74 / 83.45 | 0.36s / iter | 16.5h | 80M | 2035G | [config](./upernet_internimage_s_512x1024_160k_cityscapes.py) |[ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_s_512x1024_160k_cityscapes.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_s_512x1024_160k_cityscapes.log.json) |
| InternImage-B | 512x1024 | 83.18 / 83.97 | 0.39s / iter | 17h | 128M | 2369G | [config](./upernet_internimage_b_512x1024_160k_cityscapes.py) |[ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_b_512x1024_160k_cityscapes.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_b_512x1024_160k_cityscapes.log.json) |
| InternImage-L | 512x1024 | 83.68 / 84.41 | 0.50s / iter | 23h | 256M | 3234G | [config](./upernet_internimage_l_512x1024_160k_cityscapes.py) |[ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_l_512x1024_160k_cityscapes.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_l_512x1024_160k_cityscapes.log.json) |
| InternImage-XL | 512x1024 | 83.62 / 84.28 | 0.56s / iter | 26h | 368M | 4022G | [config](./upernet_internimage_xl_512x1024_160k_cityscapes.py) |[ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_xl_512x1024_160k_cityscapes.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_xl_512x1024_160k_cityscapes.log.json) |
- Training speed is measured with A100 GPU.
- Please set `with_cp=True` to save memory if you meet `out-of-memory` issues.
### UperNet + InternImage (with additional data)
Mapillary 80k + Cityscapes (w/ coarse data) 160k
| backbone | resolution | mIoU (ss/ms) | train speed | train time | #params | FLOPs | Config | Download |
|:--------------:|:----------:|:------------:|:-----------:|:-----------:|:-------:|:-----:|:------:|:------------:|
| InternImage-L | 512x1024 | 85.94 / 86.22 | 0.50s / iter | 23h | 256M | 3234G | [config](./upernet_internimage_l_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_l_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_l_512x1024_160k_mapillary2cityscapes.log.json) |
| InternImage-XL | 512x1024 | 86.20 / 86.42 | 0.56s / iter | 26h | 368M | 4022G | [config](./upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.log.json) |
### SegFormerHead + InternImage (with additional data)
Mapillary 80k + Cityscapes (w/ coarse data) 160k
| backbone | resolution | mIoU (ss/ms) | train speed | train time | #params | FLOPs | Config | Download |
|:--------------:|:----------:|:------------:|:-----------:|:-----------:|:-------:|:-----:|:-----:|:---------:|
| InternImage-L | 512x1024 | 85.16 / 85.67 | 0.37s / iter | 17h | 220M | 1580G | [config](./segformer_internimage_l_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/segformer_internimage_l_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/segformer_internimage_l_512x1024_160k_mapillary2cityscapes.log.json) |
| InternImage-XL | 512x1024 | 85.41 / 85.93 | 0.43s / iter | 19.5h | 330M | 2364G | [config](./segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.log.json) |
# --------------------------------------------------------
# InternImage
# Copyright (c) 2022 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
_base_ = [
'../_base_/models/segformer_mit-b0.py', '../_base_/datasets/cityscapes_extra.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
]
load_from = 'https://github.com/OpenGVLab/InternImage/releases/download/seg_models/segformer_internimage_l_512x1024_80k_mapillary.pth'
model = dict(
backbone=dict(
_delete_=True,
type='InternImage',
core_op='DCNv3',
channels=160,
depths=[5, 5, 22, 5],
groups=[10, 20, 40, 80],
mlp_ratio=4.,
drop_path_rate=0.4,
norm_layer='LN',
layer_scale=1.0,
offset_scale=2.0,
post_norm=True,
with_cp=False,
out_indices=(0, 1, 2, 3),
init_cfg=None),
decode_head=dict(num_classes=150, in_channels=[160, 320, 640, 1280]),
test_cfg=dict(mode='whole'))
optimizer = dict(
_delete_=True, type='AdamW', lr=0.00002, betas=(0.9, 0.999), weight_decay=0.05,
constructor='CustomLayerDecayOptimizerConstructor',
paramwise_cfg=dict(num_layers=37, layer_decay_rate=0.94,
depths=[5, 5, 22, 5], offset_lr_scale=1.0))
lr_config = dict(_delete_=True, policy='poly',
warmup='linear',
warmup_iters=1500,
warmup_ratio=1e-6,
power=1.0, min_lr=0.0, by_epoch=False)
# By default, models are trained on 8 GPUs with 2 images per GPU
data = dict(samples_per_gpu=2)
runner = dict(type='IterBasedRunner')
optimizer_config = dict(_delete_=True, grad_clip=dict(max_norm=0.1, norm_type=2))
checkpoint_config = dict(by_epoch=False, interval=1000, max_keep_ckpts=1)
evaluation = dict(interval=4000, metric='mIoU', save_best='mIoU')
# fp16 = dict(loss_scale=dict(init_scale=512))
# --------------------------------------------------------
# InternImage
# Copyright (c) 2022 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
_base_ = [
'../_base_/models/segformer_mit-b0.py', '../_base_/datasets/mapillary.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
]
pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_l_22k_192to384.pth'
model = dict(
backbone=dict(
_delete_=True,
type='InternImage',
core_op='DCNv3',
channels=160,
depths=[5, 5, 22, 5],
groups=[10, 20, 40, 80],
mlp_ratio=4.,
drop_path_rate=0.4,
norm_layer='LN',
layer_scale=1.0,
offset_scale=2.0,
post_norm=True,
with_cp=False,
out_indices=(0, 1, 2, 3),
init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
decode_head=dict(num_classes=150, in_channels=[160, 320, 640, 1280]),
test_cfg=dict(mode='whole'))
optimizer = dict(
_delete_=True, type='AdamW', lr=0.00002, betas=(0.9, 0.999), weight_decay=0.05,
constructor='CustomLayerDecayOptimizerConstructor',
paramwise_cfg=dict(num_layers=37, layer_decay_rate=0.94,
depths=[5, 5, 22, 5], offset_lr_scale=1.0))
lr_config = dict(_delete_=True, policy='poly',
warmup='linear',
warmup_iters=1500,
warmup_ratio=1e-6,
power=1.0, min_lr=0.0, by_epoch=False)
# By default, models are trained on 8 GPUs with 2 images per GPU
data = dict(samples_per_gpu=2)
runner = dict(type='IterBasedRunner')
optimizer_config = dict(_delete_=True, grad_clip=dict(max_norm=0.1, norm_type=2))
checkpoint_config = dict(by_epoch=False, interval=1000, max_keep_ckpts=1)
evaluation = dict(interval=8000, metric='mIoU', save_best='mIoU')
# fp16 = dict(loss_scale=dict(init_scale=512))
# --------------------------------------------------------
# InternImage
# Copyright (c) 2022 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
_base_ = [
'../_base_/models/segformer_mit-b0.py', '../_base_/datasets/cityscapes_extra.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
]
load_from = 'https://github.com/OpenGVLab/InternImage/releases/download/seg_models/segformer_internimage_xl_512x1024_80k_mapillary.pth'
model = dict(
backbone=dict(
_delete_=True,
type='InternImage',
core_op='DCNv3',
channels=192,
depths=[5, 5, 24, 5],
groups=[12, 24, 48, 96],
mlp_ratio=4.,
drop_path_rate=0.4,
norm_layer='LN',
layer_scale=1.0,
offset_scale=2.0,
post_norm=True,
with_cp=False,
out_indices=(0, 1, 2, 3),
init_cfg=None),
decode_head=dict(num_classes=150, in_channels=[192, 384, 768, 1536]),
test_cfg=dict(mode='whole'))
optimizer = dict(
_delete_=True, type='AdamW', lr=0.00002, betas=(0.9, 0.999), weight_decay=0.05,
constructor='CustomLayerDecayOptimizerConstructor',
paramwise_cfg=dict(num_layers=39, layer_decay_rate=0.94,
depths=[5, 5, 24, 5], offset_lr_scale=1.0))
lr_config = dict(_delete_=True, policy='poly',
warmup='linear',
warmup_iters=1500,
warmup_ratio=1e-6,
power=1.0, min_lr=0.0, by_epoch=False)
# By default, models are trained on 8 GPUs with 2 images per GPU
data = dict(samples_per_gpu=2)
runner = dict(type='IterBasedRunner')
optimizer_config = dict(_delete_=True, grad_clip=dict(max_norm=0.1, norm_type=2))
checkpoint_config = dict(by_epoch=False, interval=1000, max_keep_ckpts=1)
evaluation = dict(interval=4000, metric='mIoU', save_best='mIoU')
# fp16 = dict(loss_scale=dict(init_scale=512))
# --------------------------------------------------------
# InternImage
# Copyright (c) 2022 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
_base_ = [
'../_base_/models/segformer_mit-b0.py', '../_base_/datasets/mapillary.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
]
pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_xl_22k_192to384.pth'
model = dict(
backbone=dict(
_delete_=True,
type='InternImage',
core_op='DCNv3',
channels=192,
depths=[5, 5, 24, 5],
groups=[12, 24, 48, 96],
mlp_ratio=4.,
drop_path_rate=0.4,
norm_layer='LN',
layer_scale=1.0,
offset_scale=2.0,
post_norm=True,
with_cp=False,
out_indices=(0, 1, 2, 3),
init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
decode_head=dict(num_classes=150, in_channels=[192, 384, 768, 1536]),
test_cfg=dict(mode='whole'))
optimizer = dict(
_delete_=True, type='AdamW', lr=0.00002, betas=(0.9, 0.999), weight_decay=0.05,
constructor='CustomLayerDecayOptimizerConstructor',
paramwise_cfg=dict(num_layers=39, layer_decay_rate=0.94,
depths=[5, 5, 24, 5], offset_lr_scale=1.0))
lr_config = dict(_delete_=True, policy='poly',
warmup='linear',
warmup_iters=1500,
warmup_ratio=1e-6,
power=1.0, min_lr=0.0, by_epoch=False)
# By default, models are trained on 8 GPUs with 2 images per GPU
data = dict(samples_per_gpu=2)
runner = dict(type='IterBasedRunner')
optimizer_config = dict(_delete_=True, grad_clip=dict(max_norm=0.1, norm_type=2))
checkpoint_config = dict(by_epoch=False, interval=1000, max_keep_ckpts=1)
evaluation = dict(interval=8000, metric='mIoU', save_best='mIoU')
# fp16 = dict(loss_scale=dict(init_scale=512))
# --------------------------------------------------------
# InternImage
# Copyright (c) 2022 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
_base_ = [
'../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
]
pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_b_1k_224.pth'
model = dict(
backbone=dict(
_delete_=True,
type='InternImage',
core_op='DCNv3',
channels=112,
depths=[4, 4, 21, 4],
groups=[7, 14, 28, 56],
mlp_ratio=4.,
drop_path_rate=0.4,
norm_layer='LN',
layer_scale=1.0,
offset_scale=1.0,
post_norm=True,
with_cp=False,
out_indices=(0, 1, 2, 3),
init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
decode_head=dict(num_classes=150, in_channels=[112, 224, 448, 896]),
auxiliary_head=dict(num_classes=150, in_channels=448),
test_cfg=dict(mode='whole')
)
optimizer = dict(
_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.05,
constructor='CustomLayerDecayOptimizerConstructor',
paramwise_cfg=dict(num_layers=33, layer_decay_rate=1.0,
depths=[4, 4, 21, 4]))
lr_config = dict(_delete_=True, policy='poly',
warmup='linear',
warmup_iters=1500,
warmup_ratio=1e-6,
power=1.0, min_lr=0.0, by_epoch=False)
# By default, models are trained on 8 GPUs with 2 images per GPU
data=dict(samples_per_gpu=2)
runner = dict(type='IterBasedRunner')
checkpoint_config = dict(by_epoch=False, interval=1000, max_keep_ckpts=1)
evaluation = dict(interval=16000, metric='mIoU', save_best='mIoU')
# fp16 = dict(loss_scale=dict(init_scale=512))
# --------------------------------------------------------
# InternImage
# Copyright (c) 2022 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
_base_ = [
'../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
]
pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_l_22k_192to384.pth'
model = dict(
backbone=dict(
_delete_=True,
type='InternImage',
core_op='DCNv3',
channels=160,
depths=[5, 5, 22, 5],
groups=[10, 20, 40, 80],
mlp_ratio=4.,
drop_path_rate=0.4,
norm_layer='LN',
layer_scale=1.0,
offset_scale=2.0,
post_norm=True,
with_cp=False,
out_indices=(0, 1, 2, 3),
init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
decode_head=dict(num_classes=150, in_channels=[160, 320, 640, 1280]),
auxiliary_head=dict(num_classes=150, in_channels=640),
test_cfg=dict(mode='whole'))
optimizer = dict(
_delete_=True, type='AdamW', lr=0.00002, betas=(0.9, 0.999), weight_decay=0.05,
constructor='CustomLayerDecayOptimizerConstructor',
paramwise_cfg=dict(num_layers=37, layer_decay_rate=0.94,
depths=[5, 5, 22, 5], offset_lr_scale=1.0))
lr_config = dict(_delete_=True, policy='poly',
warmup='linear',
warmup_iters=1500,
warmup_ratio=1e-6,
power=1.0, min_lr=0.0, by_epoch=False)
# By default, models are trained on 8 GPUs with 2 images per GPU
data = dict(samples_per_gpu=2)
runner = dict(type='IterBasedRunner')
optimizer_config = dict(_delete_=True, grad_clip=dict(max_norm=0.1, norm_type=2))
checkpoint_config = dict(by_epoch=False, interval=1000, max_keep_ckpts=1)
evaluation = dict(interval=16000, metric='mIoU', save_best='mIoU')
# fp16 = dict(loss_scale=dict(init_scale=512))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment