Commit 0d97cc8c authored by Sugon_ldc's avatar Sugon_ldc
Browse files

add new model

parents
Pipeline #316 failed with stages
in 0 seconds
_base_: '../_base_/cityscapes.yml'
batch_size: 4
iters: 120000
optimizer:
_inherited_: False
type: adam
weight_decay: 0.0002
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.001
end_lr: 0.0
power: 0.9
loss:
types:
- type: CrossEntropyLoss
weight: [2.79834108 ,6.92945723 ,3.84068512 ,9.94349362 ,9.77098823 ,9.51484 ,10.30981624 ,9.94307377 ,4.64933892 ,9.55759938 ,7.86692178 ,9.53126629 ,10.3496365 ,6.67234062 ,10.26054204 ,10.28785275 ,10.28988296 ,10.40546021 ,10.13848367]
coef: [1]
model:
type: ESPNetV1
in_channels: 3
num_classes: 19
level2_depth: 2
level3_depth: 8
# FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation
## Reference
> Wu, Huikai, Junge Zhang, Kaiqi Huang, Kongming Liang, and Yizhou Yu. "Fastfcn: Rethinking dilated convolution in the backbone for semantic segmentation." arXiv preprint arXiv:1903.11816 (2019).
## Performance
### ADE20K
| Model | Backbone | Resolution | Training Iters | mIoU | mIoU (flip) | mIoU (ms+flip) | Links |
|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
|FastFCN|ResNet50_vd|480x480|120000|43.76%|44.11%|44.48%|[model](https://bj.bcebos.com/paddleseg/dygraph/ade20k/fastfcn_resnet50_os8_ade20k_480x480_120k/model.pdparams) \|[log](https://bj.bcebos.com/paddleseg/dygraph/ade20k/fastfcn_resnet50_os8_ade20k_480x480_120k/train.log)\|[vdl](https://www.paddlepaddle.org.cn/paddle/visualdl/service/app/scalar?id=e159d5be3860b8d08762c0416ab54acc)|
_base_: '../_base_/ade20k.yml'
batch_size: 4
iters: 120000
train_dataset:
transforms:
- type: ResizeStepScaling
min_scale_factor: 0.5
max_scale_factor: 2.0
scale_step_size: 0.25
- type: RandomPaddingCrop
crop_size: [480, 480]
- type: RandomHorizontalFlip
- type: RandomDistort
brightness_range: 0.4
contrast_range: 0.4
saturation_range: 0.4
- type: Normalize
model:
type: FastFCN
backbone:
type: ResNet50_vd
output_stride: 8
pretrained: https://bj.bcebos.com/paddleseg/dygraph/resnet50_vd_ssld_v2.tar.gz
num_codes: 32
mid_channels: 512
use_jpu: True
aux_loss: True
use_se_loss: True
add_lateral: True
loss:
types:
- type: CrossEntropyLoss
- type: CrossEntropyLoss
- type: SECrossEntropyLoss
coef: [1, 0.4, 0.2]
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.01
end_lr: 0
power: 0.9
# Fast-SCNN: Fast Semantic Segmentation Network
## Reference
> Poudel, Rudra PK, Stephan Liwicki, and Roberto Cipolla. "Fast-scnn: Fast semantic segmentation network." arXiv preprint arXiv:1902.04502 (2019).
## Performance
### Cityscapes
| Model | Backbone | Resolution | Training Iters | mIoU | mIoU (flip) | mIoU (ms+flip) | Links |
|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
|Fast SCNN|-|1024x1024|160000|69.31%|-|-|[model](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/fastscnn_cityscapes_1024x1024_160k/model.pdparams) \| [log](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/fastscnn_cityscapes_1024x1024_160k/train.log) \| [vdl](https://paddlepaddle.org.cn/paddle/visualdl/service/app/scalar?id=3b4c3f01c9213cac14e53c69d262a337)|
_base_: '../_base_/cityscapes_1024x1024.yml'
batch_size: 4
iters: 160000
loss:
types:
- type: CrossEntropyLoss
coef: [1.0, 0.4]
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.05
end_lr: 1.0e-4
power: 0.9
model:
type: FastSCNN
num_classes: 19
enable_auxiliary_loss: True
pretrained: null
_base_: '../_base_/cityscapes_1024x1024.yml'
batch_size: 4
iters: 40000
loss:
types:
- type: CrossEntropyLoss
coef: [1.0, 0.4]
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.025
end_lr: 1.0e-4
power: 0.9
model:
type: FastSCNN
num_classes: 19
enable_auxiliary_loss: True
pretrained: null
_base_: '../_base_/cityscapes_1024x1024.yml'
batch_size: 4
iters: 40000
loss:
types:
- type: MixedLoss
losses:
- type: CrossEntropyLoss
- type: SemanticConnectivityLoss
coef: [1, 0.01]
- type: CrossEntropyLoss
coef: [1.0, 0.4]
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.025
end_lr: 1.0e-4
power: 0.9
model:
type: FastSCNN
num_classes: 19
enable_auxiliary_loss: True
pretrained: null
# Deep High-Resolution Representation Learning for Visual Recognition
## Reference
> Wang, Jingdong, Ke Sun, Tianheng Cheng, Borui Jiang, Chaorui Deng, Yang Zhao, Dong Liu et al. "Deep high-resolution representation learning for visual recognition." IEEE transactions on pattern analysis and machine intelligence (2020).
## Performance
### Cityscapes
| Model | Backbone | Resolution | Training Iters | mIoU | mIoU (flip) | mIoU (ms+flip) | Links |
|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
|FCN|HRNet_W18|1024x512|80000|78.97%|79.49%|79.74%|[model](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/fcn_hrnetw18_cityscapes_1024x512_80k/model.pdparams) \| [log](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/fcn_hrnetw18_cityscapes_1024x512_80k/train.log) \| [vdl](https://paddlepaddle.org.cn/paddle/visualdl/service/app?id=bebec8e1a3802c4babd3c69e1bf50d51)|
|FCN|HRNet_W48|1024x512|80000|80.70%|81.24%|81.56%|[model](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/fcn_hrnetw48_cityscapes_1024x512_80k/model.pdparams) \| [log](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/fcn_hrnetw48_cityscapes_1024x512_80k/train.log) \| [vdl](https://paddlepaddle.org.cn/paddle/visualdl/service/app?id=ae1cb76014cdc54406c36f1e3dc2a530)|
### Pascal VOC 2012 + Aug
| Model | Backbone | Resolution | Training Iters | mIoU | mIoU (flip) | mIoU (ms+flip) | Links |
|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
|FCN|HRNet_W18|512x512|40000|75.39%|76.04%|77.09%|[model](https://bj.bcebos.com/paddleseg/dygraph/pascal_voc12/fcn_hrnetw18_voc12aug_512x512_40k/model.pdparams) \| [log](https://bj.bcebos.com/paddleseg/dygraph/pascal_voc12/fcn_hrnetw18_voc12aug_512x512_40k/train.log) \| [vdl](https://paddlepaddle.org.cn/paddle/visualdl/service/app?id=fbe6caaca0f7d7ea1dba1c60b8db2a7e)|
|FCN|HRNet_W48|512x512|40000|78.72%|79.52%|80.10%|[model](https://bj.bcebos.com/paddleseg/dygraph/pascal_voc12/fcn_hrnetw48_voc12aug_512x512_40k/model.pdparams) \| [log](https://bj.bcebos.com/paddleseg/dygraph/pascal_voc12/fcn_hrnetw48_voc12aug_512x512_40k/train.log) \| [vdl](https://paddlepaddle.org.cn/paddle/visualdl/service/app?id=20b404212fcbb5b7b329ab0c16124553)|
_base_: '../_base_/cityscapes.yml'
model:
type: FCN
backbone:
type: HRNet_W18
align_corners: False
pretrained: https://bj.bcebos.com/paddleseg/dygraph/hrnet_w18_ssld.tar.gz
num_classes: 19
pretrained: Null
backbone_indices: [-1]
optimizer:
weight_decay: 0.0005
iters: 80000
_base_: '../_base_/cityscapes.yml'
model:
type: FCN
backbone:
type: HRNet_W18
align_corners: False
pretrained: https://bj.bcebos.com/paddleseg/dygraph/hrnet_w18_ssld.tar.gz
num_classes: 19
pretrained: Null
backbone_indices: [-1]
optimizer:
weight_decay: 0.0005
iters: 80000
batch_size: 4
_base_: '../_base_/cityscapes.yml'
model:
type: FCN
backbone:
type: HRNet_W18
align_corners: False
pretrained: https://bj.bcebos.com/paddleseg/dygraph/hrnet_w18_ssld.tar.gz
num_classes: 19
pretrained: Null
backbone_indices: [-1]
optimizer:
weight_decay: 0.0005
iters: 80000
batch_size: 4
loss:
types:
- type: MixedLoss
losses:
- type: CrossEntropyLoss
- type: SemanticConnectivityLoss
coef: [1, 0.05]
coef: [1]
train_dataset:
type: PPHumanSeg14K
dataset_root: data/PP-HumanSeg14K
transforms:
- type: ResizeStepScaling
min_scale_factor: 0.5
max_scale_factor: 2.0
scale_step_size: 0.25
- type: RandomPaddingCrop
crop_size: [398, 224]
- type: RandomHorizontalFlip
- type: RandomDistort
brightness_range: 0.4
contrast_range: 0.4
saturation_range: 0.4
- type: Normalize
mode: train
val_dataset:
type: PPHumanSeg14K
dataset_root: data/PP-HumanSeg14K
transforms:
- type: Normalize
mode: val
model:
type: FCN
backbone:
type: HRNet_W18
align_corners: False
pretrained: https://bj.bcebos.com/paddleseg/dygraph/hrnet_w18_ssld.tar.gz
num_classes: 2
pretrained: Null
backbone_indices: [-1]
optimizer:
type: sgd
momentum: 0.9
weight_decay: 0.0005
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.05
end_lr: 0
power: 0.9
loss:
types:
- type: CrossEntropyLoss
coef: [1]
iters: 10000
batch_size: 64
_base_: '../_base_/pascal_voc12aug.yml'
model:
type: FCN
backbone:
type: HRNet_W18
align_corners: False
pretrained: https://bj.bcebos.com/paddleseg/dygraph/hrnet_w18_ssld.tar.gz
num_classes: 21
pretrained: Null
backbone_indices: [-1]
optimizer:
weight_decay: 0.0005
_base_: './fcn_hrnetw18_cityscapes_1024x512_80k.yml'
model:
backbone:
type: HRNet_W48
pretrained: https://bj.bcebos.com/paddleseg/dygraph/hrnet_w48_ssld.tar.gz
_base_: './fcn_hrnetw18_voc12aug_512x512_40k.yml'
model:
backbone:
type: HRNet_W48
pretrained: https://bj.bcebos.com/paddleseg/dygraph/hrnet_w48_ssld.tar.gz
# GCNet: Non-local networks meet squeeze-excitation networks and beyond
## Reference
> Cao, Yue, Jiarui Xu, Stephen Lin, Fangyun Wei, and Han Hu. "GCNet: Non-local networks meet squeeze-excitation networks and beyond." In Proceedings of the IEEE International Conference on Computer Vision Workshops, pp. 0-0. 2019.
## Performance
### Cityscapes
| Model | Backbone | Resolution | Training Iters | mIoU | mIoU (flip) | mIoU (ms+flip) | Links |
|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
|GCNet|ResNet50_OS8|1024x512|80000|79.50%|79.77%|79.69%|[model](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/gcnet_resnet50_os8_cityscapes_1024x512_80k/model.pdparams) \| [log](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/gcnet_resnet50_os8_cityscapes_1024x512_80k/train.log) \| [vdl](https://paddlepaddle.org.cn/paddle/visualdl/service/app?id=e3801edb9a6f5b33eb890f5a1ae6ed7b)|
|GCNet|ResNet101_OS8|1024x512|80000|81.01%|81.30%|81.64%|[model](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/gcnet_resnet101_os8_cityscapes_1024x512_80k/model.pdparams) \| [log](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/gcnet_resnet101_os8_cityscapes_1024x512_80k/train.log) \| [vdl](https://www.paddlepaddle.org.cn/paddle/visualdl/service/app/scalar?id=aa88e7980f4d6839537662a3a3d18851)|
### Pascal VOC 2012 + Aug
| Model | Backbone | Resolution | Training Iters | mIoU | mIoU (flip) | mIoU (ms+flip) | Links |
|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
|GCNet|ResNet50_OS8|512x512|40000|80.32%|80.39%|80.54%|[model](https://bj.bcebos.com/paddleseg/dygraph/pascal_voc12/gcnet_resnet50_os8_voc12aug_512x512_40k/model.pdparams) \| [log](https://bj.bcebos.com/paddleseg/dygraph/pascal_voc12/gcnet_resnet50_os8_voc12aug_512x512_40k/train.log) \| [vdl](https://paddlepaddle.org.cn/paddle/visualdl/service/app?id=86cbaac3fe98fdbb635e246c2c02e87b)|
|GCNet|ResNet101_OS8|512x512|40000|79.64%|79.59%|79.94%|[model](https://bj.bcebos.com/paddleseg/dygraph/pascal_voc12/gcnet_resnet101_os8_voc12aug_512x512_40k/model.pdparams) \| [log](https://bj.bcebos.com/paddleseg/dygraph/pascal_voc12/gcnet_resnet101_os8_voc12aug_512x512_40k/train.log) \| [vdl](https://paddlepaddle.org.cn/paddle/visualdl/service/app?id=73f0484b034f6c27bf481c7a3b05e9ae)|
_base_: 'gcnet_resnet50_os8_cityscapes_1024x512_80k.yml'
model:
backbone:
type: ResNet101_vd
pretrained: https://bj.bcebos.com/paddleseg/dygraph/resnet101_vd_ssld.tar.gz
_base_: './gcnet_resnet50_os8_voc12aug_512x512_40k.yml'
model:
backbone:
type: ResNet101_vd
pretrained: https://bj.bcebos.com/paddleseg/dygraph/resnet101_vd_ssld.tar.gz
_base_: '../_base_/cityscapes.yml'
batch_size: 2
iters: 80000
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.01
power: 0.9
end_lr: 1.0e-5
loss:
types:
- type: CrossEntropyLoss
coef: [1, 0.4]
model:
type: GCNet
backbone:
type: ResNet50_vd
output_stride: 8
pretrained: https://bj.bcebos.com/paddleseg/dygraph/resnet50_vd_ssld_v2.tar.gz
gc_channels: 512
ratio: 0.25
enable_auxiliary_loss: True
align_corners: False
pretrained: null
_base_: '../_base_/pascal_voc12aug.yml'
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.01
power: 0.9
end_lr: 1.0e-5
loss:
types:
- type: CrossEntropyLoss
coef: [1, 0.4]
model:
type: GCNet
backbone:
type: ResNet50_vd
output_stride: 8
pretrained: https://bj.bcebos.com/paddleseg/dygraph/resnet50_vd_ssld_v2.tar.gz
gc_channels: 512
ratio: 0.25
enable_auxiliary_loss: True
align_corners: False
pretrained: null
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment