Commit 0d97cc8c authored by Sugon_ldc's avatar Sugon_ldc
Browse files

add new model

parents
Pipeline #316 failed with stages
in 0 seconds
batch_size: 4
iters: 1000
train_dataset:
type: OpticDiscSeg
dataset_root: data/optic_disc_seg
transforms:
- type: Resize
target_size: [512, 512]
- type: RandomHorizontalFlip
- type: Normalize
mode: train
val_dataset:
type: OpticDiscSeg
dataset_root: data/optic_disc_seg
transforms:
- type: Normalize
mode: val
optimizer:
type: sgd
momentum: 0.9
weight_decay: 4.0e-5
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.01
end_lr: 0
power: 0.9
loss:
types:
- type: CrossEntropyLoss
coef: [1, 1, 1, 1, 1]
model:
type: BiSeNetV2
pretrained: Null
batch_size: 4
iters: 1000
train_dataset:
type: OpticDiscSeg
dataset_root: data/optic_disc_seg
transforms:
- type: Resize
target_size: [512, 512]
- type: RandomHorizontalFlip
- type: Normalize
mode: train
val_dataset:
type: OpticDiscSeg
dataset_root: data/optic_disc_seg
transforms:
- type: Resize
target_size: [512, 512]
- type: Normalize
mode: val
optimizer:
type: sgd
momentum: 0.9
weight_decay: 4.0e-5
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.01
end_lr: 0
power: 0.9
loss:
types:
- type: CrossEntropyLoss
coef: [1]
# distill_loss is used for distillation
distill_loss:
types:
- type: KLLoss
coef: [3]
model:
type: DeepLabV3P
backbone:
type: ResNet18_vd
output_stride: 8
multi_grid: [1, 2, 4]
pretrained: Null
num_classes: 2
backbone_indices: [0, 3]
aspp_ratios: [1, 12, 24, 36]
aspp_out_channels: 256
align_corners: False
pretrained: Null
batch_size: 4
iters: 1000
train_dataset:
type: OpticDiscSeg
dataset_root: data/optic_disc_seg
transforms:
- type: Resize
target_size: [512, 512]
- type: RandomHorizontalFlip
- type: Normalize
mode: train
val_dataset:
type: OpticDiscSeg
dataset_root: data/optic_disc_seg
transforms:
- type: Resize
target_size: [512, 512]
- type: Normalize
mode: val
optimizer:
type: sgd
momentum: 0.9
weight_decay: 4.0e-5
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.01
end_lr: 0
power: 0.9
loss:
types:
- type: CrossEntropyLoss
coef: [1]
model:
type: DeepLabV3P
backbone:
type: ResNet50_vd
output_stride: 8
multi_grid: [1, 2, 4]
pretrained: https://bj.bcebos.com/paddleseg/dygraph/resnet50_vd_ssld_v2.tar.gz
num_classes: 2
backbone_indices: [0, 3]
aspp_ratios: [1, 12, 24, 36]
aspp_out_channels: 256
align_corners: False
pretrained: Null
batch_size: 4
iters: 1000
train_dataset:
type: Dataset
dataset_root: data/optic_disc_seg
train_path: data/optic_disc_seg/train_list.txt
num_classes: 2
mode: train
transforms:
- type: ResizeStepScaling
min_scale_factor: 0.5
max_scale_factor: 2.0
scale_step_size: 0.25
- type: RandomPaddingCrop
crop_size: [512, 512]
- type: RandomHorizontalFlip
- type: RandomDistort
brightness_range: 0.5
contrast_range: 0.5
saturation_range: 0.5
- type: Normalize
val_dataset:
type: Dataset
dataset_root: data/optic_disc_seg
val_path: data/optic_disc_seg/val_list.txt
num_classes: 2
mode: val
transforms:
- type: Normalize
optimizer:
type: sgd
momentum: 0.9
weight_decay: 4.0e-5
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.01
end_lr: 0
power: 0.9
loss:
types:
- type: CrossEntropyLoss
coef: [1, 1, 1]
model:
type: PPLiteSeg
backbone:
type: STDC2
pretrained: https://bj.bcebos.com/paddleseg/dygraph/PP_STDCNet2.tar.gz
# RTFormer: Efficient Design for Real-Time Semantic Segmentation with Transformer
## Reference
> Wang, Jian, Chenhui Gou, Qiman Wu, Haocheng Feng, Junyu Han, Errui Ding, and Jingdong Wang. "RTFormer: Efficient Design for Real-Time Semantic Segmentation with Transformer." arXiv preprint arXiv:2210.07124 (2022).
## Performance
### Cityscapes
| Model | Backbone | Resolution | Training Iters | mIoU | mIoU (flip) | mIoU (ms+flip) | Links |
|-|-|-|-|-|-|-|-|
|RTFormer-Base|-|1024x512|120000|79.24%|79.80%|80.19%|[model](https://paddleseg.bj.bcebos.com/dygraph/cityscapes/rtformer_base_cityscapes_1024x512_120k/model.pdparams) \| [log]() \| [vdl]()|
|RTFormer-Slim|-|1024x512|120000|76.31%|77.05%|77.58%|[model](https://paddleseg.bj.bcebos.com/dygraph/cityscapes/rtformer_slim_cityscapes_1024x512_120k/model.pdparams) \| [log]() \| [vdl]()|
### ADE20k
| Model | Backbone | Resolution | Training Iters | mIoU | mIoU (flip) | mIoU (ms+flip) | Links |
|-|-|-|-|-|-|-|-|
|RTFormer-Base|-|512x512|160000|42.02%|42.43%|42.72%|[model](https://paddleseg.bj.bcebos.com/dygraph/ade20k/rtformer_base_ade20k_512x512_160k/model.pdparams) \| [log]() \| [vdl]()|
|RTFormer-Slim|-|512x512|160000|36.67%|37.32%|37.20%|[model](https://paddleseg.bj.bcebos.com/dygraph/ade20k/rtformer_slim_ade20k_512x512_160k/model.pdparams) \| [log]() \| [vdl]()|
_base_: '../_base_/ade20k.yml'
batch_size: 4 # total batch size: 4 * 4
iters: 160000
train_dataset:
transforms:
- type: ResizeStepScaling
min_scale_factor: 0.5
max_scale_factor: 2.0
scale_step_size: 0.25
- type: RandomPaddingCrop
crop_size: [512, 512]
- type: RandomHorizontalFlip
- type: RandomDistort
brightness_range: 0.4
contrast_range: 0.4
saturation_range: 0.4
- type: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
val_dataset:
transforms:
- type: Resize
target_size: [2048, 512]
keep_ratio: True
size_divisor: 32
- type: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
export:
transforms:
- type: Resize
target_size: [2048, 512]
keep_ratio: True
size_divisor: 32
- type: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
optimizer:
_inherited_: False
type: AdamW
beta1: 0.9
beta2: 0.999
weight_decay: 0.05
lr_scheduler:
_inherited_: False
type: PolynomialDecay
learning_rate: 1.0e-4
power: 1.
end_lr: 1.0e-7
warmup_iters: 1500
warmup_start_lr: 1.0e-6
loss:
types:
- type: CrossEntropyLoss
coef: [1, 0.4]
model:
type: RTFormer
base_channels: 64
head_channels: 128
drop_path_rate: 0.1
use_injection: [True, False]
pretrained: https://paddleseg.bj.bcebos.com/dygraph/backbone/rtformer_base_backbone_imagenet_pretrained.zip
_base_: '../_base_/cityscapes.yml'
batch_size: 3 # total batch size: 4 * 3
iters: 120000
train_dataset:
transforms:
- type: ResizeStepScaling
min_scale_factor: 0.5
max_scale_factor: 2.0
scale_step_size: 0.25
- type: RandomPaddingCrop
crop_size: [1024, 512]
- type: RandomHorizontalFlip
- type: RandomDistort
brightness_range: 0.4
contrast_range: 0.4
saturation_range: 0.4
- type: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
val_dataset:
transforms:
- type: Resize
target_size: [2048, 1024]
keep_ratio: True
- type: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
export:
transforms:
- type: Resize
target_size: [2048, 512]
keep_ratio: True
- type: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
optimizer:
_inherited_: False
type: AdamW
beta1: 0.9
beta2: 0.999
weight_decay: 0.0125
lr_scheduler:
_inherited_: False
type: PolynomialDecay
learning_rate: 4.0e-4
power: 1.
end_lr: 1.0e-6
warmup_iters: 1500
warmup_start_lr: 1.0e-6
loss:
types:
- type: CrossEntropyLoss
coef: [1, 0.4]
model:
type: RTFormer
base_channels: 64
head_channels: 128
use_injection: [True, False]
pretrained: https://paddleseg.bj.bcebos.com/dygraph/backbone/rtformer_base_backbone_imagenet_pretrained.zip
_base_: '../_base_/ade20k.yml'
batch_size: 4 # total batch size: 4 * 4
iters: 160000
train_dataset:
transforms:
- type: ResizeStepScaling
min_scale_factor: 0.5
max_scale_factor: 2.0
scale_step_size: 0.25
- type: RandomPaddingCrop
crop_size: [512, 512]
- type: RandomHorizontalFlip
- type: RandomDistort
brightness_range: 0.4
contrast_range: 0.4
saturation_range: 0.4
- type: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
val_dataset:
transforms:
- type: Resize
target_size: [2048, 512]
keep_ratio: True
size_divisor: 32
- type: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
export:
transforms:
- type: Resize
target_size: [2048, 512]
keep_ratio: True
size_divisor: 32
- type: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
optimizer:
_inherited_: False
type: AdamW
beta1: 0.9
beta2: 0.999
weight_decay: 0.05
lr_scheduler:
_inherited_: False
type: PolynomialDecay
learning_rate: 1.0e-4
power: 1.
end_lr: 1.0e-7
warmup_iters: 1500
warmup_start_lr: 1.0e-6
loss:
types:
- type: CrossEntropyLoss
coef: [1, 0.4]
model:
type: RTFormer
base_channels: 32
head_channels: 64
drop_path_rate: 0.1
cross_size: 8
use_injection: [True, False]
pretrained: https://paddleseg.bj.bcebos.com/dygraph/backbone/rtformer_slim_backbone_imagenet_pretrained.zip
_base_: '../_base_/cityscapes.yml'
batch_size: 3 # total batch size: 4 * 3
iters: 120000
train_dataset:
transforms:
- type: ResizeStepScaling
min_scale_factor: 0.5
max_scale_factor: 2.0
scale_step_size: 0.25
- type: RandomPaddingCrop
crop_size: [1024, 512]
- type: RandomHorizontalFlip
- type: RandomDistort
brightness_range: 0.4
contrast_range: 0.4
saturation_range: 0.4
- type: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
val_dataset:
transforms:
- type: Resize
target_size: [2048, 1024]
keep_ratio: True
- type: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
export:
transforms:
- type: Resize
target_size: [2048, 512]
keep_ratio: True
- type: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
optimizer:
_inherited_: False
type: AdamW
beta1: 0.9
beta2: 0.999
weight_decay: 0.0125
lr_scheduler:
_inherited_: False
type: PolynomialDecay
learning_rate: 4.0e-4
power: 1.
end_lr: 1.0e-6
warmup_iters: 1500
warmup_start_lr: 1.0e-6
loss:
types:
- type: CrossEntropyLoss
coef: [1, 0.4]
model:
type: RTFormer
base_channels: 32
head_channels: 64
use_injection: [True, True]
pretrained: https://paddleseg.bj.bcebos.com/dygraph/backbone/rtformer_slim_backbone_imagenet_pretrained.zip
\ No newline at end of file
# SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers
## Reference
> Xie, Enze, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose M. Alvarez, and Ping Luo. "SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers." arXiv preprint arXiv:2105.15203 (2021).
## Performance
### Cityscapes
| Model | Backbone | Resolution | Training Iters | mIoU(slice) | mIoU (flip) | mIoU (ms+flip) | Links |
|-|-|-|-|-|-|-|-|
|SegFormer_B0|-|1024x1024|160000|76.73%|77.16%|-|[model](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/segformer_b0_cityscapes_1024x1024_160k/model.pdparams) \| [log](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/segformer_b0_cityscapes_1024x1024_160k/train.log) \| [vdl](https://paddlepaddle.org.cn/paddle/visualdl/service/app?id=227e067add44d44383c402ec5aead11b)|
|SegFormer_B1|-|1024x1024|160000|78.35%|78.64%|-|[model](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/segformer_b1_cityscapes_1024x1024_160k/model.pdparams) \| [log](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/segformer_b1_cityscapes_1024x1024_160k/train.log) \| [vdl](https://paddlepaddle.org.cn/paddle/visualdl/service/app?id=a0f4e8eacf346826e3150989b6a9f849)|
|SegFormer_B2|-|1024x1024|160000|81.60%|81.82%|-|[model](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/segformer_b2_cityscapes_1024x1024_160k/model.pdparams) \| [log](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/segformer_b2_cityscapes_1024x1024_160k/train.log) \| [vdl](https://paddlepaddle.org.cn/paddle/visualdl/service/app?id=734c0d99d858d0db7ff58f03d18289fe)|
|SegFormer_B3|-|1024x1024|160000|82.47%|82.60%|-|[model](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/segformer_b3_cityscapes_1024x1024_160k/model.pdparams) \| [log](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/segformer_b3_cityscapes_1024x1024_160k/train.log) \| [vdl](https://paddlepaddle.org.cn/paddle/visualdl/service/app?id=406282a64c45d008bf4445c5669d6579)|
|SegFormer_B4|-|1024x1024|160000|82.38%|82.59%|-|[model](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/segformer_b4_cityscapes_1024x1024_160k/model.pdparams) \| [log](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/segformer_b4_cityscapes_1024x1024_160k/train.log) \| [vdl](https://paddlepaddle.org.cn/paddle/visualdl/service/app?id=dc51a262eb8be9273970354ed445e760)|
|SegFormer_B5|-|1024x1024|160000|82.58%|82.82%|-|[model](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/segformer_b5_cityscapes_1024x1024_160k/model.pdparams) \| [log](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/segformer_b5_cityscapes_1024x1024_160k/train.log) \| [vdl](https://paddlepaddle.org.cn/paddle/visualdl/service/app?id=306d042a8e4d82ccceabd988a478a2f8)|
_base_: '../_base_/cityscapes_1024x1024.yml'
batch_size: 2
iters: 160000
model:
type: SegFormer
backbone:
type: MixVisionTransformer_B0
pretrained: https://bj.bcebos.com/paddleseg/dygraph/backbone/mix_vision_transformer_b0.tar.gz
embedding_dim: 256
num_classes: 19
optimizer:
_inherited_: False
type: AdamW
beta1: 0.9
beta2: 0.999
weight_decay: 0.01
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.00006
power: 1
loss:
types:
- type: CrossEntropyLoss
coef: [1]
test_config:
is_slide: True
crop_size: [1024, 1024]
stride: [768, 768]
_base_: '../_base_/cityscapes.yml'
batch_size: 1
iters: 160000
model:
type: SegFormer
backbone:
type: MixVisionTransformer_B0
pretrained: https://bj.bcebos.com/paddleseg/dygraph/backbone/mix_vision_transformer_b0.tar.gz
embedding_dim: 256
num_classes: 19
optimizer:
_inherited_: False
type: AdamW
beta1: 0.9
beta2: 0.999
weight_decay: 0.01
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.00006
power: 1
loss:
types:
- type: CrossEntropyLoss
coef: [1]
_base_: '../_base_/cityscapes_1024x1024.yml'
batch_size: 2
iters: 160000
model:
type: SegFormer
backbone:
type: MixVisionTransformer_B1
pretrained: https://bj.bcebos.com/paddleseg/dygraph/backbone/mix_vision_transformer_b1.tar.gz
embedding_dim: 256
num_classes: 19
optimizer:
_inherited_: False
type: AdamW
beta1: 0.9
beta2: 0.999
weight_decay: 0.01
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.00006
power: 1
loss:
types:
- type: CrossEntropyLoss
coef: [1]
test_config:
is_slide: True
crop_size: [1024, 1024]
stride: [768, 768]
_base_: '../_base_/cityscapes.yml'
batch_size: 1
iters: 160000
model:
type: SegFormer
backbone:
type: MixVisionTransformer_B1
pretrained: https://bj.bcebos.com/paddleseg/dygraph/backbone/mix_vision_transformer_b1.tar.gz
embedding_dim: 256
num_classes: 19
optimizer:
_inherited_: False
type: AdamW
beta1: 0.9
beta2: 0.999
weight_decay: 0.01
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.00006
power: 1
loss:
types:
- type: CrossEntropyLoss
coef: [1]
_base_: '../_base_/cityscapes_1024x1024.yml'
batch_size: 2
iters: 160000
model:
type: SegFormer
backbone:
type: MixVisionTransformer_B2
pretrained: https://bj.bcebos.com/paddleseg/dygraph/backbone/mix_vision_transformer_b2.tar.gz
embedding_dim: 768
num_classes: 19
optimizer:
_inherited_: False
type: AdamW
beta1: 0.9
beta2: 0.999
weight_decay: 0.01
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.00006
power: 1
loss:
types:
- type: CrossEntropyLoss
coef: [1]
test_config:
is_slide: True
crop_size: [1024, 1024]
stride: [768, 768]
_base_: '../_base_/cityscapes.yml'
batch_size: 1
iters: 160000
model:
type: SegFormer
backbone:
type: MixVisionTransformer_B2
pretrained: https://bj.bcebos.com/paddleseg/dygraph/backbone/mix_vision_transformer_b2.tar.gz
embedding_dim: 768
num_classes: 19
optimizer:
_inherited_: False
type: AdamW
beta1: 0.9
beta2: 0.999
weight_decay: 0.01
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.00006
power: 1
loss:
types:
- type: CrossEntropyLoss
coef: [1]
_base_: '../_base_/cityscapes_1024x1024.yml'
batch_size: 2
iters: 160000
model:
type: SegFormer
backbone:
type: MixVisionTransformer_B3
pretrained: https://bj.bcebos.com/paddleseg/dygraph/backbone/mix_vision_transformer_b3.tar.gz
embedding_dim: 768
num_classes: 19
optimizer:
_inherited_: False
type: AdamW
beta1: 0.9
beta2: 0.999
weight_decay: 0.01
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.00006
power: 1
loss:
types:
- type: CrossEntropyLoss
coef: [1]
test_config:
is_slide: True
crop_size: [1024, 1024]
stride: [768, 768]
_base_: '../_base_/cityscapes.yml'
batch_size: 1
iters: 160000
model:
type: SegFormer
backbone:
type: MixVisionTransformer_B3
pretrained: https://bj.bcebos.com/paddleseg/dygraph/backbone/mix_vision_transformer_b3.tar.gz
embedding_dim: 768
num_classes: 19
optimizer:
_inherited_: False
type: AdamW
beta1: 0.9
beta2: 0.999
weight_decay: 0.01
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.00006
power: 1
loss:
types:
- type: CrossEntropyLoss
coef: [1]
_base_: '../_base_/cityscapes_1024x1024.yml'
batch_size: 2
iters: 160000
model:
type: SegFormer
backbone:
type: MixVisionTransformer_B4
pretrained: https://bj.bcebos.com/paddleseg/dygraph/backbone/mix_vision_transformer_b4.tar.gz
embedding_dim: 768
num_classes: 19
optimizer:
_inherited_: False
type: AdamW
beta1: 0.9
beta2: 0.999
weight_decay: 0.01
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.00006
power: 1
loss:
types:
- type: CrossEntropyLoss
coef: [1]
test_config:
is_slide: True
crop_size: [1024, 1024]
stride: [768, 768]
_base_: '../_base_/cityscapes.yml'
batch_size: 1
iters: 160000
model:
type: SegFormer
backbone:
type: MixVisionTransformer_B4
pretrained: https://bj.bcebos.com/paddleseg/dygraph/backbone/mix_vision_transformer_b4.tar.gz
embedding_dim: 768
num_classes: 19
optimizer:
_inherited_: False
type: AdamW
beta1: 0.9
beta2: 0.999
weight_decay: 0.01
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.00006
power: 1
loss:
types:
- type: CrossEntropyLoss
coef: [1]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment