"...gpu/git@developer.sourcefind.cn:gaoqiong/migraphx.git" did not exist on "5f37917fb39dc01505e3eeedd641832906183f07"
Commit 0d97cc8c authored by Sugon_ldc's avatar Sugon_ldc
Browse files

add new model

parents
Pipeline #316 failed with stages
in 0 seconds
_base_: './fcn_uhrnetw18_small_cityscapes_1024x512_80k.yml'
model:
backbone:
type: UHRNet_W48
pretrained: https://bj.bcebos.com/paddleseg/dygraph/backbone/uhrnetw48_imagenet.tar.gz
# U-Net: Convolutional Networks for Biomedical Image Segmentation
## Reference
> Ronneberger O, Fischer P, Brox T. U-net: Convolutional networks for biomedical image segmentation[C]//International Conference on Medical image computing and computer-assisted intervention. Springer, Cham, 2015: 234-241.
## Performance
### Cityscapes
| Model | Backbone | Resolution | Training Iters | Batch Size | mIoU | mIoU (flip) | mIoU (ms+flip) | Links |
|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
|UNet|-|1024x512|160000|4|65.00%|66.02%|66.89%|[model](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/unet_cityscapes_1024x512_160k/model.pdparams) \| [log](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/unet_cityscapes_1024x512_160k/train.log) \| [vdl](https://paddlepaddle.org.cn/paddle/visualdl/service/app?id=67b3338de34ad09f0cb5e7c6856305cc)|
### STARE
| Model | Backbone | Resolution | Training Iters | Batch Size | AUC ROC | DICE | mIoU | Links |
|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
|UNet|-|128x128|40000|16|95.93%|90.43%|83.54%|[model](https://bj.bcebos.com/paddleseg/dygraph/stare/unet_stare_128x128_40k/model.pdparams) \| [log](https://bj.bcebos.com/paddleseg/dygraph/stare/unet_stare_128x128_40k/train.log) \| [vdl](https://www.paddlepaddle.org.cn/paddle/visualdl/service/app/scalar?id=a478e562e7a4336dc3d5389809f2db6d)|
### DRIVE
| Model | Backbone | Resolution | Training Iters | Batch Size | AUC ROC | DICE | mIoU | Links |
|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
|UNet|-|128x128|40000|16|95.58%|89.50%|82.12%|[model](https://bj.bcebos.com/paddleseg/dygraph/drive/unet_drive_128x128_40k/model.pdparams) \| [log](https://bj.bcebos.com/paddleseg/dygraph/drive/unet_drive_128x128_40k/train.log) \| [vdl](https://www.paddlepaddle.org.cn/paddle/visualdl/service/app/index?id=1c87bb2e703d7a61da0f2e7e0fc553e1)|
### CHASE DB1
| Model | Backbone | Resolution | Training Iters | Batch Size | AUC ROC | DICE | mIoU | Links |
|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
|UNet|-|128x128|40000|16|95.69%|88.54%|80.87%|[model](https://bj.bcebos.com/paddleseg/dygraph/chasedb1/unet_chasedb1_128x128_40k/model.pdparams) \| [log](https://bj.bcebos.com/paddleseg/dygraph/chasedb1/unet_chasedb1_128x128_40k/train.log) \| [vdl](https://www.paddlepaddle.org.cn/paddle/visualdl/service/app/scalar?id=44a5c6f1792baafc23de9287baca6fa0)|
### HRF
| Model | Backbone | Resolution | Training Iters | Batch Size | AUC ROC | DICE | mIoU | Links |
|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
|UNet|-|256x256|40000|16|93.39%|86.83%|78.45%|[model](https://bj.bcebos.com/paddleseg/dygraph/hrf/unet_hrf_256x256_40k/model.pdparams) \| [log](https://bj.bcebos.com/paddleseg/dygraph/hrf/unet_hrf_256x256_40k/train.log) \| [vdl](https://www.paddlepaddle.org.cn/paddle/visualdl/service/app/index?id=00dc2a5b286f77f2776e902ea066d5ff)|
_base_: '../_base_/chase_db1.yml'
batch_size: 4
iters: 40000
model:
type: UNet
num_classes: 2
use_deconv: False
pretrained: Null
_base_: '../_base_/cityscapes.yml'
batch_size: 4
iters: 160000
model:
type: UNet
num_classes: 19
use_deconv: False
pretrained: /data/unet/train/model.pdparams
_base_: '../_base_/drive.yml'
batch_size: 4
iters: 40000
model:
type: UNet
num_classes: 2
use_deconv: False
pretrained: Null
_base_: '../_base_/hrf.yml'
batch_size: 4
iters: 40000
model:
type: UNet
num_classes: 2
use_deconv: False
pretrained: Null
_base_: '../_base_/stare.yml'
batch_size: 4
iters: 40000
model:
type: UNet
num_classes: 2
use_deconv: False
pretrained: Null
# UNet 3+: A Full-Scale Connected UNet for Medical Image Segmentation
## Reference
> Huang H , Lin L , Tong R , et al. UNet 3+: A Full-Scale Connected UNet for Medical Image Segmentation[J]. ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2020.
_base_: '../_base_/cityscapes.yml'
batch_size: 4
iters: 160000
model:
type: UNet3Plus
in_channels: 3
num_classes: 19
is_batchnorm: True
is_deepsup: False
is_CGM: False
# A Nested U-Net Architecture for Medical Image Segmentation
## Reference
> Zhou, Zongwei, Md Mahfuzur Rahman Siddiquee, Nima Tajbakhsh, and Jianming Liang. "Unet++: A nested u-net architecture for medical image segmentation." In Deep Learning in Medical Image Analysis and Multimodal Learning for Clinical Decision Support, pp. 3-11. Springer, Cham, 2018.
_base_: '../_base_/cityscapes.yml'
batch_size: 4
iters: 160000
model:
type: UNetPlusPlus
in_channels: 3
num_classes: 19
use_deconv: False
align_corners: False
pretrained: Null
is_ds: True
# Unified Perceptual Parsing for SceneUnderstanding
## Reference
> Tete Xiao, Yingcheng Liu, Bolei Zhou, Yuning Jiang, Jian Sun. "Unified Perceptual Parsing for Scene Understanding." Proceedings of the European Conference on Computer Vision (ECCV). 2018.
## Performance
### Cityscapes
| Model | Backbone | Resolution | Training Iters | mIoU | mIoU (flip) | mIoU (ms+flip) | Links |
|-|-|-|-|-|-|-|-|
|UPerNet|ResNet101_OS8|512x1024|40000|79.58%|80.11%|80.41%|[model](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/upernet_resnet101_os8_cityscapes_512x1024_40k/model.pdparams)\|[log](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/upernet_resnet101_os8_cityscapes_512x1024_40k/train.log)\|[vdl](https://www.paddlepaddle.org.cn/paddle/visualdl/service/app/index?id=c635ae2e70e148796cd58fae5273c3d6)|
_base_: '../_base_/cityscapes.yml'
batch_size: 2
iters: 40000
model:
type: UPerNet
backbone:
type: ResNet101_vd
output_stride: 8
pretrained: https://bj.bcebos.com/paddleseg/dygraph/resnet101_vd_ssld.tar.gz
backbone_indices: [0, 1, 2, 3]
channels: 512
dropout_prob: 0.1
enable_auxiliary_loss: True
optimizer:
type: sgd
weight_decay: 0.0005
loss:
types:
- type: CrossEntropyLoss
types:
- type: CrossEntropyLoss
coef: [1, 0.4]
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.01
end_lr: 0.0
power: 0.9
\ No newline at end of file
# AutoNUE@CVPR 2021 Challenge
Implementation of the 1st solution for AutoNUE@CVPR 2021 Challenge Semenatic Segmentation Track based on PaddlePaddle.
## Installation
#### step 1. Install PaddlePaddle
System Requirements:
* PaddlePaddle == 2.0.2
* Python >= 3.6+
Highly recommend you install the GPU version of PaddlePaddle, due to large overhead of segmentation models, otherwise it could be out of memory while running the models. For more detailed installation tutorials, please refer to the official website of [PaddlePaddle](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/2.0/install/)
#### step 2. Install PaddleSeg
You should use *API Calling* method to install PaddleSeg for flexible development.
```shell
pip install paddleseg==2.5.0
```
## Data Preparation
Firstly, you need to to download and convert the [India Driving Dataset](https://idd.insaan.iiit.ac.in/evaluation/autonue21/#bm5) following the instructions of Segmentation Track. IDD_Dectection dataset also need for pseudo-labeling.
And then, you need to organize data following the below structure.
IDD_Segmentation
|
|--leftImg8bit
| |--train
| |--val
| |--test
|
|--gtFine
| |--train
| |--val
| |--test
We make three contributions and managed to rank 1st.
- Progressively Segmentation
- Leverage IDD_Detection Dataset to generate extre training samples by pseudo-labeling.
- Decoder-enhanced Swin Transformer
## Training
### Baseline
1. Download pretrained weights on Mapillary.
```shell
mkdir -p pretrain && cd pretrain
wget https://bj.bcebos.com/paddleseg/dygraph/cityscapes/ocrnet_hrnetw48_mapillary/pretrained.pdparams
cd ..
```
2. Modify `scripts/train.py` line 27 with `from core.val import evaluate`
3. Run the training script.
```shell
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -u -m paddle.distributed.launch train.py \
--config configs/sscale_auto_nue_map+city@1920.yml --use_vdl \
--save_dir saved_model/sscale_auto_nue_map+city@1920 --save_interval 2000 --num_workers 2 --do_eval
```
### Regional progressive segmentation
1. Replace `scripts/train.py` line 27 'from core.val import evaluate' with `from core.val_crop import evaluate`
2. Run
```shell
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -u -m paddle.distributed.launch train.py \
--config configs/auto_nue_map+city_crop.yml --use_vdl \
--save_dir saved_model/auto_nue_map+city_crop --save_interval 2000 --num_workers 2 --do_eval
```
### Pseudo-labeling
First you need to organize the IDD_Detection dataset as follow:
IDD_Detection
|
|--JPEGImages
|--Annotations
where `JPEGImages` and `Annotation` are images and xml files collected from `IDD_Detection/FrontFar` and `IDD_Detection/FrontNear` two folders.
And Then:
1. Replace `AutoNUE21/predict.py` line 22 `from paddleseg.core import predict` with `from core.predict_generate_autolabel.py import predictAutolabel`
2. Modity `AutoNUE21/predict.py` line 156 `predict(` with `predictAutolabel(`
3. Run
```shell
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -m paddle.distributed.launch predict.py --config configs/sscale_auto_nue_map+city@1920.yml --model_path saved_model/sscale_auto_nue_map+city@1920/best_model/model.pdparams --image_path data/IDD_Detection/JPEGImages --save_dir detection_out --aug_pred --scales 1.0 1.5 2.0 --flip_horizontal
```
4. Auto-box `traffic lights` and `traffic sign` two classes from bounding box annotation by running `tools/IDD_labeling.py`
5. Put the generated `pred_refine` folder under `data/IDD_Detection`
5. Modify `scripts/train.py` line 27 with `from core.val import evaluate`
6. Train these pseudo labels with fine-annotated sample:
```shell
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -u -m paddle.distributed.launch train.py \
--config configs/auto_nue_auto_label.yml --use_vdl \
--save_dir saved_model/auto_nue_auto_label --save_interval 2000 --num_workers 2 --do_eval
```
### Decoder-enhanced Swin Transformer
1. Download pretrained weights on Mapillary.
```shell
cd pretrain
wget https://bj.bcebos.com/paddleseg/dygraph/cityscapes/swin_mla_p4w7_mapillary/pretrained_swin.pdparams
cd ..
```
2. Run the training script.
```shell
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -u -m paddle.distributed.launch train.py \
--config configs/swin_transformer_mla_base_patch4_window7_160k_autonue.yml --use_vdl \
--save_dir saved_model/swin_transformer_mla_autonue --save_interval 2000 --num_workers 2 --do_eval
```
3. Run the testing script.
```shell
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -m paddle.distributed.launch predict.py --config configs/swin_transformer_mla_base_patch4_window7_160k_autonue.yml --model_path saved_model/swin_transformer_mla_autonue/best_model/model.pdparams --image_path data/IDD_Segmentation/leftImg8bit/test/ --save_dir test_out_swin --aug_pred --scales 1.0 1.5 2.0 --flip_horizontal
```
## Ensemble Testing
We provide a predict script for ensembling `baseline`, `pseudo-labeling` and `rps`.
Just running:
```shell
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -m paddle.distributed.launch predict_ensemble_three.py --config configs/sscale_auto_nue_map+city@1920.yml --config_1 configs/auto_nue_auto_label.yml --config_crop configs/auto_nue_map+city_crop.yml --model_path saved_model/sscale_auto_nue_map+city@1920/best_model/model.pdparams --model_path_1 saved_model/auto_nue_auto_label/best_model/model.pdparams --model_path_crop saved_model/auto_nue_map+city_crop/best_model/model.pdparams --image_path data/IDD_Segmentation/leftImg8bit/test/ --save_dir test_out --aug_pred --scales 1.0 1.5 2.0 --flip_horizontal
```
batch_size: 1
iters: 80000
model:
type: MscaleOCRNet
pretrained: pretrain/pretrained.pdparams
n_scales: [1.0]
backbone:
type: HRNet_W48_NV
num_classes: 26
backbone_indices: [0]
train_dataset:
type: AutoNueAutolabel
dataset_root: data/IDD_Segmentation
transforms:
- type: Resize
target_size: [1920, 1080]
- type: ResizeStepScaling
min_scale_factor: 0.5
max_scale_factor: 2.0
scale_step_size: 0
- type: RandomPaddingCrop
crop_size: [1920, 1080]
- type: RandomHorizontalFlip
- type: RandomDistort
brightness_range: 0.25
brightness_prob: 1
contrast_range: 0.25
contrast_prob: 1
saturation_range: 0.25
saturation_prob: 1
hue_range: 63
hue_prob: 1
- type: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
mode: train
val_dataset:
type: AutoNueAutolabel
dataset_root: data/IDD_Segmentation
transforms:
- type: Resize
target_size: [1920, 1080]
- type: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
mode: val
optimizer:
type: sgd
momentum: 0.9
weight_decay: 0.0001
learning_rate:
value: 0.02
decay:
type: poly
power: 2
end_lr: 0.0
loss:
types:
- type: DiceLoss
- type: DiceLoss
- type: BootstrappedCrossEntropyLoss
min_K: 50000
loss_th: 0.05
- type: BootstrappedCrossEntropyLoss
min_K: 50000
loss_th: 0.05
coef: [0.4, 0.16, 1.0, 0.4]
batch_size: 1
iters: 85000
model:
type: MscaleOCRNet
pretrained: pretrain/pretrained.pdparams
n_scales: [1.0]
backbone:
type: HRNet_W48_NV
num_classes: 26
backbone_indices: [0]
train_dataset:
type: AutoNueCrop
dataset_root: data/IDD_Segmentation
transforms:
- type: Resize
target_size: [3200, 1800]
- type: RandomHorizontalFlip
- type: RandomDistort
brightness_range: 0.25
brightness_prob: 1
contrast_range: 0.25
contrast_prob: 1
saturation_range: 0.25
saturation_prob: 1
hue_range: 63
hue_prob: 1
- type: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
mode: train
val_dataset:
type: AutoNueCrop
dataset_root: data/IDD_Segmentation
transforms:
- type: Resize
target_size: [3200, 1800]
- type: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
mode: val
optimizer:
type: sgd
momentum: 0.9
weight_decay: 0.0001
learning_rate:
value: 0.005
decay:
type: poly
power: 2
end_lr: 0.0
loss:
types:
- type: DiceLoss
- type: DiceLoss
- type: BootstrappedCrossEntropyLoss
min_K: 50000
loss_th: 0.05
- type: BootstrappedCrossEntropyLoss
min_K: 50000
loss_th: 0.05
coef: [0.4, 0.16, 1.0, 0.4]
batch_size: 1
iters: 80000
model:
type: MscaleOCRNet
pretrained: saved_model/sscale_ocr_auto_nue_map+city_ce+dice@1920/best_model/model.pdparams
n_scales: [1.0, 1.5, 2.0]
backbone:
type: HRNet_W48_NV
num_classes: 26
backbone_indices: [0]
train_dataset:
type: AutoNue
dataset_root: data/IDD_Segmentation
transforms:
- type: Resize
target_size: [1920, 1080]
- type: ResizeStepScaling
min_scale_factor: 0.5
max_scale_factor: 2.0
scale_step_size: 0
- type: RandomPaddingCrop
crop_size: [1920, 1080]
- type: RandomHorizontalFlip
- type: RandomDistort
brightness_range: 0.25
brightness_prob: 1
contrast_range: 0.25
contrast_prob: 1
saturation_range: 0.25
saturation_prob: 1
hue_range: 63
hue_prob: 1
- type: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
mode: train
val_dataset:
type: AutoNue
dataset_root: data/IDD_Segmentation
transforms:
- type: Resize
target_size: [1920, 1080]
- type: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
mode: val
optimizer:
type: sgd
momentum: 0.9
weight_decay: 0.0001
learning_rate:
value: 0.005
decay:
type: poly
power: 2
end_lr: 0.0
loss:
types:
- type: DiceLoss
- type: DiceLoss
- type: BootstrappedCrossEntropyLoss
min_K: 100000
loss_th: 0.05
- type: BootstrappedCrossEntropyLoss
min_K: 100000
loss_th: 0.05
coef: [1, 0.4, 1, 0.4]
batch_size: 1
iters: 80000
model:
type: MscaleOCRNet
pretrained: pretrain/pretrained.pdparams
n_scales: [1.0]
backbone:
type: HRNet_W48_NV
num_classes: 26
backbone_indices: [0]
train_dataset:
type: AutoNue
dataset_root: data/IDD_Segmentation
transforms:
- type: Resize
target_size: [1920, 1080]
- type: ResizeStepScaling
min_scale_factor: 0.5
max_scale_factor: 2.0
scale_step_size: 0
- type: RandomPaddingCrop
crop_size: [1920, 1080]
- type: RandomHorizontalFlip
- type: RandomDistort
brightness_range: 0.25
brightness_prob: 1
contrast_range: 0.25
contrast_prob: 1
saturation_range: 0.25
saturation_prob: 1
hue_range: 63
hue_prob: 1
- type: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
mode: train
val_dataset:
type: AutoNue
dataset_root: data/IDD_Segmentation
transforms:
- type: Resize
target_size: [1920, 1080]
- type: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
mode: val
optimizer:
type: sgd
momentum: 0.9
weight_decay: 0.0001
learning_rate:
value: 0.01
decay:
type: poly
power: 2
end_lr: 0.0
loss:
types:
- type: DiceLoss
- type: DiceLoss
- type: BootstrappedCrossEntropyLoss
min_K: 100000
loss_th: 0.05
- type: BootstrappedCrossEntropyLoss
min_K: 100000
loss_th: 0.05
coef: [1.0, 0.4, 1.0, 0.4]
batch_size: 1
iters: 160000
model:
type: MLATransformer
pretrained: pretrain/pretrained_swin.pdparams
backbone:
type: SwinTransformer_base_patch4_window7_224
ape: False
drop_path_rate: 0.3
patch_norm: True
num_classes: 26
in_channels: [128, 256, 512, 1024]
mlahead_channels: 128
train_dataset:
type: AutoNue
dataset_root: data/IDD_Segmentation
transforms:
- type: Resize
target_size: [1920, 1080]
- type: ResizeStepScaling
min_scale_factor: 0.5
max_scale_factor: 2.0
scale_step_size: 0.25
- type: RandomPaddingCrop
crop_size: [1024, 512]
- type: RandomHorizontalFlip
- type: RandomDistort
brightness_range: 0.25
brightness_prob: 1
contrast_range: 0.25
contrast_prob: 1
saturation_range: 0.25
saturation_prob: 1
hue_range: 63
hue_prob: 1
- type: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
mode: train
val_dataset:
type: AutoNue
dataset_root: data/IDD_Segmentation
transforms:
- type: Resize
target_size: [256, 256] #[1920, 1080]
- type: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
mode: val
optimizer:
type: sgd
momentum: 0.9
weight_decay: 0.0001
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.005
end_lr: 0
power: 2
iters: 160000
loss:
types:
- type: CrossEntropyLoss
- type: CrossEntropyLoss
coef: [1, 0.4]
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .predict_ensemble import predictEnsemble
__all__ = ['predictEnsemble']
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment