Commit 522a602f authored by wangkx1's avatar wangkx1
Browse files

siton bug

parent abb99c90
worker_num: 4
TrainReader:
sample_transforms:
- Decode: {}
- RandomDistort: {prob: 0.8}
- RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
- RandomCrop: {prob: 0.8}
- RandomFlip: {}
batch_transforms:
- BatchRandomResize: {target_size: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800], random_size: True, random_interp: True, keep_ratio: False}
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- NormalizeBox: {}
- BboxXYXY2XYWH: {}
- Permute: {}
batch_size: 4
shuffle: true
drop_last: true
collate_batch: false
use_shared_memory: true
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- Permute: {}
batch_size: 4
TestReader:
inputs_def:
image_shape: [3, 640, 640]
sample_transforms:
- Decode: {}
- Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- Permute: {}
batch_size: 1
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_6x.yml',
'_base_/rtdetr_r50vd.yml',
'_base_/rtdetr_reader.yml',
]
weights: output/rtdetr_focalnet_L_384_3x_coco/model_final
find_unused_parameters: True
log_iter: 100
snapshot_epoch: 2
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/focalnet_large_fl4_pretrained_on_o365.pdparams
DETR:
backbone: FocalNet
neck: HybridEncoder
transformer: RTDETRTransformer
detr_head: DINOHead
post_process: DETRPostProcess
FocalNet:
arch: 'focalnet_L_384_22k_fl4'
out_indices: [1, 2, 3]
HybridEncoder:
hidden_dim: 256
use_encoder_idx: [2]
num_encoder_layers: 6 #
encoder_layer:
name: TransformerLayer
d_model: 256
nhead: 8
dim_feedforward: 2048
dropout: 0.
activation: 'gelu'
expansion: 1.0
RTDETRTransformer:
num_queries: 300
position_embed_type: sine
feat_strides: [8, 16, 32]
num_levels: 3
nhead: 8
num_decoder_layers: 6
dim_feedforward: 2048 #
dropout: 0.0
activation: relu
num_denoising: 100
label_noise_ratio: 0.5
box_noise_scale: 1.0
learnt_init_query: False
query_pos_head_inv_sig: True #
DINOHead:
loss:
name: DINOLoss
loss_coeff: {class: 1, bbox: 5, giou: 2}
aux_loss: True
use_vfl: True
matcher:
name: HungarianMatcher
matcher_coeff: {class: 2, bbox: 5, giou: 2}
DETRPostProcess:
num_top_queries: 300
epoch: 36
LearningRate:
base_lr: 0.0001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [36]
use_warmup: false
OptimizerBuilder:
clip_grad_by_norm: 0.1
regularizer: false
optimizer:
type: AdamW
weight_decay: 0.0001
param_groups:
- params: ['absolute_pos_embed', 'relative_position_bias_table', 'norm']
weight_decay: 0.0
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_6x.yml',
'_base_/rtdetr_r50vd.yml',
'_base_/rtdetr_reader.yml',
]
weights: output/rtdetr_hgnetv2_l_6x_coco/model_final
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/PPHGNetV2_L_ssld_pretrained.pdparams
find_unused_parameters: True
log_iter: 200
DETR:
backbone: PPHGNetV2
PPHGNetV2:
arch: 'L'
return_idx: [1, 2, 3]
freeze_stem_only: True
freeze_at: 0
freeze_norm: True
lr_mult_list: [0., 0.05, 0.05, 0.05, 0.05]
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_6x.yml',
'_base_/rtdetr_r50vd.yml',
'_base_/rtdetr_reader.yml',
]
weights: output/rtdetr_hgnetv2_l_6x_coco/model_final
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/PPHGNetV2_X_ssld_pretrained.pdparams
find_unused_parameters: True
log_iter: 200
DETR:
backbone: PPHGNetV2
PPHGNetV2:
arch: 'X'
return_idx: [1, 2, 3]
freeze_stem_only: True
freeze_at: 0
freeze_norm: True
lr_mult_list: [0., 0.01, 0.01, 0.01, 0.01]
HybridEncoder:
hidden_dim: 384
use_encoder_idx: [2]
num_encoder_layers: 1
encoder_layer:
name: TransformerLayer
d_model: 384
nhead: 8
dim_feedforward: 2048
dropout: 0.
activation: 'gelu'
expansion: 1.0
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_6x.yml',
'_base_/rtdetr_r50vd.yml',
'_base_/rtdetr_reader.yml',
]
weights: output/rtdetr_r101vd_6x_coco/model_final
find_unused_parameters: True
log_iter: 200
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_ssld_pretrained.pdparams
ResNet:
# index 0 stands for res2
depth: 101
variant: d
norm_type: bn
freeze_at: 0
return_idx: [1, 2, 3]
lr_mult_list: [0.01, 0.01, 0.01, 0.01]
num_stages: 4
freeze_stem_only: True
HybridEncoder:
hidden_dim: 384
use_encoder_idx: [2]
num_encoder_layers: 1
encoder_layer:
name: TransformerLayer
d_model: 384
nhead: 8
dim_feedforward: 2048
dropout: 0.
activation: 'gelu'
expansion: 1.0
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_6x.yml',
'_base_/rtdetr_r50vd.yml',
'_base_/rtdetr_reader.yml',
]
weights: output/rtdetr_r18_6x_coco/model_final
find_unused_parameters: True
log_iter: 200
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet18_vd_pretrained.pdparams
ResNet:
depth: 18
variant: d
return_idx: [1, 2, 3]
freeze_at: -1
freeze_norm: false
norm_decay: 0.
HybridEncoder:
hidden_dim: 256
use_encoder_idx: [2]
num_encoder_layers: 1
encoder_layer:
name: TransformerLayer
d_model: 256
nhead: 8
dim_feedforward: 1024
dropout: 0.
activation: 'gelu'
expansion: 0.5
depth_mult: 1.0
RTDETRTransformer:
eval_idx: -1
num_decoder_layers: 3
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_6x.yml',
'_base_/rtdetr_r50vd.yml',
'_base_/rtdetr_reader.yml',
]
weights: output/rtdetr_r34vd_6x_coco/model_final
find_unused_parameters: True
log_iter: 200
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ResNet34_vd_pretrained.pdparams
ResNet:
depth: 34
variant: d
return_idx: [1, 2, 3]
freeze_at: -1
freeze_norm: false
norm_decay: 0.
HybridEncoder:
hidden_dim: 256
use_encoder_idx: [2]
num_encoder_layers: 1
encoder_layer:
name: TransformerLayer
d_model: 256
nhead: 8
dim_feedforward: 1024
dropout: 0.
activation: 'gelu'
expansion: 0.5
depth_mult: 1.0
RTDETRTransformer:
eval_idx: -1
num_decoder_layers: 4
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_6x.yml',
'_base_/rtdetr_r50vd.yml',
'_base_/rtdetr_reader.yml',
]
weights: output/rtdetr_r50vd_6x_coco/model_final
find_unused_parameters: True
log_iter: 200
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_6x.yml',
'_base_/rtdetr_r50vd.yml',
'_base_/rtdetr_reader.yml',
]
weights: output/rtdetr_r50vd_m_6x_coco/model_final
find_unused_parameters: True
log_iter: 200
HybridEncoder:
hidden_dim: 256
use_encoder_idx: [2]
num_encoder_layers: 1
encoder_layer:
name: TransformerLayer
d_model: 256
nhead: 8
dim_feedforward: 1024
dropout: 0.
activation: 'gelu'
expansion: 0.5
depth_mult: 1.0
RTDETRTransformer:
eval_idx: 2 # use 3th decoder layer to eval
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_6x.yml',
'_base_/rtdetr_r50vd.yml',
'_base_/rtdetr_reader.yml',
]
weights: output/rtdetr_swin_L_384_3x_coco/model_final
find_unused_parameters: True
log_iter: 100
snapshot_epoch: 2
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/dino_swin_large_384_4scale_3x_coco.pdparams
DETR:
backbone: SwinTransformer
neck: HybridEncoder
transformer: RTDETRTransformer
detr_head: DINOHead
post_process: DETRPostProcess
SwinTransformer:
arch: 'swin_L_384' # ['swin_T_224', 'swin_S_224', 'swin_B_224', 'swin_L_224', 'swin_B_384', 'swin_L_384']
ape: false
drop_path_rate: 0.2
patch_norm: true
out_indices: [1, 2, 3]
HybridEncoder:
hidden_dim: 256
use_encoder_idx: [2]
num_encoder_layers: 6 #
encoder_layer:
name: TransformerLayer
d_model: 256
nhead: 8
dim_feedforward: 2048 #
dropout: 0.
activation: 'gelu'
expansion: 1.0
RTDETRTransformer:
num_queries: 300
position_embed_type: sine
feat_strides: [8, 16, 32]
num_levels: 3
nhead: 8
num_decoder_layers: 6
dim_feedforward: 2048 #
dropout: 0.0
activation: relu
num_denoising: 100
label_noise_ratio: 0.5
box_noise_scale: 1.0
learnt_init_query: False
DINOHead:
loss:
name: DINOLoss
loss_coeff: {class: 1, bbox: 5, giou: 2}
aux_loss: True
use_vfl: True
matcher:
name: HungarianMatcher
matcher_coeff: {class: 2, bbox: 5, giou: 2}
DETRPostProcess:
num_top_queries: 300
epoch: 36
LearningRate:
base_lr: 0.0001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [36]
use_warmup: false
OptimizerBuilder:
clip_grad_by_norm: 0.1
regularizer: false
optimizer:
type: AdamW
weight_decay: 0.0001
param_groups:
- params: ['absolute_pos_embed', 'relative_position_bias_table', 'norm']
weight_decay: 0.0
# RTMDet
## 内容
- [模型库](#模型库)
- [使用说明](#使用说明)
- [速度测试](#速度测试)
- [引用](#引用)
## 模型库
### 基础检测模型
| 网络网络 | 输入尺寸 | 图片数/GPU | 学习率策略 | 模型推理耗时(ms) | mAP | AP50 | Params(M) | FLOPs(G) | 下载链接 | 配置文件 |
| :------------- | :------- | :-------: | :------: | :---------: | :-----: |:-----: | :-----: |:-----: | :-------------: | :-----: |
| *RTMDet-t | 640 | 32 | 300e | 2.8 | 40.9 | 57.9 | 4.90 | 16.21 |[下载链接](https://paddledet.bj.bcebos.com/models/rtmdet_t_300e_coco.pdparams) | [配置文件](./rtmdet_t_300e_coco.yml) |
| *RTMDet-s | 640 | 32 | 300e | 3.3 | 44.5 | 62.0 | 8.89 | 29.71 |[下载链接](https://paddledet.bj.bcebos.com/models/rtmdet_s_300e_coco.pdparams) | [配置文件](./rtmdet_s_300e_coco.yml) |
| *RTMDet-m | 640 | 32 | 300e | 6.4 | 49.1 | 66.8 | 24.71 | 78.47 |[下载链接](https://paddledet.bj.bcebos.com/models/rtmdet_m_300e_coco.pdparams) | [配置文件](./rtmdet_m_300e_coco.yml) |
| *RTMDet-l | 640 | 32 | 300e | 10.2 | 51.2 | 68.8 | 52.31 | 160.32 |[下载链接](https://paddledet.bj.bcebos.com/models/rtmdet_l_300e_coco.pdparams) | [配置文件](./rtmdet_l_300e_coco.yml) |
| *RTMDet-x | 640 | 32 | 300e | 18.0 | 52.6 | 70.4 | 94.86 | 283.12 |[下载链接](https://paddledet.bj.bcebos.com/models/rtmdet_x_300e_coco.pdparams) | [配置文件](./rtmdet_x_300e_coco.yml) |
### 实例分割模型
| 网络网络 | 输入尺寸 | 图片数/GPU | 学习率策略 | 模型推理耗时(ms) | box AP | mask AP | Params(M) | FLOPs(G) | 下载链接 | 配置文件 |
| :------------- | :------- | :-------: | :------: | :---------: | :-----: |:-----: | :-----: |:-----: | :-------------: | :-----: |
| *RTMDet-t | 640 | 32 | 300e | - | 40.5 | - | 5.6 | 11.8 |[下载链接](https://paddledet.bj.bcebos.com/models/rtmdet_ins_t_300e_coco.pdparams) | [配置文件](./rtmdet_ins_t_300e_coco.yml) |
| *RTMDet-s | 640 | 32 | 300e | - | 44.0 | - | 10.18 | 21.5 |[下载链接](https://paddledet.bj.bcebos.com/models/rtmdet_ins_s_300e_coco.pdparams) | [配置文件](./rtmdet_ins_s_300e_coco.yml) |
| *RTMDet-m | 640 | 32 | 300e | - | 48.8 | - | 27.58 | 54.13 |[下载链接](https://paddledet.bj.bcebos.com/models/rtmdet_ins_m_300e_coco.pdparams) | [配置文件](./rtmdet_ins_m_300e_coco.yml) |
| *RTMDet-l | 640 | 32 | 300e | - | 51.2 | - | 57.37 | 106.56 |[下载链接](https://paddledet.bj.bcebos.com/models/rtmdet_ins_l_300e_coco.pdparams) | [配置文件](./rtmdet_ins_l_300e_coco.yml) |
| *RTMDet-x | 640 | 32 | 300e | - | 52.4 | - | 102.7 | 182.7 |[下载链接](https://paddledet.bj.bcebos.com/models/rtmdet_ins_x_300e_coco.pdparams) | [配置文件](./rtmdet_ins_x_300e_coco.yml) |
**注意:**
- RTMDet模型暂未支持完全训练,mAP为部署权重在COCO val2017上的`mAP(IoU=0.5:0.95)`结果,且评估未使用`multi_label`等trick;
- RTMDet t s模型训练使用了ImageNet预训练权重,其余 m l x 模型未使用;
- RTMDet模型Params(M)和FLOPs(G)均为训练时所测;
- RTMDet模型训练过程中默认使用8 GPUs进行混合精度训练,默认每卡batch_size=32;
- RTMDet模型原文测速是在`NVIDIA 3090 GPU`上,暂未提供T4、V100上的测速数据;
- 模型推理耗时(ms)为TensorRT-FP16下测试的耗时,不包含数据预处理和模型输出后处理(NMS)的耗时。测试采用单卡Tesla T4 GPU,batch size=1,测试环境为**paddlepaddle-2.3.2**, **CUDA 11.2**, **CUDNN 8.2**, **GCC-8.2**, **TensorRT 8.0.3.4**,具体请参考[速度测试](#速度测试)
- 如果你设置了`--run_benchmark=True`, 你首先需要安装以下依赖`pip install pynvml psutil GPUtil`
### 部署模型
| 网络模型 | 输入尺寸 | 导出后的权重(w/o NMS) | ONNX(w/o NMS) |
| :-------- | :--------: | :---------------------: | :----------------: |
| RTMDet-t | 640 | [( w/ nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/rtmdet/rtmdet_t_300e_coco_w_nms.zip) | [( w/o nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/rtmdet/rtmdet_t_300e_coco_wo_nms.zip) | [( w/ nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/rtmdet/rtmdet_t_300e_coco_w_nms.onnx) | [( w/o nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/rtmdet/rtmdet_t_300e_coco_wo_nms.onnx) |
| RTMDet-s | 640 | [( w/ nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/rtmdet/rtmdet_s_300e_coco_w_nms.zip) | [( w/o nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/rtmdet/rtmdet_s_300e_coco_wo_nms.zip) | [( w/ nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/rtmdet/rtmdet_s_300e_coco_w_nms.onnx) | [( w/o nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/rtmdet/rtmdet_s_300e_coco_wo_nms.onnx) |
| RTMDet-m | 640 | [( w/ nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/rtmdet/rtmdet_m_300e_coco_w_nms.zip) | [( w/o nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/rtmdet/rtmdet_m_300e_coco_wo_nms.zip) | [( w/ nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/rtmdet/rtmdet_m_300e_coco_w_nms.onnx) | [( w/o nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/rtmdet/rtmdet_m_300e_coco_wo_nms.onnx) |
| RTMDet-l | 640 | [( w/ nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/rtmdet/rtmdet_l_300e_coco_w_nms.zip) | [( w/o nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/rtmdet/rtmdet_l_300e_coco_wo_nms.zip) | [( w/ nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/rtmdet/rtmdet_l_300e_coco_w_nms.onnx) | [( w/o nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/rtmdet/rtmdet_l_300e_coco_wo_nms.onnx) |
| RTMDet-x | 640 | [( w/ nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/rtmdet/rtmdet_x_300e_coco_w_nms.zip) | [( w/o nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/rtmdet/rtmdet_x_300e_coco_wo_nms.zip) | [( w/ nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/rtmdet/rtmdet_x_300e_coco_w_nms.onnx) | [( w/o nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/rtmdet/rtmdet_x_300e_coco_wo_nms.onnx) |
## 使用教程
### 0. **一键运行全流程**
将以下命令写在一个脚本文件里如```run.sh```,一键运行命令为:```sh run.sh```,也可命令行一句句去运行。
```bash
model_name=rtmdet # 可修改,如 ppyoloe
job_name=rtmdet_s_300e_coco # 可修改,如 ppyoloe_plus_crn_s_80e_coco
config=configs/${model_name}/${job_name}.yml
log_dir=log_dir/${job_name}
# weights=https://bj.bcebos.com/v1/paddledet/models/${job_name}.pdparams
weights=output/${job_name}/model_final.pdparams
# 1.训练(单卡/多卡),加 --eval 表示边训边评估,加 --amp 表示混合精度训练
# CUDA_VISIBLE_DEVICES=0 python tools/train.py -c ${config} --eval --amp
python -m paddle.distributed.launch --log_dir=${log_dir} --gpus 0,1,2,3,4,5,6,7 tools/train.py -c ${config} --eval --amp
# 2.评估,加 --classwise 表示输出每一类mAP
CUDA_VISIBLE_DEVICES=0 python tools/eval.py -c ${config} -o weights=${weights} --classwise
# 3.预测 (单张图/图片文件夹)
CUDA_VISIBLE_DEVICES=0 python tools/infer.py -c ${config} -o weights=${weights} --infer_img=demo/000000014439_640x640.jpg --draw_threshold=0.5
# CUDA_VISIBLE_DEVICES=0 python tools/infer.py -c ${config} -o weights=${weights} --infer_dir=demo/ --draw_threshold=0.5
# 4.导出模型,以下3种模式选一种
## 普通导出,加trt表示用于trt加速,对NMS和silu激活函数提速明显
CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c ${config} -o weights=${weights} # trt=True
## exclude_post_process去除后处理导出,返回和YOLOv5导出ONNX时相同格式的concat后的1个Tensor,是未缩放回原图的坐标+分类置信度
# CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c ${config} -o weights=${weights} exclude_post_process=True # trt=True
## exclude_nms去除NMS导出,返回2个Tensor,是缩放回原图后的坐标和分类置信度
# CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c ${config} -o weights=${weights} exclude_nms=True # trt=True
# 5.部署预测,注意不能使用 去除后处理 或 去除NMS 导出后的模型去预测
CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/${job_name} --image_file=demo/000000014439_640x640.jpg --device=GPU
# 6.部署测速,加 “--run_mode=trt_fp16” 表示在TensorRT FP16模式下测速,注意如需用到 trt_fp16 则必须为加 trt=True 导出的模型
CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/${job_name} --image_file=demo/000000014439_640x640.jpg --device=GPU --run_benchmark=True # --run_mode=trt_fp16
# 7.onnx导出,一般结合 exclude_post_process去除后处理导出的模型
paddle2onnx --model_dir output_inference/${job_name} --model_filename model.pdmodel --params_filename model.pdiparams --opset_version 12 --save_file ${job_name}.onnx
# 8.onnx trt测速
/usr/local/TensorRT-8.0.3.4/bin/trtexec --onnx=${job_name}.onnx --workspace=4096 --avgRuns=10 --shapes=input:1x3x640x640 --fp16
/usr/local/TensorRT-8.0.3.4/bin/trtexec --onnx=${job_name}.onnx --workspace=4096 --avgRuns=10 --shapes=input:1x3x640x640 --fp32
```
### 1. 训练
执行以下指令使用混合精度训练rtmdet
```bash
python -m paddle.distributed.launch --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/rtmdet/rtmdet_s_300e_coco.yml --amp --eval
```
**注意:**
- `--amp`表示开启混合精度训练以避免显存溢出,`--eval`表示边训边验证。
### 2. 评估
执行以下命令在单个GPU上评估COCO val2017数据集
```bash
CUDA_VISIBLE_DEVICES=0 python tools/eval.py -c configs/rtmdet/rtmdet_s_300e_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/rtmdet_s_300e_coco.pdparams
```
### 3. 推理
使用以下命令在单张GPU上预测图片,使用`--infer_img`推理单张图片以及使用`--infer_dir`推理文件中的所有图片。
```bash
# 推理单张图片
CUDA_VISIBLE_DEVICES=0 python tools/infer.py -c configs/rtmdet/rtmdet_s_300e_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/rtmdet_s_300e_coco.pdparams --infer_img=demo/000000014439_640x640.jpg
# 推理文件中的所有图片
CUDA_VISIBLE_DEVICES=0 python tools/infer.py -c configs/rtmdet/rtmdet_s_300e_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/rtmdet_s_300e_coco.pdparams --infer_dir=demo
```
### 4.导出模型
在GPU上推理部署或benchmark测速等需要通过`tools/export_model.py`导出模型。
当你**使用Paddle Inference但不使用TensorRT**时,运行以下的命令导出模型
```bash
python tools/export_model.py -c configs/rtmdet/rtmdet_s_300e_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/rtmdet_s_300e_coco.pdparams
```
当你**使用Paddle Inference且使用TensorRT**时,需要指定`-o trt=True`来导出模型。
```bash
python tools/export_model.py -c configs/rtmdet/rtmdet_s_300e_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/rtmdet_s_300e_coco.pdparams trt=True
```
如果你想将rtmdet模型导出为**ONNX格式**,参考
[PaddleDetection模型导出为ONNX格式教程](../../deploy/EXPORT_ONNX_MODEL.md),运行以下命令:
```bash
# 导出推理模型
python tools/export_model.py -c configs/rtmdet/rtmdet_s_300e_coco.yml --output_dir=output_inference -o weights=https://paddledet.bj.bcebos.com/models/rtmdet_s_300e_coco.pdparams
# 安装paddle2onnx
pip install paddle2onnx
# 转换成onnx格式
paddle2onnx --model_dir output_inference/rtmdet_s_300e_coco --model_filename model.pdmodel --params_filename model.pdiparams --opset_version 11 --save_file rtmdet_s_300e_coco.onnx
```
**注意:** ONNX模型目前只支持batch_size=1
### 5.推理部署
rtmdet可以使用以下方式进行部署:
- Paddle Inference [Python](../../deploy/python) & [C++](../../deploy/cpp)
- [Paddle-TensorRT](../../deploy/TENSOR_RT.md)
- [PaddleServing](https://github.com/PaddlePaddle/Serving)
- [PaddleSlim模型量化](../slim)
运行以下命令导出模型
```bash
python tools/export_model.py -c configs/rtmdet/rtmdet_s_300e_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/rtmdet_s_300e_coco.pdparams trt=True
```
**注意:**
- trt=True表示**使用Paddle Inference且使用TensorRT**进行测速,速度会更快,默认不加即为False,表示**使用Paddle Inference但不使用TensorRT**进行测速。
- 如果是使用Paddle Inference在TensorRT FP16模式下部署,需要参考[Paddle Inference文档](https://www.paddlepaddle.org.cn/inference/master/user_guides/download_lib.html#python),下载并安装与你的CUDA, CUDNN和TensorRT相应的wheel包。
#### 5.1.Python部署
`deploy/python/infer.py`使用上述导出后的Paddle Inference模型用于推理和benchnark测速,如果设置了`--run_benchmark=True`, 首先需要安装以下依赖`pip install pynvml psutil GPUtil`
```bash
# Python部署推理单张图片
python deploy/python/infer.py --model_dir=output_inference/rtmdet_s_300e_coco --image_file=demo/000000014439_640x640.jpg --device=gpu
# 推理文件夹下的所有图片
python deploy/python/infer.py --model_dir=output_inference/rtmdet_s_300e_coco --image_dir=demo/ --device=gpu
```
#### 5.2. C++部署
`deploy/cpp/build/main`使用上述导出后的Paddle Inference模型用于C++推理部署, 首先按照[docs](../../deploy/cpp/docs)编译安装环境。
```bash
# C++部署推理单张图片
./deploy/cpp/build/main --model_dir=output_inference/rtmdet_s_300e_coco/ --image_file=demo/000000014439_640x640.jpg --run_mode=paddle --device=GPU --threshold=0.5 --output_dir=cpp_infer_output/rtmdet_s_300e_coco
```
## 速度测试
为了公平起见,在[模型库](#模型库)中的速度测试结果均为不包含数据预处理和模型输出后处理(NMS)的数据(与[YOLOv4(AlexyAB)](https://github.com/AlexeyAB/darknet)测试方法一致),需要在导出模型时指定`-o exclude_nms=True`。测速需设置`--run_benchmark=True`, 首先需要安装以下依赖`pip install pynvml psutil GPUtil`
**使用Paddle Inference但不使用TensorRT**进行测速,执行以下命令:
```bash
# 导出模型
python tools/export_model.py -c configs/rtmdet/rtmdet_s_300e_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/rtmdet_s_300e_coco.pdparams exclude_nms=True
# 速度测试,使用run_benchmark=True
python deploy/python/infer.py --model_dir=output_inference/rtmdet_s_300e_coco --image_file=demo/000000014439_640x640.jpg --run_mode=paddle --device=gpu --run_benchmark=True
```
**使用Paddle Inference且使用TensorRT**进行测速,执行以下命令:
```bash
# 导出模型,使用trt=True
python tools/export_model.py -c configs/rtmdet/rtmdet_s_300e_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/rtmdet_s_300e_coco.pdparams exclude_nms=True trt=True
# 速度测试,使用run_benchmark=True
python deploy/python/infer.py --model_dir=output_inference/rtmdet_s_300e_coco --image_file=demo/000000014439_640x640.jpg --device=gpu --run_benchmark=True
# tensorRT-FP32测速
python deploy/python/infer.py --model_dir=output_inference/rtmdet_s_300e_coco --image_file=demo/000000014439_640x640.jpg --device=gpu --run_benchmark=True --run_mode=trt_fp32
# tensorRT-FP16测速
python deploy/python/infer.py --model_dir=output_inference/rtmdet_s_300e_coco --image_file=demo/000000014439_640x640.jpg --device=gpu --run_benchmark=True --run_mode=trt_fp16
```
**注意:**
- 导出模型时指定`-o exclude_nms=True`仅作为测速时用,这样导出的模型其推理部署预测的结果不是最终检出框的结果。
- [模型库](#模型库)中的速度测试结果为tensorRT-FP16测速后的最快速度,为不包含数据预处理和模型输出后处理(NMS)的耗时。
## 引用
```
@misc{lyu2022rtmdet,
title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
year={2022},
eprint={2212.07784},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
# set the same as YOLOX scheduler temporarily
epoch: 300
LearningRate:
base_lr: 0.0004
schedulers:
- !CosineDecay
max_epochs: 300
- !LinearWarmup
start_factor: 0.00001
steps: 1000
OptimizerBuilder:
regularizer: false
optimizer:
type: AdamW
weight_decay: 0.05
architecture: RTMDet
norm_type: sync_bn
use_ema: True
ema_decay: 0.9998
ema_decay_type: "exponential"
act: silu
find_unused_parameters: True
depth_mult: 1.0
width_mult: 1.0
RTMDet:
backbone: CSPNeXt
neck: CSPNeXtPAFPN
head: RTMDetHead
post_process: ~
CSPNeXt:
arch: "P5"
return_idx: [2, 3, 4]
# use default config
# CSPNeXtPAFPN:
RTMDetHead:
exp_on_reg: False
fpn_strides: [8, 16, 32]
grid_cell_offset: 0
nms:
name: MultiClassNMS
nms_top_k: 1000
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.6
architecture: RTMDet
norm_type: sync_bn
use_ema: True
ema_decay: 0.9998
ema_decay_type: "exponential"
act: silu
find_unused_parameters: True
with_mask: True
depth_mult: 1.0
width_mult: 1.0
RTMDet:
backbone: CSPNeXt
neck: CSPNeXtPAFPN
head: RTMDetInsHead
with_mask: True
post_process: ~
CSPNeXt:
arch: "P5"
return_idx: [2, 3, 4]
# use default config
# CSPNeXtPAFPN:
RTMDetInsHead:
exp_on_reg: False
fpn_strides: [8, 16, 32]
grid_cell_offset: 0
nms:
name: MultiClassNMS
nms_top_k: 1000
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.6
return_index: True
# TrainReader is temporarily set the same as YOLOX' TrainReader
# EvalReader and TestReader is the final RTMDet reader
worker_num: 4
TrainReader:
sample_transforms:
- Decode: {}
- Mosaic:
prob: 1.0
input_dim: [640, 640]
degrees: [-10, 10]
scale: [0.1, 2.0]
shear: [-2, 2]
translate: [-0.1, 0.1]
enable_mixup: True
mixup_prob: 1.0
mixup_scale: [0.5, 1.5]
- AugmentHSV: {is_bgr: False, hgain: 5, sgain: 30, vgain: 30}
- PadResize: {target_size: 640}
- RandomFlip: {}
batch_transforms:
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
- Permute: {}
- PadGT: {}
batch_size: 32
shuffle: True
drop_last: True
use_shared_memory: True
collate_batch: True
mosaic_epoch: 280
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: [640, 640], keep_ratio: True, interp: 1}
- Pad: {size: [640, 640], fill_value: [114., 114., 114.]}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
- Permute: {}
batch_size: 1
TestReader:
inputs_def:
image_shape: [3, 640, 640]
sample_transforms:
- Decode: {}
- Resize: {target_size: [640, 640], keep_ratio: True, interp: 1}
- Pad: {size: [640, 640], fill_value: [114., 114., 114.]}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
- Permute: {}
batch_size: 1
fuse_normalize: False
_BASE_: [
'../datasets/coco_instance.yml',
'../runtime.yml',
'./_base_/optimizer_300e.yml',
'./_base_/rtmdet_ins_cspnext.yml',
'./_base_/rtmdet_reader.yml',
]
depth_mult: 1.0
width_mult: 1.0
log_iter: 100
snapshot_epoch: 10
weights: output/rtmnet_ins_l_300e_coco/model_final
_BASE_: [
'../datasets/coco_instance.yml',
'../runtime.yml',
'./_base_/optimizer_300e.yml',
'./_base_/rtmdet_ins_cspnext.yml',
'./_base_/rtmdet_reader.yml',
]
depth_mult: 0.67
width_mult: 0.75
log_iter: 100
snapshot_epoch: 10
weights: output/rtmnet_ins_m_300e_coco/model_final
_BASE_: [
'../datasets/coco_instance.yml',
'../runtime.yml',
'./_base_/optimizer_300e.yml',
'./_base_/rtmdet_ins_cspnext.yml',
'./_base_/rtmdet_reader.yml',
]
depth_mult: 0.33
width_mult: 0.50
log_iter: 100
snapshot_epoch: 10
weights: output/rtmnet_ins_s_300e_coco/model_final
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/cspnext_s_pretrained.pdparams
_BASE_: [
'../datasets/coco_instance.yml',
'../runtime.yml',
'./_base_/optimizer_300e.yml',
'./_base_/rtmdet_ins_cspnext.yml',
'./_base_/rtmdet_reader.yml',
]
depth_mult: 0.167 # 0.33 in yolox-tiny
width_mult: 0.375
log_iter: 100
snapshot_epoch: 10
weights: output/rtmnet_ins_t_300e_coco/model_final
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/cspnext_t_pretrained.pdparams
_BASE_: [
'../datasets/coco_instance.yml',
'../runtime.yml',
'./_base_/optimizer_300e.yml',
'./_base_/rtmdet_ins_cspnext.yml',
'./_base_/rtmdet_reader.yml',
]
depth_mult: 1.33
width_mult: 1.25
log_iter: 100
snapshot_epoch: 10
weights: output/rtmnet_ins_x_300e_coco/model_final
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment