Commit 522a602f authored by wangkx1's avatar wangkx1
Browse files

siton bug

parent abb99c90
input_height: &input_height 640
input_width: &input_width 640
input_size: &input_size [*input_height, *input_width]
mosaic_epoch: &mosaic_epoch 300
worker_num: 4
TrainReader:
sample_transforms:
- DecodeNormResizeMask: {target_size: *input_size, mosaic: True}
- MosaicPerspective:
mosaic_prob: 0.0
target_size: *input_size
degrees: 0.0
translate: 0.1
scale: 0.5
shear: 0.0
mixup_prob: 0.0
with_mask: True
- Poly2Mask: {del_poly: True}
- RandomHSV: {hgain: 0.015, sgain: 0.7, vgain: 0.4}
- RandomFlip: {}
batch_transforms:
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- Permute: {}
- PadGT: {}
batch_size: 8
shuffle: True
drop_last: True
use_shared_memory: True
collate_batch: False
mosaic_epoch: *mosaic_epoch
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: *input_size, keep_ratio: True, interp: 1}
- Pad: {size: *input_size, fill_value: [114., 114., 114.]}
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- Permute: {}
batch_size: 1
TestReader:
inputs_def:
image_shape: [3, *input_height, *input_width]
sample_transforms:
- Decode: {}
- Resize: {target_size: *input_size, keep_ratio: True, interp: 1}
- Pad: {size: *input_size, fill_value: [114., 114., 114.]}
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- Permute: {}
batch_size: 1
fuse_normalize: False
input_height: &input_height 640
input_width: &input_width 640
input_size: &input_size [*input_height, *input_width]
mosaic_epoch: &mosaic_epoch 300
worker_num: 4
TrainReader:
sample_transforms:
- DecodeNormResizeMask: {target_size: *input_size, mosaic: True}
- MosaicPerspective:
mosaic_prob: 0.0
target_size: *input_size
degrees: 0.0
translate: 0.1
scale: 0.9 #
shear: 0.0
mixup_prob: 0.1 #
with_mask: True
- Poly2Mask: {del_poly: True}
- RandomHSV: {hgain: 0.015, sgain: 0.7, vgain: 0.4}
- RandomFlip: {}
batch_transforms:
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- Permute: {}
- PadGT: {}
batch_size: 8
shuffle: True
drop_last: True
use_shared_memory: True
collate_batch: False
mosaic_epoch: *mosaic_epoch
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: *input_size, keep_ratio: True, interp: 1}
- Pad: {size: *input_size, fill_value: [114., 114., 114.]}
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- Permute: {}
batch_size: 1
TestReader:
inputs_def:
image_shape: [3, *input_height, *input_width]
sample_transforms:
- Decode: {}
- Resize: {target_size: *input_size, keep_ratio: True, interp: 1}
- Pad: {size: *input_size, fill_value: [114., 114., 114.]}
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- Permute: {}
batch_size: 1
fuse_normalize: False
_BASE_: [
'../datasets/coco_instance.yml',
'../runtime.yml',
'_base_/optimizer_300e.yml',
'_base_/yolov6_seg_cspbep.yml',
'_base_/yolov6_seg_reader_high_aug.yml',
]
depth_mult: 1.0
width_mult: 1.0
log_iter: 20
snapshot_epoch: 10
weights: output/yolov6_seg_l_300e_coco/model_final
### reader config
TrainReader:
batch_size: 8 # default 8 gpus, total bs = 64
EvalReader:
batch_size: 1
### model config
act: 'silu'
training_mode: "conv_silu_nobias" # Note: L use silu, seg use conv_silu_nobias
YOLOv6:
backbone: CSPBepBackbone
neck: CSPRepBiFPAN
yolo_head: EffiDeInsHead
post_process: ~
CSPBepBackbone:
csp_e: 0.50
CSPRepBiFPAN:
csp_e: 0.50
EffiDeInsHead:
reg_max: 16
use_dfl: True
iou_type: 'giou'
loss_weight: {cls: 1.0, iou: 2.5, dfl: 0.5}
_BASE_: [
'../datasets/coco_instance.yml',
'../runtime.yml',
'_base_/optimizer_300e.yml',
'_base_/yolov6_seg_cspbep.yml',
'_base_/yolov6_seg_reader_high_aug.yml',
]
depth_mult: 0.60
width_mult: 0.75
log_iter: 20
snapshot_epoch: 10
weights: output/yolov6_seg_m_300e_coco/model_final
### reader config
TrainReader:
batch_size: 8 # default 8 gpus, total bs = 64
EvalReader:
batch_size: 1
### model config
act: 'relu'
training_mode: "repvgg"
YOLOv6:
backbone: CSPBepBackbone
neck: CSPRepBiFPAN
yolo_head: EffiDeInsHead
post_process: ~
CSPBepBackbone:
csp_e: 0.67
CSPRepBiFPAN:
csp_e: 0.67
EffiDeInsHead:
reg_max: 16
use_dfl: True
iou_type: 'giou'
loss_weight: {cls: 1.0, iou: 2.5, dfl: 0.5}
_BASE_: [
'../datasets/coco_instance.yml',
'../runtime.yml',
'_base_/optimizer_300e.yml',
'_base_/yolov6_seg_efficientrep.yml',
'_base_/yolov6_seg_reader.yml',
]
depth_mult: 0.33
width_mult: 0.25
log_iter: 20
snapshot_epoch: 10
weights: output/yolov6_seg_n_300e_coco/model_final
### reader config
TrainReader:
batch_size: 8 # default 8 gpus, total bs = 64
EvalReader:
batch_size: 1
### model config
act: 'relu'
training_mode: "repvgg"
YOLOv6:
backbone: EfficientRep
neck: RepBiFPAN
yolo_head: EffiDeInsHead
post_process: ~
EffiDeInsHead:
reg_max: 0
use_dfl: False # False in n/s
loss_weight: {cls: 1.0, iou: 2.5}
iou_type: 'siou' # only in n version
_BASE_: [
'../datasets/coco_instance.yml',
'../runtime.yml',
'_base_/optimizer_300e.yml',
'_base_/yolov6_seg_efficientrep.yml',
'_base_/yolov6_seg_reader.yml',
]
depth_mult: 0.33
width_mult: 0.50
log_iter: 20
snapshot_epoch: 10
weights: output/yolov6_seg_s_300e_coco/model_final
### reader config
TrainReader:
batch_size: 8 # default 8 gpus, total bs = 64
input_height: &input_height 640
input_width: &input_width 640
input_size: &input_size [*input_height, *input_width]
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: *input_size, keep_ratio: True, interp: 1}
- Pad: {size: *input_size, fill_value: [114., 114., 114.]}
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- Poly2Mask: {del_poly: True}
- Permute: {}
batch_size: 1
# EvalReader:
# sample_transforms:
# - Decode: {}
# - Resize: {target_size: *input_size, keep_ratio: False, interp: 1}
# #- Pad: {size: *input_size, fill_value: [114., 114., 114.]}
# - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
# - Poly2Mask: {del_poly: True}
# - Permute: {}
# batch_size: 1
# # rect
# EvalReader:
# sample_transforms:
# - Decode: {}
# - YOLOv5KeepRatioResize: {target_size: *input_size, batch_shapes: True, size_divisor: 32, extra_pad_ratio: 0.5}
# - LetterResize: {scale: *input_size, pad_val: 144, allow_scale_up: False}
# - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
# - Permute: {}
# batch_size: 1 # only support bs=1
### model config
act: 'relu'
training_mode: "repvgg"
YOLOv6:
backbone: EfficientRep
neck: RepBiFPAN
yolo_head: EffiDeInsHead
post_process: ~
EffiDeInsHead:
reg_max: 0
use_dfl: False # False in n/s
loss_weight: {cls: 1.0, iou: 2.5}
_BASE_: [
'../datasets/coco_instance.yml',
'../runtime.yml',
'_base_/optimizer_300e.yml',
'_base_/yolov6_seg_cspbep.yml',
'_base_/yolov6_seg_reader_high_aug.yml',
]
depth_mult: 1.33
width_mult: 1.25
log_iter: 20
snapshot_epoch: 10
weights: output/yolov6_seg_x_300e_coco/model_final
### reader config
TrainReader:
batch_size: 8 # default 8 gpus, total bs = 64
EvalReader:
batch_size: 1
### model config
act: 'silu'
training_mode: "conv_silu_nobias" # Note: X use silu, seg use conv_silu_nobias
YOLOv6:
backbone: CSPBepBackbone
neck: CSPRepBiFPAN
yolo_head: EffiDeInsHead
post_process: ~
CSPBepBackbone:
csp_e: 0.50
CSPRepBiFPAN:
csp_e: 0.50
EffiDeInsHead:
reg_max: 16
use_dfl: True
iou_type: 'giou'
loss_weight: {cls: 1.0, iou: 2.5, dfl: 0.5}
# YOLOv6Lite
## 模型库
### YOLOv6Lite on COCO
| 网络网络 | 输入尺寸 | 图片数/GPU | 学习率策略 | TRT-FP16-Latency(ms) | mAP | AP50 | Params(M) | FLOPs(G) | 下载链接 | 配置文件 |
| :------------- | :------- | :-------: | :--------------: | :---------: | :-----: |:-----: | :-----: |:-----: | :-------------: | :-----: |
| *YOLOv6Lite-s | 320 | 32 | 400e | - | 22.3 | 34.2 | 0.55 | 0.56 |[下载链接](https://paddledet.bj.bcebos.com/models/yolov6lite_s_400e_coco.pdparams) | [配置文件](./yolov6lite_s_400e_coco.yml) |
| *YOLOv6Lite-m | 320 | 32 | 400e | - | 24.8 | 37.7 | 0.79 | 0.67 |[下载链接](https://paddledet.bj.bcebos.com/models/yolov6lite_m_400e_coco.pdparams) | [配置文件](./yolov6lite_m_400e_coco.yml) |
| *YOLOv6Lite-l | 320 | 32 | 400e | - | 27.6 | 41.6 | 1.09 | 0.87 |[下载链接](https://paddledet.bj.bcebos.com/models/yolov6lite_l_400e_coco.pdparams) | [配置文件](./yolov6lite_l_400e_coco.yml) |
epoch: 400
LearningRate:
base_lr: 0.01
schedulers:
- !YOLOv5LRDecay
max_epochs: 400
min_lr_ratio: 0.01
- !ExpWarmup
epochs: 3
OptimizerBuilder:
optimizer:
type: Momentum
momentum: 0.937
use_nesterov: True
regularizer:
factor: 0.0005
type: L2
epoch: 400
LearningRate:
base_lr: 0.0032
schedulers:
- !YOLOv5LRDecay
max_epochs: 400
min_lr_ratio: 0.12
- !ExpWarmup
epochs: 2
OptimizerBuilder:
optimizer:
type: Momentum
momentum: 0.843
use_nesterov: True
regularizer:
factor: 0.00036
type: L2
architecture: YOLOv6
norm_type: sync_bn
use_ema: True
ema_decay: 0.9999
ema_decay_type: "exponential"
find_unused_parameters: True
act: 'relu'
training_mode: "repvgg"
self_distill: False
width_mult: 1.0
YOLOv6:
backbone: Lite_EffiBackbone
neck: Lite_EffiNeck
yolo_head: Lite_EffideHead
post_process: ~
Lite_EffiBackbone:
return_idx: [2, 3, 4]
Lite_EffiNeck:
unified_channels: 96
Lite_EffideHead:
fpn_strides: [8, 16, 32, 64]
grid_cell_scale: 5.0
grid_cell_offset: 0.5
reg_max: 0
use_dfl: False
static_assigner_epoch: 4 # warmup_epoch
loss_weight: {cls: 1.0, iou: 2.5}
iou_type: 'siou' # 'siou' in lite s/m/l
static_assigner:
name: ATSSAssigner
topk: 9
assigner:
name: TaskAlignedAssigner
topk: 13
alpha: 1.0
beta: 6.0
nms:
name: MultiClassNMS
nms_top_k: 2000
keep_top_k: 300
score_threshold: 0.03
nms_threshold: 0.65
input_height: &input_height 320
input_width: &input_width 320
input_size: &input_size [*input_height, *input_width]
mosaic_epoch: &mosaic_epoch 400
worker_num: 4
TrainReader:
sample_transforms:
- DecodeNormResize: {target_size: *input_size, mosaic: True}
- MosaicPerspective:
mosaic_prob: 1.0
target_size: *input_size
degrees: 0.0
translate: 0.1
scale: 0.5
shear: 0.0
mixup_prob: 0.0
- RandomHSV: {hgain: 0.015, sgain: 0.7, vgain: 0.4}
- RandomFlip: {}
batch_transforms:
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- Permute: {}
- PadGT: {}
batch_size: 32
shuffle: True
drop_last: True
use_shared_memory: True
collate_batch: True
mosaic_epoch: *mosaic_epoch
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: *input_size, keep_ratio: True, interp: 1}
- Pad: {size: *input_size, fill_value: [114., 114., 114.]}
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- Permute: {}
batch_size: 1
TestReader:
inputs_def:
image_shape: [3, *input_height, *input_width]
sample_transforms:
- Decode: {}
- Resize: {target_size: *input_size, keep_ratio: True, interp: 1}
- Pad: {size: *input_size, fill_value: [114., 114., 114.]}
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- Permute: {}
batch_size: 1
fuse_normalize: False
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_400e.yml',
'_base_/yolov6lite_effibackbone.yml',
'_base_/yolov6lite_reader_320.yml',
]
width_mult: 1.5
log_iter: 100
snapshot_epoch: 10
weights: output/yolov6lite_l_400e_coco/model_final
### reader config
TrainReader:
batch_size: 32 # default 8 gpus, total bs = 256
EvalReader:
batch_size: 8
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_400e.yml',
'_base_/yolov6lite_effibackbone.yml',
'_base_/yolov6lite_reader_320.yml',
]
width_mult: 1.1
log_iter: 100
snapshot_epoch: 10
weights: output/yolov6lite_m_400e_coco/model_final
### reader config
TrainReader:
batch_size: 32 # default 8 gpus, total bs = 256
EvalReader:
batch_size: 8
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_400e.yml',
'_base_/yolov6lite_effibackbone.yml',
'_base_/yolov6lite_reader_320.yml',
]
width_mult: 0.7
log_iter: 100
snapshot_epoch: 10
weights: output/yolov6lite_s_400e_coco/model_final
### reader config
TrainReader:
batch_size: 32 # default 8 gpus, total bs = 256
EvalReader:
batch_size: 8
# YOLOv7 (YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors)
## 内容
- [模型库](#模型库)
- [使用说明](#使用说明)
- [速度测试](#速度测试)
- [引用](#引用)
## 模型库
### YOLOv7 on COCO
#### 基础模型
| 网络网络 | 输入尺寸 | 图片数/GPU | 学习率策略 | 模型推理耗时(ms) | mAP<sup>val<br>0.5:0.95 | mAP<sup>val<br>0.5 | Params(M) | FLOPs(G) | 下载链接 | 配置文件 |
| :------------- | :------- | :-------: | :------: | :------------: | :---------------------: | :----------------: |:---------: | :------: |:---------------: |:-----: |
| YOLOv7-L | 640 | 32 | 300e | 7.4 | 51.0 | 70.2 | 37.62 | 106.08 |[下载链接](https://paddledet.bj.bcebos.com/models/yolov7_l_300e_coco.pdparams) | [配置文件](./yolov7_l_300e_coco.yml) |
| YOLOv7-X | 640 | 32 | 300e | 12.2 | 53.0 | 70.8 | 71.34 | 190.08 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov7_x_300e_coco.pdparams) | [配置文件](./yolov7_x_300e_coco.yml) |
#### [YOLOv7u](../yolov7u)
| 网络网络 | 输入尺寸 | 图片数/GPU | 学习率策略 | 模型推理耗时(ms) | mAP<sup>val<br>0.5:0.95 | mAP<sup>val<br>0.5 | Params(M) | FLOPs(G) | 下载链接 | 配置文件 |
| :------------- | :------- | :-------: | :------: | :------------: | :---------------------: | :----------------: |:---------: | :------: |:---------------: |:-----: |
| YOLOv7u-L | 640 | 32 | 300e | 9.0 | 52.1 | 68.8 | 43.59 | 130.10 |[下载链接](https://paddledet.bj.bcebos.com/models/yolov7u_l_300e_coco.pdparams) | [配置文件](yolov7u/yolov7u_l_300e_coco.yml) |
#### P6大尺度模型
| 网络网络 | 输入尺寸 | 图片数/GPU | 学习率策略 | 模型推理耗时(ms) | mAP<sup>val<br>0.5:0.95 | mAP<sup>val<br>0.5 | Params(M) | FLOPs(G) | 下载链接 | 配置文件 |
| :------------- | :------- | :-------: | :------: | :------------: | :---------------------: | :----------------: |:---------: | :------: |:---------------: |:-----: |
| YOLOv7P6-W6 | 1280 | 16 | 300e | 25.5 | 54.4 | 71.8 | 70.43 | 360.26 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov7p6_w6_300e_coco.pdparams) | [配置文件](./yolov7p6_w6_300e_coco.yml) |
| YOLOv7P6-E6 | 1280 | 10 | 300e | 31.1 | 55.7 | 73.0 | 97.25 | 515.4 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov7p6_e6_300e_coco.pdparams) | [配置文件](./yolov7p6_e6_300e_coco.yml) |
| YOLOv7P6-D6 | 1280 | 8 | 300e | 37.4 | 56.1 | 73.3 | 133.81 | 702.92 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov7p6_d6_300e_coco.pdparams) | [配置文件](./yolov7p6_d6_300e_coco.yml) |
| YOLOv7P6-E6E | 1280 | 6 | 300e | 48.7 | 56.5 | 73.7 | 151.76 | 843.52 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov7p6_e6e_300e_coco.pdparams) | [配置文件](./yolov7p6_e6e_300e_coco.yml) |
#### tiny模型(LeakyReLU)
| 网络网络 | 输入尺寸 | 图片数/GPU | 学习率策略 | 模型推理耗时(ms) | mAP<sup>val<br>0.5:0.95 | mAP<sup>val<br>0.5 | Params(M) | FLOPs(G) | 下载链接 | 配置文件 |
| :------------- | :------- | :-------: | :------: | :------------: | :---------------------: | :----------------: |:---------: | :------: |:---------------: |:-----: |
| YOLOv7-tiny | 640 | 32 | 300e | 2.4 | 37.3 | 54.5 | 6.23 | 13.80 |[下载链接](https://paddledet.bj.bcebos.com/models/yolov7_tiny_300e_coco.pdparams) | [配置文件](./yolov7_tiny_300e_coco.yml) |
| YOLOv7-tiny | 416 | 32 | 300e | 1.3 | 33.3 | 49.5 | 6.23 | 5.82 |[下载链接](https://paddledet.bj.bcebos.com/models/yolov7_tiny_416_300e_coco.pdparams) | [配置文件](./yolov7_tiny_416_300e_coco.yml) |
| YOLOv7-tiny | 320 | 32 | 300e | - | 29.1 | 43.8 | 6.23 | 3.46 |[下载链接](https://paddledet.bj.bcebos.com/models/yolov7_tiny_320_300e_coco.pdparams) | [配置文件](./yolov7_tiny_320_300e_coco.yml) |
**注意:**
- YOLOv7模型训练使用COCO train2017作为训练集,Box AP为在COCO val2017上的`mAP(IoU=0.5:0.95)`结果;
- YOLOv7u 模型表示YOLOv7结构使用YOLOv8的head和loss,并结合YOLOR的ImplicitA和ImplicitM,是Anchor Free的检测方案,具体可参照[YOLOv7u](../yolov7u)
- YOLOv7模型训练过程中默认使用8 GPUs进行混合精度训练,默认lr为0.01为8卡总batch_size的设置,如果**GPU卡数**或者每卡**batch size**发生改动,也不需要改动学习率,但为了保证高精度最好使用**总batch size大于64**的配置去训练;
- YOLOv7模型训练的图片数/GPU(每卡batch size)默认是采用32G V100并且开启`--amp`混合精度训练的,其中P6模型也可以在配置文件中设置```use_aux: False```以使用更大的batch size。
- YOLOv7P6模型计算Params和FLOPs需设置成```use_aux: False```
- YOLOv7仅tiny模型采用`LeakyReLU`激活函数,基础模型和P6模型均采用`SiLU`激活函数。
- 模型推理耗时(ms)为TensorRT-FP16下测试的耗时,不包含数据预处理和模型输出后处理(NMS)的耗时。测试采用单卡Tesla T4 GPU,batch size=1,测试环境为**paddlepaddle-2.3.2**, **CUDA 11.2**, **CUDNN 8.2**, **GCC-8.2**, **TensorRT 8.0.3.4**,具体请参考[速度测试](#速度测试)
- 如果你设置了`--run_benchmark=True`, 你首先需要安装以下依赖`pip install pynvml psutil GPUtil`
### 部署模型
| 网络模型 | 输入尺寸 | 导出后的权重(w/o NMS) | ONNX(w/o NMS) |
| :-------- | :--------: | :---------------------: | :----------------: |
| YOLOv7-l | 640 | [( w/ nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/yolov7/yolov7_l_300e_coco_w_nms.zip) &#124; [( w/o nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/yolov7/yolov7_l_300e_coco_wo_nms.zip) | [( w/ nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/yolov7/yolov7_l_300e_coco_w_nms.onnx) &#124; [( w/o nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/yolov7/yolov7_l_300e_coco_wo_nms.onnx) |
| YOLOv7-x | 640 | [( w/ nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/yolov7/yolov7_x_300e_coco_w_nms.zip) &#124; [( w/o nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/yolov7/yolov7_x_300e_coco_wo_nms.zip) | [( w/ nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/yolov7/yolov7_x_300e_coco_w_nms.onnx) &#124; [( w/o nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/yolov7/yolov7_x_300e_coco_wo_nms.onnx) |
| YOLOv7P6-W6 | 1280 | [( w/ nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/yolov7/yolov7p6_w6_300e_coco_w_nms.zip) &#124; [( w/o nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/yolov7/yolov7p6_w6_300e_coco_wo_nms.zip) | [( w/ nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/yolov7/yolov7p6_w6_300e_coco_w_nms.onnx) &#124; [( w/o nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/yolov7/yolov7p6_w6_300e_coco_wo_nms.onnx) |
| YOLOv7P6-E6 | 1280 | [( w/ nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/yolov7/yolov7p6_e6_300e_coco_w_nms.zip) &#124; [( w/o nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/yolov7/yolov7p6_e6_300e_coco_wo_nms.zip) | [( w/ nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/yolov7/yolov7p6_e6_300e_coco_w_nms.onnx) &#124; [( w/o nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/yolov7/yolov7p6_e6_300e_coco_wo_nms.onnx) |
| YOLOv7P6-D6 | 1280 | [( w/ nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/yolov7/yolov7p6_d6_300e_coco_w_nms.zip) &#124; [( w/o nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/yolov7/yolov7p6_d6_300e_coco_wo_nms.zip) | [( w/ nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/yolov7/yolov7p6_d6_300e_coco_w_nms.onnx) &#124; [( w/o nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/yolov7/yolov7p6_d6_300e_coco_wo_nms.onnx) |
| YOLOv7P6-E6E | 1280 | [( w/ nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/yolov7/yolov7p6_e6e_300e_coco_w_nms.zip) &#124; [( w/o nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/yolov7/yolov7p6_e6e_300e_coco_wo_nms.zip) | [( w/ nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/yolov7/yolov7p6_e6e_300e_coco_w_nms.onnx) &#124; [( w/o nms)](https://paddledet.bj.bcebos.com/deploy/yoloseries/yolov7/yolov7p6_e6e_300e_coco_wo_nms.onnx) |
## 使用教程
### 0. **一键运行全流程**
将以下命令写在一个脚本文件里如```run.sh```,一键运行命令为:```sh run.sh```,也可命令行一句句去运行。
```bash
model_name=yolov7 # 可修改,如 ppyoloe
job_name=yolov7_tiny_300e_coco # 可修改,如 ppyoloe_plus_crn_s_80e_coco
config=configs/${model_name}/${job_name}.yml
log_dir=log_dir/${job_name}
# weights=https://bj.bcebos.com/v1/paddledet/models/${job_name}.pdparams
weights=output/${job_name}/model_final.pdparams
# 1.训练(单卡/多卡),加 --eval 表示边训边评估,加 --amp 表示混合精度训练
# CUDA_VISIBLE_DEVICES=0 python tools/train.py -c ${config} --eval --amp
python -m paddle.distributed.launch --log_dir=${log_dir} --gpus 0,1,2,3,4,5,6,7 tools/train.py -c ${config} --eval --amp
# 2.评估,加 --classwise 表示输出每一类mAP
CUDA_VISIBLE_DEVICES=0 python tools/eval.py -c ${config} -o weights=${weights} --classwise
# 3.预测 (单张图/图片文件夹)
CUDA_VISIBLE_DEVICES=0 python tools/infer.py -c ${config} -o weights=${weights} --infer_img=demo/000000014439_640x640.jpg --draw_threshold=0.5
# CUDA_VISIBLE_DEVICES=0 python tools/infer.py -c ${config} -o weights=${weights} --infer_dir=demo/ --draw_threshold=0.5
# 4.导出模型,以下3种模式选一种
## 普通导出,加trt表示用于trt加速,对NMS和silu激活函数提速明显
CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c ${config} -o weights=${weights} # trt=True
## exclude_post_process去除后处理导出,返回和YOLOv5导出ONNX时相同格式的concat后的1个Tensor,是未缩放回原图的坐标+分类置信度
# CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c ${config} -o weights=${weights} exclude_post_process=True # trt=True
## exclude_nms去除NMS导出,返回2个Tensor,是缩放回原图后的坐标和分类置信度
# CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c ${config} -o weights=${weights} exclude_nms=True # trt=True
# 5.部署预测,注意不能使用 去除后处理 或 去除NMS 导出后的模型去预测
CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/${job_name} --image_file=demo/000000014439_640x640.jpg --device=GPU
# 6.部署测速,加 “--run_mode=trt_fp16” 表示在TensorRT FP16模式下测速,注意如需用到 trt_fp16 则必须为加 trt=True 导出的模型
CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/${job_name} --image_file=demo/000000014439_640x640.jpg --device=GPU --run_benchmark=True # --run_mode=trt_fp16
# 7.onnx导出,一般结合 exclude_post_process去除后处理导出的模型
paddle2onnx --model_dir output_inference/${job_name} --model_filename model.pdmodel --params_filename model.pdiparams --opset_version 12 --save_file ${job_name}.onnx
# 8.onnx trt测速
/usr/local/TensorRT-8.0.3.4/bin/trtexec --onnx=${job_name}.onnx --workspace=4096 --avgRuns=10 --shapes=input:1x3x640x640 --fp16
/usr/local/TensorRT-8.0.3.4/bin/trtexec --onnx=${job_name}.onnx --workspace=4096 --avgRuns=10 --shapes=input:1x3x640x640 --fp32
```
### 1. 训练
执行以下指令使用混合精度训练YOLOv7
```bash
python -m paddle.distributed.launch --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/yolov7/yolov7_l_300e_coco.yml --amp --eval
```
**注意:**
- `--amp`表示开启混合精度训练以避免显存溢出,`--eval`表示边训边验证。
### 2. 评估
执行以下命令在单个GPU上评估COCO val2017数据集
```bash
CUDA_VISIBLE_DEVICES=0 python tools/eval.py -c configs/yolov7/yolov7_l_300e_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/yolov7_l_300e_coco.pdparams
```
### 3. 推理
使用以下命令在单张GPU上预测图片,使用`--infer_img`推理单张图片以及使用`--infer_dir`推理文件中的所有图片。
```bash
# 推理单张图片
CUDA_VISIBLE_DEVICES=0 python tools/infer.py -c configs/yolov7/yolov7_l_300e_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/yolov7_l_300e_coco.pdparams --infer_img=demo/000000014439_640x640.jpg
# 推理文件中的所有图片
CUDA_VISIBLE_DEVICES=0 python tools/infer.py -c configs/yolov7/yolov7_l_300e_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/yolov7_l_300e_coco.pdparams --infer_dir=demo
```
### 4.导出模型
YOLOv7在GPU上推理部署或benchmark测速等需要通过`tools/export_model.py`导出模型。
当你**使用Paddle Inference但不使用TensorRT**时,运行以下的命令导出模型
```bash
python tools/export_model.py -c configs/yolov7/yolov7_l_300e_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/yolov7_l_300e_coco.pdparams
```
当你**使用Paddle Inference且使用TensorRT**时,需要指定`-o trt=True`来导出模型。
```bash
python tools/export_model.py -c configs/yolov7/yolov7_l_300e_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/yolov7_l_300e_coco.pdparams trt=True
```
如果你想将YOLOv7模型导出为**ONNX格式**,参考
[PaddleDetection模型导出为ONNX格式教程](../../deploy/EXPORT_ONNX_MODEL.md),运行以下命令:
```bash
# 导出推理模型
python tools/export_model.py -c configs/yolov7/yolov7_l_300e_coco.yml --output_dir=output_inference -o weights=https://paddledet.bj.bcebos.com/models/yolov7_l_300e_coco.pdparams
# 安装paddle2onnx
pip install paddle2onnx
# 转换成onnx格式
paddle2onnx --model_dir output_inference/yolov7_l_300e_coco --model_filename model.pdmodel --params_filename model.pdiparams --opset_version 11 --save_file yolov7_l_300e_coco.onnx
```
**注意:** ONNX模型目前只支持batch_size=1
### 5.推理部署
YOLOv7可以使用以下方式进行部署:
- Paddle Inference [Python](../../deploy/python) & [C++](../../deploy/cpp)
- [Paddle-TensorRT](../../deploy/TENSOR_RT.md)
- [PaddleServing](https://github.com/PaddlePaddle/Serving)
- [PaddleSlim模型量化](../slim)
运行以下命令导出模型
```bash
python tools/export_model.py -c configs/yolov7/yolov7_l_300e_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/yolov7_l_300e_coco.pdparams trt=True
```
**注意:**
- trt=True表示**使用Paddle Inference且使用TensorRT**进行测速,速度会更快,默认不加即为False,表示**使用Paddle Inference但不使用TensorRT**进行测速。
- 如果是使用Paddle Inference在TensorRT FP16模式下部署,需要参考[Paddle Inference文档](https://www.paddlepaddle.org.cn/inference/master/user_guides/download_lib.html#python),下载并安装与你的CUDA, CUDNN和TensorRT相应的wheel包。
#### 5.1.Python部署
`deploy/python/infer.py`使用上述导出后的Paddle Inference模型用于推理和benchnark测速,如果设置了`--run_benchmark=True`, 首先需要安装以下依赖`pip install pynvml psutil GPUtil`
```bash
# Python部署推理单张图片
python deploy/python/infer.py --model_dir=output_inference/yolov7_l_300e_coco --image_file=demo/000000014439_640x640.jpg --device=gpu
# 推理文件夹下的所有图片
python deploy/python/infer.py --model_dir=output_inference/yolov7_l_300e_coco --image_dir=demo/ --device=gpu
```
#### 5.2. C++部署
`deploy/cpp/build/main`使用上述导出后的Paddle Inference模型用于C++推理部署, 首先按照[docs](../../deploy/cpp/docs)编译安装环境。
```bash
# C++部署推理单张图片
./deploy/cpp/build/main --model_dir=output_inference/yolov7_l_300e_coco/ --image_file=demo/000000014439_640x640.jpg --run_mode=paddle --device=GPU --threshold=0.5 --output_dir=cpp_infer_output/yolov7_l_300e_coco
```
## 速度测试
为了公平起见,在[模型库](#模型库)中的速度测试结果均为不包含数据预处理和模型输出后处理(NMS)的数据(与[YOLOv4(AlexyAB)](https://github.com/AlexeyAB/darknet)测试方法一致),需要在导出模型时指定`-o exclude_nms=True`。测速需设置`--run_benchmark=True`, 首先需要安装以下依赖`pip install pynvml psutil GPUtil`
**使用Paddle Inference但不使用TensorRT**进行测速,执行以下命令:
```bash
# 导出模型
python tools/export_model.py -c configs/yolov7/yolov7_l_300e_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/yolov7_l_300e_coco.pdparams exclude_nms=True
# 速度测试,使用run_benchmark=True
python deploy/python/infer.py --model_dir=output_inference/yolov7_l_300e_coco --image_file=demo/000000014439_640x640.jpg --run_mode=paddle --device=gpu --run_benchmark=True
```
**使用Paddle Inference且使用TensorRT**进行测速,执行以下命令:
```bash
# 导出模型,使用trt=True
python tools/export_model.py -c configs/yolov7/yolov7_l_300e_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/yolov7_l_300e_coco.pdparams exclude_nms=True trt=True
# 速度测试,使用run_benchmark=True
python deploy/python/infer.py --model_dir=output_inference/yolov7_l_300e_coco --image_file=demo/000000014439_640x640.jpg --device=gpu --run_benchmark=True
# tensorRT-FP32测速
python deploy/python/infer.py --model_dir=output_inference/yolov7_l_300e_coco --image_file=demo/000000014439_640x640.jpg --device=gpu --run_benchmark=True --run_mode=trt_fp32
# tensorRT-FP16测速
python deploy/python/infer.py --model_dir=output_inference/yolov7_l_300e_coco --image_file=demo/000000014439_640x640.jpg --device=gpu --run_benchmark=True --run_mode=trt_fp16
```
**注意:**
- 导出模型时指定`-o exclude_nms=True`仅作为测速时用,这样导出的模型其推理部署预测的结果不是最终检出框的结果。
- [模型库](#模型库)中的速度测试结果为tensorRT-FP16测速后的最快速度,为不包含数据预处理和模型输出后处理(NMS)的耗时。
## 引用
```
@article{wang2022yolov7,
title={{YOLOv7}: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors},
author={Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark},
journal={arXiv preprint arXiv:2207.02696},
year={2022}
}
```
epoch: 300
LearningRate:
base_lr: 0.01
schedulers:
- !YOLOv5LRDecay
max_epochs: 300
min_lr_ratio: 0.1
- !ExpWarmup
epochs: 3
OptimizerBuilder:
optimizer:
type: Momentum
momentum: 0.937
use_nesterov: True
regularizer:
factor: 0.0005
type: L2
epoch: 300
LearningRate:
base_lr: 0.01
schedulers:
- !YOLOv5LRDecay
max_epochs: 300
min_lr_ratio: 0.2 #
- !ExpWarmup
epochs: 3
OptimizerBuilder:
optimizer:
type: Momentum
momentum: 0.937
use_nesterov: True
regularizer:
factor: 0.0005
type: L2
epoch: 300
LearningRate:
base_lr: 0.01
schedulers:
- !YOLOv5LRDecay
max_epochs: 300
min_lr_ratio: 0.01 #
- !ExpWarmup
epochs: 3
OptimizerBuilder:
optimizer:
type: Momentum
momentum: 0.937
use_nesterov: True
regularizer:
factor: 0.0005
type: L2
architecture: YOLOv7
norm_type: sync_bn
use_ema: True
ema_decay: 0.9999
ema_decay_type: "exponential"
act: silu
find_unused_parameters: True
depth_mult: 1.0 # no use in YOLOv7
width_mult: 1.0
arch: 'L'
use_aux: False
use_implicit: False # default False here, True in paper
YOLOv7:
backbone: ELANNet
neck: ELANFPN
yolo_head: YOLOv7Head
post_process: ~
ELANNet:
return_idx: [2, 3, 4]
depthwise: false
ELANFPN:
depthwise: false
YOLOv7Head:
anchors: [[12, 16], [19, 36], [40, 28],
[36, 75], [76, 55], [72, 146],
[142, 110], [192, 243], [459, 401]]
anchor_masks: [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
stride: [8, 16, 32]
loss: YOLOv7Loss
nms:
name: MultiClassNMS
nms_top_k: 3000
keep_top_k: 300
score_threshold: 0.001
nms_threshold: 0.7
YOLOv7Loss:
downsample_ratios: [8, 16, 32]
balance: [4.0, 1.0, 0.4]
box_weight: 0.05
cls_weght: 0.3
obj_weight: 0.7
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment