# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import paddle import paddle.nn as nn import paddle.nn.functional as F from paddle import ParamAttr from paddle.regularizer import L2Decay from ppdet.core.workspace import register, serializable from .csp_darknet import BaseConv, DWConv, BottleNeck, SPPFLayer from ..shape_spec import ShapeSpec __all__ = ['C2fLayer', 'C2Layer', 'YOLOv8CSPDarkNet'] class C2fLayer(nn.Layer): """C2f layer with 2 convs, named C2f in YOLOv8""" def __init__(self, in_channels, out_channels, num_blocks=1, shortcut=False, expansion=0.5, depthwise=False, bias=False, act="silu"): super(C2fLayer, self).__init__() self.c = int(out_channels * expansion) # hidden channels self.conv1 = BaseConv( in_channels, 2 * self.c, ksize=1, stride=1, bias=bias, act=act) self.conv2 = BaseConv( (2 + num_blocks) * self.c, out_channels, ksize=1, stride=1, bias=bias, act=act) self.bottlenecks = nn.LayerList([ BottleNeck( self.c, self.c, shortcut=shortcut, kernel_sizes=(3, 3), expansion=1.0, depthwise=depthwise, bias=bias, act=act) for _ in range(num_blocks) ]) def forward(self, x): y = list(self.conv1(x).split((self.c, self.c), 1)) y.extend(m(y[-1]) for m in self.bottlenecks) return self.conv2(paddle.concat(y, 1)) class C2Layer(nn.Layer): """C2 layer with 2 convs, named C2 in YOLOv8""" def __init__(self, in_channels, out_channels, num_blocks=1, shortcut=False, expansion=0.5, depthwise=False, bias=False, act="silu"): super(C2Layer, self).__init__() self.c = int(out_channels * expansion) # hidden channels self.conv1 = BaseConv( in_channels, 2 * self.c, ksize=1, stride=1, bias=bias, act=act) self.conv2 = BaseConv( 2 * self.c, out_channels, ksize=1, stride=1, bias=bias, act=act) self.bottlenecks = nn.Sequential(*(BottleNeck( self.c, self.c, shortcut=shortcut, kernel_sizes=(3, 3), expansion=1.0, depthwise=depthwise, bias=bias, act=act) for _ in range(num_blocks))) def forward(self, x): a, b = self.conv1(x).split((self.c, self.c), 1) return self.conv2(paddle.concat((self.bottlenecks(a), b), 1)) @register @serializable class YOLOv8CSPDarkNet(nn.Layer): """ YOLOv8 CSPDarkNet backbone. diff with YOLOv5 CSPDarkNet: 1. self.stem ksize 3 in YOLOv8 while 6 in YOLOv5 2. use C2fLayer in YOLOv8 while CSPLayer in YOLOv5 3. num_blocks [3,6,6,3] in YOLOv8 while [3,6,9,3] in YOLOv5 4. channels of last stage in M/L/X Args: arch (str): Architecture of YOLOv8 CSPDarkNet, from {P5, P6} depth_mult (float): Depth multiplier, multiply number of channels in each layer, default as 1.0. width_mult (float): Width multiplier, multiply number of blocks in C2fLayer, default as 1.0. depthwise (bool): Whether to use depth-wise conv layer. act (str): Activation function type, default as 'silu'. return_idx (list): Index of stages whose feature maps are returned. """ __shared__ = ['depth_mult', 'width_mult', 'act', 'trt'] # in_channels, out_channels, num_blocks, add_shortcut, use_sppf arch_settings = { 'P5': [[64, 128, 3, True, False], [128, 256, 6, True, False], [256, 512, 6, True, False], [512, 1024, 3, True, True]], 'P6': [[64, 128, 3, True, False], [128, 256, 6, True, False], [256, 512, 6, True, False], [512, 768, 3, True, False], [768, 1024, 3, True, True]], } def __init__(self, arch='P5', depth_mult=1.0, width_mult=1.0, last_stage_ch=1024, last2_stage_ch=512, depthwise=False, act='silu', trt=False, return_idx=[2, 3, 4]): super(YOLOv8CSPDarkNet, self).__init__() self.return_idx = return_idx Conv = DWConv if depthwise else BaseConv arch_setting = self.arch_settings[arch] # channels of last stage in M/L/X will be smaller if last_stage_ch != 1024: assert last_stage_ch > 0 arch_setting[-1][1] = last_stage_ch if arch == 'P6' and last2_stage_ch != 768: assert last2_stage_ch > 0 arch_setting[-2][1] = last2_stage_ch arch_setting[-1][0] = last2_stage_ch base_channels = int(arch_setting[0][0] * width_mult) self.stem = Conv( 3, base_channels, ksize=3, stride=2, bias=False, act=act) _out_channels = [base_channels] layers_num = 1 self.csp_dark_blocks = [] for i, (in_channels, out_channels, num_blocks, shortcut, use_sppf) in enumerate(arch_setting): in_channels = int(in_channels * width_mult) out_channels = int(out_channels * width_mult) _out_channels.append(out_channels) num_blocks = max(round(num_blocks * depth_mult), 1) stage = [] conv_layer = self.add_sublayer( 'layers{}.stage{}.conv_layer'.format(layers_num, i + 1), Conv( in_channels, out_channels, 3, 2, bias=False, act=act)) stage.append(conv_layer) layers_num += 1 c2f_layer = self.add_sublayer( 'layers{}.stage{}.c2f_layer'.format(layers_num, i + 1), C2fLayer( out_channels, out_channels, num_blocks=num_blocks, shortcut=shortcut, depthwise=depthwise, bias=False, act=act)) stage.append(c2f_layer) layers_num += 1 if use_sppf: sppf_layer = self.add_sublayer( 'layers{}.stage{}.sppf_layer'.format(layers_num, i + 1), SPPFLayer( out_channels, out_channels, ksize=5, bias=False, act=act)) stage.append(sppf_layer) layers_num += 1 self.csp_dark_blocks.append(nn.Sequential(*stage)) self._out_channels = [_out_channels[i] for i in self.return_idx] self.strides = [[2, 4, 8, 16, 32, 64][i] for i in self.return_idx] def forward(self, inputs): x = inputs['image'] outputs = [] x = self.stem(x) for i, layer in enumerate(self.csp_dark_blocks): x = layer(x) if i + 1 in self.return_idx: outputs.append(x) return outputs @property def out_shape(self): return [ ShapeSpec( channels=c, stride=s) for c, s in zip(self._out_channels, self.strides) ]