yolov7_pafpn.py

# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
#   
# Licensed under the Apache License, Version 2.0 (the "License");   
# you may not use this file except in compliance with the License.  
# You may obtain a copy of the License at   
#   
#     http://www.apache.org/licenses/LICENSE-2.0    
# 
# Unless required by applicable law or agreed to in writing, software   
# distributed under the License is distributed on an "AS IS" BASIS, 
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
# See the License for the specific language governing permissions and   
# limitations under the License.

import paddle
import paddle.nn as nn
from ppdet.core.workspace import register, serializable
from ..backbones.yolov7_elannet import BaseConv, ELANLayer, ELAN2Layer, MPConvLayer, RepConv, DownC
from ..shape_spec import ShapeSpec

__all__ = ['ELANFPN', 'ELANFPNP6']


@register
@serializable
class ELANFPN(nn.Layer):
    """
    YOLOv7 E-ELAN FPN, used in P5 model like ['tiny', 'L', 'X'], return 3 feats
    """
    __shared__ = ['arch', 'depth_mult', 'width_mult', 'act', 'trt']

    # [in_ch, mid_ch1, mid_ch2, out_ch] of each ELANLayer (2 FPN + 2 PAN): 
    ch_settings = {
        'tiny': [[256, 64, 64, 128], [128, 32, 32, 64], [64, 64, 64, 128],
                 [128, 128, 128, 256]],
        'L': [[512, 256, 128, 256], [256, 128, 64, 128], [128, 256, 128, 256],
              [256, 512, 256, 512]],
        'X': [[640, 256, 256, 320], [320, 128, 128, 160], [160, 256, 256, 320],
              [320, 512, 512, 640]],
    }
    # concat_list of each ELANLayer:
    concat_list_settings = {
        'tiny': [-1, -2, -3, -4],
        'L': [-1, -2, -3, -4, -5, -6],
        'X': [-1, -3, -5, -7, -8],
    }
    num_blocks = {'tiny': 2, 'L': 4, 'X': 6}

    def __init__(
            self,
            arch='L',
            depth_mult=1.0,
            width_mult=1.0,
            in_channels=[512, 1024, 512],  # layer num: 24 37 51 [c3,c4,c5]
            out_channels=[256, 512, 1024],  # layer num: 75 88 101
            depthwise=False,
            for_u6=False,  # u6 branch, YOLOv7u version
            act='silu',
            trt=False):
        super(ELANFPN, self).__init__()
        self.in_channels = in_channels
        self.arch = arch
        concat_list = self.concat_list_settings[arch]
        num_blocks = self.num_blocks[arch]
        ch_settings = self.ch_settings[arch]
        self._out_channels = [chs[-1] * 2 for chs in ch_settings[1:]]
        self.for_u6 = for_u6

        self.upsample = nn.Upsample(scale_factor=2, mode="nearest")

        in_ch, mid_ch1, mid_ch2, out_ch = ch_settings[0][:]
        self.lateral_conv1 = BaseConv(
            self.in_channels[2], out_ch, 1, 1, act=act)  # 512->256
        self.route_conv1 = BaseConv(
            self.in_channels[1], out_ch, 1, 1, act=act)  # 1024->256
        self.elan_fpn1 = ELANLayer(
            out_ch * 2,
            mid_ch1,
            mid_ch2,
            out_ch,
            num_blocks,
            concat_list,
            depthwise,
            act=act)

        in_ch, mid_ch1, mid_ch2, out_ch = ch_settings[1][:]
        self.lateral_conv2 = BaseConv(in_ch, out_ch, 1, 1, act=act)  # 256->128
        self.route_conv2 = BaseConv(
            self.in_channels[0], out_ch, 1, 1, act=act)  # 512->128
        self.elan_fpn2 = ELANLayer(
            out_ch * 2,
            mid_ch1,
            mid_ch2,
            out_ch,
            num_blocks,
            concat_list,
            depthwise,
            act=act)

        in_ch, mid_ch1, mid_ch2, out_ch = ch_settings[2][:]
        if self.arch in ['L', 'X']:
            self.mp_conv1 = MPConvLayer(in_ch, out_ch, 0.5, depthwise, act=act)
            # TODO: named down_conv1
        elif self.arch in ['tiny']:
            self.mp_conv1 = BaseConv(in_ch, out_ch, 3, 2, act=act)
        else:
            raise AttributeError("Unsupported arch type: {}".format(self.arch))
        self.elan_pan1 = ELANLayer(
            out_ch * 2,
            mid_ch1,
            mid_ch2,
            out_ch,
            num_blocks,
            concat_list,
            depthwise,
            act=act)

        in_ch, mid_ch1, mid_ch2, out_ch = ch_settings[3][:]
        if self.arch in ['L', 'X']:
            self.mp_conv2 = MPConvLayer(in_ch, out_ch, 0.5, depthwise, act=act)
        elif self.arch in ['tiny']:
            self.mp_conv2 = BaseConv(in_ch, out_ch, 3, 2, act=act)
        else:
            raise AttributeError("Unsupported arch type: {}".format(self.arch))
        self.elan_pan2 = ELANLayer(
            out_ch + self.in_channels[2],  # concat([pan_out1_down, c5], 1)
            mid_ch1,
            mid_ch2,
            out_ch,
            num_blocks,
            concat_list,
            depthwise,
            act=act)

        self.repconvs = nn.LayerList()
        Conv = RepConv if (self.arch == 'L' and not self.for_u6) else BaseConv
        for out_ch in self._out_channels:
            self.repconvs.append(Conv(int(out_ch // 2), out_ch, 3, 1, act=act))

    def forward(self, feats, for_mot=False):
        assert len(feats) == len(self.in_channels)
        [c3, c4, c5] = feats  # 24  37  51
        # [8, 512, 80, 80] [8, 1024, 40, 40] [8, 512, 20, 20]

        # Top-Down FPN
        p5_lateral = self.lateral_conv1(c5)  # 512->256
        p5_up = self.upsample(p5_lateral)
        route_c4 = self.route_conv1(c4)  # 1024->256 # route
        f_out1 = paddle.concat([route_c4, p5_up], 1)  # 512 # [8, 512, 40, 40]
        fpn_out1 = self.elan_fpn1(f_out1)  # 512 -> 128*4 + 256*2 -> 1024 -> 256
        # 63

        fpn_out1_lateral = self.lateral_conv2(fpn_out1)  # 256->128
        fpn_out1_up = self.upsample(fpn_out1_lateral)
        route_c3 = self.route_conv2(c3)  # 512->128 # route
        f_out2 = paddle.concat([route_c3, fpn_out1_up], 1)  # 256
        fpn_out2 = self.elan_fpn2(f_out2)  # 256 -> 64*4 + 128*2 -> 512 -> 128
        # layer 75: [8, 128, 80, 80]

        # Buttom-Up PAN
        p_out1_down = self.mp_conv1(fpn_out2)  # 128
        p_out1 = paddle.concat([p_out1_down, fpn_out1], 1)  # 128*2 + 256 -> 512
        pan_out1 = self.elan_pan1(p_out1)  # 512 -> 128*4 + 256*2 -> 1024 -> 256
        # layer 88: [8, 256, 40, 40]

        pan_out1_down = self.mp_conv2(pan_out1)  # 256
        p_out2 = paddle.concat([pan_out1_down, c5], 1)  # 256*2 + 512 -> 1024
        pan_out2 = self.elan_pan2(
            p_out2)  # 1024 -> 256*4 + 512*2 -> 2048 -> 512
        # layer 101: [8, 512, 20, 20]

        outputs = []
        pan_outs = [fpn_out2, pan_out1, pan_out2]  # 75 88 101
        for i, out in enumerate(pan_outs):
            outputs.append(self.repconvs[i](out))
        return outputs

    @classmethod
    def from_config(cls, cfg, input_shape):
        return {'in_channels': [i.channels for i in input_shape], }

    @property
    def out_shape(self):
        return [ShapeSpec(channels=c) for c in self._out_channels]


@register
@serializable
class ELANFPNP6(nn.Layer):
    """
    YOLOv7P6 E-ELAN FPN, used in P6 model like ['W6', 'E6', 'D6', 'E6E']
    return 4 feats
    """
    __shared__ = ['arch', 'depth_mult', 'width_mult', 'act', 'use_aux', 'trt']

    # in_ch, mid_ch1, mid_ch2, out_ch of each ELANLayer (3 FPN + 3 PAN): 
    ch_settings = {
        'W6':
        [[512, 384, 192, 384], [384, 256, 128, 256], [256, 128, 64, 128],
         [128, 256, 128, 256], [256, 384, 192, 384], [384, 512, 256, 512]],
        'E6': [[640, 384, 192, 480], [480, 256, 128, 320], [320, 128, 64, 160],
               [160, 256, 128, 320], [320, 384, 192, 480],
               [480, 512, 256, 640]],
        'D6': [[768, 384, 192, 576], [576, 256, 128, 384], [384, 128, 64, 192],
               [192, 256, 128, 384], [384, 384, 192, 576],
               [576, 512, 256, 768]],
        'E6E': [[640, 384, 192, 480], [480, 256, 128, 320],
                [320, 128, 64, 160], [160, 256, 128, 320],
                [320, 384, 192, 480], [480, 512, 256, 640]],
    }
    # concat_list of each ELANLayer:
    concat_list_settings = {
        'W6': [-1, -2, -3, -4, -5, -6],
        'E6': [-1, -2, -3, -4, -5, -6, -7, -8],
        'D6': [-1, -2, -3, -4, -5, -6, -7, -8, -9, -10],
        'E6E': [-1, -2, -3, -4, -5, -6, -7, -8],
    }
    num_blocks = {'W6': 4, 'E6': 6, 'D6': 8, 'E6E': 6}

    def __init__(
            self,
            arch='W6',
            use_aux=False,
            depth_mult=1.0,
            width_mult=1.0,
            in_channels=[256, 512, 768, 512],  # 19 28 37 47 (c3,c4,c5,c6)
            out_channels=[256, 512, 768, 1024],  # layer: 83 93 103 113
            depthwise=False,
            act='silu',
            trt=False):
        super(ELANFPNP6, self).__init__()
        self.in_channels = in_channels
        self.arch = arch
        self.use_aux = use_aux
        concat_list = self.concat_list_settings[arch]
        num_blocks = self.num_blocks[arch]
        ch_settings = self.ch_settings[arch]
        self._out_channels = [chs[-1] * 2 for chs in ch_settings[2:]]
        if self.training and self.use_aux:
            chs_aux = [chs[-1] for chs in ch_settings[:3][::-1]
                       ] + [self.in_channels[3]]
            self.in_channels_aux = chs_aux
            self._out_channels = self._out_channels + [320, 640, 960, 1280]
        self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
        ELANBlock = ELAN2Layer if self.arch in ['E6E'] else ELANLayer

        in_ch, mid_ch1, mid_ch2, out_ch = ch_settings[0][:]
        self.lateral_conv1 = BaseConv(
            self.in_channels[3], out_ch, 1, 1, act=act)  # 512->384
        self.route_conv1 = BaseConv(
            self.in_channels[2], out_ch, 1, 1, act=act)  # 768->384
        self.elan_fpn1 = ELANBlock(
            out_ch * 2,
            mid_ch1,
            mid_ch2,
            out_ch,
            num_blocks,
            concat_list,
            depthwise,
            act=act)

        in_ch, mid_ch1, mid_ch2, out_ch = ch_settings[1][:]
        self.lateral_conv2 = BaseConv(in_ch, out_ch, 1, 1, act=act)  # 384->256
        self.route_conv2 = BaseConv(
            self.in_channels[1], out_ch, 1, 1, act=act)  # 512->256
        self.elan_fpn2 = ELANBlock(
            out_ch * 2,
            mid_ch1,
            mid_ch2,
            out_ch,
            num_blocks,
            concat_list,
            depthwise,
            act=act)

        in_ch, mid_ch1, mid_ch2, out_ch = ch_settings[2][:]
        self.lateral_conv3 = BaseConv(in_ch, out_ch, 1, 1, act=act)  # 256->128
        self.route_conv3 = BaseConv(
            self.in_channels[0], out_ch, 1, 1, act=act)  # 256->128
        self.elan_fpn3 = ELANBlock(
            out_ch * 2,
            mid_ch1,
            mid_ch2,
            out_ch,
            num_blocks,
            concat_list,
            depthwise,
            act=act)

        in_ch, mid_ch1, mid_ch2, out_ch = ch_settings[3][:]
        if self.arch in ['W6']:
            self.down_conv1 = BaseConv(in_ch, out_ch, 3, 2, act=act)
        elif self.arch in ['E6', 'D6', 'E6E']:
            self.down_conv1 = DownC(in_ch, out_ch, 2, act=act)
        else:
            raise AttributeError("Unsupported arch type: {}".format(self.arch))
        self.elan_pan1 = ELANBlock(
            out_ch * 2,
            mid_ch1,
            mid_ch2,
            out_ch,
            num_blocks,
            concat_list,
            depthwise,
            act=act)

        in_ch, mid_ch1, mid_ch2, out_ch = ch_settings[4][:]
        if self.arch in ['W6']:
            self.down_conv2 = BaseConv(in_ch, out_ch, 3, 2, act=act)
        elif self.arch in ['E6', 'D6', 'E6E']:
            self.down_conv2 = DownC(in_ch, out_ch, 2, act=act)
        else:
            raise AttributeError("Unsupported arch type: {}".format(self.arch))
        self.elan_pan2 = ELANBlock(
            out_ch * 2,
            mid_ch1,
            mid_ch2,
            out_ch,
            num_blocks,
            concat_list,
            depthwise,
            act=act)

        in_ch, mid_ch1, mid_ch2, out_ch = ch_settings[5][:]
        if self.arch in ['W6']:
            self.down_conv3 = BaseConv(in_ch, out_ch, 3, 2, act=act)
        elif self.arch in ['E6', 'D6', 'E6E']:
            self.down_conv3 = DownC(in_ch, out_ch, 2, act=act)
        else:
            raise AttributeError("Unsupported arch type: {}".format(self.arch))
        self.elan_pan3 = ELANBlock(
            out_ch + self.in_channels[3],  # concat([pan_out2_down, c6], 1)
            mid_ch1,
            mid_ch2,
            out_ch,
            num_blocks,
            concat_list,
            depthwise,
            act=act)

        self.repconvs = nn.LayerList()
        Conv = BaseConv
        for i, _out_ch in enumerate(self._out_channels[:4]):
            self.repconvs.append(Conv(_out_ch // 2, _out_ch, 3, 1, act=act))

        if self.training and self.use_aux:
            self.repconvs_aux = nn.LayerList()
            for i, _out_ch in enumerate(self._out_channels[4:]):
                self.repconvs_aux.append(
                    Conv(
                        self.in_channels_aux[i], _out_ch, 3, 1, act=act))

    def forward(self, feats, for_mot=False):
        assert len(feats) == len(self.in_channels)
        [c3, c4, c5, c6] = feats  # 19 28 37 47
        # [8, 256, 160, 160] [8, 512, 80, 80] [8, 768, 40, 40] [8, 512, 20, 20]

        # Top-Down FPN
        p6_lateral = self.lateral_conv1(c6)  # 512->384
        p6_up = self.upsample(p6_lateral)
        route_c5 = self.route_conv1(c5)  # 768->384 # route
        f_out1 = paddle.concat([route_c5, p6_up], 1)  # 768 # [8, 768, 40, 40]
        fpn_out1 = self.elan_fpn1(f_out1)  # 768 -> 192*4 + 384*2 -> 1536 -> 384
        # layer 59: [8, 384, 40, 40]

        fpn_out1_lateral = self.lateral_conv2(fpn_out1)  # 384->256
        fpn_out1_up = self.upsample(fpn_out1_lateral)
        route_c4 = self.route_conv2(c4)  # 512->256 # route
        f_out2 = paddle.concat([route_c4, fpn_out1_up],
                               1)  # 512 # [8, 512, 80, 80]
        fpn_out2 = self.elan_fpn2(f_out2)  # 512 -> 128*4 + 256*2 -> 1024 -> 256
        # layer 71: [8, 256, 80, 80]

        fpn_out2_lateral = self.lateral_conv3(fpn_out2)  # 256->128
        fpn_out2_up = self.upsample(fpn_out2_lateral)
        route_c3 = self.route_conv3(c3)  # 512->128 # route
        f_out3 = paddle.concat([route_c3, fpn_out2_up], 1)  # 256
        fpn_out3 = self.elan_fpn3(f_out3)  # 256 -> 64*4 + 128*2 -> 512 -> 128
        # layer 83: [8, 128, 160, 160]

        # Buttom-Up PAN
        p_out1_down = self.down_conv1(fpn_out3)  # 128->256
        p_out1 = paddle.concat([p_out1_down, fpn_out2], 1)  # 256 + 256 -> 512
        pan_out1 = self.elan_pan1(p_out1)  # 512 -> 128*4 + 256*2 -> 1024 -> 256
        # layer 93: [8, 256, 80, 80]

        pan_out1_down = self.down_conv2(pan_out1)  # 256->384
        p_out2 = paddle.concat([pan_out1_down, fpn_out1], 1)  # 384 + 384 -> 768
        pan_out2 = self.elan_pan2(p_out2)  # 768 -> 192*4 + 384*2 -> 1536 -> 384
        # layer 103: [8, 384, 40, 40]

        pan_out2_down = self.down_conv3(pan_out2)  # 384->512
        p_out3 = paddle.concat([pan_out2_down, c6], 1)  # 512 + 512 -> 1024
        pan_out3 = self.elan_pan3(
            p_out3)  # 1024 -> 256*4 + 512*2 -> 2048 -> 512
        # layer 113: [8, 512, 20, 20]

        outputs = []
        pan_outs = [fpn_out3, pan_out1, pan_out2, pan_out3]  # 83 93 103 113
        for i, out in enumerate(pan_outs):
            outputs.append(self.repconvs[i](out))

        if self.training and self.use_aux:
            aux_outs = [fpn_out3, fpn_out2, fpn_out1, c6]  # 83 71 59 47
            for i, out in enumerate(aux_outs):
                outputs.append(self.repconvs_aux[i](out))
        return outputs

    @classmethod
    def from_config(cls, cfg, input_shape):
        return {'in_channels': [i.channels for i in input_shape], }

    @property
    def out_shape(self):
        return [ShapeSpec(channels=c) for c in self._out_channels]