# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 
#   
# Licensed under the Apache License, Version 2.0 (the "License");   
# you may not use this file except in compliance with the License.  
# You may obtain a copy of the License at   
#   
#     http://www.apache.org/licenses/LICENSE-2.0    
# 
# Unless required by applicable law or agreed to in writing, software   
# distributed under the License is distributed on an "AS IS" BASIS, 
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
# See the License for the specific language governing permissions and   
# limitations under the License.

import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register, serializable
from ..shape_spec import ShapeSpec
from ..backbones.csp_darknet import BaseConv
from ..backbones.yolov8_csp_darknet import C2fLayer, C2Layer

__all__ = ['YOLOv8CSPPAN', 'YOLOv8CSPPANP6']


@register
@serializable
class YOLOv8CSPPAN(nn.Layer):
    """
    YOLOv8 CSP-PAN FPN, used in YOLOv8
    diff with YOLOv5 CSP-PAN FPN:
    1. no lateral convs
    2. use C2fLayer in YOLOv8 while CSPLayer in YOLOv5
    """
    __shared__ = ['depth_mult', 'act', 'trt']

    def __init__(self,
                 depth_mult=1.0,
                 in_channels=[256, 512, 1024],
                 depthwise=False,
                 act='silu',
                 trt=False):
        super(YOLOv8CSPPAN, self).__init__()
        self.in_channels = in_channels
        self._out_channels = in_channels

        # top-down
        self.fpn_p4 = C2fLayer(
            int(in_channels[2] + in_channels[1]),
            int(in_channels[1]),
            round(3 * depth_mult),
            shortcut=False,
            depthwise=depthwise,
            act=act)

        self.fpn_p3 = C2fLayer(
            int(in_channels[1] + in_channels[0]),
            int(in_channels[0]),
            round(3 * depth_mult),
            shortcut=False,
            depthwise=depthwise,
            act=act)

        # bottom-up
        self.down_conv2 = BaseConv(
            int(in_channels[0]), int(in_channels[0]), 3, stride=2, act=act)
        self.pan_n3 = C2fLayer(
            int(in_channels[0] + in_channels[1]),
            int(in_channels[1]),
            round(3 * depth_mult),
            shortcut=False,
            depthwise=depthwise,
            act=act)

        self.down_conv1 = BaseConv(
            int(in_channels[1]), int(in_channels[1]), 3, stride=2, act=act)
        self.pan_n4 = C2fLayer(
            int(in_channels[1] + in_channels[2]),
            int(in_channels[2]),
            round(3 * depth_mult),
            shortcut=False,
            depthwise=depthwise,
            act=act)

    def forward(self, feats, for_mot=False):
        [c3, c4, c5] = feats

        # top-down FPN
        up_feat1 = F.interpolate(c5, scale_factor=2., mode="nearest")
        f_concat1 = paddle.concat([up_feat1, c4], 1)
        f_out1 = self.fpn_p4(f_concat1)

        up_feat2 = F.interpolate(f_out1, scale_factor=2., mode="nearest")
        f_concat2 = paddle.concat([up_feat2, c3], 1)
        f_out0 = self.fpn_p3(f_concat2)

        # bottom-up PAN
        down_feat1 = self.down_conv2(f_out0)
        p_concat1 = paddle.concat([down_feat1, f_out1], 1)
        pan_out1 = self.pan_n3(p_concat1)

        down_feat2 = self.down_conv1(pan_out1)
        p_concat2 = paddle.concat([down_feat2, c5], 1)
        pan_out0 = self.pan_n4(p_concat2)

        return [f_out0, pan_out1, pan_out0]

    @classmethod
    def from_config(cls, cfg, input_shape):
        return {'in_channels': [i.channels for i in input_shape], }

    @property
    def out_shape(self):
        return [ShapeSpec(channels=c) for c in self._out_channels]


@register
@serializable
class YOLOv8CSPPANP6(nn.Layer):
    """
    YOLOv8 CSP-PAN FPN, used in YOLOv8-P6
    diff with YOLOv5 CSP-PAN FPN:
    1. no lateral convs
    2. use C2Layer in YOLOv8-P6 while CSPLayer in YOLOv5-P6
    """
    __shared__ = ['depth_mult', 'act', 'trt']

    def __init__(self,
                 depth_mult=1.0,
                 in_channels=[256, 512, 768, 1024],
                 depthwise=False,
                 act='silu',
                 trt=False):
        super(YOLOv8CSPPANP6, self).__init__()
        self.in_channels = in_channels
        self._out_channels = in_channels

        # top-down
        self.fpn_p5 = C2Layer(
            int(in_channels[3] + in_channels[2]),
            int(in_channels[2]),
            round(3 * depth_mult),
            shortcut=False,
            depthwise=depthwise,
            act=act)

        self.fpn_p4 = C2Layer(
            int(in_channels[2] + in_channels[1]),
            int(in_channels[1]),
            round(3 * depth_mult),
            shortcut=False,
            depthwise=depthwise,
            act=act)

        self.fpn_p3 = C2Layer(
            int(in_channels[1] + in_channels[0]),
            int(in_channels[0]),
            round(3 * depth_mult),
            shortcut=False,
            depthwise=depthwise,
            act=act)

        # bottom-up
        self.down_conv2 = BaseConv(
            int(in_channels[0]), int(in_channels[0]), 3, stride=2, act=act)
        self.pan_n3 = C2Layer(
            int(in_channels[0] + in_channels[1]),
            int(in_channels[1]),
            round(3 * depth_mult),
            shortcut=False,
            depthwise=depthwise,
            act=act)

        self.down_conv1 = BaseConv(
            int(in_channels[1]), int(in_channels[1]), 3, stride=2, act=act)
        self.pan_n4 = C2Layer(
            int(in_channels[1] + in_channels[2]),
            int(in_channels[2]),
            round(3 * depth_mult),
            shortcut=False,
            depthwise=depthwise,
            act=act)

        self.down_conv0 = BaseConv(
            int(in_channels[2]), int(in_channels[2]), 3, stride=2, act=act)
        self.pan_n5 = C2Layer(
            int(in_channels[2] + in_channels[3]),
            int(in_channels[3]),
            round(3 * depth_mult),
            shortcut=False,
            depthwise=depthwise,
            act=act)

    def forward(self, feats, for_mot=False):
        [c3, c4, c5, c6] = feats

        # top-down FPN
        up_feat0 = F.interpolate(c6, scale_factor=2., mode="nearest")
        f_concat0 = paddle.concat([up_feat0, c5], 1)
        f_out0 = self.fpn_p5(f_concat0)

        up_feat1 = F.interpolate(f_out0, scale_factor=2., mode="nearest")
        f_concat1 = paddle.concat([up_feat1, c4], 1)
        f_out1 = self.fpn_p4(f_concat1)

        up_feat2 = F.interpolate(f_out1, scale_factor=2., mode="nearest")
        f_concat2 = paddle.concat([up_feat2, c3], 1)
        f_out2 = self.fpn_p3(f_concat2)

        # bottom-up PAN
        down_feat1 = self.down_conv2(f_out2)
        p_concat1 = paddle.concat([down_feat1, f_out1], 1)
        pan_out2 = self.pan_n3(p_concat1)

        down_feat2 = self.down_conv1(pan_out2)
        p_concat2 = paddle.concat([down_feat2, c5], 1)
        pan_out1 = self.pan_n4(p_concat2)

        down_feat3 = self.down_conv0(pan_out1)
        p_concat3 = paddle.concat([down_feat3, c6], 1)
        pan_out0 = self.pan_n5(p_concat3)

        return [f_out2, pan_out2, pan_out1, pan_out0]

    @classmethod
    def from_config(cls, cfg, input_shape):
        return {'in_channels': [i.channels for i in input_shape], }

    @property
    def out_shape(self):
        return [ShapeSpec(channels=c) for c in self._out_channels]