add new model

0d97cc8c · Sugon_ldc · 0d97cc8c · 0d97cc8c · 0d97cc8c · 0d97cc8c
Commit 0d97cc8c authored Jun 07, 2023 by Sugon_ldc
20 changed files
--- a/Matting/ppmatting/models/backbone/mobilenet_v2.py
+++ b/Matting/ppmatting/models/backbone/mobilenet_v2.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+
+import numpy as np
+import paddle
+from paddle import ParamAttr
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
+
+from paddleseg.cvlibs import manager
+
+import ppmatting
+
+MODEL_URLS = {
+    "MobileNetV2_x0_25":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_25_pretrained.pdparams",
+    "MobileNetV2_x0_5":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_5_pretrained.pdparams",
+    "MobileNetV2_x0_75":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_75_pretrained.pdparams",
+    "MobileNetV2":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_pretrained.pdparams",
+    "MobileNetV2_x1_5":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x1_5_pretrained.pdparams",
+    "MobileNetV2_x2_0":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x2_0_pretrained.pdparams"
+}
+
+__all__ = ["MobileNetV2"]
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 num_channels,
+                 filter_size,
+                 num_filters,
+                 stride,
+                 padding,
+                 channels=None,
+                 num_groups=1,
+                 name=None,
+                 use_cudnn=True):
+        super(ConvBNLayer, self).__init__()
+
+        self._conv = Conv2D(
+            in_channels=num_channels,
+            out_channels=num_filters,
+            kernel_size=filter_size,
+            stride=stride,
+            padding=padding,
+            groups=num_groups,
+            weight_attr=ParamAttr(name=name + "_weights"),
+            bias_attr=False)
+
+        self._batch_norm = BatchNorm(
+            num_filters,
+            param_attr=ParamAttr(name=name + "_bn_scale"),
+            bias_attr=ParamAttr(name=name + "_bn_offset"),
+            moving_mean_name=name + "_bn_mean",
+            moving_variance_name=name + "_bn_variance")
+
+    def forward(self, inputs, if_act=True):
+        y = self._conv(inputs)
+        y = self._batch_norm(y)
+        if if_act:
+            y = F.relu6(y)
+        return y
+
+
+class InvertedResidualUnit(nn.Layer):
+    def __init__(self, num_channels, num_in_filter, num_filters, stride,
+                 filter_size, padding, expansion_factor, name):
+        super(InvertedResidualUnit, self).__init__()
+        num_expfilter = int(round(num_in_filter * expansion_factor))
+        self._expand_conv = ConvBNLayer(
+            num_channels=num_channels,
+            num_filters=num_expfilter,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            num_groups=1,
+            name=name + "_expand")
+
+        self._bottleneck_conv = ConvBNLayer(
+            num_channels=num_expfilter,
+            num_filters=num_expfilter,
+            filter_size=filter_size,
+            stride=stride,
+            padding=padding,
+            num_groups=num_expfilter,
+            use_cudnn=False,
+            name=name + "_dwise")
+
+        self._linear_conv = ConvBNLayer(
+            num_channels=num_expfilter,
+            num_filters=num_filters,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            num_groups=1,
+            name=name + "_linear")
+
+    def forward(self, inputs, ifshortcut):
+        y = self._expand_conv(inputs, if_act=True)
+        y = self._bottleneck_conv(y, if_act=True)
+        y = self._linear_conv(y, if_act=False)
+        if ifshortcut:
+            y = paddle.add(inputs, y)
+        return y
+
+
+class InvresiBlocks(nn.Layer):
+    def __init__(self, in_c, t, c, n, s, name):
+        super(InvresiBlocks, self).__init__()
+
+        self._first_block = InvertedResidualUnit(
+            num_channels=in_c,
+            num_in_filter=in_c,
+            num_filters=c,
+            stride=s,
+            filter_size=3,
+            padding=1,
+            expansion_factor=t,
+            name=name + "_1")
+
+        self._block_list = []
+        for i in range(1, n):
+            block = self.add_sublayer(
+                name + "_" + str(i + 1),
+                sublayer=InvertedResidualUnit(
+                    num_channels=c,
+                    num_in_filter=c,
+                    num_filters=c,
+                    stride=1,
+                    filter_size=3,
+                    padding=1,
+                    expansion_factor=t,
+                    name=name + "_" + str(i + 1)))
+            self._block_list.append(block)
+
+    def forward(self, inputs):
+        y = self._first_block(inputs, ifshortcut=False)
+        for block in self._block_list:
+            y = block(y, ifshortcut=True)
+        return y
+
+
+@manager.BACKBONES.add_component
+class MobileNet(nn.Layer):
+    def __init__(self,
+                 input_channels=3,
+                 scale=1.0,
+                 pretrained=None,
+                 prefix_name=""):
+        super(MobileNet, self).__init__()
+        self.scale = scale
+
+        bottleneck_params_list = [
+            (1, 16, 1, 1),
+            (6, 24, 2, 2),
+            (6, 32, 3, 2),
+            (6, 64, 4, 2),
+            (6, 96, 3, 1),
+            (6, 160, 3, 2),
+            (6, 320, 1, 1),
+        ]
+
+        self.conv1 = ConvBNLayer(
+            num_channels=input_channels,
+            num_filters=int(32 * scale),
+            filter_size=3,
+            stride=2,
+            padding=1,
+            name=prefix_name + "conv1_1")
+
+        self.block_list = []
+        i = 1
+        in_c = int(32 * scale)
+        for layer_setting in bottleneck_params_list:
+            t, c, n, s = layer_setting
+            i += 1
+            block = self.add_sublayer(
+                prefix_name + "conv" + str(i),
+                sublayer=InvresiBlocks(
+                    in_c=in_c,
+                    t=t,
+                    c=int(c * scale),
+                    n=n,
+                    s=s,
+                    name=prefix_name + "conv" + str(i)))
+            self.block_list.append(block)
+            in_c = int(c * scale)
+
+        self.out_c = int(1280 * scale) if scale > 1.0 else 1280
+        self.conv9 = ConvBNLayer(
+            num_channels=in_c,
+            num_filters=self.out_c,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            name=prefix_name + "conv9")
+
+        self.feat_channels = [int(i * scale) for i in [16, 24, 32, 96, 1280]]
+        self.pretrained = pretrained
+        self.init_weight()
+
+    def forward(self, inputs):
+        feat_list = []
+        y = self.conv1(inputs, if_act=True)
+
+        block_index = 0
+        for block in self.block_list:
+            y = block(y)
+            if block_index in [0, 1, 2, 4]:
+                feat_list.append(y)
+            block_index += 1
+        y = self.conv9(y, if_act=True)
+        feat_list.append(y)
+        return feat_list
+
+    def init_weight(self):
+        ppmatting.utils.load_pretrained_model(self, self.pretrained)
+
+
+@manager.BACKBONES.add_component
+def MobileNetV2(**kwargs):
+    model = MobileNet(scale=1.0, **kwargs)
+    return model
--- a/Matting/ppmatting/models/backbone/resnet_vd.py
+++ b/Matting/ppmatting/models/backbone/resnet_vd.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddleseg.cvlibs import manager
+from paddleseg.models import layers
+
+import ppmatting
+
+__all__ = [
+    "ResNet18_vd", "ResNet34_vd", "ResNet50_vd", "ResNet101_vd", "ResNet152_vd"
+]
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(
+            self,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=1,
+            dilation=1,
+            groups=1,
+            is_vd_mode=False,
+            act=None, ):
+        super(ConvBNLayer, self).__init__()
+
+        self.is_vd_mode = is_vd_mode
+        self._pool2d_avg = nn.AvgPool2D(
+            kernel_size=2, stride=2, padding=0, ceil_mode=True)
+        self._conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=(kernel_size - 1) // 2 if dilation == 1 else 0,
+            dilation=dilation,
+            groups=groups,
+            bias_attr=False)
+
+        self._batch_norm = layers.SyncBatchNorm(out_channels)
+        self._act_op = layers.Activation(act=act)
+
+    def forward(self, inputs):
+        if self.is_vd_mode:
+            inputs = self._pool2d_avg(inputs)
+        y = self._conv(inputs)
+        y = self._batch_norm(y)
+        y = self._act_op(y)
+
+        return y
+
+
+class BottleneckBlock(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 stride,
+                 shortcut=True,
+                 if_first=False,
+                 dilation=1):
+        super(BottleneckBlock, self).__init__()
+
+        self.conv0 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=1,
+            act='relu')
+
+        self.dilation = dilation
+
+        self.conv1 = ConvBNLayer(
+            in_channels=out_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            stride=stride,
+            act='relu',
+            dilation=dilation)
+        self.conv2 = ConvBNLayer(
+            in_channels=out_channels,
+            out_channels=out_channels * 4,
+            kernel_size=1,
+            act=None)
+
+        if not shortcut:
+            self.short = ConvBNLayer(
+                in_channels=in_channels,
+                out_channels=out_channels * 4,
+                kernel_size=1,
+                stride=1,
+                is_vd_mode=False if if_first or stride == 1 else True)
+
+        self.shortcut = shortcut
+
+    def forward(self, inputs):
+        y = self.conv0(inputs)
+
+        ####################################################################
+        # If given dilation rate > 1, using corresponding padding.
+        # The performance drops down without the follow padding.
+        if self.dilation > 1:
+            padding = self.dilation
+            y = F.pad(y, [padding, padding, padding, padding])
+        #####################################################################
+
+        conv1 = self.conv1(y)
+        conv2 = self.conv2(conv1)
+
+        if self.shortcut:
+            short = inputs
+        else:
+            short = self.short(inputs)
+
+        y = paddle.add(x=short, y=conv2)
+        y = F.relu(y)
+        return y
+
+
+class BasicBlock(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 stride,
+                 shortcut=True,
+                 if_first=False):
+        super(BasicBlock, self).__init__()
+        self.stride = stride
+        self.conv0 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            stride=stride,
+            act='relu')
+        self.conv1 = ConvBNLayer(
+            in_channels=out_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            act=None)
+
+        if not shortcut:
+            self.short = ConvBNLayer(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=1,
+                stride=1,
+                is_vd_mode=False if if_first or stride == 1 else True)
+
+        self.shortcut = shortcut
+
+    def forward(self, inputs):
+        y = self.conv0(inputs)
+        conv1 = self.conv1(y)
+
+        if self.shortcut:
+            short = inputs
+        else:
+            short = self.short(inputs)
+        y = paddle.add(x=short, y=conv1)
+        y = F.relu(y)
+
+        return y
+
+
+class ResNet_vd(nn.Layer):
+    """
+    The ResNet_vd implementation based on PaddlePaddle.
+
+    The original article refers to Jingdong
+    Tong He, et, al. "Bag of Tricks for Image Classification with Convolutional Neural Networks"
+    (https://arxiv.org/pdf/1812.01187.pdf).
+
+    Args:
+        layers (int, optional): The layers of ResNet_vd. The supported layers are (18, 34, 50, 101, 152, 200). Default: 50.
+        output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 8.
+        multi_grid (tuple|list, optional): The grid of stage4. Defult: (1, 1, 1).
+        pretrained (str, optional): The path of pretrained model.
+
+    """
+
+    def __init__(self,
+                 input_channels=3,
+                 layers=50,
+                 output_stride=32,
+                 multi_grid=(1, 1, 1),
+                 pretrained=None):
+        super(ResNet_vd, self).__init__()
+
+        self.conv1_logit = None  # for gscnn shape stream
+        self.layers = layers
+        supported_layers = [18, 34, 50, 101, 152, 200]
+        assert layers in supported_layers, \
+            "supported layers are {} but input layer is {}".format(
+                supported_layers, layers)
+
+        if layers == 18:
+            depth = [2, 2, 2, 2]
+        elif layers == 34 or layers == 50:
+            depth = [3, 4, 6, 3]
+        elif layers == 101:
+            depth = [3, 4, 23, 3]
+        elif layers == 152:
+            depth = [3, 8, 36, 3]
+        elif layers == 200:
+            depth = [3, 12, 48, 3]
+        num_channels = [64, 256, 512,
+                        1024] if layers >= 50 else [64, 64, 128, 256]
+        num_filters = [64, 128, 256, 512]
+
+        # for channels of four returned stages
+        self.feat_channels = [c * 4 for c in num_filters
+                              ] if layers >= 50 else num_filters
+        self.feat_channels = [64] + self.feat_channels
+
+        dilation_dict = None
+        if output_stride == 8:
+            dilation_dict = {2: 2, 3: 4}
+        elif output_stride == 16:
+            dilation_dict = {3: 2}
+
+        self.conv1_1 = ConvBNLayer(
+            in_channels=input_channels,
+            out_channels=32,
+            kernel_size=3,
+            stride=2,
+            act='relu')
+        self.conv1_2 = ConvBNLayer(
+            in_channels=32,
+            out_channels=32,
+            kernel_size=3,
+            stride=1,
+            act='relu')
+        self.conv1_3 = ConvBNLayer(
+            in_channels=32,
+            out_channels=64,
+            kernel_size=3,
+            stride=1,
+            act='relu')
+        self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
+
+        # self.block_list = []
+        self.stage_list = []
+        if layers >= 50:
+            for block in range(len(depth)):
+                shortcut = False
+                block_list = []
+                for i in range(depth[block]):
+                    if layers in [101, 152] and block == 2:
+                        if i == 0:
+                            conv_name = "res" + str(block + 2) + "a"
+                        else:
+                            conv_name = "res" + str(block + 2) + "b" + str(i)
+                    else:
+                        conv_name = "res" + str(block + 2) + chr(97 + i)
+
+                    ###############################################################################
+                    # Add dilation rate for some segmentation tasks, if dilation_dict is not None.
+                    dilation_rate = dilation_dict[
+                        block] if dilation_dict and block in dilation_dict else 1
+
+                    # Actually block here is 'stage', and i is 'block' in 'stage'
+                    # At the stage 4, expand the the dilation_rate if given multi_grid
+                    if block == 3:
+                        dilation_rate = dilation_rate * multi_grid[i]
+                    ###############################################################################
+
+                    bottleneck_block = self.add_sublayer(
+                        'bb_%d_%d' % (block, i),
+                        BottleneckBlock(
+                            in_channels=num_channels[block]
+                            if i == 0 else num_filters[block] * 4,
+                            out_channels=num_filters[block],
+                            stride=2 if i == 0 and block != 0 and
+                            dilation_rate == 1 else 1,
+                            shortcut=shortcut,
+                            if_first=block == i == 0,
+                            dilation=dilation_rate))
+
+                    block_list.append(bottleneck_block)
+                    shortcut = True
+                self.stage_list.append(block_list)
+        else:
+            for block in range(len(depth)):
+                shortcut = False
+                block_list = []
+                for i in range(depth[block]):
+                    conv_name = "res" + str(block + 2) + chr(97 + i)
+                    basic_block = self.add_sublayer(
+                        'bb_%d_%d' % (block, i),
+                        BasicBlock(
+                            in_channels=num_channels[block]
+                            if i == 0 else num_filters[block],
+                            out_channels=num_filters[block],
+                            stride=2 if i == 0 and block != 0 else 1,
+                            shortcut=shortcut,
+                            if_first=block == i == 0))
+                    block_list.append(basic_block)
+                    shortcut = True
+                self.stage_list.append(block_list)
+
+        self.pretrained = pretrained
+        self.init_weight()
+
+    def forward(self, inputs):
+        feat_list = []
+        y = self.conv1_1(inputs)
+        y = self.conv1_2(y)
+        y = self.conv1_3(y)
+        feat_list.append(y)
+
+        y = self.pool2d_max(y)
+
+        # A feature list saves the output feature map of each stage.
+        for stage in self.stage_list:
+            for block in stage:
+                y = block(y)
+            feat_list.append(y)
+
+        return feat_list
+
+    def init_weight(self):
+        ppmatting.utils.load_pretrained_model(self, self.pretrained)
+
+
+@manager.BACKBONES.add_component
+def ResNet18_vd(**args):
+    model = ResNet_vd(layers=18, **args)
+    return model
+
+
+@manager.BACKBONES.add_component
+def ResNet34_vd(**args):
+    model = ResNet_vd(layers=34, **args)
+    return model
+
+
+@manager.BACKBONES.add_component
+def ResNet50_vd(**args):
+    model = ResNet_vd(layers=50, **args)
+    return model
+
+
+@manager.BACKBONES.add_component
+def ResNet101_vd(**args):
+    model = ResNet_vd(layers=101, **args)
+    return model
+
+
+def ResNet152_vd(**args):
+    model = ResNet_vd(layers=152, **args)
+    return model
+
+
+def ResNet200_vd(**args):
+    model = ResNet_vd(layers=200, **args)
+    return model
--- a/Matting/ppmatting/models/backbone/stdcnet.py
+++ b/Matting/ppmatting/models/backbone/stdcnet.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+
+import paddle
+import paddle.nn as nn
+
+from paddleseg.utils import utils
+from paddleseg.cvlibs import manager, param_init
+from paddleseg.models.layers.layer_libs import SyncBatchNorm
+
+__all__ = ["STDC1", "STDC2", "STDC_Small", "STDC_Tiny"]
+
+
+class STDCNet(nn.Layer):
+    """
+    The STDCNet implementation based on PaddlePaddle.
+
+    The original article refers to Meituan
+    Fan, Mingyuan, et al. "Rethinking BiSeNet For Real-time Semantic Segmentation."
+    (https://arxiv.org/abs/2104.13188)
+
+    Args:
+        base(int, optional): base channels. Default: 64.
+        layers(list, optional): layers numbers list. It determines STDC block numbers of STDCNet's stage3\4\5. Defualt: [4, 5, 3].
+        block_num(int,optional): block_num of features block. Default: 4.
+        type(str,optional): feature fusion method "cat"/"add". Default: "cat".
+        pretrained(str, optional): the path of pretrained model.
+    """
+
+    def __init__(self,
+                 input_channels=3,
+                 channels=[32, 64, 256, 512, 1024],
+                 layers=[4, 5, 3],
+                 block_num=4,
+                 type="cat",
+                 pretrained=None):
+        super(STDCNet, self).__init__()
+        if type == "cat":
+            block = CatBottleneck
+        elif type == "add":
+            block = AddBottleneck
+        self.input_channels = input_channels
+        self.layers = layers
+        self.feat_channels = channels
+        self.features = self._make_layers(channels, layers, block_num, block)
+
+        self.pretrained = pretrained
+        self.init_weight()
+
+    def forward(self, x):
+        """
+        forward function for feature extract.
+        """
+        out_feats = []
+
+        x = self.features[0](x)
+        out_feats.append(x)
+        x = self.features[1](x)
+        out_feats.append(x)
+
+        idx = [[2, 2 + self.layers[0]],
+               [2 + self.layers[0], 2 + sum(self.layers[0:2])],
+               [2 + sum(self.layers[0:2]), 2 + sum(self.layers)]]
+        for start_idx, end_idx in idx:
+            for i in range(start_idx, end_idx):
+                x = self.features[i](x)
+            out_feats.append(x)
+
+        return out_feats
+
+    def _make_layers(self, channels, layers, block_num, block):
+        features = []
+        features += [ConvBNRelu(self.input_channels, channels[0], 3, 2)]
+        features += [ConvBNRelu(channels[0], channels[1], 3, 2)]
+
+        for i, layer in enumerate(layers):
+            for j in range(layer):
+                if i == 0 and j == 0:
+                    features.append(
+                        block(channels[i + 1], channels[i + 2], block_num, 2))
+                elif j == 0:
+                    features.append(
+                        block(channels[i + 1], channels[i + 2], block_num, 2))
+                else:
+                    features.append(
+                        block(channels[i + 2], channels[i + 2], block_num, 1))
+
+        return nn.Sequential(*features)
+
+    def init_weight(self):
+        for layer in self.sublayers():
+            if isinstance(layer, nn.Conv2D):
+                param_init.normal_init(layer.weight, std=0.001)
+            elif isinstance(layer, (nn.BatchNorm, nn.SyncBatchNorm)):
+                param_init.constant_init(layer.weight, value=1.0)
+                param_init.constant_init(layer.bias, value=0.0)
+        if self.pretrained is not None:
+            utils.load_pretrained_model(self, self.pretrained)
+
+
+class ConvBNRelu(nn.Layer):
+    def __init__(self, in_planes, out_planes, kernel=3, stride=1):
+        super(ConvBNRelu, self).__init__()
+        self.conv = nn.Conv2D(
+            in_planes,
+            out_planes,
+            kernel_size=kernel,
+            stride=stride,
+            padding=kernel // 2,
+            bias_attr=False)
+        self.bn = SyncBatchNorm(out_planes, data_format='NCHW')
+        self.relu = nn.ReLU()
+
+    def forward(self, x):
+        out = self.relu(self.bn(self.conv(x)))
+        return out
+
+
+class AddBottleneck(nn.Layer):
+    def __init__(self, in_planes, out_planes, block_num=3, stride=1):
+        super(AddBottleneck, self).__init__()
+        assert block_num > 1, "block number should be larger than 1."
+        self.conv_list = nn.LayerList()
+        self.stride = stride
+        if stride == 2:
+            self.avd_layer = nn.Sequential(
+                nn.Conv2D(
+                    out_planes // 2,
+                    out_planes // 2,
+                    kernel_size=3,
+                    stride=2,
+                    padding=1,
+                    groups=out_planes // 2,
+                    bias_attr=False),
+                nn.BatchNorm2D(out_planes // 2), )
+            self.skip = nn.Sequential(
+                nn.Conv2D(
+                    in_planes,
+                    in_planes,
+                    kernel_size=3,
+                    stride=2,
+                    padding=1,
+                    groups=in_planes,
+                    bias_attr=False),
+                nn.BatchNorm2D(in_planes),
+                nn.Conv2D(
+                    in_planes, out_planes, kernel_size=1, bias_attr=False),
+                nn.BatchNorm2D(out_planes), )
+            stride = 1
+
+        for idx in range(block_num):
+            if idx == 0:
+                self.conv_list.append(
+                    ConvBNRelu(
+                        in_planes, out_planes // 2, kernel=1))
+            elif idx == 1 and block_num == 2:
+                self.conv_list.append(
+                    ConvBNRelu(
+                        out_planes // 2, out_planes // 2, stride=stride))
+            elif idx == 1 and block_num > 2:
+                self.conv_list.append(
+                    ConvBNRelu(
+                        out_planes // 2, out_planes // 4, stride=stride))
+            elif idx < block_num - 1:
+                self.conv_list.append(
+                    ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
+                               // int(math.pow(2, idx + 1))))
+            else:
+                self.conv_list.append(
+                    ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
+                               // int(math.pow(2, idx))))
+
+    def forward(self, x):
+        out_list = []
+        out = x
+        for idx, conv in enumerate(self.conv_list):
+            if idx == 0 and self.stride == 2:
+                out = self.avd_layer(conv(out))
+            else:
+                out = conv(out)
+            out_list.append(out)
+        if self.stride == 2:
+            x = self.skip(x)
+        return paddle.concat(out_list, axis=1) + x
+
+
+class CatBottleneck(nn.Layer):
+    def __init__(self, in_planes, out_planes, block_num=3, stride=1):
+        super(CatBottleneck, self).__init__()
+        assert block_num > 1, "block number should be larger than 1."
+        self.conv_list = nn.LayerList()
+        self.stride = stride
+        if stride == 2:
+            self.avd_layer = nn.Sequential(
+                nn.Conv2D(
+                    out_planes // 2,
+                    out_planes // 2,
+                    kernel_size=3,
+                    stride=2,
+                    padding=1,
+                    groups=out_planes // 2,
+                    bias_attr=False),
+                nn.BatchNorm2D(out_planes // 2), )
+            self.skip = nn.AvgPool2D(kernel_size=3, stride=2, padding=1)
+            stride = 1
+
+        for idx in range(block_num):
+            if idx == 0:
+                self.conv_list.append(
+                    ConvBNRelu(
+                        in_planes, out_planes // 2, kernel=1))
+            elif idx == 1 and block_num == 2:
+                self.conv_list.append(
+                    ConvBNRelu(
+                        out_planes // 2, out_planes // 2, stride=stride))
+            elif idx == 1 and block_num > 2:
+                self.conv_list.append(
+                    ConvBNRelu(
+                        out_planes // 2, out_planes // 4, stride=stride))
+            elif idx < block_num - 1:
+                self.conv_list.append(
+                    ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
+                               // int(math.pow(2, idx + 1))))
+            else:
+                self.conv_list.append(
+                    ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
+                               // int(math.pow(2, idx))))
+
+    def forward(self, x):
+        out_list = []
+        out1 = self.conv_list[0](x)
+        for idx, conv in enumerate(self.conv_list[1:]):
+            if idx == 0:
+                if self.stride == 2:
+                    out = conv(self.avd_layer(out1))
+                else:
+                    out = conv(out1)
+            else:
+                out = conv(out)
+            out_list.append(out)
+
+        if self.stride == 2:
+            out1 = self.skip(out1)
+        out_list.insert(0, out1)
+        out = paddle.concat(out_list, axis=1)
+        return out
+
+
+@manager.BACKBONES.add_component
+def STDC2(**kwargs):
+    model = STDCNet(
+        channels=[32, 64, 256, 512, 1024], layers=[4, 5, 3], **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def STDC1(**kwargs):
+    model = STDCNet(
+        channels=[32, 64, 256, 512, 1024], layers=[2, 2, 2], **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def STDC_Small(**kwargs):
+    model = STDCNet(channels=[32, 32, 64, 128, 256], layers=[4, 5, 3], **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def STDC_Tiny(**kwargs):
+    model = STDCNet(channels=[32, 32, 64, 128, 256], layers=[2, 2, 2], **kwargs)
+    return model
--- a/Matting/ppmatting/models/backbone/vgg.py
+++ b/Matting/ppmatting/models/backbone/vgg.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+from paddle import ParamAttr
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
+
+from paddleseg.cvlibs import manager
+
+import ppmatting
+
+
+class ConvBlock(nn.Layer):
+    def __init__(self, input_channels, output_channels, groups, name=None):
+        super(ConvBlock, self).__init__()
+
+        self.groups = groups
+        self._conv_1 = Conv2D(
+            in_channels=input_channels,
+            out_channels=output_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            weight_attr=ParamAttr(name=name + "1_weights"),
+            bias_attr=False)
+        if groups == 2 or groups == 3 or groups == 4:
+            self._conv_2 = Conv2D(
+                in_channels=output_channels,
+                out_channels=output_channels,
+                kernel_size=3,
+                stride=1,
+                padding=1,
+                weight_attr=ParamAttr(name=name + "2_weights"),
+                bias_attr=False)
+        if groups == 3 or groups == 4:
+            self._conv_3 = Conv2D(
+                in_channels=output_channels,
+                out_channels=output_channels,
+                kernel_size=3,
+                stride=1,
+                padding=1,
+                weight_attr=ParamAttr(name=name + "3_weights"),
+                bias_attr=False)
+        if groups == 4:
+            self._conv_4 = Conv2D(
+                in_channels=output_channels,
+                out_channels=output_channels,
+                kernel_size=3,
+                stride=1,
+                padding=1,
+                weight_attr=ParamAttr(name=name + "4_weights"),
+                bias_attr=False)
+
+        self._pool = MaxPool2D(
+            kernel_size=2, stride=2, padding=0, return_mask=True)
+
+    def forward(self, inputs):
+        x = self._conv_1(inputs)
+        x = F.relu(x)
+        if self.groups == 2 or self.groups == 3 or self.groups == 4:
+            x = self._conv_2(x)
+            x = F.relu(x)
+        if self.groups == 3 or self.groups == 4:
+            x = self._conv_3(x)
+            x = F.relu(x)
+        if self.groups == 4:
+            x = self._conv_4(x)
+            x = F.relu(x)
+        skip = x
+        x, max_indices = self._pool(x)
+        return x, max_indices, skip
+
+
+class VGGNet(nn.Layer):
+    def __init__(self, input_channels=3, layers=11, pretrained=None):
+        super(VGGNet, self).__init__()
+        self.pretrained = pretrained
+
+        self.layers = layers
+        self.vgg_configure = {
+            11: [1, 1, 2, 2, 2],
+            13: [2, 2, 2, 2, 2],
+            16: [2, 2, 3, 3, 3],
+            19: [2, 2, 4, 4, 4]
+        }
+        assert self.layers in self.vgg_configure.keys(), \
+            "supported layers are {} but input layer is {}".format(
+                self.vgg_configure.keys(), layers)
+        self.groups = self.vgg_configure[self.layers]
+
+        # matting的第一层卷积输入为4通道，初始化是直接初始化为0
+        self._conv_block_1 = ConvBlock(
+            input_channels, 64, self.groups[0], name="conv1_")
+        self._conv_block_2 = ConvBlock(64, 128, self.groups[1], name="conv2_")
+        self._conv_block_3 = ConvBlock(128, 256, self.groups[2], name="conv3_")
+        self._conv_block_4 = ConvBlock(256, 512, self.groups[3], name="conv4_")
+        self._conv_block_5 = ConvBlock(512, 512, self.groups[4], name="conv5_")
+
+        # 这一层的初始化需要利用vgg fc6的参数转换后进行初始化，可以暂时不考虑初始化
+        self._conv_6 = Conv2D(
+            512, 512, kernel_size=3, padding=1, bias_attr=False)
+
+        self.init_weight()
+
+    def forward(self, inputs):
+        fea_list = []
+        ids_list = []
+        x, ids, skip = self._conv_block_1(inputs)
+        fea_list.append(skip)
+        ids_list.append(ids)
+        x, ids, skip = self._conv_block_2(x)
+        fea_list.append(skip)
+        ids_list.append(ids)
+        x, ids, skip = self._conv_block_3(x)
+        fea_list.append(skip)
+        ids_list.append(ids)
+        x, ids, skip = self._conv_block_4(x)
+        fea_list.append(skip)
+        ids_list.append(ids)
+        x, ids, skip = self._conv_block_5(x)
+        fea_list.append(skip)
+        ids_list.append(ids)
+        x = F.relu(self._conv_6(x))
+        fea_list.append(x)
+        return fea_list
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            ppmatting.utils.load_pretrained_model(self, self.pretrained)
+
+
+@manager.BACKBONES.add_component
+def VGG11(**args):
+    model = VGGNet(layers=11, **args)
+    return model
+
+
+@manager.BACKBONES.add_component
+def VGG13(**args):
+    model = VGGNet(layers=13, **args)
+    return model
+
+
+@manager.BACKBONES.add_component
+def VGG16(**args):
+    model = VGGNet(layers=16, **args)
+    return model
+
+
+@manager.BACKBONES.add_component
+def VGG19(**args):
+    model = VGGNet(layers=19, **args)
+    return model
--- a/Matting/ppmatting/models/dim.py
+++ b/Matting/ppmatting/models/dim.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from collections import defaultdict
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddleseg.models import layers
+from paddleseg import utils
+from paddleseg.cvlibs import manager
+
+from ppmatting.models.losses import MRSD
+
+
+@manager.MODELS.add_component
+class DIM(nn.Layer):
+    """
+    The DIM implementation based on PaddlePaddle.
+
+    The original article refers to
+    Ning Xu, et, al. "Deep Image Matting"
+    (https://arxiv.org/pdf/1908.07919.pdf).
+
+    Args:
+        backbone: backbone model.
+        stage (int, optional): The stage of model. Defautl: 3.
+        decoder_input_channels(int, optional): The channel of decoder input. Default: 512.
+        pretrained(str, optional): The path of pretrianed model. Defautl: None.
+
+    """
+
+    def __init__(self,
+                 backbone,
+                 stage=3,
+                 decoder_input_channels=512,
+                 pretrained=None):
+        super().__init__()
+        self.backbone = backbone
+        self.pretrained = pretrained
+        self.stage = stage
+        self.loss_func_dict = None
+
+        decoder_output_channels = [64, 128, 256, 512]
+        self.decoder = Decoder(
+            input_channels=decoder_input_channels,
+            output_channels=decoder_output_channels)
+        if self.stage == 2:
+            for param in self.backbone.parameters():
+                param.stop_gradient = True
+            for param in self.decoder.parameters():
+                param.stop_gradient = True
+        if self.stage >= 2:
+            self.refine = Refine()
+        self.init_weight()
+
+    def forward(self, inputs):
+        input_shape = paddle.shape(inputs['img'])[-2:]
+        x = paddle.concat([inputs['img'], inputs['trimap'] / 255], axis=1)
+        fea_list = self.backbone(x)
+
+        # decoder stage
+        up_shape = []
+        for i in range(5):
+            up_shape.append(paddle.shape(fea_list[i])[-2:])
+        alpha_raw = self.decoder(fea_list, up_shape)
+        alpha_raw = F.interpolate(
+            alpha_raw, input_shape, mode='bilinear', align_corners=False)
+        logit_dict = {'alpha_raw': alpha_raw}
+        if self.stage < 2:
+            return logit_dict
+
+        if self.stage >= 2:
+            # refine stage
+            refine_input = paddle.concat([inputs['img'], alpha_raw], axis=1)
+            alpha_refine = self.refine(refine_input)
+
+            # finally alpha
+            alpha_pred = alpha_refine + alpha_raw
+            alpha_pred = F.interpolate(
+                alpha_pred, input_shape, mode='bilinear', align_corners=False)
+            if not self.training:
+                alpha_pred = paddle.clip(alpha_pred, min=0, max=1)
+            logit_dict['alpha_pred'] = alpha_pred
+        if self.training:
+            loss_dict = self.loss(logit_dict, inputs)
+            return logit_dict, loss_dict
+        else:
+            return alpha_pred
+
+    def loss(self, logit_dict, label_dict, loss_func_dict=None):
+        if loss_func_dict is None:
+            if self.loss_func_dict is None:
+                self.loss_func_dict = defaultdict(list)
+                self.loss_func_dict['alpha_raw'].append(MRSD())
+                self.loss_func_dict['comp'].append(MRSD())
+                self.loss_func_dict['alpha_pred'].append(MRSD())
+        else:
+            self.loss_func_dict = loss_func_dict
+
+        loss = {}
+        mask = label_dict['trimap'] == 128
+        loss['all'] = 0
+
+        if self.stage != 2:
+            loss['alpha_raw'] = self.loss_func_dict['alpha_raw'][0](
+                logit_dict['alpha_raw'], label_dict['alpha'], mask)
+            loss['alpha_raw'] = 0.5 * loss['alpha_raw']
+            loss['all'] = loss['all'] + loss['alpha_raw']
+
+        if self.stage == 1 or self.stage == 3:
+            comp_pred = logit_dict['alpha_raw'] * label_dict['fg'] + \
+                (1 - logit_dict['alpha_raw']) * label_dict['bg']
+            loss['comp'] = self.loss_func_dict['comp'][0](
+                comp_pred, label_dict['img'], mask)
+            loss['comp'] = 0.5 * loss['comp']
+            loss['all'] = loss['all'] + loss['comp']
+
+        if self.stage == 2 or self.stage == 3:
+            loss['alpha_pred'] = self.loss_func_dict['alpha_pred'][0](
+                logit_dict['alpha_pred'], label_dict['alpha'], mask)
+            loss['all'] = loss['all'] + loss['alpha_pred']
+
+        return loss
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+
+# bilinear interpolate skip connect
+class Up(nn.Layer):
+    def __init__(self, input_channels, output_channels):
+        super().__init__()
+        self.conv = layers.ConvBNReLU(
+            input_channels,
+            output_channels,
+            kernel_size=5,
+            padding=2,
+            bias_attr=False)
+
+    def forward(self, x, skip, output_shape):
+        x = F.interpolate(
+            x, size=output_shape, mode='bilinear', align_corners=False)
+        x = x + skip
+        x = self.conv(x)
+        x = F.relu(x)
+
+        return x
+
+
+class Decoder(nn.Layer):
+    def __init__(self, input_channels, output_channels=(64, 128, 256, 512)):
+        super().__init__()
+        self.deconv6 = nn.Conv2D(
+            input_channels, input_channels, kernel_size=1, bias_attr=False)
+        self.deconv5 = Up(input_channels, output_channels[-1])
+        self.deconv4 = Up(output_channels[-1], output_channels[-2])
+        self.deconv3 = Up(output_channels[-2], output_channels[-3])
+        self.deconv2 = Up(output_channels[-3], output_channels[-4])
+        self.deconv1 = Up(output_channels[-4], 64)
+
+        self.alpha_conv = nn.Conv2D(
+            64, 1, kernel_size=5, padding=2, bias_attr=False)
+
+    def forward(self, fea_list, shape_list):
+        x = fea_list[-1]
+        x = self.deconv6(x)
+        x = self.deconv5(x, fea_list[4], shape_list[4])
+        x = self.deconv4(x, fea_list[3], shape_list[3])
+        x = self.deconv3(x, fea_list[2], shape_list[2])
+        x = self.deconv2(x, fea_list[1], shape_list[1])
+        x = self.deconv1(x, fea_list[0], shape_list[0])
+        alpha = self.alpha_conv(x)
+        alpha = F.sigmoid(alpha)
+
+        return alpha
+
+
+class Refine(nn.Layer):
+    def __init__(self):
+        super().__init__()
+        self.conv1 = layers.ConvBNReLU(
+            4, 64, kernel_size=3, padding=1, bias_attr=False)
+        self.conv2 = layers.ConvBNReLU(
+            64, 64, kernel_size=3, padding=1, bias_attr=False)
+        self.conv3 = layers.ConvBNReLU(
+            64, 64, kernel_size=3, padding=1, bias_attr=False)
+        self.alpha_pred = layers.ConvBNReLU(
+            64, 1, kernel_size=3, padding=1, bias_attr=False)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = self.conv3(x)
+        alpha = self.alpha_pred(x)
+
+        return alpha
--- a/Matting/ppmatting/models/gca.py
+++ b/Matting/ppmatting/models/gca.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# The gca code was heavily based on https://github.com/Yaoyi-Li/GCA-Matting
+# and https://github.com/open-mmlab/mmediting
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddleseg.models import layers
+from paddleseg import utils
+from paddleseg.cvlibs import manager, param_init
+
+from ppmatting.models.layers import GuidedCxtAtten
+
+
+@manager.MODELS.add_component
+class GCABaseline(nn.Layer):
+    def __init__(self, backbone, pretrained=None):
+        super().__init__()
+        self.encoder = backbone
+        self.decoder = ResShortCut_D_Dec([2, 3, 3, 2])
+
+    def forward(self, inputs):
+
+        x = paddle.concat([inputs['img'], inputs['trimap'] / 255], axis=1)
+        embedding, mid_fea = self.encoder(x)
+        alpha_pred = self.decoder(embedding, mid_fea)
+
+        if self.training:
+            logit_dict = {'alpha_pred': alpha_pred, }
+            loss_dict = {}
+            alpha_gt = inputs['alpha']
+            loss_dict["alpha"] = F.l1_loss(alpha_pred, alpha_gt)
+            loss_dict["all"] = loss_dict["alpha"]
+            return logit_dict, loss_dict
+
+        return alpha_pred
+
+
+@manager.MODELS.add_component
+class GCA(GCABaseline):
+    def __init__(self, backbone, pretrained=None):
+        super().__init__(backbone, pretrained)
+        self.decoder = ResGuidedCxtAtten_Dec([2, 3, 3, 2])
+
+
+def conv5x5(in_planes, out_planes, stride=1, groups=1, dilation=1):
+    """5x5 convolution with padding"""
+    return nn.Conv2D(
+        in_planes,
+        out_planes,
+        kernel_size=5,
+        stride=stride,
+        padding=2,
+        groups=groups,
+        bias_attr=False,
+        dilation=dilation)
+
+
+def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2D(
+        in_planes,
+        out_planes,
+        kernel_size=3,
+        stride=stride,
+        padding=dilation,
+        groups=groups,
+        bias_attr=False,
+        dilation=dilation)
+
+
+def conv1x1(in_planes, out_planes, stride=1):
+    """1x1 convolution"""
+    return nn.Conv2D(
+        in_planes, out_planes, kernel_size=1, stride=stride, bias_attr=False)
+
+
+class BasicBlock(nn.Layer):
+    expansion = 1
+
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 stride=1,
+                 upsample=None,
+                 norm_layer=None,
+                 large_kernel=False):
+        super().__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm
+        self.stride = stride
+        conv = conv5x5 if large_kernel else conv3x3
+        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
+        if self.stride > 1:
+            self.conv1 = nn.utils.spectral_norm(
+                nn.Conv2DTranspose(
+                    inplanes,
+                    inplanes,
+                    kernel_size=4,
+                    stride=2,
+                    padding=1,
+                    bias_attr=False))
+        else:
+            self.conv1 = nn.utils.spectral_norm(conv(inplanes, inplanes))
+        self.bn1 = norm_layer(inplanes)
+        self.activation = nn.LeakyReLU(0.2)
+        self.conv2 = nn.utils.spectral_norm(conv(inplanes, planes))
+        self.bn2 = norm_layer(planes)
+        self.upsample = upsample
+
+    def forward(self, x):
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.activation(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.upsample is not None:
+            identity = self.upsample(x)
+
+        out += identity
+        out = self.activation(out)
+
+        return out
+
+
+class ResNet_D_Dec(nn.Layer):
+    def __init__(self,
+                 layers=[3, 4, 4, 2],
+                 norm_layer=None,
+                 large_kernel=False,
+                 late_downsample=False):
+        super().__init__()
+
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm
+        self._norm_layer = norm_layer
+        self.large_kernel = large_kernel
+        self.kernel_size = 5 if self.large_kernel else 3
+
+        self.inplanes = 512 if layers[0] > 0 else 256
+        self.late_downsample = late_downsample
+        self.midplanes = 64 if late_downsample else 32
+
+        self.conv1 = nn.utils.spectral_norm(
+            nn.Conv2DTranspose(
+                self.midplanes,
+                32,
+                kernel_size=4,
+                stride=2,
+                padding=1,
+                bias_attr=False))
+        self.bn1 = norm_layer(32)
+        self.leaky_relu = nn.LeakyReLU(0.2)
+        self.conv2 = nn.Conv2D(
+            32,
+            1,
+            kernel_size=self.kernel_size,
+            stride=1,
+            padding=self.kernel_size // 2)
+        self.upsample = nn.UpsamplingNearest2D(scale_factor=2)
+        self.tanh = nn.Tanh()
+        self.layer1 = self._make_layer(BasicBlock, 256, layers[0], stride=2)
+        self.layer2 = self._make_layer(BasicBlock, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(BasicBlock, 64, layers[2], stride=2)
+        self.layer4 = self._make_layer(
+            BasicBlock, self.midplanes, layers[3], stride=2)
+
+        self.init_weight()
+
+    def _make_layer(self, block, planes, blocks, stride=1):
+        if blocks == 0:
+            return nn.Sequential(nn.Identity())
+        norm_layer = self._norm_layer
+        upsample = None
+        if stride != 1:
+            upsample = nn.Sequential(
+                nn.UpsamplingNearest2D(scale_factor=2),
+                nn.utils.spectral_norm(
+                    conv1x1(self.inplanes, planes * block.expansion)),
+                norm_layer(planes * block.expansion), )
+        elif self.inplanes != planes * block.expansion:
+            upsample = nn.Sequential(
+                nn.utils.spectral_norm(
+                    conv1x1(self.inplanes, planes * block.expansion)),
+                norm_layer(planes * block.expansion), )
+
+        layers = [
+            block(self.inplanes, planes, stride, upsample, norm_layer,
+                  self.large_kernel)
+        ]
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(
+                block(
+                    self.inplanes,
+                    planes,
+                    norm_layer=norm_layer,
+                    large_kernel=self.large_kernel))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x, mid_fea):
+        x = self.layer1(x)  # N x 256 x 32 x 32
+        print(x.shape)
+        x = self.layer2(x)  # N x 128 x 64 x 64
+        print(x.shape)
+        x = self.layer3(x)  # N x 64 x 128 x 128
+        print(x.shape)
+        x = self.layer4(x)  # N x 32 x 256 x 256
+        print(x.shape)
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.leaky_relu(x)
+        x = self.conv2(x)
+
+        alpha = (self.tanh(x) + 1.0) / 2.0
+
+        return alpha
+
+    def init_weight(self):
+        for layer in self.sublayers():
+            if isinstance(layer, nn.Conv2D):
+
+                if hasattr(layer, "weight_orig"):
+                    param = layer.weight_orig
+                else:
+                    param = layer.weight
+                param_init.xavier_uniform(param)
+
+            elif isinstance(layer, (nn.BatchNorm, nn.SyncBatchNorm)):
+                param_init.constant_init(layer.weight, value=1.0)
+                param_init.constant_init(layer.bias, value=0.0)
+
+            elif isinstance(layer, BasicBlock):
+                param_init.constant_init(layer.bn2.weight, value=0.0)
+
+
+class ResShortCut_D_Dec(ResNet_D_Dec):
+    def __init__(self,
+                 layers,
+                 norm_layer=None,
+                 large_kernel=False,
+                 late_downsample=False):
+        super().__init__(
+            layers, norm_layer, large_kernel, late_downsample=late_downsample)
+
+    def forward(self, x, mid_fea):
+        fea1, fea2, fea3, fea4, fea5 = mid_fea['shortcut']
+        x = self.layer1(x) + fea5
+        x = self.layer2(x) + fea4
+        x = self.layer3(x) + fea3
+        x = self.layer4(x) + fea2
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.leaky_relu(x) + fea1
+        x = self.conv2(x)
+
+        alpha = (self.tanh(x) + 1.0) / 2.0
+
+        return alpha
+
+
+class ResGuidedCxtAtten_Dec(ResNet_D_Dec):
+    def __init__(self,
+                 layers,
+                 norm_layer=None,
+                 large_kernel=False,
+                 late_downsample=False):
+        super().__init__(
+            layers, norm_layer, large_kernel, late_downsample=late_downsample)
+        self.gca = GuidedCxtAtten(128, 128)
+
+    def forward(self, x, mid_fea):
+        fea1, fea2, fea3, fea4, fea5 = mid_fea['shortcut']
+        im = mid_fea['image_fea']
+        x = self.layer1(x) + fea5  # N x 256 x 32 x 32
+        x = self.layer2(x) + fea4  # N x 128 x 64 x 64
+        x = self.gca(im, x, mid_fea['unknown'])  # contextual attention
+        x = self.layer3(x) + fea3  # N x 64 x 128 x 128
+        x = self.layer4(x) + fea2  # N x 32 x 256 x 256
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.leaky_relu(x) + fea1
+        x = self.conv2(x)
+
+        alpha = (self.tanh(x) + 1.0) / 2.0
+
+        return alpha
--- a/Matting/ppmatting/models/human_matting.py
+++ b/Matting/ppmatting/models/human_matting.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from collections import defaultdict
+import time
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+import paddleseg
+from paddleseg.models import layers
+from paddleseg import utils
+from paddleseg.cvlibs import manager
+
+from ppmatting.models.losses import MRSD
+
+
+def conv_up_psp(in_channels, out_channels, up_sample):
+    return nn.Sequential(
+        layers.ConvBNReLU(
+            in_channels, out_channels, 3, padding=1),
+        nn.Upsample(
+            scale_factor=up_sample, mode='bilinear', align_corners=False))
+
+
+@manager.MODELS.add_component
+class HumanMatting(nn.Layer):
+    """A model for """
+
+    def __init__(self,
+                 backbone,
+                 pretrained=None,
+                 backbone_scale=0.25,
+                 refine_kernel_size=3,
+                 if_refine=True):
+        super().__init__()
+        if if_refine:
+            if backbone_scale > 0.5:
+                raise ValueError(
+                    'Backbone_scale should not be greater than 1/2, but it is {}'
+                    .format(backbone_scale))
+        else:
+            backbone_scale = 1
+
+        self.backbone = backbone
+        self.backbone_scale = backbone_scale
+        self.pretrained = pretrained
+        self.if_refine = if_refine
+        if if_refine:
+            self.refiner = Refiner(kernel_size=refine_kernel_size)
+        self.loss_func_dict = None
+
+        self.backbone_channels = backbone.feat_channels
+        ######################
+        ### Decoder part - Glance
+        ######################
+        self.psp_module = layers.PPModule(
+            self.backbone_channels[-1],
+            512,
+            bin_sizes=(1, 3, 5),
+            dim_reduction=False,
+            align_corners=False)
+        self.psp4 = conv_up_psp(512, 256, 2)
+        self.psp3 = conv_up_psp(512, 128, 4)
+        self.psp2 = conv_up_psp(512, 64, 8)
+        self.psp1 = conv_up_psp(512, 64, 16)
+        # stage 5g
+        self.decoder5_g = nn.Sequential(
+            layers.ConvBNReLU(
+                512 + self.backbone_channels[-1], 512, 3, padding=1),
+            layers.ConvBNReLU(
+                512, 512, 3, padding=2, dilation=2),
+            layers.ConvBNReLU(
+                512, 256, 3, padding=2, dilation=2),
+            nn.Upsample(
+                scale_factor=2, mode='bilinear', align_corners=False))
+        # stage 4g
+        self.decoder4_g = nn.Sequential(
+            layers.ConvBNReLU(
+                512, 256, 3, padding=1),
+            layers.ConvBNReLU(
+                256, 256, 3, padding=1),
+            layers.ConvBNReLU(
+                256, 128, 3, padding=1),
+            nn.Upsample(
+                scale_factor=2, mode='bilinear', align_corners=False))
+        # stage 3g
+        self.decoder3_g = nn.Sequential(
+            layers.ConvBNReLU(
+                256, 128, 3, padding=1),
+            layers.ConvBNReLU(
+                128, 128, 3, padding=1),
+            layers.ConvBNReLU(
+                128, 64, 3, padding=1),
+            nn.Upsample(
+                scale_factor=2, mode='bilinear', align_corners=False))
+        # stage 2g
+        self.decoder2_g = nn.Sequential(
+            layers.ConvBNReLU(
+                128, 128, 3, padding=1),
+            layers.ConvBNReLU(
+                128, 128, 3, padding=1),
+            layers.ConvBNReLU(
+                128, 64, 3, padding=1),
+            nn.Upsample(
+                scale_factor=2, mode='bilinear', align_corners=False))
+        # stage 1g
+        self.decoder1_g = nn.Sequential(
+            layers.ConvBNReLU(
+                128, 64, 3, padding=1),
+            layers.ConvBNReLU(
+                64, 64, 3, padding=1),
+            layers.ConvBNReLU(
+                64, 64, 3, padding=1),
+            nn.Upsample(
+                scale_factor=2, mode='bilinear', align_corners=False))
+        # stage 0g
+        self.decoder0_g = nn.Sequential(
+            layers.ConvBNReLU(
+                64, 64, 3, padding=1),
+            layers.ConvBNReLU(
+                64, 64, 3, padding=1),
+            nn.Conv2D(
+                64, 3, 3, padding=1))
+
+        ##########################
+        ### Decoder part - FOCUS
+        ##########################
+        self.bridge_block = nn.Sequential(
+            layers.ConvBNReLU(
+                self.backbone_channels[-1], 512, 3, dilation=2, padding=2),
+            layers.ConvBNReLU(
+                512, 512, 3, dilation=2, padding=2),
+            layers.ConvBNReLU(
+                512, 512, 3, dilation=2, padding=2))
+        # stage 5f
+        self.decoder5_f = nn.Sequential(
+            layers.ConvBNReLU(
+                512 + self.backbone_channels[-1], 512, 3, padding=1),
+            layers.ConvBNReLU(
+                512, 512, 3, padding=2, dilation=2),
+            layers.ConvBNReLU(
+                512, 256, 3, padding=2, dilation=2),
+            nn.Upsample(
+                scale_factor=2, mode='bilinear', align_corners=False))
+        # stage 4f
+        self.decoder4_f = nn.Sequential(
+            layers.ConvBNReLU(
+                256 + self.backbone_channels[-2], 256, 3, padding=1),
+            layers.ConvBNReLU(
+                256, 256, 3, padding=1),
+            layers.ConvBNReLU(
+                256, 128, 3, padding=1),
+            nn.Upsample(
+                scale_factor=2, mode='bilinear', align_corners=False))
+        # stage 3f
+        self.decoder3_f = nn.Sequential(
+            layers.ConvBNReLU(
+                128 + self.backbone_channels[-3], 128, 3, padding=1),
+            layers.ConvBNReLU(
+                128, 128, 3, padding=1),
+            layers.ConvBNReLU(
+                128, 64, 3, padding=1),
+            nn.Upsample(
+                scale_factor=2, mode='bilinear', align_corners=False))
+        # stage 2f
+        self.decoder2_f = nn.Sequential(
+            layers.ConvBNReLU(
+                64 + self.backbone_channels[-4], 128, 3, padding=1),
+            layers.ConvBNReLU(
+                128, 128, 3, padding=1),
+            layers.ConvBNReLU(
+                128, 64, 3, padding=1),
+            nn.Upsample(
+                scale_factor=2, mode='bilinear', align_corners=False))
+        # stage 1f
+        self.decoder1_f = nn.Sequential(
+            layers.ConvBNReLU(
+                64 + self.backbone_channels[-5], 64, 3, padding=1),
+            layers.ConvBNReLU(
+                64, 64, 3, padding=1),
+            layers.ConvBNReLU(
+                64, 64, 3, padding=1),
+            nn.Upsample(
+                scale_factor=2, mode='bilinear', align_corners=False))
+        # stage 0f
+        self.decoder0_f = nn.Sequential(
+            layers.ConvBNReLU(
+                64, 64, 3, padding=1),
+            layers.ConvBNReLU(
+                64, 64, 3, padding=1),
+            nn.Conv2D(
+                64, 1 + 1 + 32, 3, padding=1))
+        self.init_weight()
+
+    def forward(self, data):
+        src = data['img']
+        src_h, src_w = paddle.shape(src)[2:]
+        if self.if_refine:
+            # It is not need when exporting.
+            if isinstance(src_h, paddle.Tensor):
+                if (src_h % 4 != 0) or (src_w % 4) != 0:
+                    raise ValueError(
+                        'The input image must have width and height that are divisible by 4'
+                    )
+
+        # Downsample src for backbone
+        src_sm = F.interpolate(
+            src,
+            scale_factor=self.backbone_scale,
+            mode='bilinear',
+            align_corners=False)
+
+        # Base
+        fea_list = self.backbone(src_sm)
+        ##########################
+        ### Decoder part - GLANCE
+        ##########################
+        #psp: N, 512, H/32, W/32
+        psp = self.psp_module(fea_list[-1])
+        #d6_g: N, 512, H/16, W/16
+        d5_g = self.decoder5_g(paddle.concat((psp, fea_list[-1]), 1))
+        #d5_g: N, 512, H/8, W/8
+        d4_g = self.decoder4_g(paddle.concat((self.psp4(psp), d5_g), 1))
+        #d4_g: N, 256, H/4, W/4
+        d3_g = self.decoder3_g(paddle.concat((self.psp3(psp), d4_g), 1))
+        #d4_g: N, 128, H/2, W/2
+        d2_g = self.decoder2_g(paddle.concat((self.psp2(psp), d3_g), 1))
+        #d2_g: N, 64, H, W
+        d1_g = self.decoder1_g(paddle.concat((self.psp1(psp), d2_g), 1))
+        #d0_g: N, 3, H, W
+        d0_g = self.decoder0_g(d1_g)
+        # The 1st channel is foreground. The 2nd is transition region. The 3rd is background.
+        # glance_sigmoid = F.sigmoid(d0_g)
+        glance_sigmoid = F.softmax(d0_g, axis=1)
+
+        ##########################
+        ### Decoder part - FOCUS
+        ##########################
+        bb = self.bridge_block(fea_list[-1])
+        #bg: N, 512, H/32, W/32
+        d5_f = self.decoder5_f(paddle.concat((bb, fea_list[-1]), 1))
+        #d5_f: N, 256, H/16, W/16
+        d4_f = self.decoder4_f(paddle.concat((d5_f, fea_list[-2]), 1))
+        #d4_f: N, 128, H/8, W/8
+        d3_f = self.decoder3_f(paddle.concat((d4_f, fea_list[-3]), 1))
+        #d3_f: N, 64, H/4, W/4
+        d2_f = self.decoder2_f(paddle.concat((d3_f, fea_list[-4]), 1))
+        #d2_f: N, 64, H/2, W/2
+        d1_f = self.decoder1_f(paddle.concat((d2_f, fea_list[-5]), 1))
+        #d1_f: N, 64, H, W
+        d0_f = self.decoder0_f(d1_f)
+        #d0_f: N, 1, H, W
+        focus_sigmoid = F.sigmoid(d0_f[:, 0:1, :, :])
+        pha_sm = self.fusion(glance_sigmoid, focus_sigmoid)
+        err_sm = d0_f[:, 1:2, :, :]
+        err_sm = paddle.clip(err_sm, 0., 1.)
+        hid_sm = F.relu(d0_f[:, 2:, :, :])
+
+        # Refiner
+        if self.if_refine:
+            pha = self.refiner(
+                src=src, pha=pha_sm, err=err_sm, hid=hid_sm, tri=glance_sigmoid)
+            # Clamp outputs
+            pha = paddle.clip(pha, 0., 1.)
+
+        if self.training:
+            logit_dict = {
+                'glance': glance_sigmoid,
+                'focus': focus_sigmoid,
+                'fusion': pha_sm,
+                'error': err_sm
+            }
+            if self.if_refine:
+                logit_dict['refine'] = pha
+            loss_dict = self.loss(logit_dict, data)
+            return logit_dict, loss_dict
+        else:
+            return pha if self.if_refine else pha_sm
+
+    def loss(self, logit_dict, label_dict, loss_func_dict=None):
+        if loss_func_dict is None:
+            if self.loss_func_dict is None:
+                self.loss_func_dict = defaultdict(list)
+                self.loss_func_dict['glance'].append(nn.NLLLoss())
+                self.loss_func_dict['focus'].append(MRSD())
+                self.loss_func_dict['cm'].append(MRSD())
+                self.loss_func_dict['err'].append(paddleseg.models.MSELoss())
+                self.loss_func_dict['refine'].append(paddleseg.models.L1Loss())
+        else:
+            self.loss_func_dict = loss_func_dict
+
+        loss = {}
+
+        # glance loss computation
+        # get glance label
+        glance_label = F.interpolate(
+            label_dict['trimap'],
+            logit_dict['glance'].shape[2:],
+            mode='nearest',
+            align_corners=False)
+        glance_label_trans = (glance_label == 128).astype('int64')
+        glance_label_bg = (glance_label == 0).astype('int64')
+        glance_label = glance_label_trans + glance_label_bg * 2
+        loss_glance = self.loss_func_dict['glance'][0](
+            paddle.log(logit_dict['glance'] + 1e-6), glance_label.squeeze(1))
+        loss['glance'] = loss_glance
+
+        # focus loss computation
+        focus_label = F.interpolate(
+            label_dict['alpha'],
+            logit_dict['focus'].shape[2:],
+            mode='bilinear',
+            align_corners=False)
+        loss_focus = self.loss_func_dict['focus'][0](
+            logit_dict['focus'], focus_label, glance_label_trans)
+        loss['focus'] = loss_focus
+
+        # collaborative matting loss
+        loss_cm_func = self.loss_func_dict['cm']
+        # fusion_sigmoid loss
+        loss_cm = loss_cm_func[0](logit_dict['fusion'], focus_label)
+        loss['cm'] = loss_cm
+
+        # error loss
+        err = F.interpolate(
+            logit_dict['error'],
+            label_dict['alpha'].shape[2:],
+            mode='bilinear',
+            align_corners=False)
+        err_label = (F.interpolate(
+            logit_dict['fusion'],
+            label_dict['alpha'].shape[2:],
+            mode='bilinear',
+            align_corners=False) - label_dict['alpha']).abs()
+        loss_err = self.loss_func_dict['err'][0](err, err_label)
+        loss['err'] = loss_err
+
+        loss_all = 0.25 * loss_glance + 0.25 * loss_focus + 0.25 * loss_cm + loss_err
+
+        # refine loss
+        if self.if_refine:
+            loss_refine = self.loss_func_dict['refine'][0](logit_dict['refine'],
+                                                           label_dict['alpha'])
+            loss['refine'] = loss_refine
+            loss_all = loss_all + loss_refine
+
+        loss['all'] = loss_all
+        return loss
+
+    def fusion(self, glance_sigmoid, focus_sigmoid):
+        # glance_sigmoid [N, 3, H, W].
+        # In index, 0 is foreground, 1 is transition, 2 is backbone.
+        # After fusion, the foreground is 1, the background is 0, and the transion is between (0, 1).
+        index = paddle.argmax(glance_sigmoid, axis=1, keepdim=True)
+        transition_mask = (index == 1).astype('float32')
+        fg = (index == 0).astype('float32')
+        fusion_sigmoid = focus_sigmoid * transition_mask + fg
+        return fusion_sigmoid
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+
+class Refiner(nn.Layer):
+    '''
+    Refiner refines the coarse output to full resolution.
+
+    Args:
+        kernel_size: The convolution kernel_size. Options: [1, 3]. Default: 3.
+    '''
+
+    def __init__(self, kernel_size=3):
+        super().__init__()
+        if kernel_size not in [1, 3]:
+            raise ValueError("kernel_size must be in [1, 3]")
+
+        self.kernel_size = kernel_size
+
+        channels = [32, 24, 16, 12, 1]
+        self.conv1 = layers.ConvBNReLU(
+            channels[0] + 4 + 3,
+            channels[1],
+            kernel_size,
+            padding=0,
+            bias_attr=False)
+        self.conv2 = layers.ConvBNReLU(
+            channels[1], channels[2], kernel_size, padding=0, bias_attr=False)
+        self.conv3 = layers.ConvBNReLU(
+            channels[2] + 3,
+            channels[3],
+            kernel_size,
+            padding=0,
+            bias_attr=False)
+        self.conv4 = nn.Conv2D(
+            channels[3], channels[4], kernel_size, padding=0, bias_attr=True)
+
+    def forward(self, src, pha, err, hid, tri):
+        '''
+        Args：
+            src: (B, 3, H, W) full resolution source image.
+            pha: (B, 1, Hc, Wc) coarse alpha prediction.
+            err: (B, 1, Hc, Hc) coarse error prediction.
+            hid: (B, 32, Hc, Hc) coarse hidden encoding.
+            tri: (B, 1, Hc, Hc) trimap prediction.
+        '''
+        h_full, w_full = paddle.shape(src)[2:]
+        h_half, w_half = h_full // 2, w_full // 2
+        h_quat, w_quat = h_full // 4, w_full // 4
+
+        x = paddle.concat([hid, pha, tri], axis=1)
+        x = F.interpolate(
+            x,
+            paddle.concat((h_half, w_half)),
+            mode='bilinear',
+            align_corners=False)
+        y = F.interpolate(
+            src,
+            paddle.concat((h_half, w_half)),
+            mode='bilinear',
+            align_corners=False)
+
+        if self.kernel_size == 3:
+            x = F.pad(x, [3, 3, 3, 3])
+            y = F.pad(y, [3, 3, 3, 3])
+
+        x = self.conv1(paddle.concat([x, y], axis=1))
+        x = self.conv2(x)
+
+        if self.kernel_size == 3:
+            x = F.interpolate(x, paddle.concat((h_full + 4, w_full + 4)))
+            y = F.pad(src, [2, 2, 2, 2])
+        else:
+            x = F.interpolate(
+                x, paddle.concat((h_full, w_full)), mode='nearest')
+            y = src
+
+        x = self.conv3(paddle.concat([x, y], axis=1))
+        x = self.conv4(x)
+
+        pha = x
+        return pha
--- a/Matting/ppmatting/models/layers/__init__.py
+++ b/Matting/ppmatting/models/layers/__init__.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .gca_module import GuidedCxtAtten
+from .tensor_fusion import MLFF
\ No newline at end of file
--- a/Matting/ppmatting/models/layers/gca_module.py
+++ b/Matting/ppmatting/models/layers/gca_module.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# The gca code was heavily based on https://github.com/Yaoyi-Li/GCA-Matting
+# and https://github.com/open-mmlab/mmediting
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddleseg.cvlibs import param_init
+
+
+class GuidedCxtAtten(nn.Layer):
+    def __init__(self,
+                 out_channels,
+                 guidance_channels,
+                 kernel_size=3,
+                 stride=1,
+                 rate=2):
+        super().__init__()
+
+        self.kernel_size = kernel_size
+        self.rate = rate
+        self.stride = stride
+        self.guidance_conv = nn.Conv2D(
+            in_channels=guidance_channels,
+            out_channels=guidance_channels // 2,
+            kernel_size=1)
+
+        self.out_conv = nn.Sequential(
+            nn.Conv2D(
+                in_channels=out_channels,
+                out_channels=out_channels,
+                kernel_size=1,
+                bias_attr=False),
+            nn.BatchNorm(out_channels))
+
+        self.init_weight()
+
+    def init_weight(self):
+        param_init.xavier_uniform(self.guidance_conv.weight)
+        param_init.constant_init(self.guidance_conv.bias, value=0.0)
+        param_init.xavier_uniform(self.out_conv[0].weight)
+        param_init.constant_init(self.out_conv[1].weight, value=1e-3)
+        param_init.constant_init(self.out_conv[1].bias, value=0.0)
+
+    def forward(self, img_feat, alpha_feat, unknown=None, softmax_scale=1.):
+
+        img_feat = self.guidance_conv(img_feat)
+        img_feat = F.interpolate(
+            img_feat, scale_factor=1 / self.rate, mode='nearest')
+
+        # process unknown mask
+        unknown, softmax_scale = self.process_unknown_mask(unknown, img_feat,
+                                                           softmax_scale)
+
+        img_ps, alpha_ps, unknown_ps = self.extract_feature_maps_patches(
+            img_feat, alpha_feat, unknown)
+
+        self_mask = self.get_self_correlation_mask(img_feat)
+
+        # split tensors by batch dimension; tuple is returned
+        img_groups = paddle.split(img_feat, 1, axis=0)
+        img_ps_groups = paddle.split(img_ps, 1, axis=0)
+        alpha_ps_groups = paddle.split(alpha_ps, 1, axis=0)
+        unknown_ps_groups = paddle.split(unknown_ps, 1, axis=0)
+        scale_groups = paddle.split(softmax_scale, 1, axis=0)
+        groups = (img_groups, img_ps_groups, alpha_ps_groups, unknown_ps_groups,
+                  scale_groups)
+
+        y = []
+
+        for img_i, img_ps_i, alpha_ps_i, unknown_ps_i, scale_i in zip(*groups):
+            # conv for compare
+            similarity_map = self.compute_similarity_map(img_i, img_ps_i)
+
+            gca_score = self.compute_guided_attention_score(
+                similarity_map, unknown_ps_i, scale_i, self_mask)
+
+            yi = self.propagate_alpha_feature(gca_score, alpha_ps_i)
+
+            y.append(yi)
+
+        y = paddle.concat(y, axis=0)  # back to the mini-batch
+        y = paddle.reshape(y, alpha_feat.shape)
+
+        y = self.out_conv(y) + alpha_feat
+
+        return y
+
+    def extract_feature_maps_patches(self, img_feat, alpha_feat, unknown):
+
+        # extract image feature patches with shape:
+        # (N, img_h*img_w, img_c, img_ks, img_ks)
+        img_ks = self.kernel_size
+        img_ps = self.extract_patches(img_feat, img_ks, self.stride)
+
+        # extract alpha feature patches with shape:
+        # (N, img_h*img_w, alpha_c, alpha_ks, alpha_ks)
+        alpha_ps = self.extract_patches(alpha_feat, self.rate * 2, self.rate)
+
+        # extract unknown mask patches with shape: (N, img_h*img_w, 1, 1)
+        unknown_ps = self.extract_patches(unknown, img_ks, self.stride)
+        unknown_ps = unknown_ps.squeeze(axis=2)  # squeeze channel dimension
+        unknown_ps = unknown_ps.mean(axis=[2, 3], keepdim=True)
+
+        return img_ps, alpha_ps, unknown_ps
+
+    def extract_patches(self, x, kernel_size, stride):
+        n, c, _, _ = x.shape
+        x = self.pad(x, kernel_size, stride)
+        x = F.unfold(x, [kernel_size, kernel_size], strides=[stride, stride])
+        x = paddle.transpose(x, (0, 2, 1))
+        x = paddle.reshape(x, (n, -1, c, kernel_size, kernel_size))
+
+        return x
+
+    def pad(self, x, kernel_size, stride):
+        left = (kernel_size - stride + 1) // 2
+        right = (kernel_size - stride) // 2
+        pad = (left, right, left, right)
+        return F.pad(x, pad, mode='reflect')
+
+    def compute_guided_attention_score(self, similarity_map, unknown_ps, scale,
+                                       self_mask):
+        # scale the correlation with predicted scale factor for known and
+        # unknown area
+        unknown_scale, known_scale = scale[0]
+        out = similarity_map * (
+            unknown_scale * paddle.greater_than(unknown_ps,
+                                                paddle.to_tensor([0.])) +
+            known_scale * paddle.less_equal(unknown_ps, paddle.to_tensor([0.])))
+        # mask itself, self-mask only applied to unknown area
+        out = out + self_mask * unknown_ps
+        gca_score = F.softmax(out, axis=1)
+
+        return gca_score
+
+    def propagate_alpha_feature(self, gca_score, alpha_ps):
+
+        alpha_ps = alpha_ps[0]  # squeeze dim 0
+        if self.rate == 1:
+            gca_score = self.pad(gca_score, kernel_size=2, stride=1)
+            alpha_ps = paddle.transpose(alpha_ps, (1, 0, 2, 3))
+            out = F.conv2d(gca_score, alpha_ps) / 4.
+        else:
+            out = F.conv2d_transpose(
+                gca_score, alpha_ps, stride=self.rate, padding=1) / 4.
+
+        return out
+
+    def compute_similarity_map(self, img_feat, img_ps):
+        img_ps = img_ps[0]  # squeeze dim 0
+        # convolve the feature to get correlation (similarity) map
+        img_ps_normed = img_ps / paddle.clip(self.l2_norm(img_ps), 1e-4)
+        img_feat = F.pad(img_feat, (1, 1, 1, 1), mode='reflect')
+        similarity_map = F.conv2d(img_feat, img_ps_normed)
+
+        return similarity_map
+
+    def get_self_correlation_mask(self, img_feat):
+        _, _, h, w = img_feat.shape
+        self_mask = F.one_hot(
+            paddle.reshape(paddle.arange(h * w), (h, w)),
+            num_classes=int(h * w))
+
+        self_mask = paddle.transpose(self_mask, (2, 0, 1))
+        self_mask = paddle.reshape(self_mask, (1, h * w, h, w))
+
+        return self_mask * (-1e4)
+
+    def process_unknown_mask(self, unknown, img_feat, softmax_scale):
+
+        n, _, h, w = img_feat.shape
+
+        if unknown is not None:
+            unknown = unknown.clone()
+            unknown = F.interpolate(
+                unknown, scale_factor=1 / self.rate, mode='nearest')
+            unknown_mean = unknown.mean(axis=[2, 3])
+            known_mean = 1 - unknown_mean
+            unknown_scale = paddle.clip(
+                paddle.sqrt(unknown_mean / known_mean), 0.1, 10)
+            known_scale = paddle.clip(
+                paddle.sqrt(known_mean / unknown_mean), 0.1, 10)
+            softmax_scale = paddle.concat([unknown_scale, known_scale], axis=1)
+        else:
+            unknown = paddle.ones([n, 1, h, w])
+            softmax_scale = paddle.reshape(
+                paddle.to_tensor([softmax_scale, softmax_scale]), (1, 2))
+            softmax_scale = paddle.expand(softmax_scale, (n, 2))
+
+        return unknown, softmax_scale
+
+    @staticmethod
+    def l2_norm(x):
+        x = x**2
+        x = x.sum(axis=[1, 2, 3], keepdim=True)
+        return paddle.sqrt(x)
--- a/Matting/ppmatting/models/layers/tensor_fusion.py
+++ b/Matting/ppmatting/models/layers/tensor_fusion.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddleseg.models import layers
+
+from ppmatting.models.layers import tensor_fusion_helper as helper
+
+
+class MLFF(nn.Layer):
+    """
+    Multi-level features are fused adaptively by obtaining spatial attention.
+
+    Args:
+        in_channels(list): The channels of input tensors.
+        mid_channles(list): The middle channels while fusing the features.
+        out_channel(int): The output channel after fusing.
+        merge_type(str): Which type to merge the multi features before output. 
+            It should be one of ('add', 'concat'). Default: 'concat'.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 mid_channels,
+                 out_channel,
+                 merge_type='concat'):
+        super().__init__()
+
+        self.merge_type = merge_type
+
+        # Check arguments
+        if len(in_channels) != len(mid_channels):
+            raise ValueError(
+                "`mid_channels` should have the same length as `in_channels`, but they are {} and {}".
+                format(mid_channels, in_channels))
+        if self.merge_type == 'add' and len(np.unique(np.array(
+                mid_channels))) != 1:
+            raise ValueError(
+                "if `merge_type='add', `mid_channels` should be same of all input features, but it is {}.".
+                format(mid_channels))
+
+        self.pwconvs = nn.LayerList()
+        self.dwconvs = nn.LayerList()
+        for in_channel, mid_channel in zip(in_channels, mid_channels):
+            self.pwconvs.append(
+                layers.ConvBN(
+                    in_channel, mid_channel, 1, bias_attr=False))
+            self.dwconvs.append(
+                layers.ConvBNReLU(
+                    mid_channel,
+                    mid_channel,
+                    3,
+                    padding=1,
+                    groups=mid_channel,
+                    bias_attr=False))
+
+        num_feas = len(in_channels)
+        self.conv_atten = nn.Sequential(
+            layers.ConvBNReLU(
+                2 * num_feas,
+                num_feas,
+                kernel_size=3,
+                padding=1,
+                bias_attr=False),
+            layers.ConvBN(
+                num_feas, num_feas, kernel_size=3, padding=1, bias_attr=False))
+
+        if self.merge_type == 'add':
+            in_chan = mid_channels[0]
+        else:
+            in_chan = sum(mid_channels)
+        self.conv_out = layers.ConvBNReLU(
+            in_chan, out_channel, kernel_size=3, padding=1, bias_attr=False)
+
+    def forward(self, inputs, shape):
+        """
+        args:
+            inputs(list): List of tensor to be fused.
+            shape(Tensor): A tensor with two elements like (H, W).
+        """
+        feas = []
+        for i, input in enumerate(inputs):
+            x = self.pwconvs[i](input)
+            x = F.interpolate(
+                x, size=shape, mode='bilinear', align_corners=False)
+            x = self.dwconvs[i](x)
+            feas.append(x)
+
+        atten = helper.avg_max_reduce_channel(feas)
+        atten = F.sigmoid(self.conv_atten(atten))
+
+        feas_att = []
+        for i, fea in enumerate(feas):
+            fea = fea * (atten[:, i, :, :].unsqueeze(1))
+            feas_att.append(fea)
+        if self.merge_type == 'concat':
+            out = paddle.concat(feas_att, axis=1)
+        else:
+            out = sum(feas_att)
+
+        out = self.conv_out(out)
+        return out
--- a/Matting/ppmatting/models/layers/tensor_fusion_helper.py
+++ b/Matting/ppmatting/models/layers/tensor_fusion_helper.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+
+def avg_max_reduce_channel_helper(x, use_concat=True):
+    # Reduce hw by avg and max, only support single input
+    assert not isinstance(x, (list, tuple))
+    mean_value = paddle.mean(x, axis=1, keepdim=True)
+    max_value = paddle.max(x, axis=1, keepdim=True)
+
+    if use_concat:
+        res = paddle.concat([mean_value, max_value], axis=1)
+    else:
+        res = [mean_value, max_value]
+    return res
+
+
+def avg_max_reduce_channel(x):
+    # Reduce hw by avg and max
+    # Return cat([avg_ch_0, max_ch_0, avg_ch_1, max_ch_1, ...])
+    if not isinstance(x, (list, tuple)):
+        return avg_max_reduce_channel_helper(x)
+    elif len(x) == 1:
+        return avg_max_reduce_channel_helper(x[0])
+    else:
+        res = []
+        for xi in x:
+            res.extend(avg_max_reduce_channel_helper(xi, False))
+        return paddle.concat(res, axis=1)
--- a/Matting/ppmatting/models/losses/__init__.py
+++ b/Matting/ppmatting/models/losses/__init__.py
+from .loss import *
--- a/Matting/ppmatting/models/losses/loss.py
+++ b/Matting/ppmatting/models/losses/loss.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddleseg.cvlibs import manager
+import cv2
+
+
+@manager.LOSSES.add_component
+class MRSD(nn.Layer):
+    def __init__(self, eps=1e-6):
+        super().__init__()
+        self.eps = eps
+
+    def forward(self, logit, label, mask=None):
+        """
+        Forward computation.
+
+        Args:
+            logit (Tensor): Logit tensor, the data type is float32, float64.
+            label (Tensor): Label tensor, the data type is float32, float64. The shape should equal to logit.
+            mask (Tensor, optional): The mask where the loss valid. Default： None.
+        """
+        if len(label.shape) == 3:
+            label = label.unsqueeze(1)
+        sd = paddle.square(logit - label)
+        loss = paddle.sqrt(sd + self.eps)
+        if mask is not None:
+            mask = mask.astype('float32')
+            if len(mask.shape) == 3:
+                mask = mask.unsqueeze(1)
+            loss = loss * mask
+            loss = loss.sum() / (mask.sum() + self.eps)
+            mask.stop_gradient = True
+        else:
+            loss = loss.mean()
+
+        return loss
+
+
+@manager.LOSSES.add_component
+class GradientLoss(nn.Layer):
+    def __init__(self, eps=1e-6):
+        super().__init__()
+        self.kernel_x, self.kernel_y = self.sobel_kernel()
+        self.eps = eps
+
+    def forward(self, logit, label, mask=None):
+        if len(label.shape) == 3:
+            label = label.unsqueeze(1)
+        if mask is not None:
+            if len(mask.shape) == 3:
+                mask = mask.unsqueeze(1)
+            logit = logit * mask
+            label = label * mask
+            loss = paddle.sum(
+                F.l1_loss(self.sobel(logit), self.sobel(label), 'none')) / (
+                    mask.sum() + self.eps)
+        else:
+            loss = F.l1_loss(self.sobel(logit), self.sobel(label), 'mean')
+
+        return loss
+
+    def sobel(self, input):
+        """Using Sobel to compute gradient. Return the magnitude."""
+        if not len(input.shape) == 4:
+            raise ValueError("Invalid input shape, we expect NCHW, but it is ",
+                             input.shape)
+
+        n, c, h, w = input.shape
+
+        input_pad = paddle.reshape(input, (n * c, 1, h, w))
+        input_pad = F.pad(input_pad, pad=[1, 1, 1, 1], mode='replicate')
+
+        grad_x = F.conv2d(input_pad, self.kernel_x, padding=0)
+        grad_y = F.conv2d(input_pad, self.kernel_y, padding=0)
+
+        mag = paddle.sqrt(grad_x * grad_x + grad_y * grad_y + self.eps)
+        mag = paddle.reshape(mag, (n, c, h, w))
+
+        return mag
+
+    def sobel_kernel(self):
+        kernel_x = paddle.to_tensor([[-1.0, 0.0, 1.0], [-2.0, 0.0, 2.0],
+                                     [-1.0, 0.0, 1.0]]).astype('float32')
+        kernel_x = kernel_x / kernel_x.abs().sum()
+        kernel_y = kernel_x.transpose([1, 0])
+        kernel_x = kernel_x.unsqueeze(0).unsqueeze(0)
+        kernel_y = kernel_y.unsqueeze(0).unsqueeze(0)
+        kernel_x.stop_gradient = True
+        kernel_y.stop_gradient = True
+        return kernel_x, kernel_y
+
+
+@manager.LOSSES.add_component
+class LaplacianLoss(nn.Layer):
+    """
+    Laplacian loss is refer to
+    https://github.com/JizhiziLi/AIM/blob/master/core/evaluate.py#L83
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.gauss_kernel = self.build_gauss_kernel(
+            size=5, sigma=1.0, n_channels=1)
+
+    def forward(self, logit, label, mask=None):
+        if len(label.shape) == 3:
+            label = label.unsqueeze(1)
+        if mask is not None:
+            if len(mask.shape) == 3:
+                mask = mask.unsqueeze(1)
+            logit = logit * mask
+            label = label * mask
+        pyr_label = self.laplacian_pyramid(label, self.gauss_kernel, 5)
+        pyr_logit = self.laplacian_pyramid(logit, self.gauss_kernel, 5)
+        loss = sum(F.l1_loss(a, b) for a, b in zip(pyr_label, pyr_logit))
+
+        return loss
+
+    def build_gauss_kernel(self, size=5, sigma=1.0, n_channels=1):
+        if size % 2 != 1:
+            raise ValueError("kernel size must be uneven")
+        grid = np.float32(np.mgrid[0:size, 0:size].T)
+        gaussian = lambda x: np.exp((x - size // 2)**2 / (-2 * sigma**2))**2
+        kernel = np.sum(gaussian(grid), axis=2)
+        kernel /= np.sum(kernel)
+        kernel = np.tile(kernel, (n_channels, 1, 1))
+        kernel = paddle.to_tensor(kernel[:, None, :, :])
+        kernel.stop_gradient = True
+        return kernel
+
+    def conv_gauss(self, input, kernel):
+        n_channels, _, kh, kw = kernel.shape
+        x = F.pad(input, (kh // 2, kw // 2, kh // 2, kh // 2), mode='replicate')
+        x = F.conv2d(x, kernel, groups=n_channels)
+
+        return x
+
+    def laplacian_pyramid(self, input, kernel, max_levels=5):
+        current = input
+        pyr = []
+        for level in range(max_levels):
+            filtered = self.conv_gauss(current, kernel)
+            diff = current - filtered
+            pyr.append(diff)
+            current = F.avg_pool2d(filtered, 2)
+        pyr.append(current)
+        return pyr
--- a/Matting/ppmatting/models/modnet.py
+++ b/Matting/ppmatting/models/modnet.py
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from collections import defaultdict
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+import numpy as np
+import scipy
+import paddleseg
+from paddleseg.models import layers, losses
+from paddleseg import utils
+from paddleseg.cvlibs import manager, param_init
+
+
+@manager.MODELS.add_component
+class MODNet(nn.Layer):
+    """
+    The MODNet implementation based on PaddlePaddle.
+
+    The original article refers to
+    Zhanghan Ke, et, al. "Is a Green Screen Really Necessary for Real-Time Portrait Matting?"
+    (https://arxiv.org/pdf/2011.11961.pdf).
+
+    Args:
+        backbone: backbone model.
+        hr(int, optional): The channels of high resolutions branch. Defautl: None.
+        pretrained(str, optional): The path of pretrianed model. Defautl: None.
+
+    """
+
+    def __init__(self, backbone, hr_channels=32, pretrained=None):
+        super().__init__()
+        self.backbone = backbone
+        self.pretrained = pretrained
+        self.head = MODNetHead(
+            hr_channels=hr_channels, backbone_channels=backbone.feat_channels)
+        self.init_weight()
+        self.blurer = GaussianBlurLayer(1, 3)
+        self.loss_func_dict = None
+
+    def forward(self, inputs):
+        """
+        If training, return a dict.
+        If evaluation, return the final alpha prediction.
+        """
+        x = inputs['img']
+        feat_list = self.backbone(x)
+        y = self.head(inputs=inputs, feat_list=feat_list)
+        if self.training:
+            loss = self.loss(y, inputs)
+            return y, loss
+        else:
+            return y
+
+    def loss(self, logit_dict, label_dict, loss_func_dict=None):
+        if loss_func_dict is None:
+            if self.loss_func_dict is None:
+                self.loss_func_dict = defaultdict(list)
+                self.loss_func_dict['semantic'].append(paddleseg.models.MSELoss(
+                ))
+                self.loss_func_dict['detail'].append(paddleseg.models.L1Loss())
+                self.loss_func_dict['fusion'].append(paddleseg.models.L1Loss())
+                self.loss_func_dict['fusion'].append(paddleseg.models.L1Loss())
+        else:
+            self.loss_func_dict = loss_func_dict
+
+        loss = {}
+        # semantic loss
+        semantic_gt = F.interpolate(
+            label_dict['alpha'],
+            scale_factor=1 / 16,
+            mode='bilinear',
+            align_corners=False)
+        semantic_gt = self.blurer(semantic_gt)
+        #         semantic_gt.stop_gradient=True
+        loss['semantic'] = self.loss_func_dict['semantic'][0](
+            logit_dict['semantic'], semantic_gt)
+
+        # detail loss
+        trimap = label_dict['trimap']
+        mask = (trimap == 128).astype('float32')
+        logit_detail = logit_dict['detail'] * mask
+        label_detail = label_dict['alpha'] * mask
+        loss_detail = self.loss_func_dict['detail'][0](logit_detail,
+                                                       label_detail)
+        loss_detail = loss_detail / (mask.mean() + 1e-6)
+        loss['detail'] = 10 * loss_detail
+
+        # fusion loss
+        matte = logit_dict['matte']
+        alpha = label_dict['alpha']
+        transition_mask = label_dict['trimap'] == 128
+        matte_boundary = paddle.where(transition_mask, matte, alpha)
+        # l1 loss
+        loss_fusion_l1 = self.loss_func_dict['fusion'][0](
+            matte, alpha) + 4 * self.loss_func_dict['fusion'][0](matte_boundary,
+                                                                 alpha)
+        # composition loss
+        loss_fusion_comp = self.loss_func_dict['fusion'][1](
+            matte * label_dict['img'], alpha *
+            label_dict['img']) + 4 * self.loss_func_dict['fusion'][1](
+                matte_boundary * label_dict['img'], alpha * label_dict['img'])
+        # consisten loss with semantic
+        transition_mask = F.interpolate(
+            label_dict['trimap'],
+            scale_factor=1 / 16,
+            mode='nearest',
+            align_corners=False)
+        transition_mask = transition_mask == 128
+        matte_con_sem = F.interpolate(
+            matte, scale_factor=1 / 16, mode='bilinear', align_corners=False)
+        matte_con_sem = self.blurer(matte_con_sem)
+        logit_semantic = logit_dict['semantic'].clone()
+        logit_semantic.stop_gradient = True
+        matte_con_sem = paddle.where(transition_mask, logit_semantic,
+                                     matte_con_sem)
+        if False:
+            import cv2
+            matte_con_sem_num = matte_con_sem.numpy()
+            matte_con_sem_num = matte_con_sem_num[0].squeeze()
+            matte_con_sem_num = (matte_con_sem_num * 255).astype('uint8')
+            semantic = logit_dict['semantic'].numpy()
+            semantic = semantic[0].squeeze()
+            semantic = (semantic * 255).astype('uint8')
+            transition_mask = transition_mask.astype('uint8')
+            transition_mask = transition_mask.numpy()
+            transition_mask = (transition_mask[0].squeeze()) * 255
+            cv2.imwrite('matte_con.png', matte_con_sem_num)
+            cv2.imwrite('semantic.png', semantic)
+            cv2.imwrite('transition.png', transition_mask)
+        mse_loss = paddleseg.models.MSELoss()
+        loss_fusion_con_sem = mse_loss(matte_con_sem, logit_dict['semantic'])
+        loss_fusion = loss_fusion_l1 + loss_fusion_comp + loss_fusion_con_sem
+        loss['fusion'] = loss_fusion
+        loss['fusion_l1'] = loss_fusion_l1
+        loss['fusion_comp'] = loss_fusion_comp
+        loss['fusion_con_sem'] = loss_fusion_con_sem
+
+        loss['all'] = loss['semantic'] + loss['detail'] + loss['fusion']
+
+        return loss
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+
+class MODNetHead(nn.Layer):
+    def __init__(self, hr_channels, backbone_channels):
+        super().__init__()
+
+        self.lr_branch = LRBranch(backbone_channels)
+        self.hr_branch = HRBranch(hr_channels, backbone_channels)
+        self.f_branch = FusionBranch(hr_channels, backbone_channels)
+        self.init_weight()
+
+    def forward(self, inputs, feat_list):
+        pred_semantic, lr8x, [enc2x, enc4x] = self.lr_branch(feat_list)
+        pred_detail, hr2x = self.hr_branch(inputs['img'], enc2x, enc4x, lr8x)
+        pred_matte = self.f_branch(inputs['img'], lr8x, hr2x)
+
+        if self.training:
+            logit_dict = {
+                'semantic': pred_semantic,
+                'detail': pred_detail,
+                'matte': pred_matte
+            }
+            return logit_dict
+        else:
+            return pred_matte
+
+    def init_weight(self):
+        for layer in self.sublayers():
+            if isinstance(layer, nn.Conv2D):
+                param_init.kaiming_uniform(layer.weight)
+
+
+class FusionBranch(nn.Layer):
+    def __init__(self, hr_channels, enc_channels):
+        super().__init__()
+        self.conv_lr4x = Conv2dIBNormRelu(
+            enc_channels[2], hr_channels, 5, stride=1, padding=2)
+
+        self.conv_f2x = Conv2dIBNormRelu(
+            2 * hr_channels, hr_channels, 3, stride=1, padding=1)
+        self.conv_f = nn.Sequential(
+            Conv2dIBNormRelu(
+                hr_channels + 3, int(hr_channels / 2), 3, stride=1, padding=1),
+            Conv2dIBNormRelu(
+                int(hr_channels / 2),
+                1,
+                1,
+                stride=1,
+                padding=0,
+                with_ibn=False,
+                with_relu=False))
+
+    def forward(self, img, lr8x, hr2x):
+        lr4x = F.interpolate(
+            lr8x, scale_factor=2, mode='bilinear', align_corners=False)
+        lr4x = self.conv_lr4x(lr4x)
+        lr2x = F.interpolate(
+            lr4x, scale_factor=2, mode='bilinear', align_corners=False)
+
+        f2x = self.conv_f2x(paddle.concat((lr2x, hr2x), axis=1))
+        f = F.interpolate(
+            f2x, scale_factor=2, mode='bilinear', align_corners=False)
+        f = self.conv_f(paddle.concat((f, img), axis=1))
+        pred_matte = F.sigmoid(f)
+
+        return pred_matte
+
+
+class HRBranch(nn.Layer):
+    """
+    High Resolution Branch of MODNet
+    """
+
+    def __init__(self, hr_channels, enc_channels):
+        super().__init__()
+
+        self.tohr_enc2x = Conv2dIBNormRelu(
+            enc_channels[0], hr_channels, 1, stride=1, padding=0)
+        self.conv_enc2x = Conv2dIBNormRelu(
+            hr_channels + 3, hr_channels, 3, stride=2, padding=1)
+
+        self.tohr_enc4x = Conv2dIBNormRelu(
+            enc_channels[1], hr_channels, 1, stride=1, padding=0)
+        self.conv_enc4x = Conv2dIBNormRelu(
+            2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1)
+
+        self.conv_hr4x = nn.Sequential(
+            Conv2dIBNormRelu(
+                2 * hr_channels + enc_channels[2] + 3,
+                2 * hr_channels,
+                3,
+                stride=1,
+                padding=1),
+            Conv2dIBNormRelu(
+                2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(
+                2 * hr_channels, hr_channels, 3, stride=1, padding=1))
+
+        self.conv_hr2x = nn.Sequential(
+            Conv2dIBNormRelu(
+                2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(
+                2 * hr_channels, hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(
+                hr_channels, hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(
+                hr_channels, hr_channels, 3, stride=1, padding=1))
+
+        self.conv_hr = nn.Sequential(
+            Conv2dIBNormRelu(
+                hr_channels + 3, hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(
+                hr_channels,
+                1,
+                1,
+                stride=1,
+                padding=0,
+                with_ibn=False,
+                with_relu=False))
+
+    def forward(self, img, enc2x, enc4x, lr8x):
+        img2x = F.interpolate(
+            img, scale_factor=1 / 2, mode='bilinear', align_corners=False)
+        img4x = F.interpolate(
+            img, scale_factor=1 / 4, mode='bilinear', align_corners=False)
+
+        enc2x = self.tohr_enc2x(enc2x)
+        hr4x = self.conv_enc2x(paddle.concat((img2x, enc2x), axis=1))
+
+        enc4x = self.tohr_enc4x(enc4x)
+        hr4x = self.conv_enc4x(paddle.concat((hr4x, enc4x), axis=1))
+
+        lr4x = F.interpolate(
+            lr8x, scale_factor=2, mode='bilinear', align_corners=False)
+        hr4x = self.conv_hr4x(paddle.concat((hr4x, lr4x, img4x), axis=1))
+
+        hr2x = F.interpolate(
+            hr4x, scale_factor=2, mode='bilinear', align_corners=False)
+        hr2x = self.conv_hr2x(paddle.concat((hr2x, enc2x), axis=1))
+
+        pred_detail = None
+        if self.training:
+            hr = F.interpolate(
+                hr2x, scale_factor=2, mode='bilinear', align_corners=False)
+            hr = self.conv_hr(paddle.concat((hr, img), axis=1))
+            pred_detail = F.sigmoid(hr)
+
+        return pred_detail, hr2x
+
+
+class LRBranch(nn.Layer):
+    def __init__(self, backbone_channels):
+        super().__init__()
+        self.se_block = SEBlock(backbone_channels[4], reduction=4)
+        self.conv_lr16x = Conv2dIBNormRelu(
+            backbone_channels[4], backbone_channels[3], 5, stride=1, padding=2)
+        self.conv_lr8x = Conv2dIBNormRelu(
+            backbone_channels[3], backbone_channels[2], 5, stride=1, padding=2)
+        self.conv_lr = Conv2dIBNormRelu(
+            backbone_channels[2],
+            1,
+            3,
+            stride=2,
+            padding=1,
+            with_ibn=False,
+            with_relu=False)
+
+    def forward(self, feat_list):
+        enc2x, enc4x, enc32x = feat_list[0], feat_list[1], feat_list[4]
+
+        enc32x = self.se_block(enc32x)
+        lr16x = F.interpolate(
+            enc32x, scale_factor=2, mode='bilinear', align_corners=False)
+        lr16x = self.conv_lr16x(lr16x)
+        lr8x = F.interpolate(
+            lr16x, scale_factor=2, mode='bilinear', align_corners=False)
+        lr8x = self.conv_lr8x(lr8x)
+
+        pred_semantic = None
+        if self.training:
+            lr = self.conv_lr(lr8x)
+            pred_semantic = F.sigmoid(lr)
+
+        return pred_semantic, lr8x, [enc2x, enc4x]
+
+
+class IBNorm(nn.Layer):
+    """
+    Combine Instance Norm and Batch Norm into One Layer
+    """
+
+    def __init__(self, in_channels):
+        super().__init__()
+        self.bnorm_channels = in_channels // 2
+        self.inorm_channels = in_channels - self.bnorm_channels
+
+        self.bnorm = nn.BatchNorm2D(self.bnorm_channels)
+        self.inorm = nn.InstanceNorm2D(self.inorm_channels)
+
+    def forward(self, x):
+        bn_x = self.bnorm(x[:, :self.bnorm_channels, :, :])
+        in_x = self.inorm(x[:, self.bnorm_channels:, :, :])
+
+        return paddle.concat((bn_x, in_x), 1)
+
+
+class Conv2dIBNormRelu(nn.Layer):
+    """
+    Convolution + IBNorm + Relu
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias_attr=None,
+                 with_ibn=True,
+                 with_relu=True):
+
+        super().__init__()
+
+        layers = [
+            nn.Conv2D(
+                in_channels,
+                out_channels,
+                kernel_size,
+                stride=stride,
+                padding=padding,
+                dilation=dilation,
+                groups=groups,
+                bias_attr=bias_attr)
+        ]
+
+        if with_ibn:
+            layers.append(IBNorm(out_channels))
+
+        if with_relu:
+            layers.append(nn.ReLU())
+
+        self.layers = nn.Sequential(*layers)
+
+    def forward(self, x):
+        return self.layers(x)
+
+
+class SEBlock(nn.Layer):
+    """
+    SE Block Proposed in https://arxiv.org/pdf/1709.01507.pdf
+    """
+
+    def __init__(self, num_channels, reduction=1):
+        super().__init__()
+        self.pool = nn.AdaptiveAvgPool2D(1)
+        self.conv = nn.Sequential(
+            nn.Conv2D(
+                num_channels,
+                int(num_channels // reduction),
+                1,
+                bias_attr=False),
+            nn.ReLU(),
+            nn.Conv2D(
+                int(num_channels // reduction),
+                num_channels,
+                1,
+                bias_attr=False),
+            nn.Sigmoid())
+
+    def forward(self, x):
+        w = self.pool(x)
+        w = self.conv(w)
+        return w * x
+
+
+class GaussianBlurLayer(nn.Layer):
+    """ Add Gaussian Blur to a 4D tensors
+    This layer takes a 4D tensor of {N, C, H, W} as input.
+    The Gaussian blur will be performed in given channel number (C) splitly.
+    """
+
+    def __init__(self, channels, kernel_size):
+        """
+        Args:
+            channels (int): Channel for input tensor
+            kernel_size (int): Size of the kernel used in blurring
+        """
+
+        super(GaussianBlurLayer, self).__init__()
+        self.channels = channels
+        self.kernel_size = kernel_size
+        assert self.kernel_size % 2 != 0
+
+        self.op = nn.Sequential(
+            nn.Pad2D(
+                int(self.kernel_size / 2), mode='reflect'),
+            nn.Conv2D(
+                channels,
+                channels,
+                self.kernel_size,
+                stride=1,
+                padding=0,
+                bias_attr=False,
+                groups=channels))
+
+        self._init_kernel()
+        self.op[1].weight.stop_gradient = True
+
+    def forward(self, x):
+        """
+        Args:
+            x (paddle.Tensor): input 4D tensor
+        Returns:
+            paddle.Tensor: Blurred version of the input
+        """
+
+        if not len(list(x.shape)) == 4:
+            print('\'GaussianBlurLayer\' requires a 4D tensor as input\n')
+            exit()
+        elif not x.shape[1] == self.channels:
+            print('In \'GaussianBlurLayer\', the required channel ({0}) is'
+                  'not the same as input ({1})\n'.format(self.channels, x.shape[
+                      1]))
+            exit()
+
+        return self.op(x)
+
+    def _init_kernel(self):
+        sigma = 0.3 * ((self.kernel_size - 1) * 0.5 - 1) + 0.8
+
+        n = np.zeros((self.kernel_size, self.kernel_size))
+        i = int(self.kernel_size / 2)
+        n[i, i] = 1
+        kernel = scipy.ndimage.gaussian_filter(n, sigma)
+        kernel = kernel.astype('float32')
+        kernel = kernel[np.newaxis, np.newaxis, :, :]
+        paddle.assign(kernel, self.op[1].weight)
--- a/Matting/ppmatting/models/ppmatting.py
+++ b/Matting/ppmatting/models/ppmatting.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from collections import defaultdict
+import time
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+import paddleseg
+from paddleseg.models import layers
+from paddleseg import utils
+from paddleseg.cvlibs import manager
+
+from ppmatting.models.losses import MRSD, GradientLoss
+from ppmatting.models.backbone import resnet_vd
+
+
+@manager.MODELS.add_component
+class PPMatting(nn.Layer):
+    """
+    The PPMattinh implementation based on PaddlePaddle.
+
+    The original article refers to
+    Guowei Chen, et, al. "PP-Matting: High-Accuracy Natural Image Matting"
+    (https://arxiv.org/pdf/2204.09433.pdf).
+
+    Args:
+        backbone: backbone model.
+        pretrained(str, optional): The path of pretrianed model. Defautl: None.
+
+    """
+
+    def __init__(self, backbone, pretrained=None):
+        super().__init__()
+        self.backbone = backbone
+        self.pretrained = pretrained
+        self.loss_func_dict = self.get_loss_func_dict()
+
+        self.backbone_channels = backbone.feat_channels
+
+        self.scb = SCB(self.backbone_channels[-1])
+
+        self.hrdb = HRDB(
+            self.backbone_channels[0] + self.backbone_channels[1],
+            scb_channels=self.scb.out_channels,
+            gf_index=[0, 2, 4])
+
+        self.init_weight()
+
+    def forward(self, inputs):
+        x = inputs['img']
+        input_shape = paddle.shape(x)
+        fea_list = self.backbone(x)
+
+        scb_logits = self.scb(fea_list[-1])
+        semantic_map = F.softmax(scb_logits[-1], axis=1)
+
+        fea0 = F.interpolate(
+            fea_list[0], input_shape[2:], mode='bilinear', align_corners=False)
+        fea1 = F.interpolate(
+            fea_list[1], input_shape[2:], mode='bilinear', align_corners=False)
+        hrdb_input = paddle.concat([fea0, fea1], 1)
+        hrdb_logit = self.hrdb(hrdb_input, scb_logits)
+        detail_map = F.sigmoid(hrdb_logit)
+        fusion = self.fusion(semantic_map, detail_map)
+
+        if self.training:
+            logit_dict = {
+                'semantic': semantic_map,
+                'detail': detail_map,
+                'fusion': fusion
+            }
+            loss_dict = self.loss(logit_dict, inputs)
+            return logit_dict, loss_dict
+        else:
+            return fusion
+
+    def get_loss_func_dict(self):
+        loss_func_dict = defaultdict(list)
+        loss_func_dict['semantic'].append(nn.NLLLoss())
+        loss_func_dict['detail'].append(MRSD())
+        loss_func_dict['detail'].append(GradientLoss())
+        loss_func_dict['fusion'].append(MRSD())
+        loss_func_dict['fusion'].append(MRSD())
+        loss_func_dict['fusion'].append(GradientLoss())
+        return loss_func_dict
+
+    def loss(self, logit_dict, label_dict):
+        loss = {}
+
+        # semantic loss computation
+        # get semantic label
+        semantic_label = label_dict['trimap']
+        semantic_label_trans = (semantic_label == 128).astype('int64')
+        semantic_label_bg = (semantic_label == 0).astype('int64')
+        semantic_label = semantic_label_trans + semantic_label_bg * 2
+        loss_semantic = self.loss_func_dict['semantic'][0](
+            paddle.log(logit_dict['semantic'] + 1e-6),
+            semantic_label.squeeze(1))
+        loss['semantic'] = loss_semantic
+
+        # detail loss computation
+        transparent = label_dict['trimap'] == 128
+        detail_alpha_loss = self.loss_func_dict['detail'][0](
+            logit_dict['detail'], label_dict['alpha'], transparent)
+        # gradient loss
+        detail_gradient_loss = self.loss_func_dict['detail'][1](
+            logit_dict['detail'], label_dict['alpha'], transparent)
+        loss_detail = detail_alpha_loss + detail_gradient_loss
+        loss['detail'] = loss_detail
+        loss['detail_alpha'] = detail_alpha_loss
+        loss['detail_gradient'] = detail_gradient_loss
+
+        # fusion loss
+        loss_fusion_func = self.loss_func_dict['fusion']
+        # fusion_sigmoid loss
+        fusion_alpha_loss = loss_fusion_func[0](logit_dict['fusion'],
+                                                label_dict['alpha'])
+        # composion loss
+        comp_pred = logit_dict['fusion'] * label_dict['fg'] + (
+            1 - logit_dict['fusion']) * label_dict['bg']
+        comp_gt = label_dict['alpha'] * label_dict['fg'] + (
+            1 - label_dict['alpha']) * label_dict['bg']
+        fusion_composition_loss = loss_fusion_func[1](comp_pred, comp_gt)
+        # grandient loss
+        fusion_grad_loss = loss_fusion_func[2](logit_dict['fusion'],
+                                               label_dict['alpha'])
+        # fusion loss
+        loss_fusion = fusion_alpha_loss + fusion_composition_loss + fusion_grad_loss
+        loss['fusion'] = loss_fusion
+        loss['fusion_alpha'] = fusion_alpha_loss
+        loss['fusion_composition'] = fusion_composition_loss
+        loss['fusion_gradient'] = fusion_grad_loss
+
+        loss[
+            'all'] = 0.25 * loss_semantic + 0.25 * loss_detail + 0.25 * loss_fusion
+
+        return loss
+
+    def fusion(self, semantic_map, detail_map):
+        # semantic_map [N, 3, H, W]
+        # In index, 0 is foreground, 1 is transition, 2 is backbone
+        # After fusion, the foreground is 1, the background is 0, and the transion is between [0, 1]
+        index = paddle.argmax(semantic_map, axis=1, keepdim=True)
+        transition_mask = (index == 1).astype('float32')
+        fg = (index == 0).astype('float32')
+        alpha = detail_map * transition_mask + fg
+        return alpha
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+
+class SCB(nn.Layer):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.in_channels = [512 + in_channels, 512, 256, 128, 128, 64]
+        self.mid_channels = [512, 256, 128, 128, 64, 64]
+        self.out_channels = [256, 128, 64, 64, 64, 3]
+
+        self.psp_module = layers.PPModule(
+            in_channels,
+            512,
+            bin_sizes=(1, 3, 5),
+            dim_reduction=False,
+            align_corners=False)
+
+        psp_upsamples = [2, 4, 8, 16]
+        self.psps = nn.LayerList([
+            self.conv_up_psp(512, self.out_channels[i], psp_upsamples[i])
+            for i in range(4)
+        ])
+
+        scb_list = [
+            self._make_stage(
+                self.in_channels[i],
+                self.mid_channels[i],
+                self.out_channels[i],
+                padding=int(i == 0) + 1,
+                dilation=int(i == 0) + 1)
+            for i in range(len(self.in_channels) - 1)
+        ]
+        scb_list += [
+            nn.Sequential(
+                layers.ConvBNReLU(
+                    self.in_channels[-1], self.mid_channels[-1], 3, padding=1),
+                layers.ConvBNReLU(
+                    self.mid_channels[-1], self.mid_channels[-1], 3, padding=1),
+                nn.Conv2D(
+                    self.mid_channels[-1], self.out_channels[-1], 3, padding=1))
+        ]
+        self.scb_stages = nn.LayerList(scb_list)
+
+    def forward(self, x):
+        psp_x = self.psp_module(x)
+        psps = [psp(psp_x) for psp in self.psps]
+
+        scb_logits = []
+        for i, scb_stage in enumerate(self.scb_stages):
+            if i == 0:
+                x = scb_stage(paddle.concat((psp_x, x), 1))
+            elif i <= len(psps):
+                x = scb_stage(paddle.concat((psps[i - 1], x), 1))
+            else:
+                x = scb_stage(x)
+            scb_logits.append(x)
+        return scb_logits
+
+    def conv_up_psp(self, in_channels, out_channels, up_sample):
+        return nn.Sequential(
+            layers.ConvBNReLU(
+                in_channels, out_channels, 3, padding=1),
+            nn.Upsample(
+                scale_factor=up_sample, mode='bilinear', align_corners=False))
+
+    def _make_stage(self,
+                    in_channels,
+                    mid_channels,
+                    out_channels,
+                    padding=1,
+                    dilation=1):
+        layer_list = [
+            layers.ConvBNReLU(
+                in_channels, mid_channels, 3, padding=1), layers.ConvBNReLU(
+                    mid_channels,
+                    mid_channels,
+                    3,
+                    padding=padding,
+                    dilation=dilation), layers.ConvBNReLU(
+                        mid_channels,
+                        out_channels,
+                        3,
+                        padding=padding,
+                        dilation=dilation), nn.Upsample(
+                            scale_factor=2,
+                            mode='bilinear',
+                            align_corners=False)
+        ]
+        return nn.Sequential(*layer_list)
+
+
+class HRDB(nn.Layer):
+    """
+    The High-Resolution Detail Branch
+
+    Args:
+        in_channels(int): The number of input channels.
+        scb_channels(list|tuple): The channels of scb logits
+        gf_index(list|tuple, optional): Which logit is selected as guidance flow from scb logits. Default: (0, 2, 4)
+    """
+
+    def __init__(self, in_channels, scb_channels, gf_index=(0, 2, 4)):
+        super().__init__()
+        self.gf_index = gf_index
+        self.gf_list = nn.LayerList(
+            [nn.Conv2D(scb_channels[i], 1, 1) for i in gf_index])
+
+        channels = [64, 32, 16, 8]
+        self.res_list = [
+            resnet_vd.BasicBlock(
+                in_channels, channels[0], stride=1, shortcut=False)
+        ]
+        self.res_list += [
+            resnet_vd.BasicBlock(
+                i, i, stride=1) for i in channels[1:-1]
+        ]
+        self.res_list = nn.LayerList(self.res_list)
+
+        self.convs = nn.LayerList([
+            nn.Conv2D(
+                channels[i], channels[i + 1], kernel_size=1)
+            for i in range(len(channels) - 1)
+        ])
+        self.gates = nn.LayerList(
+            [GatedSpatailConv2d(i, i) for i in channels[1:]])
+
+        self.detail_conv = nn.Conv2D(channels[-1], 1, 1, bias_attr=False)
+
+    def forward(self, x, scb_logits):
+        for i in range(len(self.res_list)):
+            x = self.res_list[i](x)
+            x = self.convs[i](x)
+            gf = self.gf_list[i](scb_logits[self.gf_index[i]])
+            gf = F.interpolate(
+                gf, paddle.shape(x)[-2:], mode='bilinear', align_corners=False)
+            x = self.gates[i](x, gf)
+        return self.detail_conv(x)
+
+
+class GatedSpatailConv2d(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size=1,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias_attr=False):
+        super().__init__()
+        self._gate_conv = nn.Sequential(
+            layers.SyncBatchNorm(in_channels + 1),
+            nn.Conv2D(
+                in_channels + 1, in_channels + 1, kernel_size=1),
+            nn.ReLU(),
+            nn.Conv2D(
+                in_channels + 1, 1, kernel_size=1),
+            layers.SyncBatchNorm(1),
+            nn.Sigmoid())
+        self.conv = nn.Conv2D(
+            in_channels,
+            out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=groups,
+            bias_attr=bias_attr)
+
+    def forward(self, input_features, gating_features):
+        cat = paddle.concat([input_features, gating_features], axis=1)
+        alphas = self._gate_conv(cat)
+        x = input_features * (alphas + 1)
+        x = self.conv(x)
+        return x
--- a/Matting/ppmatting/models/ppmattingv2.py
+++ b/Matting/ppmatting/models/ppmattingv2.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from functools import partial
+from collections import defaultdict
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+import paddleseg
+from paddleseg import utils
+from paddleseg.models import layers
+from paddleseg.cvlibs import manager
+from paddleseg.models.backbones.transformer_utils import Identity, DropPath
+
+from ppmatting.models.layers import MLFF
+from ppmatting.models.losses import MRSD, GradientLoss
+
+
+@manager.MODELS.add_component
+class PPMattingV2(nn.Layer):
+    """
+    The PPMattingV2 implementation based on PaddlePaddle.
+
+    The original article refers to
+    TODO Guowei Chen, et, al. "" ().
+
+    Args:
+        backbone: backobne model.
+        pretrained(str, optional): The path of pretrianed model. Defautl: None.
+        dpp_len_trans(int, optional): The depth of transformer block in dpp(DoublePyramidPoolModule). Default: 1.
+        dpp_index(list, optional): The index of backone output which as the input in dpp. Default: [1, 2, 3, 4].
+        dpp_mid_channel(int, optional): The output channels of the first pyramid pool in dpp. Default: 256.
+        dpp_out_channel(int, optional): The output channels of dpp. Default: 512.
+        dpp_bin_sizes(list, optional): The output size of the second pyramid pool in dpp. Default: (2, 4, 6).
+        dpp_mlp_ratios(int, optional): The expandsion ratio of mlp in dpp. Default: 2.
+        dpp_attn_ratio(int, optional): The expandsion ratio of attention. Default: 2.
+        dpp_merge_type(str, optional): The merge type of the output of the second pyramid pool in dpp, 
+            which should be one of (`concat`, `add`). Default: 'concat'.
+        mlff_merge_type(str, optional): The merge type of the multi features before output. 
+            It should be one of ('add', 'concat'). Default: 'concat'.
+    """
+
+    def __init__(self,
+                 backbone,
+                 pretrained=None,
+                 dpp_len_trans=1,
+                 dpp_index=[1, 2, 3, 4],
+                 dpp_mid_channel=256,
+                 dpp_output_channel=512,
+                 dpp_bin_sizes=(2, 4, 6),
+                 dpp_mlp_ratios=2,
+                 dpp_attn_ratio=2,
+                 dpp_merge_type='concat',
+                 mlff_merge_type='concat',
+                 decoder_channels=[128, 96, 64, 32, 32],
+                 head_channel=32):
+        super().__init__()
+
+        self.backbone = backbone
+        self.backbone_channels = backbone.feat_channels
+
+        # check
+        assert len(backbone.feat_channels) == 5, \
+            "Backbone should return 5 features with different scales"
+        assert max(dpp_index) < len(backbone.feat_channels), \
+            "The element of `dpp_index` should be less than the number of return features of backbone."
+
+        # dpp module
+        self.dpp_index = dpp_index
+        self.dpp = DoublePyramidPoolModule(
+            stride=2,
+            input_channel=sum(self.backbone_channels[i]
+                              for i in self.dpp_index),
+            mid_channel=dpp_mid_channel,
+            output_channel=dpp_output_channel,
+            len_trans=dpp_len_trans,
+            bin_sizes=dpp_bin_sizes,
+            mlp_ratios=dpp_mlp_ratios,
+            attn_ratio=dpp_attn_ratio,
+            merge_type=dpp_merge_type)
+
+        # decoder
+        self.mlff32x = MLFF(
+            in_channels=[self.backbone_channels[-1], dpp_output_channel],
+            mid_channels=[dpp_output_channel, dpp_output_channel],
+            out_channel=decoder_channels[0],
+            merge_type=mlff_merge_type)
+        self.mlff16x = MLFF(
+            in_channels=[
+                self.backbone_channels[-2], decoder_channels[0],
+                dpp_output_channel
+            ],
+            mid_channels=[
+                decoder_channels[0], decoder_channels[0], decoder_channels[0]
+            ],
+            out_channel=decoder_channels[1],
+            merge_type=mlff_merge_type)
+        self.mlff8x = MLFF(
+            in_channels=[
+                self.backbone_channels[-3], decoder_channels[1],
+                dpp_output_channel
+            ],
+            mid_channels=[
+                decoder_channels[1], decoder_channels[1], decoder_channels[1]
+            ],
+            out_channel=decoder_channels[2],
+            merge_type=mlff_merge_type)
+        self.mlff4x = MLFF(
+            in_channels=[self.backbone_channels[-4], decoder_channels[2], 3],
+            mid_channels=[decoder_channels[2], decoder_channels[2], 3],
+            out_channel=decoder_channels[3])
+        self.mlff2x = MLFF(
+            in_channels=[self.backbone_channels[-5], decoder_channels[3], 3],
+            mid_channels=[decoder_channels[3], decoder_channels[3], 3],
+            out_channel=decoder_channels[4])
+
+        self.matting_head_mlff8x = MattingHead(
+            in_chan=decoder_channels[2], mid_chan=32)
+        self.matting_head_mlff2x = MattingHead(
+            in_chan=decoder_channels[4] + 3, mid_chan=head_channel, mid_num=2)
+
+        # loss
+        self.loss_func_dict = None
+
+        # pretrained
+        self.pretrained = pretrained
+        self.init_weight()
+
+    def forward(self, inputs):
+        img = inputs['img']
+        input_shape = paddle.shape(img)
+        feats_backbone = self.backbone(
+            img)  # stdc1 [2x, 4x, 8x, 16x, 32x] [32, 64, 256, 512, 1024]
+        x = self.dpp([feats_backbone[i] for i in self.dpp_index])
+        dpp_out = x
+
+        input_32x = [feats_backbone[-1], x]
+        x = self.mlff32x(input_32x,
+                         paddle.shape(feats_backbone[-1])[-2:])  # 32x
+
+        input_16x = [feats_backbone[-2], x, dpp_out]
+        x = self.mlff16x(input_16x,
+                         paddle.shape(feats_backbone[-2])[-2:])  # 16x
+
+        input_8x = [feats_backbone[-3], x, dpp_out]
+        x = self.mlff8x(input_8x, paddle.shape(feats_backbone[-3])[-2:])  # 8x
+        mlff8x_output = x
+
+        input_4x = [feats_backbone[-4], x]
+        input_4x.append(
+            F.interpolate(
+                img, feats_backbone[-4].shape[2:], mode='area'))
+        x = self.mlff4x(input_4x, paddle.shape(feats_backbone[-4])[-2:])  # 4x
+
+        input_2x = [feats_backbone[-5], x]
+        input_2x.append(
+            F.interpolate(
+                img, feats_backbone[-5].shape[2:], mode='area'))
+        x = self.mlff2x(input_2x, paddle.shape(feats_backbone[-5])[-2:])  # 2x
+
+        x = F.interpolate(
+            x, input_shape[-2:], mode='bilinear', align_corners=False)
+        x = paddle.concat([x, img], axis=1)
+        alpha = self.matting_head_mlff2x(x)
+
+        if self.training:
+            logit_dict = {}
+            logit_dict['alpha'] = alpha
+            logit_dict['alpha_8x'] = self.matting_head_mlff8x(mlff8x_output)
+
+            loss_dict = self.loss(logit_dict, inputs)
+
+            return logit_dict, loss_dict
+        else:
+            return alpha
+
+    def loss(self, logit_dict, label_dict, loss_func_dict=None):
+        if loss_func_dict is None:
+            if self.loss_func_dict is None:
+                self.loss_func_dict = defaultdict(list)
+                self.loss_func_dict['alpha'].append(MRSD())
+                self.loss_func_dict['alpha'].append(GradientLoss())
+                self.loss_func_dict['alpha_8x'].append(MRSD())
+                self.loss_func_dict['alpha_8x'].append(GradientLoss())
+        else:
+            self.loss_func_dict = loss_func_dict
+
+        loss = {}
+        alpha_8x_label = F.interpolate(
+            label_dict['alpha'],
+            size=logit_dict['alpha_8x'].shape[-2:],
+            mode='area',
+            align_corners=False)
+        loss['alpha_8x_mrsd'] = self.loss_func_dict['alpha_8x'][0](
+            logit_dict['alpha_8x'], alpha_8x_label)
+        loss['alpha_8x_grad'] = self.loss_func_dict['alpha_8x'][1](
+            logit_dict['alpha_8x'], alpha_8x_label)
+        loss['alpha_8x'] = loss['alpha_8x_mrsd'] + loss['alpha_8x_grad']
+
+        transition_mask = label_dict['trimap'] == 128
+        loss['alpha_mrsd'] = self.loss_func_dict['alpha'][0](
+            logit_dict['alpha'],
+            label_dict['alpha']) + 2 * self.loss_func_dict['alpha'][0](
+                logit_dict['alpha'], label_dict['alpha'], transition_mask)
+        loss['alpha_grad'] = self.loss_func_dict['alpha'][1](
+            logit_dict['alpha'],
+            label_dict['alpha']) + 2 * self.loss_func_dict['alpha'][1](
+                logit_dict['alpha'], label_dict['alpha'], transition_mask)
+        loss['alpha'] = loss['alpha_mrsd'] + loss['alpha_grad']
+
+        loss['all'] = loss['alpha'] + loss['alpha_8x']
+        return loss
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+
+class MattingHead(nn.Layer):
+    def __init__(self, in_chan, mid_chan, mid_num=1, out_channels=1):
+        super().__init__()
+        self.conv = layers.ConvBNReLU(
+            in_chan,
+            mid_chan,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            bias_attr=False)
+        self.mid_conv = nn.LayerList([
+            layers.ConvBNReLU(
+                mid_chan,
+                mid_chan,
+                kernel_size=3,
+                stride=1,
+                padding=1,
+                bias_attr=False) for i in range(mid_num - 1)
+        ])
+        self.conv_out = nn.Conv2D(
+            mid_chan, out_channels, kernel_size=1, bias_attr=False)
+
+    def forward(self, x):
+        x = self.conv(x)
+        for mid_conv in self.mid_conv:
+            x = mid_conv(x)
+        x = self.conv_out(x)
+        x = F.sigmoid(x)
+        return x
+
+
+class DoublePyramidPoolModule(nn.Layer):
+    """
+    Extract global information through double pyramid pool structure and attention calculation by transformer block.
+
+    Args:
+        stride(int): The stride for the inputs.
+        input_channel(int): The total channels of input features.
+        mid_channel(int, optional): The output channels of the first pyramid pool. Default: 256.
+        out_channel(int, optional): The output channels. Default: 512.
+        len_trans(int, optional): The depth of transformer block. Default: 1.
+        bin_sizes(list, optional): The output size of the second pyramid pool. Default: (2, 4, 6).
+        mlp_ratios(int, optional): The expandsion ratio of the mlp. Default: 2.
+        attn_ratio(int, optional): The expandsion ratio of the attention. Default: 2.
+        merge_type(str, optional): The merge type of the output of the second pyramid pool, which should be one of (`concat`, `add`). Default: 'concat'.
+        align_corners(bool, optional): Whether to use `align_corners` when interpolating. Default: False.
+
+    """
+
+    def __init__(self,
+                 stride,
+                 input_channel,
+                 mid_channel=256,
+                 output_channel=512,
+                 len_trans=1,
+                 bin_sizes=(2, 4, 6),
+                 mlp_ratios=2,
+                 attn_ratio=2,
+                 merge_type='concat',
+                 align_corners=False):
+        super().__init__()
+
+        self.mid_channel = mid_channel
+        self.align_corners = align_corners
+        self.mlp_rations = mlp_ratios
+        self.attn_ratio = attn_ratio
+        if isinstance(len_trans, int):
+            self.len_trans = [len_trans] * len(bin_sizes)
+        elif isinstance(len_trans, (list, tuple)):
+            self.len_trans = len_trans
+            if len(len_trans) != len(bin_sizes):
+                raise ValueError(
+                    'If len_trans is list or tuple, the length should be same as bin_sizes'
+                )
+        else:
+            raise ValueError(
+                '`len_trans` only support int, list and tuple type')
+
+        if merge_type not in ['add', 'concat']:
+            raise ('`merge_type only support `add` or `concat`.')
+        self.merge_type = merge_type
+
+        self.pp1 = PyramidPoolAgg(stride=stride)
+        self.conv_mid = layers.ConvBN(input_channel, mid_channel, 1)
+        self.pp2 = nn.LayerList([
+            self._make_stage(
+                embdeding_channels=mid_channel, size=size, block_num=block_num)
+            for size, block_num in zip(bin_sizes, self.len_trans)
+        ])
+
+        if self.merge_type == 'concat':
+            in_chan = mid_channel + mid_channel * len(bin_sizes)
+        else:
+            in_chan = mid_channel
+        self.conv_out = layers.ConvBNReLU(
+            in_chan, output_channel, kernel_size=1)
+
+    def _make_stage(self, embdeding_channels, size, block_num):
+        prior = nn.AdaptiveAvgPool2D(output_size=size)
+        if size == 1:
+            trans = layers.ConvBNReLU(
+                in_channels=embdeding_channels,
+                out_channels=embdeding_channels,
+                kernel_size=1)
+        else:
+            trans = BasicLayer(
+                block_num=block_num,
+                embedding_dim=embdeding_channels,
+                key_dim=16,
+                num_heads=8,
+                mlp_ratios=self.mlp_rations,
+                attn_ratio=self.attn_ratio,
+                drop=0,
+                attn_drop=0,
+                drop_path=0,
+                act_layer=nn.ReLU6,
+                lr_mult=1.0)
+        return nn.Sequential(prior, trans)
+
+    def forward(self, inputs):
+        x = self.pp1(inputs)
+        pp2_input = self.conv_mid(x)
+
+        cat_layers = []
+        for stage in self.pp2:
+            x = stage(pp2_input)
+            x = F.interpolate(
+                x,
+                paddle.shape(pp2_input)[2:],
+                mode='bilinear',
+                align_corners=self.align_corners)
+            cat_layers.append(x)
+        cat_layers = [pp2_input] + cat_layers[::-1]
+        if self.merge_type == 'concat':
+            cat = paddle.concat(cat_layers, axis=1)
+        else:
+            cat = sum(cat_layers)
+        out = self.conv_out(cat)
+        return out
+
+
+class Conv2DBN(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 ks=1,
+                 stride=1,
+                 pad=0,
+                 dilation=1,
+                 groups=1,
+                 bn_weight_init=1,
+                 lr_mult=1.0):
+        super().__init__()
+        conv_weight_attr = paddle.ParamAttr(learning_rate=lr_mult)
+        self.c = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=ks,
+            stride=stride,
+            padding=pad,
+            dilation=dilation,
+            groups=groups,
+            weight_attr=conv_weight_attr,
+            bias_attr=False)
+        bn_weight_attr = paddle.ParamAttr(
+            initializer=nn.initializer.Constant(bn_weight_init),
+            learning_rate=lr_mult)
+        bn_bias_attr = paddle.ParamAttr(
+            initializer=nn.initializer.Constant(0), learning_rate=lr_mult)
+        self.bn = nn.BatchNorm2D(
+            out_channels, weight_attr=bn_weight_attr, bias_attr=bn_bias_attr)
+
+    def forward(self, inputs):
+        out = self.c(inputs)
+        out = self.bn(out)
+        return out
+
+
+class MLP(nn.Layer):
+    def __init__(self,
+                 in_features,
+                 hidden_features=None,
+                 out_features=None,
+                 act_layer=nn.ReLU,
+                 drop=0.,
+                 lr_mult=1.0):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = Conv2DBN(in_features, hidden_features, lr_mult=lr_mult)
+        param_attr = paddle.ParamAttr(learning_rate=lr_mult)
+        self.dwconv = nn.Conv2D(
+            hidden_features,
+            hidden_features,
+            3,
+            1,
+            1,
+            groups=hidden_features,
+            weight_attr=param_attr,
+            bias_attr=param_attr)
+        self.act = act_layer()
+        self.fc2 = Conv2DBN(hidden_features, out_features, lr_mult=lr_mult)
+        self.drop = nn.Dropout(drop)
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.dwconv(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+
+
+class Attention(nn.Layer):
+    def __init__(self,
+                 dim,
+                 key_dim,
+                 num_heads,
+                 attn_ratio=4,
+                 activation=None,
+                 lr_mult=1.0):
+        super().__init__()
+        self.num_heads = num_heads
+        self.scale = key_dim**-0.5
+        self.key_dim = key_dim
+        self.nh_kd = nh_kd = key_dim * num_heads
+        self.d = int(attn_ratio * key_dim)
+        self.dh = int(attn_ratio * key_dim) * num_heads
+        self.attn_ratio = attn_ratio
+
+        self.to_q = Conv2DBN(dim, nh_kd, 1, lr_mult=lr_mult)
+        self.to_k = Conv2DBN(dim, nh_kd, 1, lr_mult=lr_mult)
+        self.to_v = Conv2DBN(dim, self.dh, 1, lr_mult=lr_mult)
+
+        self.proj = nn.Sequential(
+            activation(),
+            Conv2DBN(
+                self.dh, dim, bn_weight_init=0, lr_mult=lr_mult))
+
+    def forward(self, x):
+        x_shape = paddle.shape(x)
+        H, W = x_shape[2], x_shape[3]
+
+        qq = self.to_q(x).reshape(
+            [0, self.num_heads, self.key_dim, -1]).transpose([0, 1, 3, 2])
+        kk = self.to_k(x).reshape([0, self.num_heads, self.key_dim, -1])
+        vv = self.to_v(x).reshape([0, self.num_heads, self.d, -1]).transpose(
+            [0, 1, 3, 2])
+
+        attn = paddle.matmul(qq, kk)
+        attn = F.softmax(attn, axis=-1)
+
+        xx = paddle.matmul(attn, vv)
+
+        xx = xx.transpose([0, 1, 3, 2]).reshape([0, self.dh, H, W])
+        xx = self.proj(xx)
+        return xx
+
+
+class Block(nn.Layer):
+    def __init__(self,
+                 dim,
+                 key_dim,
+                 num_heads,
+                 mlp_ratios=4.,
+                 attn_ratio=2.,
+                 drop=0.,
+                 drop_path=0.,
+                 act_layer=nn.ReLU,
+                 lr_mult=1.0):
+        super().__init__()
+        self.dim = dim
+        self.num_heads = num_heads
+        self.mlp_ratios = mlp_ratios
+
+        self.attn = Attention(
+            dim,
+            key_dim=key_dim,
+            num_heads=num_heads,
+            attn_ratio=attn_ratio,
+            activation=act_layer,
+            lr_mult=lr_mult)
+
+        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()
+        mlp_hidden_dim = int(dim * mlp_ratios)
+        self.mlp = MLP(in_features=dim,
+                       hidden_features=mlp_hidden_dim,
+                       act_layer=act_layer,
+                       drop=drop,
+                       lr_mult=lr_mult)
+
+    def forward(self, x):
+        h = x
+        x = self.attn(x)
+        x = self.drop_path(x)
+        x = h + x
+
+        h = x
+        x = self.mlp(x)
+        x = self.drop_path(x)
+        x = x + h
+        return x
+
+
+class BasicLayer(nn.Layer):
+    def __init__(self,
+                 block_num,
+                 embedding_dim,
+                 key_dim,
+                 num_heads,
+                 mlp_ratios=4.,
+                 attn_ratio=2.,
+                 drop=0.,
+                 attn_drop=0.,
+                 drop_path=0.,
+                 act_layer=None,
+                 lr_mult=1.0):
+        super().__init__()
+        self.block_num = block_num
+
+        self.transformer_blocks = nn.LayerList()
+        for i in range(self.block_num):
+            self.transformer_blocks.append(
+                Block(
+                    embedding_dim,
+                    key_dim=key_dim,
+                    num_heads=num_heads,
+                    mlp_ratios=mlp_ratios,
+                    attn_ratio=attn_ratio,
+                    drop=drop,
+                    drop_path=drop_path[i]
+                    if isinstance(drop_path, list) else drop_path,
+                    act_layer=act_layer,
+                    lr_mult=lr_mult))
+
+    def forward(self, x):
+        # token * N 
+        for i in range(self.block_num):
+            x = self.transformer_blocks[i](x)
+        return x
+
+
+class PyramidPoolAgg(nn.Layer):
+    def __init__(self, stride):
+        super().__init__()
+        self.stride = stride
+        self.tmp = Identity()  # avoid the error of paddle.flops
+
+    def forward(self, inputs):
+        '''
+        # The F.adaptive_avg_pool2d does not support the (H, W) be Tensor,
+        # so exporting the inference model will raise error.
+        _, _, H, W = inputs[-1].shape
+        H = (H - 1) // self.stride + 1
+        W = (W - 1) // self.stride + 1
+        return paddle.concat(
+            [F.adaptive_avg_pool2d(inp, (H, W)) for inp in inputs], axis=1)
+        '''
+        out = []
+        ks = 2**len(inputs)
+        stride = self.stride**len(inputs)
+        for x in inputs:
+            x = F.avg_pool2d(x, int(ks), int(stride))
+            ks /= 2
+            stride /= 2
+            out.append(x)
+        out = paddle.concat(out, axis=1)
+        return out
--- a/Matting/ppmatting/transforms/__init__.py
+++ b/Matting/ppmatting/transforms/__init__.py
+from .transforms import *
--- a/Matting/ppmatting/transforms/transforms.py
+++ b/Matting/ppmatting/transforms/transforms.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import random
+import string
+
+import cv2
+import numpy as np
+from paddleseg.transforms import functional
+from paddleseg.cvlibs import manager
+from paddleseg.utils import seg_env
+from PIL import Image
+
+
+@manager.TRANSFORMS.add_component
+class Compose:
+    """
+    Do transformation on input data with corresponding pre-processing and augmentation operations.
+    The shape of input data to all operations is [height, width, channels].
+    """
+
+    def __init__(self, transforms, to_rgb=True):
+        if not isinstance(transforms, list):
+            raise TypeError('The transforms must be a list!')
+        self.transforms = transforms
+        self.to_rgb = to_rgb
+
+    def __call__(self, data):
+        """
+        Args:
+            data (dict): The data to transform.
+
+        Returns:
+            dict: Data after transformation
+        """
+        if 'trans_info' not in data:
+            data['trans_info'] = []
+        for op in self.transforms:
+            data = op(data)
+            if data is None:
+                return None
+
+        data['img'] = np.transpose(data['img'], (2, 0, 1))
+        for key in data.get('gt_fields', []):
+            if len(data[key].shape) == 2:
+                continue
+            data[key] = np.transpose(data[key], (2, 0, 1))
+
+        return data
+
+
+@manager.TRANSFORMS.add_component
+class LoadImages:
+    def __init__(self, to_rgb=True):
+        self.to_rgb = to_rgb
+
+    def __call__(self, data):
+        if isinstance(data['img'], str):
+            data['img'] = cv2.imread(data['img'])
+        for key in data.get('gt_fields', []):
+            if isinstance(data[key], str):
+                data[key] = cv2.imread(data[key], cv2.IMREAD_UNCHANGED)
+            # if alpha and trimap has 3 channels, extract one.
+            if key in ['alpha', 'trimap']:
+                if len(data[key].shape) > 2:
+                    data[key] = data[key][:, :, 0]
+
+        if self.to_rgb:
+            data['img'] = cv2.cvtColor(data['img'], cv2.COLOR_BGR2RGB)
+            for key in data.get('gt_fields', []):
+                if len(data[key].shape) == 2:
+                    continue
+                data[key] = cv2.cvtColor(data[key], cv2.COLOR_BGR2RGB)
+
+        return data
+
+
+@manager.TRANSFORMS.add_component
+class Resize:
+    def __init__(self, target_size=(512, 512), random_interp=False):
+        if isinstance(target_size, list) or isinstance(target_size, tuple):
+            if len(target_size) != 2:
+                raise ValueError(
+                    '`target_size` should include 2 elements, but it is {}'.
+                    format(target_size))
+        else:
+            raise TypeError(
+                "Type of `target_size` is invalid. It should be list or tuple, but it is {}"
+                .format(type(target_size)))
+
+        self.target_size = target_size
+        self.random_interp = random_interp
+        self.interps = [cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC]
+
+    def __call__(self, data):
+        if self.random_interp:
+            interp = np.random.choice(self.interps)
+        else:
+            interp = cv2.INTER_LINEAR
+        data['trans_info'].append(('resize', data['img'].shape[0:2]))
+        data['img'] = functional.resize(data['img'], self.target_size, interp)
+        for key in data.get('gt_fields', []):
+            if key == 'trimap':
+                data[key] = functional.resize(data[key], self.target_size,
+                                              cv2.INTER_NEAREST)
+            else:
+                data[key] = functional.resize(data[key], self.target_size,
+                                              interp)
+        return data
+
+
+@manager.TRANSFORMS.add_component
+class RandomResize:
+    """
+    Resize image to a size determinned by `scale` and `size`.
+
+    Args:
+        size(tuple|list): The reference size to resize. A tuple or list with length 2.
+        scale(tupel|list, optional): A range of scale base on `size`. A tuple or list with length 2. Default: None.
+    """
+
+    def __init__(self, size=None, scale=None):
+        if isinstance(size, list) or isinstance(size, tuple):
+            if len(size) != 2:
+                raise ValueError(
+                    '`size` should include 2 elements, but it is {}'.format(
+                        size))
+        elif size is not None:
+            raise TypeError(
+                "Type of `size` is invalid. It should be list or tuple, but it is {}"
+                .format(type(size)))
+
+        if scale is not None:
+            if isinstance(scale, list) or isinstance(scale, tuple):
+                if len(scale) != 2:
+                    raise ValueError(
+                        '`scale` should include 2 elements, but it is {}'.
+                        format(scale))
+            else:
+                raise TypeError(
+                    "Type of `scale` is invalid. It should be list or tuple, but it is {}"
+                    .format(type(scale)))
+        self.size = size
+        self.scale = scale
+
+    def __call__(self, data):
+        h, w = data['img'].shape[:2]
+        if self.scale is not None:
+            scale = np.random.uniform(self.scale[0], self.scale[1])
+        else:
+            scale = 1.
+        if self.size is not None:
+            scale_factor = max(self.size[0] / w, self.size[1] / h)
+        else:
+            scale_factor = 1
+        scale = scale * scale_factor
+
+        w = int(round(w * scale))
+        h = int(round(h * scale))
+        data['img'] = functional.resize(data['img'], (w, h))
+        for key in data.get('gt_fields', []):
+            if key == 'trimap':
+                data[key] = functional.resize(data[key], (w, h),
+                                              cv2.INTER_NEAREST)
+            else:
+                data[key] = functional.resize(data[key], (w, h))
+        return data
+
+
+@manager.TRANSFORMS.add_component
+class ResizeByLong:
+    """
+    Resize the long side of an image to given size, and then scale the other side proportionally.
+
+    Args:
+        long_size (int): The target size of long side.
+    """
+
+    def __init__(self, long_size):
+        self.long_size = long_size
+
+    def __call__(self, data):
+        data['trans_info'].append(('resize', data['img'].shape[0:2]))
+        data['img'] = functional.resize_long(data['img'], self.long_size)
+        for key in data.get('gt_fields', []):
+            if key == 'trimap':
+                data[key] = functional.resize_long(data[key], self.long_size,
+                                                   cv2.INTER_NEAREST)
+            else:
+                data[key] = functional.resize_long(data[key], self.long_size)
+        return data
+
+
+@manager.TRANSFORMS.add_component
+class ResizeByShort:
+    """
+    Resize the short side of an image to given size, and then scale the other side proportionally.
+
+    Args:
+        short_size (int): The target size of short side.
+    """
+
+    def __init__(self, short_size):
+        self.short_size = short_size
+
+    def __call__(self, data):
+        data['trans_info'].append(('resize', data['img'].shape[0:2]))
+        data['img'] = functional.resize_short(data['img'], self.short_size)
+        for key in data.get('gt_fields', []):
+            if key == 'trimap':
+                data[key] = functional.resize_short(data[key], self.short_size,
+                                                    cv2.INTER_NEAREST)
+            else:
+                data[key] = functional.resize_short(data[key], self.short_size)
+        return data
+
+
+@manager.TRANSFORMS.add_component
+class ResizeToIntMult:
+    """
+    Resize to some int muitple, d.g. 32.
+    """
+
+    def __init__(self, mult_int=32):
+        self.mult_int = mult_int
+
+    def __call__(self, data):
+        data['trans_info'].append(('resize', data['img'].shape[0:2]))
+
+        h, w = data['img'].shape[0:2]
+        rw = w - w % self.mult_int
+        rh = h - h % self.mult_int
+        data['img'] = functional.resize(data['img'], (rw, rh))
+        for key in data.get('gt_fields', []):
+            if key == 'trimap':
+                data[key] = functional.resize(data[key], (rw, rh),
+                                              cv2.INTER_NEAREST)
+            else:
+                data[key] = functional.resize(data[key], (rw, rh))
+
+        return data
+
+
+@manager.TRANSFORMS.add_component
+class Normalize:
+    """
+    Normalize an image.
+
+    Args:
+        mean (list, optional): The mean value of a data set. Default: [0.5, 0.5, 0.5].
+        std (list, optional): The standard deviation of a data set. Default: [0.5, 0.5, 0.5].
+
+    Raises:
+        ValueError: When mean/std is not list or any value in std is 0.
+    """
+
+    def __init__(self, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)):
+        self.mean = mean
+        self.std = std
+        if not (isinstance(self.mean,
+                           (list, tuple)) and isinstance(self.std,
+                                                         (list, tuple))):
+            raise ValueError(
+                "{}: input type is invalid. It should be list or tuple".format(
+                    self))
+        from functools import reduce
+        if reduce(lambda x, y: x * y, self.std) == 0:
+            raise ValueError('{}: std is invalid!'.format(self))
+
+    def __call__(self, data):
+        mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
+        std = np.array(self.std)[np.newaxis, np.newaxis, :]
+        data['img'] = functional.normalize(data['img'], mean, std)
+        if 'fg' in data.get('gt_fields', []):
+            data['fg'] = functional.normalize(data['fg'], mean, std)
+        if 'bg' in data.get('gt_fields', []):
+            data['bg'] = functional.normalize(data['bg'], mean, std)
+
+        return data
+
+
+@manager.TRANSFORMS.add_component
+class RandomCropByAlpha:
+    """
+    Randomly crop while centered on uncertain area by a certain probability.
+
+    Args:
+        crop_size (tuple|list): The size you want to crop from image.
+        p (float): The probability centered on uncertain area.
+
+    """
+
+    def __init__(self, crop_size=((320, 320), (480, 480), (640, 640)),
+                 prob=0.5):
+        self.crop_size = crop_size
+        self.prob = prob
+
+    def __call__(self, data):
+        idex = np.random.randint(low=0, high=len(self.crop_size))
+        crop_w, crop_h = self.crop_size[idex]
+
+        img_h = data['img'].shape[0]
+        img_w = data['img'].shape[1]
+        if np.random.rand() < self.prob:
+            crop_center = np.where((data['alpha'] > 0) & (data['alpha'] < 255))
+            center_h_array, center_w_array = crop_center
+            if len(center_h_array) == 0:
+                return data
+            rand_ind = np.random.randint(len(center_h_array))
+            center_h = center_h_array[rand_ind]
+            center_w = center_w_array[rand_ind]
+            delta_h = crop_h // 2
+            delta_w = crop_w // 2
+            start_h = max(0, center_h - delta_h)
+            start_w = max(0, center_w - delta_w)
+        else:
+            start_h = 0
+            start_w = 0
+            if img_h > crop_h:
+                start_h = np.random.randint(img_h - crop_h + 1)
+            if img_w > crop_w:
+                start_w = np.random.randint(img_w - crop_w + 1)
+
+        end_h = min(img_h, start_h + crop_h)
+        end_w = min(img_w, start_w + crop_w)
+
+        data['img'] = data['img'][start_h:end_h, start_w:end_w]
+        for key in data.get('gt_fields', []):
+            data[key] = data[key][start_h:end_h, start_w:end_w]
+
+        return data
+
+
+@manager.TRANSFORMS.add_component
+class RandomCrop:
+    """
+    Randomly crop
+
+    Args:
+    crop_size (tuple|list): The size you want to crop from image.
+    """
+
+    def __init__(self, crop_size=((320, 320), (480, 480), (640, 640))):
+        if not isinstance(crop_size[0], (list, tuple)):
+            crop_size = [crop_size]
+        self.crop_size = crop_size
+
+    def __call__(self, data):
+        idex = np.random.randint(low=0, high=len(self.crop_size))
+        crop_w, crop_h = self.crop_size[idex]
+        img_h, img_w = data['img'].shape[0:2]
+
+        start_h = 0
+        start_w = 0
+        if img_h > crop_h:
+            start_h = np.random.randint(img_h - crop_h + 1)
+        if img_w > crop_w:
+            start_w = np.random.randint(img_w - crop_w + 1)
+
+        end_h = min(img_h, start_h + crop_h)
+        end_w = min(img_w, start_w + crop_w)
+
+        data['img'] = data['img'][start_h:end_h, start_w:end_w]
+        for key in data.get('gt_fields', []):
+            data[key] = data[key][start_h:end_h, start_w:end_w]
+
+        return data
+
+
+@manager.TRANSFORMS.add_component
+class LimitLong:
+    """
+    Limit the long edge of image.
+
+    If the long edge is larger than max_long, resize the long edge
+    to max_long, while scale the short edge proportionally.
+
+    If the long edge is smaller than min_long, resize the long edge
+    to min_long, while scale the short edge proportionally.
+
+    Args:
+        max_long (int, optional): If the long edge of image is larger than max_long,
+            it will be resize to max_long. Default: None.
+        min_long (int, optional): If the long edge of image is smaller than min_long,
+            it will be resize to min_long. Default: None.
+    """
+
+    def __init__(self, max_long=None, min_long=None):
+        if max_long is not None:
+            if not isinstance(max_long, int):
+                raise TypeError(
+                    "Type of `max_long` is invalid. It should be int, but it is {}"
+                    .format(type(max_long)))
+        if min_long is not None:
+            if not isinstance(min_long, int):
+                raise TypeError(
+                    "Type of `min_long` is invalid. It should be int, but it is {}"
+                    .format(type(min_long)))
+        if (max_long is not None) and (min_long is not None):
+            if min_long > max_long:
+                raise ValueError(
+                    '`max_long should not smaller than min_long, but they are {} and {}'
+                    .format(max_long, min_long))
+        self.max_long = max_long
+        self.min_long = min_long
+
+    def __call__(self, data):
+        h, w = data['img'].shape[:2]
+        long_edge = max(h, w)
+        target = long_edge
+        if (self.max_long is not None) and (long_edge > self.max_long):
+            target = self.max_long
+        elif (self.min_long is not None) and (long_edge < self.min_long):
+            target = self.min_long
+
+        data['trans_info'].append(('resize', data['img'].shape[0:2]))
+        if target != long_edge:
+            data['img'] = functional.resize_long(data['img'], target)
+            for key in data.get('gt_fields', []):
+                if key == 'trimap':
+                    data[key] = functional.resize_long(data[key], target,
+                                                       cv2.INTER_NEAREST)
+                else:
+                    data[key] = functional.resize_long(data[key], target)
+
+        return data
+
+
+@manager.TRANSFORMS.add_component
+class LimitShort:
+    """
+    Limit the short edge of image.
+
+    If the short edge is larger than max_short, resize the short edge
+    to max_short, while scale the long edge proportionally.
+
+    If the short edge is smaller than min_short, resize the short edge
+    to min_short, while scale the long edge proportionally.
+
+    Args:
+        max_short (int, optional): If the short edge of image is larger than max_short,
+            it will be resize to max_short. Default: None.
+        min_short (int, optional): If the short edge of image is smaller than min_short,
+            it will be resize to min_short. Default: None.
+    """
+
+    def __init__(self, max_short=None, min_short=None):
+        if max_short is not None:
+            if not isinstance(max_short, int):
+                raise TypeError(
+                    "Type of `max_short` is invalid. It should be int, but it is {}"
+                    .format(type(max_short)))
+        if min_short is not None:
+            if not isinstance(min_short, int):
+                raise TypeError(
+                    "Type of `min_short` is invalid. It should be int, but it is {}"
+                    .format(type(min_short)))
+        if (max_short is not None) and (min_short is not None):
+            if min_short > max_short:
+                raise ValueError(
+                    '`max_short should not smaller than min_short, but they are {} and {}'
+                    .format(max_short, min_short))
+        self.max_short = max_short
+        self.min_short = min_short
+
+    def __call__(self, data):
+        h, w = data['img'].shape[:2]
+        short_edge = min(h, w)
+        target = short_edge
+        if (self.max_short is not None) and (short_edge > self.max_short):
+            target = self.max_short
+        elif (self.min_short is not None) and (short_edge < self.min_short):
+            target = self.min_short
+
+        data['trans_info'].append(('resize', data['img'].shape[0:2]))
+        if target != short_edge:
+            data['img'] = functional.resize_short(data['img'], target)
+            for key in data.get('gt_fields', []):
+                if key == 'trimap':
+                    data[key] = functional.resize_short(data[key], target,
+                                                        cv2.INTER_NEAREST)
+                else:
+                    data[key] = functional.resize_short(data[key], target)
+
+        return data
+
+
+@manager.TRANSFORMS.add_component
+class RandomHorizontalFlip:
+    """
+    Flip an image horizontally with a certain probability.
+
+    Args:
+        prob (float, optional): A probability of horizontally flipping. Default: 0.5.
+    """
+
+    def __init__(self, prob=0.5):
+        self.prob = prob
+
+    def __call__(self, data):
+        if random.random() < self.prob:
+            data['img'] = functional.horizontal_flip(data['img'])
+            for key in data.get('gt_fields', []):
+                data[key] = functional.horizontal_flip(data[key])
+
+        return data
+
+
+@manager.TRANSFORMS.add_component
+class RandomBlur:
+    """
+    Blurring an image by a Gaussian function with a certain probability.
+
+    Args:
+        prob (float, optional): A probability of blurring an image. Default: 0.1.
+    """
+
+    def __init__(self, prob=0.1):
+        self.prob = prob
+
+    def __call__(self, data):
+        if self.prob <= 0:
+            n = 0
+        elif self.prob >= 1:
+            n = 1
+        else:
+            n = int(1.0 / self.prob)
+        if n > 0:
+            if np.random.randint(0, n) == 0:
+                radius = np.random.randint(3, 10)
+                if radius % 2 != 1:
+                    radius = radius + 1
+                if radius > 9:
+                    radius = 9
+                data['img'] = cv2.GaussianBlur(data['img'], (radius, radius), 0,
+                                               0)
+                for key in data.get('gt_fields', []):
+                    if key == 'trimap':
+                        continue
+                    data[key] = cv2.GaussianBlur(data[key], (radius, radius), 0,
+                                                 0)
+        return data
+
+
+@manager.TRANSFORMS.add_component
+class RandomDistort:
+    """
+    Distort an image with random configurations.
+
+    Args:
+        brightness_range (float, optional): A range of brightness. Default: 0.5.
+        brightness_prob (float, optional): A probability of adjusting brightness. Default: 0.5.
+        contrast_range (float, optional): A range of contrast. Default: 0.5.
+        contrast_prob (float, optional): A probability of adjusting contrast. Default: 0.5.
+        saturation_range (float, optional): A range of saturation. Default: 0.5.
+        saturation_prob (float, optional): A probability of adjusting saturation. Default: 0.5.
+        hue_range (int, optional): A range of hue. Default: 18.
+        hue_prob (float, optional): A probability of adjusting hue. Default: 0.5.
+    """
+
+    def __init__(self,
+                 brightness_range=0.5,
+                 brightness_prob=0.5,
+                 contrast_range=0.5,
+                 contrast_prob=0.5,
+                 saturation_range=0.5,
+                 saturation_prob=0.5,
+                 hue_range=18,
+                 hue_prob=0.5):
+        self.brightness_range = brightness_range
+        self.brightness_prob = brightness_prob
+        self.contrast_range = contrast_range
+        self.contrast_prob = contrast_prob
+        self.saturation_range = saturation_range
+        self.saturation_prob = saturation_prob
+        self.hue_range = hue_range
+        self.hue_prob = hue_prob
+
+    def __call__(self, data):
+        brightness_lower = 1 - self.brightness_range
+        brightness_upper = 1 + self.brightness_range
+        contrast_lower = 1 - self.contrast_range
+        contrast_upper = 1 + self.contrast_range
+        saturation_lower = 1 - self.saturation_range
+        saturation_upper = 1 + self.saturation_range
+        hue_lower = -self.hue_range
+        hue_upper = self.hue_range
+        ops = [
+            functional.brightness, functional.contrast, functional.saturation,
+            functional.hue
+        ]
+        random.shuffle(ops)
+        params_dict = {
+            'brightness': {
+                'brightness_lower': brightness_lower,
+                'brightness_upper': brightness_upper
+            },
+            'contrast': {
+                'contrast_lower': contrast_lower,
+                'contrast_upper': contrast_upper
+            },
+            'saturation': {
+                'saturation_lower': saturation_lower,
+                'saturation_upper': saturation_upper
+            },
+            'hue': {
+                'hue_lower': hue_lower,
+                'hue_upper': hue_upper
+            }
+        }
+        prob_dict = {
+            'brightness': self.brightness_prob,
+            'contrast': self.contrast_prob,
+            'saturation': self.saturation_prob,
+            'hue': self.hue_prob
+        }
+
+        im = data['img'].astype('uint8')
+        im = Image.fromarray(im)
+        for id in range(len(ops)):
+            params = params_dict[ops[id].__name__]
+            params['im'] = im
+            prob = prob_dict[ops[id].__name__]
+            if np.random.uniform(0, 1) < prob:
+                im = ops[id](**params)
+        data['img'] = np.asarray(im)
+
+        for key in data.get('gt_fields', []):
+            if key in ['alpha', 'trimap']:
+                continue
+            else:
+                im = data[key].astype('uint8')
+                im = Image.fromarray(im)
+                for id in range(len(ops)):
+                    params = params_dict[ops[id].__name__]
+                    params['im'] = im
+                    prob = prob_dict[ops[id].__name__]
+                    if np.random.uniform(0, 1) < prob:
+                        im = ops[id](**params)
+                data[key] = np.asarray(im)
+        return data
+
+
+@manager.TRANSFORMS.add_component
+class Padding:
+    """
+    Add bottom-right padding to a raw image or annotation image.
+
+    Args:
+        target_size (list|tuple): The target size after padding.
+        im_padding_value (list, optional): The padding value of raw image.
+            Default: [127.5, 127.5, 127.5].
+        label_padding_value (int, optional): The padding value of annotation image. Default: 255.
+
+    Raises:
+        TypeError: When target_size is neither list nor tuple.
+        ValueError: When the length of target_size is not 2.
+    """
+
+    def __init__(self, target_size, im_padding_value=(127.5, 127.5, 127.5)):
+        if isinstance(target_size, list) or isinstance(target_size, tuple):
+            if len(target_size) != 2:
+                raise ValueError(
+                    '`target_size` should include 2 elements, but it is {}'.
+                    format(target_size))
+        else:
+            raise TypeError(
+                "Type of target_size is invalid. It should be list or tuple, now is {}"
+                .format(type(target_size)))
+
+        self.target_size = target_size
+        self.im_padding_value = im_padding_value
+
+    def __call__(self, data):
+        im_height, im_width = data['img'].shape[0], data['img'].shape[1]
+        target_height = self.target_size[1]
+        target_width = self.target_size[0]
+        pad_height = max(0, target_height - im_height)
+        pad_width = max(0, target_width - im_width)
+        data['trans_info'].append(('padding', data['img'].shape[0:2]))
+        if (pad_height == 0) and (pad_width == 0):
+            return data
+        else:
+            data['img'] = cv2.copyMakeBorder(
+                data['img'],
+                0,
+                pad_height,
+                0,
+                pad_width,
+                cv2.BORDER_CONSTANT,
+                value=self.im_padding_value)
+            for key in data.get('gt_fields', []):
+                if key in ['trimap', 'alpha']:
+                    value = 0
+                else:
+                    value = self.im_padding_value
+                data[key] = cv2.copyMakeBorder(
+                    data[key],
+                    0,
+                    pad_height,
+                    0,
+                    pad_width,
+                    cv2.BORDER_CONSTANT,
+                    value=value)
+        return data
+
+
+@manager.TRANSFORMS.add_component
+class RandomSharpen:
+    def __init__(self, prob=0.1):
+        if prob < 0:
+            self.prob = 0
+        elif prob > 1:
+            self.prob = 1
+        else:
+            self.prob = prob
+
+    def __call__(self, data):
+        if np.random.rand() > self.prob:
+            return data
+
+        radius = np.random.choice([0, 3, 5, 7, 9])
+        w = np.random.uniform(0.1, 0.5)
+        blur_img = cv2.GaussianBlur(data['img'], (radius, radius), 5)
+        data['img'] = cv2.addWeighted(data['img'], 1 + w, blur_img, -w, 0)
+        for key in data.get('gt_fields', []):
+            if key == 'trimap' or key == 'alpha':
+                continue
+            blur_img = cv2.GaussianBlur(data[key], (0, 0), 5)
+            data[key] = cv2.addWeighted(data[key], 1.5, blur_img, -0.5, 0)
+
+        return data
+
+
+@manager.TRANSFORMS.add_component
+class RandomNoise:
+    def __init__(self, prob=0.1):
+        if prob < 0:
+            self.prob = 0
+        elif prob > 1:
+            self.prob = 1
+        else:
+            self.prob = prob
+
+    def __call__(self, data):
+        if np.random.rand() > self.prob:
+            return data
+        mean = np.random.uniform(0, 0.04)
+        var = np.random.uniform(0, 0.001)
+        noise = np.random.normal(mean, var**0.5, data['img'].shape) * 255
+        data['img'] = data['img'] + noise
+        data['img'] = np.clip(data['img'], 0, 255)
+
+        return data
+
+
+@manager.TRANSFORMS.add_component
+class RandomReJpeg:
+    def __init__(self, prob=0.1):
+        if prob < 0:
+            self.prob = 0
+        elif prob > 1:
+            self.prob = 1
+        else:
+            self.prob = prob
+
+    def __call__(self, data):
+        if np.random.rand() > self.prob:
+            return data
+        q = np.random.randint(70, 95)
+        img = data['img'].astype('uint8')
+
+        # Ensure no conflicts between processes
+        tmp_name = str(os.getpid()) + '.jpg'
+        tmp_name = os.path.join(seg_env.TMP_HOME, tmp_name)
+        cv2.imwrite(tmp_name, img, [int(cv2.IMWRITE_JPEG_QUALITY), q])
+        data['img'] = cv2.imread(tmp_name)
+
+        return data
--- a/Matting/ppmatting/utils/__init__.py
+++ b/Matting/ppmatting/utils/__init__.py
+from .estimate_foreground_ml import estimate_foreground_ml
+from .utils import get_files, get_image_list, mkdir, load_pretrained_model
--- a/Matting/ppmatting/utils/estimate_foreground_ml.py
+++ b/Matting/ppmatting/utils/estimate_foreground_ml.py
+import numpy as np
+from numba import njit, prange
+
+# The foreground estimation refer to pymatting [https://github.com/pymatting/pymatting/blob/master/pymatting/foreground/estimate_foreground_ml.py]
+
+
+@njit("void(f4[:, :, :], f4[:, :, :])", cache=True, nogil=True, parallel=True)
+def _resize_nearest_multichannel(dst, src):
+    """
+    Internal method.
+
+    Resize image src to dst using nearest neighbors filtering.
+    Images must have multiple color channels, i.e. :code:`len(shape) == 3`.
+
+    Parameters
+    ----------
+    dst: numpy.ndarray of type np.float32
+        output image
+    src: numpy.ndarray of type np.float32
+        input image
+    """
+    h_src, w_src, depth = src.shape
+    h_dst, w_dst, depth = dst.shape
+
+    for y_dst in prange(h_dst):
+        for x_dst in range(w_dst):
+            x_src = max(0, min(w_src - 1, x_dst * w_src // w_dst))
+            y_src = max(0, min(h_src - 1, y_dst * h_src // h_dst))
+
+            for c in range(depth):
+                dst[y_dst, x_dst, c] = src[y_src, x_src, c]
+
+
+@njit("void(f4[:, :], f4[:, :])", cache=True, nogil=True, parallel=True)
+def _resize_nearest(dst, src):
+    """
+    Internal method.
+
+    Resize image src to dst using nearest neighbors filtering.
+    Images must be grayscale, i.e. :code:`len(shape) == 3`.
+
+    Parameters
+    ----------
+    dst: numpy.ndarray of type np.float32
+        output image
+    src: numpy.ndarray of type np.float32
+        input image
+    """
+    h_src, w_src = src.shape
+    h_dst, w_dst = dst.shape
+
+    for y_dst in prange(h_dst):
+        for x_dst in range(w_dst):
+            x_src = max(0, min(w_src - 1, x_dst * w_src // w_dst))
+            y_src = max(0, min(h_src - 1, y_dst * h_src // h_dst))
+
+            dst[y_dst, x_dst] = src[y_src, x_src]
+
+
+# TODO
+# There should be an option to switch @njit(parallel=True) on or off.
+# parallel=True would be faster, but might cause race conditions.
+# User should have the option to turn it on or off.
+@njit(
+    "Tuple((f4[:, :, :], f4[:, :, :]))(f4[:, :, :], f4[:, :], f4, i4, i4, i4, f4)",
+    cache=True,
+    nogil=True)
+def _estimate_fb_ml(
+        input_image,
+        input_alpha,
+        regularization,
+        n_small_iterations,
+        n_big_iterations,
+        small_size,
+        gradient_weight, ):
+    h0, w0, depth = input_image.shape
+
+    dtype = np.float32
+
+    w_prev = 1
+    h_prev = 1
+
+    F_prev = np.empty((h_prev, w_prev, depth), dtype=dtype)
+    B_prev = np.empty((h_prev, w_prev, depth), dtype=dtype)
+
+    n_levels = int(np.ceil(np.log2(max(w0, h0))))
+
+    for i_level in range(n_levels + 1):
+        w = round(w0**(i_level / n_levels))
+        h = round(h0**(i_level / n_levels))
+
+        image = np.empty((h, w, depth), dtype=dtype)
+        alpha = np.empty((h, w), dtype=dtype)
+
+        _resize_nearest_multichannel(image, input_image)
+        _resize_nearest(alpha, input_alpha)
+
+        F = np.empty((h, w, depth), dtype=dtype)
+        B = np.empty((h, w, depth), dtype=dtype)
+
+        _resize_nearest_multichannel(F, F_prev)
+        _resize_nearest_multichannel(B, B_prev)
+
+        if w <= small_size and h <= small_size:
+            n_iter = n_small_iterations
+        else:
+            n_iter = n_big_iterations
+
+        b = np.zeros((2, depth), dtype=dtype)
+
+        dx = [-1, 1, 0, 0]
+        dy = [0, 0, -1, 1]
+
+        for i_iter in range(n_iter):
+            for y in prange(h):
+                for x in range(w):
+                    a0 = alpha[y, x]
+                    a1 = 1.0 - a0
+
+                    a00 = a0 * a0
+                    a01 = a0 * a1
+                    # a10 = a01 can be omitted due to symmetry of matrix
+                    a11 = a1 * a1
+
+                    for c in range(depth):
+                        b[0, c] = a0 * image[y, x, c]
+                        b[1, c] = a1 * image[y, x, c]
+
+                    for d in range(4):
+                        x2 = max(0, min(w - 1, x + dx[d]))
+                        y2 = max(0, min(h - 1, y + dy[d]))
+
+                        gradient = abs(a0 - alpha[y2, x2])
+
+                        da = regularization + gradient_weight * gradient
+
+                        a00 += da
+                        a11 += da
+
+                        for c in range(depth):
+                            b[0, c] += da * F[y2, x2, c]
+                            b[1, c] += da * B[y2, x2, c]
+
+                    determinant = a00 * a11 - a01 * a01
+
+                    inv_det = 1.0 / determinant
+
+                    b00 = inv_det * a11
+                    b01 = inv_det * -a01
+                    b11 = inv_det * a00
+
+                    for c in range(depth):
+                        F_c = b00 * b[0, c] + b01 * b[1, c]
+                        B_c = b01 * b[0, c] + b11 * b[1, c]
+
+                        F_c = max(0.0, min(1.0, F_c))
+                        B_c = max(0.0, min(1.0, B_c))
+
+                        F[y, x, c] = F_c
+                        B[y, x, c] = B_c
+
+        F_prev = F
+        B_prev = B
+
+        w_prev = w
+        h_prev = h
+
+    return F, B
+
+
+def estimate_foreground_ml(
+        image,
+        alpha,
+        regularization=1e-5,
+        n_small_iterations=10,
+        n_big_iterations=2,
+        small_size=32,
+        return_background=False,
+        gradient_weight=1.0, ):
+    """Estimates the foreground of an image given its alpha matte.
+
+    See :cite:`germer2020multilevel` for reference.
+
+    Parameters
+    ----------
+    image: numpy.ndarray
+        Input image with shape :math:`h \\times  w \\times d`
+    alpha: numpy.ndarray
+        Input alpha matte shape :math:`h \\times  w`
+    regularization: float
+        Regularization strength :math:`\\epsilon`, defaults to :math:`10^{-5}`.
+        Higher regularization results in smoother colors.
+    n_small_iterations: int
+        Number of iterations performed on small scale, defaults to :math:`10`
+    n_big_iterations: int
+        Number of iterations performed on large scale, defaults to :math:`2`
+    small_size: int
+        Threshold that determines at which size `n_small_iterations` should be used
+    return_background: bool
+        Whether to return the estimated background in addition to the foreground
+    gradient_weight: float
+        Larger values enforce smoother foregrounds, defaults to :math:`1`
+
+    Returns
+    -------
+    F: numpy.ndarray
+        Extracted foreground
+    B: numpy.ndarray
+        Extracted background
+
+    Example
+    -------
+    >>> from pymatting import *
+    >>> image = load_image("data/lemur/lemur.png", "RGB")
+    >>> alpha = load_image("data/lemur/lemur_alpha.png", "GRAY")
+    >>> F = estimate_foreground_ml(image, alpha, return_background=False)
+    >>> F, B = estimate_foreground_ml(image, alpha, return_background=True)
+
+    See Also
+    ----
+    stack_images: This function can be used to place the foreground on a new background.
+    """
+
+    foreground, background = _estimate_fb_ml(
+        image.astype(np.float32),
+        alpha.astype(np.float32),
+        regularization,
+        n_small_iterations,
+        n_big_iterations,
+        small_size,
+        gradient_weight, )
+
+    if return_background:
+        return foreground, background
+
+    return foreground